diff --git a/adapter/README.md b/adapter/README.md new file mode 100644 index 000000000..66ebcc854 --- /dev/null +++ b/adapter/README.md @@ -0,0 +1,13 @@ +This directory stores some adapters based on the F-Stack lib library, currently includes `micro_thread` and `syscall`. + +## micro_thread + +Provides micro thread interface. Various applications with stateful applications can easily use F-Stack to get high performance without processing complex asynchronous logic. + +## syscall + +Hijack Linux kernel syscall with f-stack api, can use `LD_PRELOAD` to support existing applications, such as Nginx. + +It also can support f-stack and kernel stack at the same time. + + \ No newline at end of file diff --git a/app/micro_thread/Makefile b/adapter/micro_thread/Makefile similarity index 100% rename from app/micro_thread/Makefile rename to adapter/micro_thread/Makefile diff --git a/app/micro_thread/arch_ctx.S b/adapter/micro_thread/arch_ctx.S similarity index 100% rename from app/micro_thread/arch_ctx.S rename to adapter/micro_thread/arch_ctx.S diff --git a/app/micro_thread/echo.cpp b/adapter/micro_thread/echo.cpp similarity index 100% rename from app/micro_thread/echo.cpp rename to adapter/micro_thread/echo.cpp diff --git a/app/micro_thread/ff_hook.cpp b/adapter/micro_thread/ff_hook.cpp similarity index 100% rename from app/micro_thread/ff_hook.cpp rename to adapter/micro_thread/ff_hook.cpp diff --git a/app/micro_thread/ff_hook.h b/adapter/micro_thread/ff_hook.h similarity index 100% rename from app/micro_thread/ff_hook.h rename to adapter/micro_thread/ff_hook.h diff --git a/app/micro_thread/hash_list.h b/adapter/micro_thread/hash_list.h similarity index 100% rename from app/micro_thread/hash_list.h rename to adapter/micro_thread/hash_list.h diff --git a/app/micro_thread/heap.h b/adapter/micro_thread/heap.h similarity index 100% rename from app/micro_thread/heap.h rename to adapter/micro_thread/heap.h diff --git a/app/micro_thread/heap_timer.cpp b/adapter/micro_thread/heap_timer.cpp similarity index 100% rename from app/micro_thread/heap_timer.cpp rename to adapter/micro_thread/heap_timer.cpp diff --git a/app/micro_thread/heap_timer.h b/adapter/micro_thread/heap_timer.h similarity index 100% rename from app/micro_thread/heap_timer.h rename to adapter/micro_thread/heap_timer.h diff --git a/app/micro_thread/kqueue_proxy.cpp b/adapter/micro_thread/kqueue_proxy.cpp similarity index 100% rename from app/micro_thread/kqueue_proxy.cpp rename to adapter/micro_thread/kqueue_proxy.cpp diff --git a/app/micro_thread/kqueue_proxy.h b/adapter/micro_thread/kqueue_proxy.h similarity index 100% rename from app/micro_thread/kqueue_proxy.h rename to adapter/micro_thread/kqueue_proxy.h diff --git a/app/micro_thread/micro_thread.cpp b/adapter/micro_thread/micro_thread.cpp similarity index 100% rename from app/micro_thread/micro_thread.cpp rename to adapter/micro_thread/micro_thread.cpp diff --git a/app/micro_thread/micro_thread.h b/adapter/micro_thread/micro_thread.h similarity index 100% rename from app/micro_thread/micro_thread.h rename to adapter/micro_thread/micro_thread.h diff --git a/app/micro_thread/mt_action.cpp b/adapter/micro_thread/mt_action.cpp similarity index 100% rename from app/micro_thread/mt_action.cpp rename to adapter/micro_thread/mt_action.cpp diff --git a/app/micro_thread/mt_action.h b/adapter/micro_thread/mt_action.h similarity index 100% rename from app/micro_thread/mt_action.h rename to adapter/micro_thread/mt_action.h diff --git a/app/micro_thread/mt_api.cpp b/adapter/micro_thread/mt_api.cpp similarity index 100% rename from app/micro_thread/mt_api.cpp rename to adapter/micro_thread/mt_api.cpp diff --git a/app/micro_thread/mt_api.h b/adapter/micro_thread/mt_api.h similarity index 100% rename from app/micro_thread/mt_api.h rename to adapter/micro_thread/mt_api.h diff --git a/app/micro_thread/mt_cache.cpp b/adapter/micro_thread/mt_cache.cpp similarity index 100% rename from app/micro_thread/mt_cache.cpp rename to adapter/micro_thread/mt_cache.cpp diff --git a/app/micro_thread/mt_cache.h b/adapter/micro_thread/mt_cache.h similarity index 100% rename from app/micro_thread/mt_cache.h rename to adapter/micro_thread/mt_cache.h diff --git a/app/micro_thread/mt_concurrent.cpp b/adapter/micro_thread/mt_concurrent.cpp similarity index 100% rename from app/micro_thread/mt_concurrent.cpp rename to adapter/micro_thread/mt_concurrent.cpp diff --git a/app/micro_thread/mt_concurrent.h b/adapter/micro_thread/mt_concurrent.h similarity index 100% rename from app/micro_thread/mt_concurrent.h rename to adapter/micro_thread/mt_concurrent.h diff --git a/app/micro_thread/mt_connection.cpp b/adapter/micro_thread/mt_connection.cpp similarity index 100% rename from app/micro_thread/mt_connection.cpp rename to adapter/micro_thread/mt_connection.cpp diff --git a/app/micro_thread/mt_connection.h b/adapter/micro_thread/mt_connection.h similarity index 100% rename from app/micro_thread/mt_connection.h rename to adapter/micro_thread/mt_connection.h diff --git a/app/micro_thread/mt_incl.h b/adapter/micro_thread/mt_incl.h similarity index 100% rename from app/micro_thread/mt_incl.h rename to adapter/micro_thread/mt_incl.h diff --git a/app/micro_thread/mt_mbuf_pool.cpp b/adapter/micro_thread/mt_mbuf_pool.cpp similarity index 100% rename from app/micro_thread/mt_mbuf_pool.cpp rename to adapter/micro_thread/mt_mbuf_pool.cpp diff --git a/app/micro_thread/mt_mbuf_pool.h b/adapter/micro_thread/mt_mbuf_pool.h similarity index 100% rename from app/micro_thread/mt_mbuf_pool.h rename to adapter/micro_thread/mt_mbuf_pool.h diff --git a/app/micro_thread/mt_msg.h b/adapter/micro_thread/mt_msg.h similarity index 100% rename from app/micro_thread/mt_msg.h rename to adapter/micro_thread/mt_msg.h diff --git a/app/micro_thread/mt_net.cpp b/adapter/micro_thread/mt_net.cpp similarity index 100% rename from app/micro_thread/mt_net.cpp rename to adapter/micro_thread/mt_net.cpp diff --git a/app/micro_thread/mt_net.h b/adapter/micro_thread/mt_net.h similarity index 100% rename from app/micro_thread/mt_net.h rename to adapter/micro_thread/mt_net.h diff --git a/app/micro_thread/mt_net_api.h b/adapter/micro_thread/mt_net_api.h similarity index 100% rename from app/micro_thread/mt_net_api.h rename to adapter/micro_thread/mt_net_api.h diff --git a/app/micro_thread/mt_notify.cpp b/adapter/micro_thread/mt_notify.cpp similarity index 100% rename from app/micro_thread/mt_notify.cpp rename to adapter/micro_thread/mt_notify.cpp diff --git a/app/micro_thread/mt_notify.h b/adapter/micro_thread/mt_notify.h similarity index 100% rename from app/micro_thread/mt_notify.h rename to adapter/micro_thread/mt_notify.h diff --git a/app/micro_thread/mt_session.cpp b/adapter/micro_thread/mt_session.cpp similarity index 100% rename from app/micro_thread/mt_session.cpp rename to adapter/micro_thread/mt_session.cpp diff --git a/app/micro_thread/mt_session.h b/adapter/micro_thread/mt_session.h similarity index 100% rename from app/micro_thread/mt_session.h rename to adapter/micro_thread/mt_session.h diff --git a/app/micro_thread/mt_sys_hook.cpp b/adapter/micro_thread/mt_sys_hook.cpp similarity index 100% rename from app/micro_thread/mt_sys_hook.cpp rename to adapter/micro_thread/mt_sys_hook.cpp diff --git a/app/micro_thread/mt_sys_hook.h b/adapter/micro_thread/mt_sys_hook.h similarity index 100% rename from app/micro_thread/mt_sys_hook.h rename to adapter/micro_thread/mt_sys_hook.h diff --git a/app/micro_thread/mt_version.h b/adapter/micro_thread/mt_version.h similarity index 100% rename from app/micro_thread/mt_version.h rename to adapter/micro_thread/mt_version.h diff --git a/app/micro_thread/valgrind/valgrind.h b/adapter/micro_thread/valgrind/valgrind.h similarity index 100% rename from app/micro_thread/valgrind/valgrind.h rename to adapter/micro_thread/valgrind/valgrind.h diff --git a/adapter/syscall/LICENSE b/adapter/syscall/LICENSE new file mode 100644 index 000000000..1f72a9380 --- /dev/null +++ b/adapter/syscall/LICENSE @@ -0,0 +1,25 @@ +This is BSD 2-Clause licensed. +Copyright (C) 2023 THL A29 Limited, a Tencent company. +All rights reserved. + +Terms of the BSD 2-Clause License: +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/adapter/syscall/Makefile b/adapter/syscall/Makefile new file mode 100644 index 000000000..551462ca9 --- /dev/null +++ b/adapter/syscall/Makefile @@ -0,0 +1,107 @@ +TOPDIR=.. + +ifeq ($(FF_PATH),) + FF_PATH=${TOPDIR} +endif + +ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) + $(error "No installation of DPDK found, maybe you should export environment variable `PKG_CONFIG_PATH`") +endif + +#DEBUG=-O0 -gdwarf-2 -g3 + +# Per thread separate initialization dpdk lib and attach sc when needed, +# such as listen same port in different threads, and socket can use in own thread. +# Default disable. +# +# If disable it, one socket can use in all threads. +#FF_THREAD_SOCKET=1 + +# If enable FF_KERNEL_EVENT, epoll_create/epoll_clt/epoll_wait always call f-stack and system API at the same time. +# Use for some scenarios similar to Nginx. +#FF_KERNEL_EVENT=1 + +PKGCONF ?= pkg-config + +ifndef DEBUG + CFLAGS+= -g -O2 -DNDEBUG +else + CFLAGS+= ${DEBUG} +endif + +ifdef FF_THREAD_SOCKET + CFLAGS+= -DFF_THREAD_SOCKET +endif + +ifdef FF_KERNEL_EVENT + CFLAGS+= -DFF_KERNEL_EVENT +endif + +ifdef FF_MULTI_SC + CFLAGS+= -DFF_MULTI_SC +endif + +CFLAGS += -fPIC -Wall -Werror $(shell $(PKGCONF) --cflags libdpdk) + +INCLUDES= -I. -I${FF_PATH}/lib + +LIBS+= -Wl,--no-whole-archive -lrt -lm -ldl -lcrypto -pthread -lnuma +FF_LIBS= -L${FF_PATH}/lib -Wl,--whole-archive,-lfstack,--no-whole-archive + +DPDK_LIBS+= $(shell $(PKGCONF) --static --libs libdpdk) +DPDK_LIBS+= ${LIBS} + +#DPDK_CFLAGS= -Wall -Werror -include ${FF_DPDK}/include/rte_config.h +#DPDK_CFLAGS+= -march=native -DRTE_MACHINE_CPUFLAG_SSE -DRTE_MACHINE_CPUFLAG_SSE2 -DRTE_MACHINE_CPUFLAG_SSE3 +#DPDK_CFLAGS+= -DRTE_MACHINE_CPUFLAG_SSSE3 -DRTE_MACHINE_CPUFLAG_SSE4_1 -DRTE_MACHINE_CPUFLAG_SSE4_2 +#DPDK_CFLAGS+= -DRTE_COMPILE_TIME_CPUFLAGS=RTE_CPUFLAG_SSE,RTE_CPUFLAG_SSE2,RTE_CPUFLAG_SSE3,RTE_CPUFLAG_SSSE3,RTE_CPUFLAG_SSE4_1,RTE_CPUFLAG_SSE4_2 +#DPDK_CFLAGS+= -I${FF_DPDK}/include + +CFLAGS+= ${INCLUDES} ${DPDK_CFLAGS} + +FSTACK_LIBS= ${FF_LIBS} ${DPDK_LIBS} +FF_SYSCALL_LIBS= + +FSTACK_SRCS= \ + fstack.c \ + ff_so_zone.c \ + ff_socket_ops.c + +FF_SYSCALL_SRCS= \ + ff_so_zone.c \ + ff_hook_syscall.c \ + ff_linux_syscall.c + +FSTACK_OBJS= $(patsubst %.c,%.o,${FSTACK_SRCS}) + +FF_SYSCALL_OBJS= $(patsubst %.c,%.o,${FF_SYSCALL_SRCS}) + +#TARGET= fstack libff_syscall.a +TARGET= fstack libff_syscall.so example + +all: ${TARGET} + +fstack: ${FSTACK_OBJS} + cc -o $@ $^ ${FSTACK_LIBS} + +#libff_syscall.a: ${FF_SYSCALL_OBJS} +libff_syscall.so: ${FF_SYSCALL_OBJS} + #ar -rcs $@ $^ ${FF_SYSCALL_LIBS} + ${CC} -fPIC -shared -o $@ $^ ${FF_SYSCALL_LIBS} ${DPDK_LIBS} + +example: + cc ${CFLAGS} -I ${FF_PATH}/adapter/syscall -L ${FF_PATH}/adapter/syscall -lff_syscall -o helloworld_stack main_stack.c ${LIBS} + cc ${CFLAGS} -I ${FF_PATH}/adapter/syscall -L ${FF_PATH}/adapter/syscall -lff_syscall -o helloworld_stack_thread_socket main_stack_thread_socket.c ${LIBS} + cc ${CFLAGS} -I ${FF_PATH}/adapter/syscall -o helloworld_stack_epoll main_stack_epoll.c ${LIBS} + cc ${CFLAGS} -I ${FF_PATH}/adapter/syscall -o helloworld_stack_epoll_thread_socket main_stack_epoll_thread_socket.c ${LIBS} + cc ${CFLAGS} -I ${FF_PATH}/adapter/syscall -o helloworld_stack_epoll_kernel main_stack_epoll_kernel.c ${LIBS} + +${FSTACK_OBJS}: %.o: %.c + ${CC} -c $(CFLAGS) ${PROF} $< + +${FF_SYSCALL_OBJS}: %.o: %.c + ${CC} -c $(CFLAGS) ${PROF} $< + +.PHONY: clean +clean: + rm -f *.o ${TARGET} diff --git a/adapter/syscall/README.md b/adapter/syscall/README.md new file mode 100644 index 000000000..8f19fd357 --- /dev/null +++ b/adapter/syscall/README.md @@ -0,0 +1,4 @@ +# F-Stack LD_PRELOAD Beta Introduction + +[To be translated](https://mp.weixin.qq.com/s?__biz=MzI5ODY3Nzc5OA==&tempkey=MTIxNl9Kd0pabEJ4em95c3J6OFJJdUYzZ2F2aGthLURKQmplYnJIVmE0cTFsSzNqdkhFM3JjV1F1SjlrMWFxeVo0Nks0b0lwOG5UbEZueVRTb0RFcnQ5LW42UE1aNVFUYW9mMHVaRjFiWXBKcG9VY0FYTTJIdUgxcU5UdTltb0hId0EtODBHR3lZM1FuX1FxdHNMQkJsWExTb3IyQ0MycDkxWXlBTllNOVlBfn4%3D&chksm=eca3623edbd4eb28e8d7dc68001f46381d39f4ffca91037a4ad8f8ed5b59bad2354ca273601b&token=39899456&lang=zh_CN#rd). + diff --git a/adapter/syscall/ff_adapter.h b/adapter/syscall/ff_adapter.h new file mode 100644 index 000000000..a985993a5 --- /dev/null +++ b/adapter/syscall/ff_adapter.h @@ -0,0 +1,23 @@ +#ifndef _FF_ADAPTER_H +#define _FF_ADAPTER_H + +/* socket.h */ +//#define SOCK_CLOEXEC 0x10000000 +//#define SOCK_NONBLOCK 0x20000000 +#define SOCK_FSTACK 0x01000000 +#define SOCK_KERNEL 0x02000000 + +int ff_adapter_init(); +//int __attribute__((constructor)) ff_adapter_init(int argc, char * const argv[]); + +void alarm_event_sem(); + +/*- + * Verify whether the socket is supported by fstack or not. + */ +int fstack_territory(int domain, int type, int protocol); + +/* Tell whether a 'sockfd' belongs to fstack. */ +int is_fstack_fd(int fd); + +#endif diff --git a/adapter/syscall/ff_declare_syscalls.h b/adapter/syscall/ff_declare_syscalls.h new file mode 100644 index 000000000..28d3b9808 --- /dev/null +++ b/adapter/syscall/ff_declare_syscalls.h @@ -0,0 +1,31 @@ +FF_SYSCALL_DECL(int, socket, (int, int, int)); +FF_SYSCALL_DECL(int, bind, (int, const struct sockaddr *, socklen_t)); +FF_SYSCALL_DECL(int, listen, (int, int)); +FF_SYSCALL_DECL(int, shutdown, (int, int)); +FF_SYSCALL_DECL(int, getsockname, (int, struct sockaddr *, socklen_t *)); +FF_SYSCALL_DECL(int, getpeername, (int, struct sockaddr *, socklen_t *)); +FF_SYSCALL_DECL(int, getsockopt, (int, int, int, void *, socklen_t *)); +FF_SYSCALL_DECL(int, setsockopt, (int, int, int, const void *, socklen_t)); +FF_SYSCALL_DECL(int, accept, (int, struct sockaddr *, socklen_t *)); +FF_SYSCALL_DECL(int, accept4,(int, struct sockaddr *, socklen_t *, int flags)); +FF_SYSCALL_DECL(int, connect, (int, const struct sockaddr *, socklen_t)); +FF_SYSCALL_DECL(ssize_t, recv, (int, void *, size_t, int)); +FF_SYSCALL_DECL(ssize_t, send, (int, const void *, size_t, int)); +FF_SYSCALL_DECL(ssize_t, read, (int, void *, size_t)); +FF_SYSCALL_DECL(ssize_t, write, (int, const void *, size_t)); +FF_SYSCALL_DECL(ssize_t, writev, (int, const struct iovec *, int)); +FF_SYSCALL_DECL(ssize_t, readv, (int, const struct iovec *, int)); +FF_SYSCALL_DECL(ssize_t, sendto, (int, const void *, size_t, int, + const struct sockaddr *, socklen_t)); +FF_SYSCALL_DECL(ssize_t, recvfrom, (int, void *, size_t, int, + struct sockaddr *, socklen_t *)); +FF_SYSCALL_DECL(ssize_t, sendmsg, (int, const struct msghdr *, int flags)); +FF_SYSCALL_DECL(ssize_t, recvmsg, (int, struct msghdr *, int flags)); +FF_SYSCALL_DECL(int, close, (int)); +FF_SYSCALL_DECL(int, ioctl, (int, unsigned long, unsigned long)); +FF_SYSCALL_DECL(int, fcntl, (int, int, unsigned long)); +FF_SYSCALL_DECL(int, epoll_create, (int)); +FF_SYSCALL_DECL(int, epoll_ctl, (int, int, int, struct epoll_event *)); +FF_SYSCALL_DECL(int, epoll_wait, (int, struct epoll_event *, int, int)); +FF_SYSCALL_DECL(pid_t, fork, (void)); +#undef FF_SYSCALL_DECL diff --git a/adapter/syscall/ff_hook_syscall.c b/adapter/syscall/ff_hook_syscall.c new file mode 100644 index 000000000..811a7a74c --- /dev/null +++ b/adapter/syscall/ff_hook_syscall.c @@ -0,0 +1,2434 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ff_config.h" +#include "ff_socket_ops.h" +#include "ff_sysproto.h" +#include "ff_event.h" +#include "ff_hook_syscall.h" +#include "ff_linux_syscall.h" +#include "ff_adapter.h" + +/* Just for so, no used */ +struct ff_config ff_global_cfg; + +#define NS_PER_SECOND 1000000000 + +#ifndef likely +#define likely(x) __builtin_expect((x),1) +#endif + +#ifndef unlikely +#define unlikely(x) __builtin_expect((x),0) +#endif + +#define strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); + +#undef FF_SYSCALL_DECL +#define FF_SYSCALL_DECL(ret, fn, args) strong_alias(ff_hook_##fn, fn) +#include + +#define share_mem_alloc(size) rte_malloc(NULL, (size), 0) +#define share_mem_free(addr) rte_free((addr)) + +#define CHECK_FD_OWNERSHIP(name, args) \ +{ \ + if (!is_fstack_fd(fd)) { \ + return ff_linux_##name args; \ + } \ + fd = restore_fstack_fd(fd); \ +} + +#define DEFINE_REQ_ARGS(name) \ + struct ff_##name##_args *args; \ + int ret = -1; \ + size_t size = sizeof(struct ff_##name##_args); \ + args = share_mem_alloc(size); \ + if (args == NULL) { \ + errno = ENOMEM; \ + return ret; \ + } + +/* Always use __thread, but no __FF_THREAD */ +static __thread struct ff_shutdown_args *shutdown_args = NULL; +static __thread struct ff_getsockname_args *getsockname_args = NULL; +static __thread struct ff_getpeername_args *getpeername_args = NULL; +static __thread struct ff_setsockopt_args *setsockopt_args = NULL; +static __thread struct ff_accept_args *accept_args = NULL; +static __thread struct ff_connect_args *connect_args = NULL; +static __thread struct ff_recvfrom_args *recvfrom_args = NULL; +static __thread struct ff_recvmsg_args *recvmsg_args = NULL; +static __thread struct ff_read_args *read_args = NULL; +static __thread struct ff_readv_args *readv_args = NULL; +static __thread struct ff_sendto_args *sendto_args = NULL; +static __thread struct ff_sendmsg_args *sendmsg_args = NULL; +static __thread struct ff_write_args *write_args = NULL; +static __thread struct ff_writev_args *writev_args = NULL; +static __thread struct ff_close_args *close_args = NULL; +static __thread struct ff_ioctl_args *ioctl_args = NULL; +static __thread struct ff_fcntl_args *fcntl_args = NULL; +static __thread struct ff_epoll_ctl_args *epoll_ctl_args = NULL; +static __thread struct ff_epoll_wait_args *epoll_wait_args = NULL; +static __thread struct ff_kevent_args *kevent_args = NULL; + +#define IOV_MAX 16 +#define IOV_LEN_MAX 2048 + +static __thread struct iovec *sh_iov_static = NULL; +static __thread void *sh_iov_static_base[IOV_MAX]; +static __thread int sh_iov_static_fill_idx_local = 0; +static __thread int sh_iov_static_fill_idx_share = 0; + +#define DEFINE_REQ_ARGS_STATIC(name) \ + int ret = -1; \ + struct ff_##name##_args *args = NULL; \ + if (name##_args == NULL) { \ + size_t size = sizeof(struct ff_##name##_args); \ + name##_args = share_mem_alloc(size); \ + if (name##_args == NULL) { \ + errno = ENOMEM; \ + return ret; \ + } \ + } \ + args = name##_args; + +/* Dirty read first, and then try to lock sc and real read. */ +#define ACQUIRE_ZONE_LOCK(exp) do { \ + while (1) { \ + while (sc->status != exp) { \ + rte_pause(); \ + } \ + rte_spinlock_lock(&sc->lock); \ + if (sc->status == exp) { \ + break; \ + } \ + rte_spinlock_unlock(&sc->lock); \ + } \ +} while (0) + +#define RELEASE_ZONE_LOCK(s) do { \ + sc->status = s; \ + rte_spinlock_unlock(&sc->lock); \ +} while (0) + +/* NOTE: deadlock prone while fstack adapter run error */ +#define SYSCALL(op, arg) do { \ + ACQUIRE_ZONE_LOCK(FF_SC_IDLE); \ + sc->ops = (op); \ + sc->args = (arg); \ + RELEASE_ZONE_LOCK(FF_SC_REQ); \ + ACQUIRE_ZONE_LOCK(FF_SC_REP); \ + ret = sc->result; \ + if (ret < 0) { \ + errno = sc->error; \ + } \ + RELEASE_ZONE_LOCK(FF_SC_IDLE); \ +} while (0) + +#define RETURN_NOFREE() do { \ + DEBUG_LOG("RETURN_NOFREE ret:%d, errno:%d\n", ret, errno); \ + return ret; \ +} while (0) + +#define RETURN_ERROR_NOFREE(err) do { \ + errno = err; \ + DEBUG_LOG("RETURN_ERROR_NOFREE ret:%d, errno:%d\n", ret, errno); \ + return ret; \ +} while (0) + +#define RETURN() do { \ + share_mem_free(args); \ + DEBUG_LOG("RETURN ret:%d, errno:%d\n", ret, errno); \ + return ret; \ +} while (0) + +#define RETURN_ERROR(err) do { \ + errno = err; \ + share_mem_free(args); \ + DEBUG_LOG("RETURN_ERROR ret:%d, errno:%d\n", ret, errno); \ + return ret; \ +} while (0) + +static __FF_THREAD int inited = 0; +static __FF_THREAD struct ff_so_context *sc; + +/* + * For parent process socket/bind/listen multi sockets + * and use them in different child process, + * like Nginx with reuseport. + */ +#ifdef FF_MULTI_SC +typedef struct ff_multi_sc { + int worker_id; + int fd; + struct ff_so_context *sc; +} ff_multi_sc_type; + +static ff_multi_sc_type scs[SOCKET_OPS_CONTEXT_MAX_NUM]; + +/* + * For child worker process, + * All workers must be forked by the same process, scilicet + * support master fork child1, [child1 fork child2], chilid2 fork worker1/worker2/worker3... + * But not support master fork worker1, worker fork worker2, worker2 fork worker3... + */ +#define CURRENT_WORKER_ID_DEFAULT 0 +static int current_worker_id = CURRENT_WORKER_ID_DEFAULT; +#endif + +static pthread_key_t key; + +#ifdef FF_KERNEL_EVENT +/* kern.maxfiles: 33554432 */ +#define FF_MAX_FREEBSD_FILES 65536 +int fstack_kernel_fd_map[FF_MAX_FREEBSD_FILES]; +#endif + +/* process-level initialization flag */ +static int proc_inited = 0; + +/* Use from lcore 2 by default, can set by environment variable FF_INITIAL_LCORE_ID */ +#define INITIAL_LCORE_ID_DEFAULT 0x4 /* lcore 2 */ +#define INITIAL_LCORE_ID_MAX 0x4000000000000 /* lcore 50 */ +#define FF_INITIAL_LCORE_ID_STR "FF_INITIAL_LCORE_ID" +static uint64_t initial_lcore_id = INITIAL_LCORE_ID_DEFAULT; + +#define WORKER_ID_DEFAULT 0 +#define FF_PROC_ID_STR "FF_PROC_ID" +static int worker_id = WORKER_ID_DEFAULT; +rte_spinlock_t worker_id_lock; + +/* The num of F-Stack process instance, default 1 */ +#define NB_FSTACK_INSTANCE_DEFAULT 1 +#define FF_NB_FSTACK_INSTANCE_STR "FF_NB_FSTACK_INSTANCE" +static int nb_procs = NB_FSTACK_INSTANCE_DEFAULT; + +#define FF_KERNEL_MAX_FD_DEFAULT 1024 +static int ff_kernel_max_fd = FF_KERNEL_MAX_FD_DEFAULT; + +/* not support thread socket now */ +static int need_alarm_sem = 0; + +static inline int convert_fstack_fd(int sockfd) { + return sockfd + ff_kernel_max_fd; +} + +/* Restore socket fd. */ +static inline int restore_fstack_fd(int sockfd) { + if(sockfd < ff_kernel_max_fd) { + return sockfd; + } + + return sockfd - ff_kernel_max_fd; +} + +int is_fstack_fd(int sockfd) { + if (unlikely(inited == 0/* && ff_adapter_init() < 0*/)) { + return 0; + } + + /* FIXED ME: ff_linux_socket not limit fd < ff_kernel_max_fd, may be Misjudgment */ + return sockfd >= ff_kernel_max_fd; +} + +int +fstack_territory(int domain, int type, int protocol) +{ + /* Remove creation flags */ + type &= ~SOCK_CLOEXEC; + type &= ~SOCK_NONBLOCK; + type &= ~SOCK_FSTACK; + type &= ~SOCK_KERNEL; + + if ((AF_INET != domain && AF_INET6 != domain) || (SOCK_STREAM != type && + SOCK_DGRAM != type)) { + return 0; + } + + return 1; +} + +/* + * APP need set type |= SOCK_FSTACK + */ +int +ff_hook_socket(int domain, int type, int protocol) +{ + ERR_LOG("ff_hook_socket, domain:%d, type:%d, protocol:%d\n", domain, type, protocol); + if (unlikely(fstack_territory(domain, type, protocol) == 0)) { + return ff_linux_socket(domain, type, protocol); + } + + if (unlikely(type & SOCK_KERNEL) && !(type & SOCK_FSTACK)) { + type &= ~SOCK_KERNEL; + return ff_linux_socket(domain, type, protocol); + } + + if (unlikely(inited == 0)) { + if (ff_adapter_init() < 0) { + return ff_linux_socket(domain, type, protocol); + } + } +#ifdef FF_MULTI_SC + else { + if (ff_adapter_init() < 0) { + ERR_LOG("FF_MUTLI_SC ff_adapter_init failed\n"); + return -1; + } + } +#endif + + type &= ~SOCK_FSTACK; + + DEFINE_REQ_ARGS(socket); + + args->domain = domain; + args->type = type; + args->protocol = protocol; + + SYSCALL(FF_SO_SOCKET, args); + +#ifdef FF_MULTI_SC + scs[worker_id - 1].fd = ret; +#endif + + if (ret >= 0) { + ret = convert_fstack_fd(ret); + } + + ERR_LOG("ff_hook_socket return fd:%d\n", ret); + + RETURN(); +} + +int +ff_hook_bind(int fd, const struct sockaddr *addr, + socklen_t addrlen) +{ + ERR_LOG("ff_hook_bind, fd:%d, addr:%p, addrlen:%d\n", fd, addr, addrlen); + + if (addr == NULL) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(bind, (fd, addr, addrlen)); + + DEFINE_REQ_ARGS(bind); + struct sockaddr *sh_addr = NULL; + + sh_addr = share_mem_alloc(addrlen); + if (sh_addr == NULL) { + RETURN_ERROR(ENOMEM); + } + rte_memcpy(sh_addr, addr, addrlen); + + args->fd = fd; + args->addr = sh_addr; + args->addrlen = addrlen; + + SYSCALL(FF_SO_BIND, args); + + share_mem_free(sh_addr); + RETURN(); +} + +int +ff_hook_listen(int fd, int backlog) +{ + ERR_LOG("ff_hook_listen, fd:%d, backlog:%d\n", fd, backlog); + + CHECK_FD_OWNERSHIP(listen, (fd, backlog)); + + DEFINE_REQ_ARGS(listen); + + args->fd = fd; + args->backlog = backlog; + + SYSCALL(FF_SO_LISTEN, args); + + RETURN(); +} + +int +ff_hook_shutdown(int fd, int how) +{ + CHECK_FD_OWNERSHIP(shutdown, (fd, how)); + + DEFINE_REQ_ARGS_STATIC(shutdown); + + args->fd = fd; + args->how = how; + + SYSCALL(FF_SO_SHUTDOWN, args); + + RETURN_NOFREE(); +} + +int +ff_hook_getsockname(int fd, struct sockaddr *name, + socklen_t *namelen) +{ + if (name == NULL || namelen == NULL) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(getsockname, (fd, name, namelen)); + + DEFINE_REQ_ARGS_STATIC(getsockname); + static __thread struct sockaddr *sh_name = NULL; + static __thread socklen_t sh_name_len = 0; + static __thread socklen_t *sh_namelen = NULL; + + if (sh_name == NULL || sh_name_len < *namelen) { + if (sh_name) { + share_mem_free(sh_name); + } + + sh_name_len = *namelen; + sh_name = share_mem_alloc(sh_name_len); + if (sh_name == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + + if (sh_namelen == NULL) { + sh_namelen = share_mem_alloc(sizeof(socklen_t)); + if (sh_namelen == NULL) { + //share_mem_free(sh_name); + RETURN_ERROR_NOFREE(ENOMEM); + } + } + *sh_namelen = *namelen; + + args->fd = fd; + args->name = sh_name; + args->namelen = sh_namelen; + + SYSCALL(FF_SO_GETSOCKNAME, args); + + if (ret == 0) { + socklen_t cplen = *namelen ? *sh_namelen > *namelen + : *sh_namelen; + rte_memcpy(name, sh_name, cplen); + *namelen = *sh_namelen; + } + + //share_mem_free(sh_name); + //share_mem_free(sh_namelen); + + RETURN_NOFREE(); +} + +int +ff_hook_getpeername(int fd, struct sockaddr *name, + socklen_t *namelen) +{ + if (name == NULL || namelen == NULL) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(getpeername, (fd, name, namelen)); + + DEFINE_REQ_ARGS_STATIC(getpeername); + static __thread struct sockaddr *sh_name = NULL; + static __thread socklen_t sh_name_len = 0; + static __thread socklen_t *sh_namelen = NULL; + + if (sh_name == NULL || sh_name_len < *namelen) { + if (sh_name) { + share_mem_free(sh_name); + } + + sh_name_len = *namelen; + sh_name = share_mem_alloc(sh_name_len); + if (sh_name == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + + if (sh_namelen == NULL) { + sh_namelen = share_mem_alloc(sizeof(socklen_t)); + if (sh_namelen == NULL) { + //share_mem_free(sh_name); + RETURN_ERROR_NOFREE(ENOMEM); + } + } + *sh_namelen = *namelen; + + args->fd = fd; + args->name = sh_name; + args->namelen = sh_namelen; + + SYSCALL(FF_SO_GETPEERNAME, args); + + if (ret == 0) { + socklen_t cplen = *namelen ? *sh_namelen > *namelen + : *sh_namelen; + rte_memcpy(name, sh_name, cplen); + *namelen = *sh_namelen; + } + + //share_mem_free(sh_name); + //share_mem_free(sh_namelen); + + RETURN_NOFREE(); +} + +int +ff_hook_getsockopt(int fd, int level, int optname, + void *optval, socklen_t *optlen) +{ + if (optlen == NULL) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(getsockopt, (fd, level, optname, + optval, optlen)); + + DEFINE_REQ_ARGS(getsockopt); + void *sh_optval = NULL; + socklen_t *sh_optlen = NULL; + + if (optval != NULL) { + sh_optval = share_mem_alloc(*optlen); + if (sh_optval == NULL) { + RETURN_ERROR(ENOMEM); + } + } + + sh_optlen = share_mem_alloc(sizeof(socklen_t)); + if (sh_optlen == NULL) { + if (sh_optval) { + share_mem_free(sh_optval); + } + + RETURN_ERROR(ENOMEM); + } + *sh_optlen = *optlen; + + args->fd = fd; + args->level = level; + args->name = optname; + args->optval = sh_optval; + args->optlen = sh_optlen; + + SYSCALL(FF_SO_GETSOCKOPT, args); + + if (ret == 0) { + if (optval) { + rte_memcpy(optval, sh_optval, *sh_optlen); + } + *optlen = *sh_optlen; + } + + if (sh_optval) { + share_mem_free(sh_optval); + } + share_mem_free(sh_optlen); + RETURN(); +} + +int +ff_hook_setsockopt(int fd, int level, int optname, + const void *optval, socklen_t optlen) +{ + if (optval == NULL && optlen != 0) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(setsockopt, (fd, level, optname, + optval, optlen)); + + DEFINE_REQ_ARGS_STATIC(setsockopt); + static __thread void *sh_optval = NULL; + static __thread socklen_t sh_optval_len = 0; + + if (optval != NULL) { + if (sh_optval == NULL || sh_optval_len < optlen) { + if (sh_optval) { + share_mem_free(sh_optval); + } + + sh_optval_len = optlen; + sh_optval = share_mem_alloc(sh_optval_len); + if (sh_optval == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + } + + args->fd = fd; + args->level = level; + args->name = optname; + args->optval = sh_optval; + args->optlen = optlen; + + SYSCALL(FF_SO_SETSOCKOPT, args); + + /*if (sh_optval) { + share_mem_free(sh_optval); + }*/ + + RETURN_NOFREE(); +} + +int +ff_hook_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) +{ + DEBUG_LOG("ff_hook_accept, fd:%d, addr:%p, len:%p\n", fd, addr, addrlen); + + if ((addr == NULL && addrlen != NULL) || + (addr != NULL && addrlen == NULL)) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(accept, (fd, addr, addrlen)); + + DEFINE_REQ_ARGS_STATIC(accept); + static __thread struct sockaddr *sh_addr = NULL; + static __thread socklen_t sh_addr_len = 0; + static __thread socklen_t *sh_addrlen = NULL; + + if (addr != NULL) { + if (sh_addr == NULL || sh_addr_len < *addrlen) { + if(sh_addr) { + share_mem_free(sh_addr); + } + + sh_addr_len = *addrlen; + sh_addr = share_mem_alloc(sh_addr_len); + if (sh_addr == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + + if (sh_addrlen == NULL) { + sh_addrlen = share_mem_alloc(sizeof(socklen_t)); + if (sh_addrlen == NULL) { + //share_mem_free(sh_addr); // Don't free + RETURN_ERROR_NOFREE(ENOMEM); + } + } + *sh_addrlen = *addrlen; + + args->addr = sh_addr; + args->addrlen = sh_addrlen; + }else { + args->addr = NULL; + args->addrlen = NULL; + } + + args->fd = fd; + + SYSCALL(FF_SO_ACCEPT, args); + + if (ret > 0) { + ret = convert_fstack_fd(ret); + } + + if (addr) { + if (ret > 0) { + socklen_t cplen = *sh_addrlen > *addrlen ? + *addrlen : *sh_addrlen; + rte_memcpy(addr, sh_addr, cplen); + *addrlen = *sh_addrlen; + } + //share_mem_free(sh_addr); // Don't free + //share_mem_free(sh_addrlen); + } + + RETURN_NOFREE(); +} + +int +ff_hook_accept4(int fd, struct sockaddr *addr, + socklen_t *addrlen, int flags) +{ + DEBUG_LOG("ff_hook_accept4, fd:%d, addr:%p, addrlen:%p, flags:%d\n", fd, addr, addrlen, flags); + + CHECK_FD_OWNERSHIP(accept4, (fd, addr, addrlen, flags)); + + errno = ENOSYS; + return -1; +} + +int +ff_hook_connect(int fd, const struct sockaddr *addr, + socklen_t addrlen) +{ + DEBUG_LOG("ff_hook_connect, fd:%d, addr:%p, addrlen:%u\n", fd, addr, addrlen); + + if (addr == NULL) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(connect, (fd, addr, addrlen)); + + DEFINE_REQ_ARGS_STATIC(connect); + static __thread struct sockaddr *sh_addr = NULL; + static __thread socklen_t sh_addr_len = 0; + + if (sh_addr == NULL || sh_addr_len < addrlen) { + if(sh_addr) { + share_mem_free(sh_addr); + } + + sh_addr_len = addrlen; + sh_addr = share_mem_alloc(sh_addr_len); + if (sh_addr == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + rte_memcpy(sh_addr, addr, addrlen); + + args->fd = fd; + args->addr = sh_addr; + args->addrlen = addrlen; + + SYSCALL(FF_SO_CONNECT, args); + + //share_mem_free(sh_addr); + + RETURN_NOFREE(); +} + +ssize_t +ff_hook_recv(int fd, void *buf, size_t len, int flags) +{ + DEBUG_LOG("ff_hook_recv, fd:%d, buf:%p, len:%lu, flags:%d\n", + fd, buf, len, flags); + return ff_hook_recvfrom(fd, buf, len, flags, NULL, NULL); +} + +ssize_t +ff_hook_recvfrom(int fd, void *buf, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen) +{ + DEBUG_LOG("ff_hook_recvfrom, fd:%d, buf:%p, len:%lu, flags:%d, from:%p, fromlen:%p\n", + fd, buf, len, flags, from, fromlen); + + if (buf == NULL || len == 0) { + errno = EINVAL; + return -1; + } + + if ((from == NULL && fromlen != NULL) || + (from != NULL && fromlen == NULL)) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(recvfrom, (fd, buf, len, flags, from, fromlen)); + + DEFINE_REQ_ARGS_STATIC(recvfrom); + static __thread void *sh_buf = NULL; + static __thread size_t sh_buf_len = 0; + static __thread struct sockaddr *sh_from = NULL; + static __thread socklen_t sh_from_len = 0; + static __thread socklen_t *sh_fromlen = NULL; + + if (from != NULL) { + if (sh_from == NULL || sh_from_len < *fromlen) { + if (sh_from) { + share_mem_free(sh_from); + } + + sh_from_len = *fromlen; + sh_from = share_mem_alloc(sh_from_len); + if (sh_from == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + + if (sh_fromlen == NULL) { + sh_fromlen = share_mem_alloc(sizeof(socklen_t)); + if (sh_fromlen == NULL) { + //share_mem_free(sh_from); + RETURN_ERROR_NOFREE(ENOMEM); + } + } + + args->from = sh_from; + args->fromlen = sh_fromlen; + } else { + args->from = NULL; + args->fromlen = NULL; + } + + if (sh_buf == NULL || sh_buf_len < (len * 4)) { + if (sh_buf) { + share_mem_free(sh_buf); + } + + sh_buf_len = len * 4; + sh_buf = share_mem_alloc(sh_buf_len); + if (sh_buf == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + + args->fd = fd; + args->buf = sh_buf; + args->len = len; + args->flags = flags; + + SYSCALL(FF_SO_RECVFROM, args); + + if (ret >= 0) { + rte_memcpy(buf, sh_buf, ret); + if (from) { + socklen_t cplen = *fromlen ? *sh_fromlen > *fromlen + : *sh_fromlen; + rte_memcpy(from, sh_from, cplen); + *fromlen = *sh_fromlen; + } + } + + /*if (from) { + share_mem_free(sh_from); + share_mem_free(sh_fromlen); + } + + share_mem_free(sh_buf);*/ + + RETURN_NOFREE(); +} + +static void +iovec_share_free(struct iovec *iov, int iovcnt) +{ + int i; + + if (iov == NULL) { + return; + } + + for (i = 0; i < iovcnt; i++) { + if (iov[i].iov_base) { + share_mem_free(iov[i].iov_base); + } + } + + share_mem_free(iov); +} + +static struct iovec * +iovec_share_alloc(const struct iovec *iov, int iovcnt) +{ + struct iovec *sh_iov; + int i; + + if (iov == NULL || iovcnt == 0) { + return NULL; + } + + sh_iov = share_mem_alloc(sizeof(struct iovec) * iovcnt); + if (sh_iov == NULL) { + return NULL; + } + + for (i = 0; i < iovcnt; i++) { + sh_iov[i].iov_len = iov[i].iov_len; + void *iov_base = share_mem_alloc(sh_iov[i].iov_len); + sh_iov[i].iov_base = iov_base; + + if (iov_base == NULL) { + goto ERROR; + } + } + + return sh_iov; + +ERROR: + iovec_share_free(sh_iov, iovcnt); + return NULL; +} + +static void +iovec_local2share(struct iovec *share, const struct iovec *local, + int iovcnt) +{ + int i; + + if (share == NULL || local == NULL || iovcnt == 0) { + return; + } + + for (i = 0; i < iovcnt; i++) { + assert(share[i].iov_len == local[i].iov_len); + + rte_memcpy(share[i].iov_base, local[i].iov_base, + share[i].iov_len); + } +} + +static void +iovec_share2local(struct iovec *share, + const struct iovec *local, int iovcnt, + ssize_t total, int copy) +{ + int i; + for (i = 0; i < iovcnt && total > 0; i++) { + ssize_t count = local[i].iov_len; + if (total <= count) { + count = total; + } + + share[i].iov_base = + (char *)share[i].iov_base - count; + share[i].iov_len += count; + + if (copy) { + rte_memcpy(local[i].iov_base, + share[i].iov_base, count); + } + + total -= count; + } +} + +static struct iovec * +iovec_share_alloc_s() +{ + int i, iovcnt = IOV_MAX; + + sh_iov_static = share_mem_alloc(sizeof(struct iovec) * iovcnt); + if (sh_iov_static == NULL) { + ERR_LOG("share_mem_alloc shiov failed, oom\n"); + errno = ENOMEM; + return NULL; + } + + for (i = 0; i < iovcnt; i++) { + sh_iov_static[i].iov_len = IOV_LEN_MAX; + void *iov_base = share_mem_alloc(sh_iov_static[i].iov_len); + sh_iov_static[i].iov_base = iov_base; + sh_iov_static_base[i] = iov_base; + + if (iov_base == NULL) { + ERR_LOG("share_mem_alloc iov_base:%d failed, oom\n", i); + errno = ENOMEM; + goto ERROR; + } + } + + ERR_LOG("iovec_share_alloc_s alloc sh_iov_static:%p success, iovcnt:%d, per iov_len:%d\n", + sh_iov_static, IOV_MAX, IOV_LEN_MAX); + + return sh_iov_static; + +ERROR: + iovec_share_free(sh_iov_static, i); + return NULL; +} + +static int +_iovec_local2share_s(const struct iovec *local, int iovcnt, size_t skip) +{ + int i, j; + size_t len, total = 0; + + DEBUG_LOG("_iovec_local2share_s local iov:%p, iovcnt:%d, skip:%lu, sh_iov_static:%p, " + "first iov_base:%p, iov_len:%lu\n", + local, iovcnt, skip, sh_iov_static, + sh_iov_static[0].iov_base, sh_iov_static[0].iov_len); + + if (local == NULL || iovcnt == 0) { + errno = EINVAL; + return -1; + } + + for (i = sh_iov_static_fill_idx_local, j = 0; i < iovcnt && j < IOV_MAX; i++, j++) { + DEBUG_LOG("local[%d].iov_len:%lu, skip:%lu, total:%lu\n", + i, local[i].iov_len, skip, total); + + if (local[i].iov_len <= skip) { + skip -= local[i].iov_len; + continue; + } + + if ((local[i].iov_len - skip) <= IOV_LEN_MAX) { + sh_iov_static[j].iov_len = local[i].iov_len - skip; + rte_memcpy(sh_iov_static[j].iov_base, local[i].iov_base, + sh_iov_static[j].iov_len); + total += sh_iov_static[j].iov_len; + DEBUG_LOG("sh_iov_static[%d].iov_base:%p, len:%lu, skip:%lu, total:%lu\n", + j, sh_iov_static[j].iov_base, sh_iov_static[j].iov_len, skip, total); + } else { + len = local[i].iov_len - skip; + DEBUG_LOG("local[%d].iov_len:%lu, skip:%lu, total:%lu, len(iov_len - skip):%lu\n", + i, local[i].iov_len, skip, total, len); + for (; j < IOV_MAX ; j++) { + sh_iov_static[j].iov_len = RTE_MIN(IOV_LEN_MAX, len); + rte_memcpy(sh_iov_static[j].iov_base, local[i].iov_base + (local[i].iov_len - len), + sh_iov_static[j].iov_len); + + len -= sh_iov_static[j].iov_len; + total += sh_iov_static[j].iov_len; + + DEBUG_LOG("sh_iov_static[%d].iov_base:%p, len:%lu, skip:%lu, total:%lu, len:%lu\n", + j, sh_iov_static[j].iov_base, sh_iov_static[j].iov_len, skip, total, len); + + if (len == 0) { + break; + } + } + + if (j == IOV_MAX) { + ERR_LOG("Too large buf to send/write, you best to reduce it.\n"); + break; + } + } + } + + sh_iov_static_fill_idx_local = i; + sh_iov_static_fill_idx_share = j; + + DEBUG_LOG("sh_iov_static_fill_idx_local(i):%d, sh_iov_static_fill_idx_share(j):%d, skip:%lu, total:%lu\n", + sh_iov_static_fill_idx_local, sh_iov_static_fill_idx_share, skip, total); + + return total; +} + +static int +iovec_local2share_s(const struct iovec *iov, int iovcnt, size_t skip) +{ + int sent = 0; + + DEBUG_LOG("iovec_local2share_s iov:%p, iovcnt:%d, skip:%lu, sh_iov_static:%p\n", + iov, iovcnt, skip, sh_iov_static); + + if (sh_iov_static == NULL) { + sh_iov_static = iovec_share_alloc_s(); + if (sh_iov_static == NULL) { + ERR_LOG("iovec_share_alloc_s failed, oom\n"); + errno = ENOMEM; + return -1; + } + } + + sent = _iovec_local2share_s(iov, iovcnt, skip); + + return sent; +} + +static void +iovec_share2local_s() +{ + int i; + + DEBUG_LOG("iovec_share2local_s sh_iov_static:%p, sh_iov_static_fill_idx_share:%d\n", + sh_iov_static, sh_iov_static_fill_idx_share); + + for (i = 0; i < sh_iov_static_fill_idx_share; i++) { + sh_iov_static[i].iov_base = sh_iov_static_base[i]; + sh_iov_static[i].iov_len = IOV_LEN_MAX; + } +} + +static void +msghdr_share_free(struct msghdr *msg) +{ + if (msg == NULL) { + return; + } + + if (msg->msg_name) { + share_mem_free(msg->msg_name); + } + + if (msg->msg_control) { + share_mem_free(msg->msg_control); + } + + if (msg->msg_iov) { + iovec_share_free(msg->msg_iov, msg->msg_iovlen); + } + + share_mem_free(msg); +} + +static struct msghdr * +msghdr_share_alloc(const struct msghdr *msg) +{ + struct msghdr *hdr; + + if (msg == NULL) { + return NULL; + } + + hdr = share_mem_alloc(sizeof(struct msghdr)); + if (hdr == NULL) { + return NULL; + } + memset(hdr, 0, sizeof(struct msghdr)); + + hdr->msg_namelen = msg->msg_namelen; + hdr->msg_iovlen = msg->msg_iovlen; + hdr->msg_controllen = msg->msg_controllen; + hdr->msg_flags = msg->msg_flags; + + if (msg->msg_name) { + hdr->msg_name = share_mem_alloc(hdr->msg_namelen); + if (hdr->msg_name == NULL) { + goto ERROR; + } + } + + if (msg->msg_control) { + hdr->msg_control = share_mem_alloc(hdr->msg_controllen); + if (hdr->msg_control == NULL) { + goto ERROR; + } + } + + hdr->msg_iov = iovec_share_alloc(msg->msg_iov, hdr->msg_iovlen); + if (hdr->msg_iov == NULL) { + goto ERROR; + } + + return hdr; + +ERROR: + msghdr_share_free(hdr); + return NULL; +} + +static void +msghdr_share_memcpy(const struct msghdr *dst, const struct msghdr *src) +{ + if (dst == NULL || src == NULL) { + return; + } + + assert((dst->msg_name == NULL && src->msg_name == NULL) + || (dst->msg_name != NULL && src->msg_name != NULL)); + assert(dst->msg_namelen == src->msg_namelen); + + assert((dst->msg_control == NULL && src->msg_control == NULL) + || (dst->msg_control != NULL && src->msg_control != NULL)); + assert(dst->msg_controllen == src->msg_controllen); + + if (dst->msg_name) { + rte_memcpy(dst->msg_name, src->msg_name, src->msg_namelen); + } + + if (dst->msg_control) { + rte_memcpy(dst->msg_control, src->msg_control, + src->msg_controllen); + } + + //do iovec_memcpy by caller. +} + +ssize_t +ff_hook_recvmsg(int fd, struct msghdr *msg, int flags) +{ + DEBUG_LOG("ff_hook_recvmsg, fd:%d, msg:%p, flags:%d\n", fd, msg, flags); + + if (msg == NULL || msg->msg_iov == NULL || + msg->msg_iovlen == 0) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(recvmsg, (fd, msg, flags)); + + DEFINE_REQ_ARGS_STATIC(recvmsg); + + /* + * If calling very frequently, + * may need to not free the memory malloc with rte_malloc, + * to improve proformance. + * + * Because this API support it relatively troublesome, + * so no support right now. + */ + struct msghdr *sh_msg = NULL; + + sh_msg = msghdr_share_alloc(msg); + if (sh_msg == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + + args->fd = fd; + args->msg = sh_msg; + args->flags = flags; + + SYSCALL(FF_SO_RECVMSG, args); + + if (ret >= 0) { + msghdr_share_memcpy(msg, sh_msg); + if (ret > 0) { + iovec_share2local(sh_msg->msg_iov, + msg->msg_iov, msg->msg_iovlen, + ret, 1); + } + } + + msghdr_share_free(sh_msg); + + RETURN_NOFREE(); +} + +ssize_t +ff_hook_read(int fd, void *buf, size_t len) +{ + DEBUG_LOG("ff_hook_read, fd:%d, buf:%p, len:%lu\n", fd, buf, len); + + if (buf == NULL || len == 0) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(read, (fd, buf, len)); + + DEFINE_REQ_ARGS_STATIC(read); + static __thread void *sh_buf = NULL; + static __thread size_t sh_buf_len = 0; + + /* alloc or realloc sh_buf */ + if (sh_buf == NULL || sh_buf_len < (len * 4)) { + if (sh_buf) { + share_mem_free(sh_buf);; + } + + /* alloc 4 times buf space */ + sh_buf_len = len * 4; + sh_buf = share_mem_alloc(sh_buf_len); + if (sh_buf == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + + args->fd = fd; + args->buf = sh_buf; + args->len = len; + + SYSCALL(FF_SO_READ, args); + + if (ret > 0) { + rte_memcpy(buf, sh_buf, ret); + } + + //share_mem_free(sh_buf); + + RETURN_NOFREE(); +} + +ssize_t +ff_hook_readv(int fd, const struct iovec *iov, int iovcnt) +{ + DEBUG_LOG("ff_hook_readv, fd:%d, iov:%p, iovcnt:%d\n", fd, iov, iovcnt); + + if (iov == NULL || iovcnt == 0) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(readv, (fd, iov, iovcnt)); + + DEFINE_REQ_ARGS_STATIC(readv); + + /* + * If calling very frequently, + * may need to not free the memory malloc with rte_malloc, + * to improve proformance, see ff_hook_writev(). + */ + struct iovec *sh_iov = NULL; + + sh_iov = iovec_share_alloc(iov, iovcnt); + if (sh_iov == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + + args->fd = fd; + args->iov = sh_iov; + args->iovcnt = iovcnt; + + SYSCALL(FF_SO_READV, args); + + if (ret > 0) { + iovec_share2local(sh_iov, iov, iovcnt, ret, 1); + } + + iovec_share_free(sh_iov, iovcnt); + + RETURN_NOFREE(); +} + +ssize_t +ff_hook_sendto(int fd, const void *buf, size_t len, int flags, + const struct sockaddr *to, socklen_t tolen) +{ + DEBUG_LOG("ff_hook_sendto, fd:%d, buf:%p, len:%lu, flags:%d, to:%p, tolen:%d\n", + fd, buf, len, flags, to, tolen); + + if (buf == NULL || len == 0) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(sendto, (fd, buf, len, flags, to, tolen)); + + DEFINE_REQ_ARGS_STATIC(sendto); + static __thread void *sh_buf = NULL; + static __thread size_t sh_buf_len = 0; + static __thread void *sh_to = NULL; + static __thread socklen_t sh_to_len = 0; + + if (sh_buf == NULL || sh_buf_len < (len * 4)) { + if (sh_buf) { + share_mem_free(sh_buf);; + } + + /* alloc 4 times buf space */ + sh_buf_len = len * 4; + sh_buf = share_mem_alloc(sh_buf_len); + if (sh_buf == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + rte_memcpy(sh_buf, buf, len); + + if (to) { + if (sh_to == NULL || sh_to_len < tolen) { + if (sh_to) { + share_mem_free(sh_to); + } + + sh_to_len = tolen; + sh_to = share_mem_alloc(sh_to_len); + if (sh_to == NULL) { + //share_mem_free(sh_buf); + RETURN_ERROR_NOFREE(ENOMEM); + } + } + rte_memcpy(sh_to, to, tolen); + args->to = sh_to; + args->tolen = tolen; + } else { + args->to = NULL; + args->tolen = 0; + } + + args->fd = fd; + args->buf = sh_buf; + args->len = len; + args->flags = flags; + + SYSCALL(FF_SO_SENDTO, args); + + /*share_mem_free(sh_buf); + if (sh_to) { + share_mem_free(sh_to); + }*/ + + RETURN_NOFREE(); +} + +ssize_t +ff_hook_sendmsg(int fd, const struct msghdr *msg, int flags) +{ + DEBUG_LOG("ff_hook_sendmsg, fd:%d, msg:%p, flags:%d\n", + fd, msg, flags); + + if (msg == NULL || msg->msg_iov == NULL || + msg->msg_iovlen == 0) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(sendmsg, (fd, msg, flags)); + + DEFINE_REQ_ARGS_STATIC(sendmsg); + + /* + * If calling very frequently, + * may need to not free the memory malloc with rte_malloc, + * to improve proformance. + * + * Because this API support it relatively troublesome, + * so no support right now. + */ + struct msghdr *sh_msg = NULL; + + sh_msg = msghdr_share_alloc(msg); + if (sh_msg == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + msghdr_share_memcpy(sh_msg, msg); + iovec_local2share(sh_msg->msg_iov, + msg->msg_iov, msg->msg_iovlen); + + args->fd = fd; + args->msg = sh_msg; + args->flags = flags; + + SYSCALL(FF_SO_SENDMSG, args); + + if (ret > 0) { + iovec_share2local(sh_msg->msg_iov, + msg->msg_iov, msg->msg_iovlen, + ret, 0); + } + + msghdr_share_free(sh_msg); + + RETURN_NOFREE(); +} + +ssize_t +ff_hook_send(int fd, const void *buf, size_t len, int flags) +{ + DEBUG_LOG("ff_hook_send, fd:%d, buf:%p, len:%lu, flags:%d\n", fd, buf, len, flags); + return ff_hook_sendto(fd, buf, len, flags, NULL, 0); +} + +ssize_t +ff_hook_write(int fd, const void *buf, size_t len) +{ + DEBUG_LOG("ff_hook_write, fd:%d, len:%lu\n", fd, len); + + if (buf == NULL || len == 0) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(write, (fd, buf, len)); + + DEFINE_REQ_ARGS_STATIC(write); + static __thread void *sh_buf = NULL; + static __thread size_t sh_buf_len = 0; + + if (sh_buf == NULL || sh_buf_len < (len * 4)) { + if (sh_buf) { + share_mem_free(sh_buf); + } + + sh_buf_len = len * 4; + sh_buf = share_mem_alloc(sh_buf_len); + if (sh_buf == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + rte_memcpy(sh_buf, buf, len); + + args->fd = fd; + args->buf = sh_buf; + args->len = len; + + SYSCALL(FF_SO_WRITE, args); + + //share_mem_free(sh_buf); + + RETURN_NOFREE(); +} + +ssize_t +ff_hook_writev(int fd, const struct iovec *iov, int iovcnt) +{ + size_t sent = 0; + int ret_s = -1; + + DEBUG_LOG("ff_hook_writev, fd:%d, iov:%p, iovcnt:%d\n", fd, iov, iovcnt); + + if (iov == NULL || iovcnt == 0) { + errno = EINVAL; + return -1; + } + + CHECK_FD_OWNERSHIP(writev, (fd, iov, iovcnt)); + + DEFINE_REQ_ARGS_STATIC(writev); + + errno = 0; + args->fd = fd; + + do { + sh_iov_static_fill_idx_local = 0; + sh_iov_static_fill_idx_share = 0; + ret_s = iovec_local2share_s(iov, iovcnt, sent); + DEBUG_LOG("iovec_local2share_s ret_s:%d, iov:%p, ipvcnt:%d, send:%lu, " + "sh_iov_static:%p, sh_iov_static_fill_idx_local:%d, sh_iov_static_fill_idx_share:%d\n", + ret_s, iov, iovcnt, sent, + sh_iov_static, sh_iov_static_fill_idx_local, sh_iov_static_fill_idx_share); + if (ret_s < 0) { + ERR_LOG("get_iovec_share failed, iov:%p, iovcnt:%d, sh_iov_static_fill_idx_local:%d," + " sh_iov_static_fill_idx_share:%d", + iov, iovcnt, sh_iov_static_fill_idx_local, + sh_iov_static_fill_idx_share); + return -1; + } + + args->iov = sh_iov_static; + args->iovcnt = sh_iov_static_fill_idx_share; + + SYSCALL(FF_SO_WRITEV, args); + + /* + * This API can be igroned while use sh_iov_static_base[i] directly + * in _iovec_local2share_s. But don't do like that now + */ + iovec_share2local_s(); + + if (ret > 0) { + sent += ret; + } + + /* + * Don't try to send again in this case. + */ + DEBUG_LOG("iovec_local2share_s ret_s:%d, f-stack writev ret:%d, total sent:%lu\n", ret_s, ret, sent); + if (ret != ret_s) { + break; + } + } while (sh_iov_static_fill_idx_local < iovcnt); + sh_iov_static_fill_idx_share = 0; + + if (sent > 0) { + ret = sent; + } + + RETURN_NOFREE(); +} + +int +ff_hook_close(int fd) +{ + DEBUG_LOG("ff_hook_close, fd:%d\n", fd); + + CHECK_FD_OWNERSHIP(close, (fd)); + + DEFINE_REQ_ARGS_STATIC(close); + +#ifdef FF_MULTI_SC + /* + * Hear don't care if the fd belong to this worker sc, + * just scs[i].fd == fd, to close it + * until the loop close all fd. + */ + if (unlikely(current_worker_id == worker_id)) { + int i; + for (i = 0; i < worker_id; i++) { + if (scs[i].fd == fd) { + ERR_LOG("worker_id:%d, fd:%d, sc:%p, sc->fd:%d, sc->worker_id:%d\n", + i, fd, scs[i].sc, scs[i].fd, scs[i].worker_id); + sc = scs[i].sc; + scs[i].fd = -1; + break; + } + } + } +#endif + args->fd = fd; + + SYSCALL(FF_SO_CLOSE, args); + + RETURN_NOFREE(); +} + +int +ff_hook_ioctl(int fd, unsigned long req, unsigned long data) +{ + #ifndef FIOASYNC + #define FIOASYNC 0x5452 + #endif + #ifndef FIONBIO + #define FIONBIO 0x5421 + #endif + + if (req != FIOASYNC && req != FIONBIO) { + errno = ENOTSUP; + return -1; + } + + CHECK_FD_OWNERSHIP(ioctl, (fd, req, data)); + + DEFINE_REQ_ARGS_STATIC(ioctl); + + static __thread unsigned long *sh_data = NULL; + + if (sh_data == NULL) { + sh_data = share_mem_alloc(sizeof(int)); + if (sh_data == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + *((int *)sh_data) = *((int *)data); + + args->fd = fd; + args->com = req; + args->data = sh_data; + + SYSCALL(FF_SO_IOCTL, args); + + if (ret == 0) { + *((int *)data) = *((int *)sh_data); + } + + //share_mem_free(sh_data); + + RETURN_NOFREE(); +} + +int +ff_hook_fcntl(int fd, int cmd, unsigned long data) +{ + CHECK_FD_OWNERSHIP(fcntl, (fd, cmd, data)); + + DEFINE_REQ_ARGS_STATIC(fcntl); + + args->fd = fd; + args->cmd = cmd; + args->data = data; + + SYSCALL(FF_SO_FCNTL, args); + + RETURN_NOFREE(); +} + +/* + * Use F-Stack stack by default. + * + * If fdsize set SOCK_KERNEL(0x01000000) and not set SOCK_FSTACK(0x02000000), means use kernel stack. + * And the max fdsize shoud be <= (SOCK_KERNEL - 1). + * + * If fdsize set [1, 16], means use kernel stack, need to consider a better implementation. + */ +int +ff_hook_epoll_create(int fdsize) +{ + + ERR_LOG("ff_hook_epoll_create, fdsize:%d\n", fdsize); + if (inited == 0 || ((fdsize & SOCK_KERNEL) && !(fdsize & SOCK_FSTACK))/* || (fdsize >= 1 && fdsize <= 16)*/) { + fdsize &= ~SOCK_KERNEL; + return ff_linux_epoll_create(fdsize); + } + + DEFINE_REQ_ARGS(epoll_create); + + args->size = size; + + SYSCALL(FF_SO_EPOLL_CREATE, args); + + if (ret >= 0) { +#ifdef FF_KERNEL_EVENT + int kernel_fd; + + kernel_fd = ff_linux_epoll_create(fdsize); + fstack_kernel_fd_map[ret] = kernel_fd; + ERR_LOG("ff_hook_epoll_create fstack fd:%d, FF_KERNEL_EVENT kernel_fd:%d:\n", ret, kernel_fd); +#endif + ret = convert_fstack_fd(ret); + } + + ERR_LOG("ff_hook_epoll_create return fd:%d\n", ret); + + RETURN(); +} + +int +ff_hook_epoll_ctl(int epfd, int op, int fd, + struct epoll_event *event) +{ + int ff_epfd; + + DEBUG_LOG("ff_hook_epoll_ctl, epfd:%d, op:%d, fd:%d\n", epfd, op, fd); + +#ifdef FF_KERNEL_EVENT + if (unlikely(!is_fstack_fd(fd))) { + if (is_fstack_fd(epfd)) { + ff_epfd = restore_fstack_fd(epfd); + if (likely(fstack_kernel_fd_map[ff_epfd] > 0)) { + epfd = fstack_kernel_fd_map[ff_epfd]; + DEBUG_LOG("ff_epfd:%d, kernel epfd:%d\n", ff_epfd, epfd); + } else { + ERR_LOG("invalid fd and ff_epfd:%d, epfd:%d, op:%d, fd:%d\n", ff_epfd, epfd, op, fd); + errno = EBADF; + return -1; + } + } + return ff_linux_epoll_ctl(epfd, op, fd, event); + } + fd = restore_fstack_fd(fd); +#else + CHECK_FD_OWNERSHIP(epoll_ctl, (epfd, op, fd, event)); +#endif + ff_epfd = restore_fstack_fd(epfd); + + DEFINE_REQ_ARGS_STATIC(epoll_ctl); + static __thread struct epoll_event *sh_event = NULL; + + if ((!event && op != EPOLL_CTL_DEL) || + (op != EPOLL_CTL_ADD && + op != EPOLL_CTL_MOD && + op != EPOLL_CTL_DEL)) { + errno = EINVAL; + return -1; + } + + if (event) { + if (sh_event == NULL) { + sh_event = share_mem_alloc(sizeof(struct epoll_event)); + if (sh_event == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + rte_memcpy(sh_event, event, sizeof(struct epoll_event)); + args->event = sh_event; + } else { + args->event = NULL; + } + + args->epfd = ff_epfd; + args->op = op; + args->fd = fd; + + SYSCALL(FF_SO_EPOLL_CTL, args); + + /*if (sh_event) { + share_mem_free(sh_event); + }*/ + + RETURN_NOFREE(); +} + +int +ff_hook_epoll_wait(int epfd, struct epoll_event *events, + int maxevents, int timeout) +{ + DEBUG_LOG("ff_hook_epoll_wait, epfd:%d, maxevents:%d, timeout:%d\n", epfd, maxevents, timeout); + int fd = epfd; + struct timespec abs_timeout; + + CHECK_FD_OWNERSHIP(epoll_wait, (epfd, events, maxevents, timeout)); + + DEFINE_REQ_ARGS_STATIC(epoll_wait); + static __thread struct epoll_event *sh_events = NULL; + static __thread int sh_events_len = 0; + +#ifdef FF_KERNEL_EVENT + /* maxevents must >= 2, if use FF_KERNEL_EVENT */ + if (unlikely(maxevents < 2)) { + ERR_LOG("maxevents must >= 2, if use FF_KERNEL_EVENT, now is %d\n", maxevents); + RETURN_ERROR_NOFREE(EINVAL); + } + + int kernel_ret = 0; + int kernel_maxevents = kernel_maxevents = maxevents / 16; + + if (kernel_maxevents > SOCKET_OPS_CONTEXT_MAX_NUM) { + kernel_maxevents = SOCKET_OPS_CONTEXT_MAX_NUM; + } else if (kernel_maxevents <= 0) { + kernel_maxevents = 1; + } + maxevents -= kernel_maxevents; +#endif + + if (sh_events == NULL || sh_events_len < maxevents) { + if (sh_events) { + share_mem_free(sh_events); + } + + sh_events_len = maxevents; + sh_events = share_mem_alloc(sizeof(struct epoll_event) * sh_events_len); + if (sh_events == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + + if (timeout > 0) { + clock_gettime(CLOCK_REALTIME, &abs_timeout); + DEBUG_LOG("before wait, sec:%ld, nsec:%ld\n", abs_timeout.tv_sec, abs_timeout.tv_nsec); + abs_timeout.tv_sec += timeout / 1000; + /* must % 1000 first, otherwise type(int) maybe overflow, and sem_timedwait failed */ + abs_timeout.tv_nsec += (timeout % 1000) * 1000 * 1000; + if (abs_timeout.tv_nsec > NS_PER_SECOND) { + abs_timeout.tv_nsec -= NS_PER_SECOND; + abs_timeout.tv_sec += 1; + } + if (unlikely(abs_timeout.tv_sec < 0 || abs_timeout.tv_nsec < 0)) { + ERR_LOG("invalid timeout argument, the sec:%ld, nsec:%ld\n", + abs_timeout.tv_sec, abs_timeout.tv_nsec); + RETURN_ERROR_NOFREE(EINVAL); + } + } + + args->epfd = fd; + args->events = sh_events; + args->maxevents = maxevents; + args->timeout = timeout; + +RETRY: + /* for timeout, Although not really effective in FreeBSD stack */ + //SYSCALL(FF_SO_EPOLL_WAIT, args); + ACQUIRE_ZONE_LOCK(FF_SC_IDLE); + sc->ops = FF_SO_EPOLL_WAIT; + sc->args = args; + + /* + * sc->result, sc->error must reset in epoll_wait and kevent. + * Otherwise can access last sc call's result. + * + * Because if sem_timedwait timeouted, but fstack instance still + * call sem_post later, and next or next's next sem_timedwait will + * return 0 directly, then get invalid result and error. + */ + sc->result = 0; + sc->error = 0; + errno = 0; + if (timeout <= 0) { + need_alarm_sem = 1; + } + + RELEASE_ZONE_LOCK(FF_SC_REQ); + +#ifdef FF_KERNEL_EVENT + /* + * Call ff_linux_epoll_wait before sem_timedwait/sem_wait. + * And set timeout is 0. + * + * If there are events return, and move event offset to unused event for copy F-Stack events. + */ + DEBUG_LOG("call ff_linux_epoll_wait at the same time, epfd:%d, fstack_kernel_fd_map[epfd]:%d, kernel_maxevents:%d\n", + fd, fstack_kernel_fd_map[fd], kernel_maxevents); + if (likely(fstack_kernel_fd_map[fd] > 0)) { + static uint64_t count = 0; + if (unlikely((count & 0xff) == 0)) { + kernel_ret = ff_linux_epoll_wait(fstack_kernel_fd_map[fd], events, kernel_maxevents, 0); + DEBUG_LOG("ff_linux_epoll_wait count:%lu, kernel_ret:%d, errno:%d\n", count, ret, errno); + if (kernel_ret < 0) { + kernel_ret = 0; + } else if (kernel_ret > 0) { + events += kernel_ret; + } + } + count++; + } +#endif + + if (timeout > 0) { + DEBUG_LOG("ready to wait, sec:%ld, nsec:%ld\n", abs_timeout.tv_sec, abs_timeout.tv_nsec); + ret = sem_timedwait(&sc->wait_sem, &abs_timeout); + + clock_gettime(CLOCK_REALTIME, &abs_timeout); + DEBUG_LOG("after wait, sec:%ld, nsec:%ld\n", abs_timeout.tv_sec, abs_timeout.tv_nsec); + } else { + ret = sem_wait(&sc->wait_sem); + } + + rte_spinlock_lock(&sc->lock); + + if (timeout <= 0) { + need_alarm_sem = 0; + } + + /* + * After sem_timedwait, and before lock sc, sc->status may be modify from FF_SC_REQ to FF_SC_RSP, + * so it can't use to check. + * + * And only ret == 0, means sem_timedwait return normal, + * can set ret = sc->result, otherwise may use last sc->result. + */ + DEBUG_LOG("sem wait, ret:%d, sc->result:%d, sc->errno:%d\n", + ret, sc->result, sc->error); + if (unlikely(ret == -1 && errno == ETIMEDOUT /* sc->status == FF_SC_REQ */)) { + ret = 0; + } else if (likely(ret == 0)) { + ret = sc->result; + if (ret < 0) { + errno = sc->error; + } + } + + sc->status = FF_SC_IDLE; + rte_spinlock_unlock(&sc->lock); + + if (likely(ret > 0)) { + if (unlikely(ret > maxevents)) { + ERR_LOG("return events:%d, maxevents:%d, set return events to maxevents, may be some error occur\n", + ret, maxevents); + ret = maxevents; + } + rte_memcpy(events, sh_events, sizeof(struct epoll_event) * ret); + } + +#ifdef FF_KERNEL_EVENT + if (unlikely(kernel_ret > 0)) { + if (likely(ret > 0)) { + ret += kernel_ret; + } else { + ret = kernel_ret; + } + } +#endif + + /* If timeout is -1, always retry epoll_wait until ret not 0 */ + if (timeout <= 0 && ret == 0) { + //usleep(100); + rte_pause(); + goto RETRY; + } + + /* + * Don't free, to improve proformance. + * Will cause memory leak while APP exit , but fstack adapter not exit. + * May set them as gloabl variable and free in thread_destructor. + */ + /*if (sh_events) { + share_mem_free(sh_events); + sh_events = NULL; + }*/ + + RETURN_NOFREE(); +} + +pid_t +ff_hook_fork(void) +{ + pid_t pid; + + ERR_LOG("ff_hook_fork\n"); +#ifdef FF_MULTI_SC + /* Let the child process inherit the specified sc and ff_so_zone*/ + sc = scs[current_worker_id].sc; + ff_so_zone = ff_so_zones[current_worker_id]; +#endif + + if (sc) { + rte_spinlock_lock(&sc->lock); + } + + pid = ff_linux_fork(); + + if (sc) { + /* Parent process set refcount. */ + if (pid > 0) { + sc->refcount++; + ERR_LOG("parent process, chilid pid:%d, sc:%p, sc->refcount:%d, ff_so_zone:%p\n", + pid, sc, sc->refcount, ff_so_zone); +#ifdef FF_MULTI_SC + current_worker_id++; + ERR_LOG("parent process, current_worker_id++:%d\n", current_worker_id); +#endif + } + else if (pid == 0) { + ERR_LOG("chilid process, sc:%p, sc->refcount:%d, ff_so_zone:%p\n", + sc, sc->refcount, ff_so_zone); +#ifdef FF_MULTI_SC + ERR_LOG("chilid process, current_worker_id:%d\n", current_worker_id); +#endif + } + + /* Parent process unlock sc, fork success of failed. */ + if (pid != 0) { + rte_spinlock_unlock(&sc->lock); + } + } + + return pid; +} + +int +kqueue() +{ + int ret = -1; + + DEBUG_LOG("run kqueue\n"); + + if (unlikely(inited == 0)) { + errno = ENOSYS; + return -1; + } + + SYSCALL(FF_SO_KQUEUE, NULL); + + if (ret >= 0) { + ret = convert_fstack_fd(ret); + } + + DEBUG_LOG("get fd:%d\n", ret); + + return ret; +} + +int +kevent(int kq, const struct kevent *changelist, int nchanges, + struct kevent *eventlist, int nevents, + const struct timespec *timeout) +{ + int i; + int maxevents = nevents; + struct kevent *kev; + + DEBUG_LOG("kq:%d, nchanges:%d, nevents:%d\n", kq, nchanges, nevents); + + if (unlikely(inited == 0 && ff_adapter_init() < 0)) { + errno = ENOSYS; + return -1; + } + + kq = restore_fstack_fd(kq); + + DEFINE_REQ_ARGS_STATIC(kevent); + static __thread struct kevent *sh_changelist = NULL; + static __thread int sh_changelist_len = 0; + static __thread struct kevent *sh_eventlist = NULL; + static __thread int sh_eventlist_len = 0; + + if (changelist != NULL && nchanges > 0) { + if (sh_changelist == NULL || sh_changelist_len < nchanges) { + if (sh_changelist) { + share_mem_free(sh_changelist); + } + + sh_changelist_len = nchanges; + sh_changelist = share_mem_alloc(sizeof(struct kevent) * sh_changelist_len); + if (sh_changelist == NULL) { + RETURN_ERROR_NOFREE(ENOMEM); + } + } + rte_memcpy(sh_changelist, changelist, sizeof(struct kevent) * nchanges); + + for(i = 0; i < nchanges; i++) { + kev = (struct kevent *)&sh_changelist[i]; + switch (kev->filter) { + case EVFILT_READ: + case EVFILT_WRITE: + case EVFILT_VNODE: + kev->ident = restore_fstack_fd(kev->ident); + break; + default: + break; + } + } + args->changelist = sh_changelist; + args->nchanges = nchanges; + } else { + args->changelist = NULL; + args->nchanges = 0; + } + + if (eventlist != NULL && nevents > 0) { + if (sh_eventlist == NULL || sh_eventlist_len < nevents) { + if (sh_eventlist) { + share_mem_free(sh_eventlist); + } + + sh_eventlist_len = nevents; + sh_eventlist = share_mem_alloc(sizeof(struct kevent) * sh_eventlist_len); + if (sh_eventlist == NULL) { + //share_mem_free(sh_changelist); // don't free + RETURN_ERROR_NOFREE(ENOMEM); + } + } + args->eventlist = sh_eventlist; + args->nevents = nevents; + } else { + args->eventlist = NULL; + args->nevents = 0; + } + + args->kq = kq; + args->timeout = (struct timespec *)timeout; + + ACQUIRE_ZONE_LOCK(FF_SC_IDLE); + //rte_spinlock_lock(&sc->lock); + + sc->ops = FF_SO_KEVENT; + sc->args = args; + sc->status = FF_SC_REQ; + + /* + * sc->result, sc->error must reset in epoll_wait and kevent. + * Otherwise can access last sc call's result. + * + * Because if sem_timedwait timeouted, but fstack instance still + * call sem_post later, and next or next's next sem_timedwait will + * return 0 directly, then get invalid result and error. + */ + sc->result = 0; + sc->error = 0; + errno = 0; + if (timeout == NULL) { + need_alarm_sem = 1; + } + + rte_spinlock_unlock(&sc->lock); + + if (timeout != NULL) { + struct timespec abs_timeout; + + clock_gettime(CLOCK_REALTIME, &abs_timeout); + abs_timeout.tv_sec += timeout->tv_sec; + abs_timeout.tv_nsec += timeout->tv_nsec; + if (abs_timeout.tv_nsec > NS_PER_SECOND) { + abs_timeout.tv_nsec -= NS_PER_SECOND; + abs_timeout.tv_sec += 1; + } + if (unlikely(abs_timeout.tv_sec < 0 || abs_timeout.tv_nsec < 0)) { + ERR_LOG("invalid timeout argument, the sec:%ld, nsec:%ld\n", + abs_timeout.tv_sec, abs_timeout.tv_nsec); + errno = EINVAL; + ret = -1; + } else { + ret = sem_timedwait(&sc->wait_sem, &abs_timeout); + } + } else { + ret = sem_wait(&sc->wait_sem); + } + + rte_spinlock_lock(&sc->lock); + + if (timeout == NULL) { + need_alarm_sem = 0; + } + + /* + * After sem_timedwait, and before lock sc, sc->status may be modify from FF_SC_REQ to FF_SC_RSP, + * so it can't use to check. + * + * And only ret == 0, means sem_timedwait return normal, + * can set ret = sc->result, otherwise may use last sc->result. + */ + if (ret == -1 && errno == ETIMEDOUT /* sc->status == FF_SC_REQ */) { + ret = 0; + } else if (ret == 0) { + ret = sc->result; + if (ret < 0) { + errno = sc->error; + } + } + + sc->status = FF_SC_IDLE; + + rte_spinlock_unlock(&sc->lock); + + if (ret > 0) { + if (eventlist && nevents) { + if (unlikely(nevents > maxevents)) { + ERR_LOG("return events:%d, maxevents:%d, set return events to maxevents, may be some error occur\n", + nevents, maxevents); + nevents = maxevents; + } + rte_memcpy(eventlist, sh_eventlist, + sizeof(struct kevent) * ret); + + for (i = 0; i < nevents; i++) { + kev = &eventlist[i]; + kev->ident = convert_fstack_fd(kev->ident); + } + } + } + + /* + * Don't free, to improve performance. + * Will cause memory leak while APP exit , but fstack adapter not exit. + * May set them as gloabl variable and free in thread_destructor. + */ + /*if (sh_changelist) { + share_mem_free(sh_changelist); + sh_changelist = NULL; + } + + if (sh_eventlist) { + share_mem_free(sh_eventlist); + sh_eventlist = NULL; + }*/ + + RETURN_NOFREE(); +} + +static void +thread_destructor(void *sc) +{ +#ifdef FF_THREAD_SOCKET + DEBUG_LOG("pthread self tid:%lu, detach sc:%p\n", pthread_self(), sc); + ff_detach_so_context(sc); + sc = NULL; +#endif + + if (shutdown_args) { + share_mem_free(shutdown_args); + } + if (getsockname_args) { + share_mem_free(getsockname_args); + } + if (getpeername_args) { + share_mem_free(getpeername_args); + } + if (setsockopt_args) { + share_mem_free(setsockopt_args); + } + if (accept_args) { + share_mem_free(accept_args); + } + if (connect_args) { + share_mem_free(connect_args); + } + if (recvfrom_args) { + share_mem_free(recvfrom_args); + } + if (recvmsg_args) { + share_mem_free(recvmsg_args); + } + if (read_args) { + share_mem_free(read_args); + } + if (readv_args) { + share_mem_free(readv_args); + } + if (sendto_args) { + share_mem_free(sendto_args); + } + if (sendmsg_args) { + share_mem_free(sendmsg_args); + } + if (write_args) { + share_mem_free(write_args); + } + if (writev_args) { + share_mem_free(writev_args); + } + if (close_args) { + share_mem_free(close_args); + } + if (ioctl_args) { + share_mem_free(ioctl_args); + } + if (fcntl_args) { + share_mem_free(fcntl_args); + } + if (epoll_ctl_args) { + share_mem_free(epoll_ctl_args); + } + if (epoll_wait_args) { + share_mem_free(epoll_wait_args); + } + if (kevent_args) { + share_mem_free(kevent_args); + } + + if (sh_iov_static) { + iovec_share2local_s(); + iovec_share_free(sh_iov_static, IOV_MAX); + } +} + +void __attribute__((destructor)) +ff_adapter_exit() +{ + pthread_key_delete(key); + +#ifndef FF_THREAD_SOCKET + +#ifdef FF_MULTI_SC + if (current_worker_id == worker_id) { + int i; + for (i = 0; i < worker_id; i++) { + ERR_LOG("pthread self tid:%lu, detach sc:%p\n", pthread_self(), scs[i].sc); + ff_so_zone = ff_so_zones[i]; + ff_detach_so_context(scs[i].sc); + } + } else +#endif + { + ERR_LOG("pthread self tid:%lu, detach sc:%p\n", pthread_self(), sc); + ff_detach_so_context(sc); + sc = NULL; + } +#endif +} + +int +ff_adapter_init() +//int __attribute__((constructor)) +//ff_adapter_init(int argc, char * const argv[]) +{ + int ret; + + ERR_LOG("inited:%d, proc_inited:%d\n", inited, proc_inited); + +#ifndef FF_MULTI_SC + if (inited) { + return 0; + } +#endif + + if (proc_inited == 0) { + /* May conflict */ + rte_spinlock_init(&worker_id_lock); + rte_spinlock_lock(&worker_id_lock); + + pthread_key_create(&key, thread_destructor); + DEBUG_LOG("pthread key:%d\n", key); + + //atexit(ff_adapter_exit); + //on_exit(ff_adapter_exit, NULL); + + /* + * get ulimit -n to distinguish fd between kernel and F-Stack + */ + struct rlimit rlmt; + ret = getrlimit(RLIMIT_NOFILE, &rlmt); + if (ret < 0) { + ERR_LOG("getrlimit(RLIMIT_NOFILE) failed, use default ff_kernel_max_fd:%d\n", ff_kernel_max_fd); + return -1; + } else { + ff_kernel_max_fd = (int)rlmt.rlim_cur; + } + ERR_LOG("getrlimit(RLIMIT_NOFILE) successed, sed ff_kernel_max_fd:%d, and rlim_max is %ld\n", + ff_kernel_max_fd, rlmt.rlim_max); + + /* + * Get environment variable FF_INITIAL_LCORE_ID to set initial_lcore_id + * + * If need later, modify to get config from config file, + * it can consider multiplex F-Stack config.ini + */ + char *ff_init_lcore_id = getenv(FF_INITIAL_LCORE_ID_STR); + if (ff_init_lcore_id != NULL) { + initial_lcore_id = (uint64_t)strtoull(ff_init_lcore_id, NULL, 16); + if (initial_lcore_id > ((uint64_t)INITIAL_LCORE_ID_MAX) /*== UINT64_MAX*/) { + initial_lcore_id = INITIAL_LCORE_ID_DEFAULT; + ERR_LOG("get invalid FF_INITIAL_LCORE_ID=%s, to use default value 0x%0lx\n", + ff_init_lcore_id, initial_lcore_id); + } + ERR_LOG("get FF_INITIAL_LCORE_ID=%s, use 0x%0lx\n", + ff_init_lcore_id, initial_lcore_id); + } + else { + ERR_LOG("environment variable FF_INITIAL_LCORE_ID not found, to use default value 0x%0lx\n", + initial_lcore_id); + } + + /* + * Get environment variable FF_NB_FSTACK_INSTANCE to set nb_procs. + */ + char *ff_nb_procs = getenv(FF_NB_FSTACK_INSTANCE_STR); + if (ff_nb_procs != NULL) { + nb_procs = (uint32_t)strtoul(ff_nb_procs, NULL, 10); + if (nb_procs == -1 /*UINT32_MAX*/) { + nb_procs = NB_FSTACK_INSTANCE_DEFAULT; + ERR_LOG("get invalid FF_NB_FSTACK_INSTANCE=%s, to use default value %d\n", + ff_nb_procs, nb_procs); + } + ERR_LOG("get FF_NB_FSTACK_INSTANCE=%s, use %d\n", + ff_nb_procs, nb_procs); + } + else { + ERR_LOG("environment variable FF_NB_FSTACK_INSTANCE not found, to use default value %d\n", + nb_procs); + } + + /* + * Get environment variable FF_PROC_ID to set worker_id. + */ + char *ff_worker_id = getenv(FF_PROC_ID_STR); + if (ff_worker_id != NULL) { + worker_id = (uint32_t)strtoul(ff_worker_id, NULL, 10); + if (worker_id == -1 /*UINT32_MAX*/) { + worker_id = WORKER_ID_DEFAULT; + ERR_LOG("get invalid FF_PROC_ID=%s, to use default value %d\n", + ff_worker_id, worker_id); + } + ERR_LOG("get FF_PROC_ID=%s, use %d\n", + ff_worker_id, worker_id); + } + else { + ERR_LOG("environment variable FF_PROC_ID not found, to use default value %d\n", + worker_id); + } + + char buf[RTE_MAX_LCORE] = {0}; + sprintf(buf, "-c%lx", initial_lcore_id/* << worker_id*/); + + char *dpdk_argv[] = { + "ff-adapter", buf, "-n4", + "--proc-type=secondary", + /* RTE_LOG_WARNING */ + "--log-level=5", + }; + + printf("\n"); + DEBUG_LOG("rte_eal_init, argc:%ld/%ld=%ld\n", sizeof(dpdk_argv), sizeof(dpdk_argv[0]), sizeof(dpdk_argv)/sizeof(dpdk_argv[0])); + for (int i=0; i < sizeof(dpdk_argv)/sizeof(dpdk_argv[0]); i++) { + printf("%s ", dpdk_argv[i]); + } + printf("\n"); + ret = rte_eal_init(sizeof(dpdk_argv)/sizeof(dpdk_argv[0]), + dpdk_argv); + DEBUG_LOG("rte_eal_init ret:%d\n", ret); + if (ret < 0) { + ERR_LOG("ff_adapter_init failed with EAL initialization\n"); + return ret; + } + + if (proc_inited == 0) { + proc_inited = 1; + } + } else { + rte_spinlock_lock(&worker_id_lock); + } + + DEBUG_LOG("worker_id:%d, nb_procs:%d\n", worker_id, nb_procs); + sc = ff_attach_so_context(worker_id % nb_procs); + if (sc == NULL) { + ERR_LOG("ff_attach_so_context failed\n"); + return -1; + } + + pthread_setspecific(key, sc); + +#ifdef FF_MULTI_SC + scs[worker_id].worker_id = worker_id; + scs[worker_id].fd = -1; + scs[worker_id].sc = sc; +#endif + worker_id++; + inited = 1; + + rte_spinlock_unlock(&worker_id_lock); + + ERR_LOG("ff_adapter_init success, sc:%p, status:%d, ops:%d\n", sc, sc->status, sc->ops); + + return 0; +} + +void +alarm_event_sem() +{ +#ifndef FF_THREAD_SOCKET + DEBUG_LOG("check whether need to alarm sem sc:%p, status:%d, ops:%d, need_alarm_sem:%d\n", + sc, sc->status, sc->ops, need_alarm_sem); + rte_spinlock_lock(&sc->lock); + if (need_alarm_sem == 1) { + ERR_LOG("alarm sc:%p, status:%d, ops:%d\n", sc, sc->status, sc->ops); + sem_post(&sc->wait_sem); + need_alarm_sem = 0; + } + rte_spinlock_unlock(&sc->lock); + + DEBUG_LOG("finish alarm sem sc:%p, status:%d, ops:%d, need_alarm_sem:%d\n", + sc, sc->status, sc->ops, need_alarm_sem); +#endif +} + diff --git a/adapter/syscall/ff_hook_syscall.h b/adapter/syscall/ff_hook_syscall.h new file mode 100644 index 000000000..2f943b40a --- /dev/null +++ b/adapter/syscall/ff_hook_syscall.h @@ -0,0 +1,12 @@ +#ifndef _FF_HOOK_SYSCALL_H +#define _FF_HOOK_SYSCALL_H + +#undef FF_SYSCALL_DECL +#define FF_SYSCALL_DECL(ret, fn, args) extern ret ff_hook_##fn args +#include + +extern int kqueue(void); +extern int kevent(int kq, const struct kevent *changelist, int nchanges, + struct kevent *eventlist, int nevents, const struct timespec *timeout); + +#endif diff --git a/adapter/syscall/ff_linux_syscall.c b/adapter/syscall/ff_linux_syscall.c new file mode 100644 index 000000000..adfa2e60c --- /dev/null +++ b/adapter/syscall/ff_linux_syscall.c @@ -0,0 +1,256 @@ +#include +#include +#include +#include + +#include "ff_socket_ops.h" +#include "ff_linux_syscall.h" + +#define SYSCALL(symbol, para) { \ + if (linux_syscall_inited == 0) { \ + if (linux_syscall_init() != 0) { \ + return -1; \ + } \ + } \ + if (syscalls.pf_##symbol) { \ + return syscalls.pf_##symbol para; \ + } \ + errno = ENOSYS; \ + return -1; \ +} + +struct ff_linux_syscall { + #define FF_SYSCALL_DECL(ret, fn, args) ret (*pf_##fn) args; + #include "ff_declare_syscalls.h" +}; + +static int linux_syscall_inited; + +static struct ff_linux_syscall syscalls = { 0 }; + +static void *linux_lib_handle = NULL; + +pthread_mutex_t syscall_init_mutex = PTHREAD_MUTEX_INITIALIZER; + +static inline int +linux_syscall_load_symbol() +{ + linux_lib_handle = dlopen ("libc.so.6", RTLD_NOW | RTLD_GLOBAL); + if (linux_lib_handle == NULL) { + ERR_LOG("cannot dlopen libc.so.6] err_string=%s", dlerror()); + return -1; + } + + #define FF_SYSCALL_DECL(ret, fn, args) \ + syscalls.pf_##fn = (typeof(syscalls.pf_##fn))dlsym(linux_lib_handle, #fn); + #include + + return 0; +} + +static inline int +linux_syscall_init() +{ + if (linux_syscall_inited) { + return 0; + } + + pthread_mutex_lock(&syscall_init_mutex); + if (linux_syscall_inited) { + pthread_mutex_unlock(&syscall_init_mutex); + return 0; + } + + if (linux_syscall_load_symbol() != 0) { + pthread_mutex_unlock(&syscall_init_mutex); + return -1; + } + + linux_syscall_inited = 1; + + pthread_mutex_unlock(&syscall_init_mutex); + + return 0; +} + +int +ff_linux_socket(int domain, int type, int protocol) +{ + ERR_LOG("ff_linux_socket, domain:%d, type:%d, protocol:%d\n", domain, type, protocol); + SYSCALL(socket, (domain, type, protocol)); +} + +int +ff_linux_bind(int s, const struct sockaddr *addr, + socklen_t addrlen) +{ + ERR_LOG("ff_linux_bind, fd:%d, addr:%p, addrlen:%u\n", s, addr, addrlen); + SYSCALL(bind, (s, addr, addrlen)); +} + +int +ff_linux_listen(int s, int backlog) +{ + ERR_LOG("ff_linux_listen, fd:%d, backlog:%d\n", s, backlog); + SYSCALL(listen, (s, backlog)); +} + +int ff_linux_shutdown(int s, int how) +{ + SYSCALL(shutdown, (s, how)); +} + +int ff_linux_getsockname(int s, struct sockaddr *name, + socklen_t *namelen) +{ + SYSCALL(getsockname, (s, name, namelen)); +} + +int ff_linux_getpeername(int s, struct sockaddr *name, + socklen_t *namelen) +{ + SYSCALL(getpeername, (s, name, namelen)); +} + +int ff_linux_getsockopt(int s, int level, int optname, + void *optval, socklen_t *optlen) +{ + SYSCALL(getsockopt, (s, level, optname, optval, optlen)); +} + +int ff_linux_setsockopt(int s, int level, int optname, + const void *optval, socklen_t optlen) +{ + SYSCALL(setsockopt, (s, level, optname, optval, optlen)); +} + +int ff_linux_accept(int s, struct sockaddr *addr, socklen_t *addrlen) +{ + DEBUG_LOG("ff_linux_accept, fd:%d, addr:%p, len:%p\n", s, addr, addrlen); + SYSCALL(accept, (s, addr, addrlen)); +} + +int ff_linux_accept4(int s, struct sockaddr *addr, + socklen_t *addrlen, int flags) +{ + DEBUG_LOG("ff_linux_accept4, fd:%d, addr:%p, addrlen:%p, flags:%d\n", s, addr, addrlen, flags); + SYSCALL(accept4, (s, addr, addrlen, flags)); +} + +int ff_linux_connect(int s, const struct sockaddr *addr, + socklen_t addrlen) +{ + DEBUG_LOG("ff_linux_connect, fd:%d, addr:%p, addrlen:%u\n", s, addr, addrlen); + SYSCALL(connect, (s, addr, addrlen)); +} + +ssize_t ff_linux_recv(int s, void *buf, size_t len, int flags) +{ + DEBUG_LOG("ff_linux_recv, fd:%d, buf:%p, len:%lu, flags:%d\n", s, buf, len, flags); + SYSCALL(recv, (s, buf, len, flags)); +} + +ssize_t ff_linux_send(int s, const void *buf, size_t len, int flags) +{ + DEBUG_LOG("ff_linux_send, fd:%d, buf:%p, len:%lu, flags:%d\n", s, buf, len, flags); + SYSCALL(send, (s, buf, len, flags)); +} + +ssize_t ff_linux_read(int s, void *buf, size_t len) +{ + DEBUG_LOG("ff_linux_read, fd:%d, buf:%p, len:%lu\n", s, buf, len); + SYSCALL(read, (s, buf, len)); +} + +ssize_t ff_linux_write(int s, const void *buf, size_t len) +{ + DEBUG_LOG("ff_linux_write, fd:%d, buf:%p, len:%lu\n", s, buf, len); + SYSCALL(write, (s, buf, len)); +} + +ssize_t ff_linux_writev(int s, const struct iovec *iov, int iovcnt) +{ + DEBUG_LOG("ff_linux_writev, fd:%d, iov:%p, iovcnt:%d\n", s, iov, iovcnt); + SYSCALL(writev, (s, iov, iovcnt)); +} + +ssize_t ff_linux_readv(int s, const struct iovec *iov, int iovcnt) +{ + DEBUG_LOG("ff_linux_readv, fd:%d, iov:%p, iovcnt:%d\n", s, iov, iovcnt); + SYSCALL(readv, (s, iov, iovcnt)); +} + +ssize_t ff_linux_sendto(int s, const void *buf, size_t len, int flags, + const struct sockaddr *to, socklen_t tolen) +{ + DEBUG_LOG("ff_linux_sendto, fd:%d, buf:%p, len:%lu, flags:%d, to:%p, tolen:%d\n", + s, buf, len, flags, to, tolen); + SYSCALL(sendto, (s, buf, len, flags, to, tolen)); +} + +ssize_t ff_linux_recvfrom(int s, void *buf, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen) + +{ + DEBUG_LOG("ff_linux_recvfrom, fd:%d, buf:%p, len:%lu, flags:%d, from:%p, fromlen:%p\n", + s, buf, len, flags, from, fromlen); + SYSCALL(recvfrom, (s, buf, len, flags, from, fromlen)); +} + +ssize_t ff_linux_sendmsg(int s, const struct msghdr *msg, int flags) +{ + DEBUG_LOG("ff_linux_sendmsg, fd:%d, msg:%p, flags:%d\n", + s, msg, flags); + SYSCALL(sendmsg, (s, msg, flags)); +} + +ssize_t ff_linux_recvmsg(int s, struct msghdr *msg, int flags) +{ + DEBUG_LOG("ff_linux_recvmsg, fd:%d, msg:%p, flags:%d\n", s, msg, flags); + SYSCALL(recvmsg, (s, msg, flags)) +} + +int ff_linux_close(int s) +{ + DEBUG_LOG("ff_linux_close, fd:%d\n", s); + SYSCALL(close, (s)); +} + +int ff_linux_ioctl(int s, unsigned long req, unsigned long data) +{ + SYSCALL(ioctl, (s, req, data)); +} + +int ff_linux_fcntl(int s, int cmd, unsigned long data) +{ + SYSCALL(fcntl, (s, cmd, data)); +} + +int ff_linux_epoll_create(int size) +{ + ERR_LOG("ff_linux_epoll_create, fdsize:%d\n", size); + SYSCALL(epoll_create, (size)); +} + +int ff_linux_epoll_ctl(int epfd, int op, int fd, + struct epoll_event *event) +{ + ERR_LOG("ff_linux_epoll_ctl, epfd:%d, op:%d, fd:%d\n", epfd, op, fd); + SYSCALL(epoll_ctl, (epfd, op, fd, event)); +} + +int +ff_linux_epoll_wait(int epfd, struct epoll_event *events, + int maxevents, int timeout) +{ + DEBUG_LOG("ff_linux_epoll_wait, epfd:%d, maxevents:%d, timeout:%d\n", epfd, maxevents, timeout); + SYSCALL(epoll_wait, (epfd, events, maxevents, timeout)); +} + +pid_t +ff_linux_fork(void) +{ + DEBUG_LOG("ff_linux_fork\n"); + SYSCALL(fork, ()); +} + diff --git a/adapter/syscall/ff_linux_syscall.h b/adapter/syscall/ff_linux_syscall.h new file mode 100644 index 000000000..c6fc96582 --- /dev/null +++ b/adapter/syscall/ff_linux_syscall.h @@ -0,0 +1,13 @@ +#ifndef _FF_LINUX_SYSCALL_H_ +#define _FF_LINUX_SYSCALL_H_ + +#include +#include +#include +#include + +#undef FF_SYSCALL_DECL +#define FF_SYSCALL_DECL(ret, fn, args) ret ff_linux_##fn args +#include "ff_declare_syscalls.h" + +#endif diff --git a/adapter/syscall/ff_so_zone.c b/adapter/syscall/ff_so_zone.c new file mode 100644 index 000000000..90ddabfc7 --- /dev/null +++ b/adapter/syscall/ff_so_zone.c @@ -0,0 +1,234 @@ +#include +#include + +#include +#include + +#include "ff_config.h" +#include "ff_socket_ops.h" + +#define SOCKET_OPS_ZONE_NAME "ff_socket_ops_zone_%d" + +#define SOCKET_OPS_CONTEXT_NAME_SIZE 32 +#define SOCKET_OPS_CONTEXT_NAME "ff_so_context_" + +static uint16_t ff_max_so_context = SOCKET_OPS_CONTEXT_MAX_NUM; +__FF_THREAD struct ff_socket_ops_zone *ff_so_zone; +#ifdef FF_MULTI_SC +struct ff_socket_ops_zone *ff_so_zones[SOCKET_OPS_CONTEXT_MAX_NUM] = {NULL}; +#endif + +static inline int +is_power_of_2(uint64_t n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +int +ff_set_max_so_context(uint16_t count) +{ + if (ff_so_zone) { + ERR_LOG("Can not set: memzone has inited\n"); + return -1; + } + + /*if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + ERR_LOG("Can not set: process is not primary\n"); + return 1; + }*/ + + if (!is_power_of_2(count)) { + ERR_LOG("Can not set: count:%d is not power of 2, use default:%d\n", + count, ff_max_so_context); + return -1; + } + + if (count > SOCKET_OPS_CONTEXT_MAX_NUM) { + count = SOCKET_OPS_CONTEXT_MAX_NUM; + } + + ff_max_so_context = count; + + return 0; +} + +int +ff_create_so_memzone() +{ + if (ff_so_zone) { + ERR_LOG("Can not create memzone: memzone has inited\n"); + return -1; + } + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + uint16_t i, proc_id; + for (proc_id = 0; proc_id < ff_global_cfg.dpdk.nb_procs; proc_id++) { + struct ff_socket_ops_zone *so_zone_tmp; + const struct rte_memzone *mz; + char zn[64]; + + size_t zone_size = sizeof(struct ff_socket_ops_zone) + + sizeof(struct ff_so_context) * ff_max_so_context; + snprintf(zn, sizeof(zn), SOCKET_OPS_ZONE_NAME, proc_id); + ERR_LOG("To create memzone:%s\n", zn); + + mz = rte_memzone_reserve(zn, zone_size, rte_socket_id(), 0); + if (mz == NULL) { + ERR_LOG("Cannot reserve memory zone:%s\n", zn); + return -1; + } + + memset(mz->addr, 0, zone_size); + so_zone_tmp = mz->addr; + + rte_spinlock_init(&so_zone_tmp->lock); + so_zone_tmp->count = ff_max_so_context; + so_zone_tmp->mask = so_zone_tmp->count - 1; + so_zone_tmp->free = so_zone_tmp->count; + so_zone_tmp->idx = 0; + memset(so_zone_tmp->inuse, 0, SOCKET_OPS_CONTEXT_MAX_NUM); + so_zone_tmp->sc = (struct ff_so_context *)(so_zone_tmp + 1); + + for (i = 0; i < ff_max_so_context; i++) { + struct ff_so_context *sc = &so_zone_tmp->sc[i]; + rte_spinlock_init(&sc->lock); + sc->status = FF_SC_IDLE; + sc->idx = i; + sc->refcount = 0; + //so_zone_tmp->inuse[i] = 0; + + if (sem_init(&sc->wait_sem, 1, 0) == -1) { + ERR_LOG("Initialize semaphore failed:%d\n", errno); + return -1; + } + } + + if (proc_id == 0) { + ff_so_zone = so_zone_tmp; + } + } + }else { + const struct rte_memzone *mz; + char zn[64]; + + snprintf(zn, sizeof(zn), SOCKET_OPS_ZONE_NAME, ff_global_cfg.dpdk.proc_id); + ERR_LOG("To lookup memzone:%s\n", zn); + + mz = rte_memzone_lookup(zn); + if (mz == NULL) { + ERR_LOG("Lookup memory zone:%s failed\n", zn); + return -1; + } + + ff_so_zone = mz->addr; + } + + return 0; +} + +struct ff_so_context * +ff_attach_so_context(int idx) +{ + struct ff_so_context *sc = NULL; + uint16_t i; + +#ifdef FF_MULTI_SC + ff_so_zone = ff_so_zones[idx]; +#endif + + DEBUG_LOG("proc_id:%d, ff_so_zone:%p\n", idx, ff_so_zone); + + if (ff_so_zone == NULL) { + const struct rte_memzone *mz; + char zn[64]; + + snprintf(zn, sizeof(zn), SOCKET_OPS_ZONE_NAME, idx); + ERR_LOG("To lookup memzone:%s\n", zn); + + mz = rte_memzone_lookup(zn); + if (mz == NULL) { + ERR_LOG("Lookup memory zone:%s failed\n", zn); + return NULL; + } + + ff_so_zone = mz->addr; + +#ifdef FF_MULTI_SC + ff_so_zones[idx] = ff_so_zone; + ERR_LOG("FF_MULTI_SC f_so_zones[%d]:%p\n", idx, ff_so_zones[idx]); +#endif + } + + rte_spinlock_lock(&ff_so_zone->lock); + + if (ff_so_zone->free == 0) { + ERR_LOG("Attach memzone failed: no free context\n"); + rte_spinlock_unlock(&ff_so_zone->lock); + return NULL; + } + + for (i = 0; i < ff_so_zone->count; i++) { + uint16_t idx = (ff_so_zone->idx + i) & ff_so_zone->mask; + sc = &ff_so_zone->sc[idx]; + if (ff_so_zone->inuse[idx] == 0) { + ff_so_zone->inuse[idx] = 1; + rte_spinlock_init(&sc->lock); + sc->status = FF_SC_IDLE; + sc->refcount = 1; + ff_so_zone->free--; + ff_so_zone->idx = idx + 1; + break; + } + } + + if (unlikely(i == ff_so_zone->count)) { + ERR_LOG("Attach memzone failed: instance %d no free context," + " fetel error of so status, all sc inuse, count:%d, free:%d\n", + idx, ff_so_zone->count, ff_so_zone->free); + sc = NULL; + } + + ERR_LOG("attach sc:%p, so count:%d, free:%d, idx:%d, i:%d\n", + sc, ff_so_zone->count, ff_so_zone->free, ff_so_zone->idx, i); + + rte_spinlock_unlock(&ff_so_zone->lock); + + return sc; +} + +void +ff_detach_so_context(struct ff_so_context *sc) +{ + ERR_LOG("ff_so_zone:%p, sc:%p\n", ff_so_zone, sc); + + if (ff_so_zone == NULL || sc == NULL) { + return; + } + + ERR_LOG("detach sc:%p, ops:%d, status:%d, idx:%d, sc->refcount:%d, inuse:%d, so free:%u, idx:%u\n", + sc, sc->ops, sc->status, sc->idx, sc->refcount, ff_so_zone->inuse[sc->idx], ff_so_zone->free, ff_so_zone->idx); + + rte_spinlock_lock(&ff_so_zone->lock); + rte_spinlock_lock(&sc->lock); + + if (sc->refcount > 1) { + ERR_LOG("sc refcount > 1, to sub it, sc:%p, ops:%d, status:%d, idx:%d, sc->refcount:%d, inuse:%d, so free:%u, idx:%u\n", + sc, sc->ops, sc->status, sc->idx, sc->refcount, ff_so_zone->inuse[sc->idx], ff_so_zone->free, ff_so_zone->idx); + sc->refcount--; + } else { + ERR_LOG("sc refcount is 1, to detach it, sc:%p, ops:%d, status:%d, idx:%d, sc->refcount:%d, inuse:%d, so free:%u, idx:%u\n", + sc, sc->ops, sc->status, sc->idx, sc->refcount, ff_so_zone->inuse[sc->idx], ff_so_zone->free, ff_so_zone->idx); + if (ff_so_zone->inuse[sc->idx] == 1) { + ff_so_zone->inuse[sc->idx] = 0; + + ff_so_zone->free++; + ff_so_zone->idx = sc->idx; + } + } + + ERR_LOG("detach sc:%p, ops:%d, status:%d, idx:%d, sc->refcount:%d, inuse:%d, so free:%u, idx:%u\n", + sc, sc->ops, sc->status, sc->idx, sc->refcount, ff_so_zone->inuse[sc->idx], ff_so_zone->free, ff_so_zone->idx); + + rte_spinlock_unlock(&sc->lock); + rte_spinlock_unlock(&ff_so_zone->lock); +} diff --git a/adapter/syscall/ff_socket_ops.c b/adapter/syscall/ff_socket_ops.c new file mode 100644 index 000000000..d02ec510f --- /dev/null +++ b/adapter/syscall/ff_socket_ops.c @@ -0,0 +1,553 @@ +#include +#include + +#include "ff_socket_ops.h" +#include "ff_sysproto.h" +#include "ff_api.h" +#include "ff_epoll.h" +#include "ff_config.h" + +#undef FF_SYSCALL_DECL +#define FF_SYSCALL_DECL(ret, fn, none) \ + static ret ff_sys_##fn(struct ff_##fn##_args *args); +#include +static int ff_sys_kqueue(struct ff_kqueue_args *args); +static int ff_sys_kevent(struct ff_kevent_args *args); + +#define FF_MAX_BOUND_NUM 8 + +/* Where to call sem_post in kevent or epoll_wait */ +static int sem_flag = 0; + +/* + * The event num kevent or epoll_wait returned. + * Use for burst process event in one F-Stack loop to improve performance. + */ +#define EVENT_LOOP_TIMES 32 +static int ff_event_loop_nb = 0; +//static int ff_next_event_flag = 0; + +struct ff_bound_info { + int fd; + struct sockaddr addr; +}; + +static struct ff_bound_info ff_bound_fds[FF_MAX_BOUND_NUM]; + +static int +sockaddr_cmp(struct sockaddr *a, struct sockaddr *b) +{ + struct sockaddr_in *sina, *sinb; + sina = (struct sockaddr_in *)a; + sinb = (struct sockaddr_in *)b; + + if (sina->sin_family != sinb->sin_family) { + return 1; + } + + if (sina->sin_port != sinb->sin_port) { + return 1; + } + + if (sina->sin_addr.s_addr != sinb->sin_addr.s_addr) { + return 1; + } + + return 0; +} + +static int +sockaddr_is_bound(struct sockaddr *addr) +{ + int i; + + for (i = 0; i < FF_MAX_BOUND_NUM; i++) { + struct ff_bound_info *info = &ff_bound_fds[i]; + if (info->fd == 0) { + continue; + } + + if (sockaddr_cmp(&info->addr, addr) == 0) { + return info->fd; + } + } + + return 0; +} + +static int +sockaddr_bind(int fd, struct sockaddr *addr) +{ + int i; + + for (i = 0; i < FF_MAX_BOUND_NUM; i++) { + struct ff_bound_info *info = &ff_bound_fds[i]; + if (info->fd != 0) { + continue; + } + + info->fd = fd; + rte_memcpy(&info->addr, addr, sizeof(struct sockaddr)); + + return 0; + } + + return -1; +} + +static int +sockaddr_unbind(int fd) +{ + int i; + + for (i = 0; i < FF_MAX_BOUND_NUM; i++) { + struct ff_bound_info *info = &ff_bound_fds[i]; + if (info->fd != fd) { + continue; + } + + info->fd = 0; + + return 0; + } + + return -1; +} + +static int +ff_sys_socket(struct ff_socket_args *args) +{ + return ff_socket(args->domain, args->type, args->protocol); +} + +static int +ff_sys_bind(struct ff_bind_args *args) +{ + int bound_fd; + int ret; + + bound_fd = sockaddr_is_bound(args->addr); + if (bound_fd != 0 && bound_fd != args->fd) { + return ff_dup2(bound_fd, args->fd); + } + + ret = ff_bind(args->fd, args->addr, args->addrlen); + if (ret == 0) { + sockaddr_bind(args->fd, args->addr); + } + + return ret; +} + +static int +ff_sys_listen(struct ff_listen_args *args) +{ + return ff_listen(args->fd, args->backlog); +} + +static int +ff_sys_shutdown(struct ff_shutdown_args *args) +{ + return ff_shutdown(args->fd, args->how); +} + +static int +ff_sys_getsockname(struct ff_getsockname_args *args) +{ + return ff_getsockname(args->fd, args->name, args->namelen); +} + +static int +ff_sys_getpeername(struct ff_getpeername_args *args) +{ + return ff_getpeername(args->fd, args->name, args->namelen); +} + +static int +ff_sys_getsockopt(struct ff_getsockopt_args *args) +{ + return ff_getsockopt(args->fd, args->level, args->name, + args->optval, args->optlen); +} + +static int +ff_sys_setsockopt(struct ff_setsockopt_args *args) +{ + return ff_setsockopt(args->fd, args->level, args->name, + args->optval, args->optlen); +} + +static int +ff_sys_accept(struct ff_accept_args *args) +{ + return ff_accept(args->fd, args->addr, args->addrlen); +} + +static int +ff_sys_accept4(struct ff_accept4_args *args) +{ + errno = ENOSYS; + return -1; +} + +static int +ff_sys_connect(struct ff_connect_args *args) +{ + return ff_connect(args->fd, args->addr, args->addrlen); +} + +static ssize_t +ff_sys_recv(struct ff_recv_args *args) +{ + return ff_recv(args->fd, args->buf, args->len, args->flags); +} + +static ssize_t +ff_sys_recvfrom(struct ff_recvfrom_args *args) +{ + return ff_recvfrom(args->fd, args->buf, args->len, args->flags, + args->from, args->fromlen); +} + +static ssize_t +ff_sys_recvmsg(struct ff_recvmsg_args *args) +{ + return ff_recvmsg(args->fd, args->msg, args->flags); +} + +static ssize_t +ff_sys_read(struct ff_read_args *args) +{ + DEBUG_LOG("ff_sys_read, fd:%d, len:%lu\n", args->fd, args->len); + return ff_read(args->fd, args->buf, args->len); +} + +static ssize_t +ff_sys_readv(struct ff_readv_args *args) +{ + return ff_readv(args->fd, args->iov, args->iovcnt); +} + +static ssize_t +ff_sys_send(struct ff_send_args *args) +{ + return ff_send(args->fd, args->buf, args->len, args->flags); +} + +static ssize_t +ff_sys_sendto(struct ff_sendto_args *args) +{ + return ff_sendto(args->fd, args->buf, args->len, args->flags, + args->to, args->tolen); +} + +static ssize_t +ff_sys_sendmsg(struct ff_sendmsg_args *args) +{ + return ff_sendmsg(args->fd, args->msg, args->flags); +} + +static ssize_t +ff_sys_write(struct ff_write_args *args) +{ + DEBUG_LOG("ff_sys_write, fd:%d, len:%lu\n", args->fd, args->len); + return ff_write(args->fd, args->buf, args->len); +} + +static ssize_t +ff_sys_writev(struct ff_writev_args *args) +{ + return ff_writev(args->fd, args->iov, args->iovcnt); +} + +static int +ff_sys_close(struct ff_close_args *args) +{ + DEBUG_LOG("ff_sys_close, fd:%d\n", args->fd); + sockaddr_unbind(args->fd); + return ff_close(args->fd); +} + +static int +ff_sys_ioctl(struct ff_ioctl_args *args) +{ + return ff_ioctl(args->fd, args->com, args->data); +} + +static int +ff_sys_fcntl(struct ff_fcntl_args *args) +{ + return ff_fcntl(args->fd, args->cmd, args->data); +} + +static int +ff_sys_epoll_create(struct ff_epoll_create_args *args) +{ + DEBUG_LOG("to run ff_epoll_create, size:%d\n", args->size); + return ff_epoll_create(args->size); +} + +static int +ff_sys_epoll_ctl(struct ff_epoll_ctl_args *args) +{ + DEBUG_LOG("to run ff_epoll_ctl, epfd:%d, op:%d, fd:%d\n", + args->epfd, args->op, args->fd); + return ff_epoll_ctl(args->epfd, args->op, args->fd, + args->event); +} + +static int +ff_sys_epoll_wait(struct ff_epoll_wait_args *args) +{ + int ret; + + DEBUG_LOG("to run ff_epoll_wait, epfd:%d, maxevents:%d, timeout:%d\n", + args->epfd, args->maxevents, args->timeout); + ret = ff_epoll_wait(args->epfd, args->events, + args->maxevents, args->timeout); + + /* + * If timeout is 0, and no event triggered, + * no post sem, and next loop will continue to call ff_sys_epoll_wait, + * until some event triggered + */ + if (args->timeout == 0 && ret == 0 && args->maxevents != 0) { + sem_flag = 0; + } else { + sem_flag = 1; + } + + return ret; +} + +static int +ff_sys_kqueue(struct ff_kqueue_args *args) +{ + return ff_kqueue(); +} + +static int +ff_sys_kevent(struct ff_kevent_args *args) +{ + int ret; + + ret = ff_kevent(args->kq, args->changelist, args->nchanges, + args->eventlist, args->nevents, args->timeout); + + if (args->nchanges) { + args->nchanges = 0; + } + + /* + * If timeout is NULL, and no event triggered, + * no post sem, and next loop will continue to call ff_sys_kevent, + * until some event triggered + */ + if (args->timeout == NULL && ret == 0 && args->nevents != 0) { + sem_flag = 0; + } else { + sem_flag = 1; + } + + return ret; +} + +static pid_t +ff_sys_fork(struct ff_fork_args *args) +{ + errno = ENOSYS; + return -1; +} + +static int +ff_so_handler(int ops, void *args) +{ + DEBUG_LOG("ff_so_handler ops:%d, epoll create ops:%d\n", ops, FF_SO_EPOLL_CREATE); + switch(ops) { + case FF_SO_SOCKET: + return ff_sys_socket((struct ff_socket_args *)args); + case FF_SO_BIND: + return ff_sys_bind((struct ff_bind_args *)args); + case FF_SO_LISTEN: + return ff_sys_listen((struct ff_listen_args *)args); + case FF_SO_CONNECT: + return ff_sys_connect((struct ff_connect_args *)args); + case FF_SO_SHUTDOWN: + return ff_sys_shutdown((struct ff_shutdown_args *)args); + case FF_SO_GETSOCKNAME: + return ff_sys_getsockname((struct ff_getsockname_args *)args); + case FF_SO_GETPEERNAME: + return ff_sys_getpeername((struct ff_getpeername_args *)args); + case FF_SO_GETSOCKOPT: + return ff_sys_getsockopt((struct ff_getsockopt_args *)args); + case FF_SO_SETSOCKOPT: + return ff_sys_setsockopt((struct ff_setsockopt_args *)args); + case FF_SO_ACCEPT: + return ff_sys_accept((struct ff_accept_args *)args); + case FF_SO_ACCEPT4: + return ff_sys_accept4((struct ff_accept4_args *)args); + case FF_SO_RECV: + return ff_sys_recv((struct ff_recv_args *)args); + case FF_SO_RECVFROM: + return ff_sys_recvfrom((struct ff_recvfrom_args *)args); + case FF_SO_RECVMSG: + return ff_sys_recvmsg((struct ff_recvmsg_args *)args); + case FF_SO_READ: + return ff_sys_read((struct ff_read_args *)args); + case FF_SO_READV: + return ff_sys_readv((struct ff_readv_args *)args); + case FF_SO_SEND: + return ff_sys_send((struct ff_send_args *)args); + case FF_SO_SENDTO: + return ff_sys_sendto((struct ff_sendto_args *)args); + case FF_SO_SENDMSG: + return ff_sys_sendmsg((struct ff_sendmsg_args *)args); + case FF_SO_WRITE: + return ff_sys_write((struct ff_write_args *)args); + case FF_SO_WRITEV: + return ff_sys_writev((struct ff_writev_args *)args); + case FF_SO_CLOSE: + return ff_sys_close((struct ff_close_args *)args); + case FF_SO_IOCTL: + return ff_sys_ioctl((struct ff_ioctl_args *)args); + case FF_SO_FCNTL: + return ff_sys_fcntl((struct ff_fcntl_args *)args); + case FF_SO_EPOLL_CREATE: + return ff_sys_epoll_create((struct ff_epoll_create_args *)args); + case FF_SO_EPOLL_CTL: + return ff_sys_epoll_ctl((struct ff_epoll_ctl_args *)args); + case FF_SO_EPOLL_WAIT: + return ff_sys_epoll_wait((struct ff_epoll_wait_args *)args); + case FF_SO_KQUEUE: + return ff_sys_kqueue((struct ff_kqueue_args *)args); + case FF_SO_KEVENT: + return ff_sys_kevent((struct ff_kevent_args *)args); + case FF_SO_FORK: + return ff_sys_fork((struct ff_fork_args *)args); + default: + break; + } + + errno = EINVAL; + DEBUG_LOG("ff_so_handler error:%d, ops:%d\n", errno, ops); + return (-1); +} + +static inline void +ff_handle_socket_ops(struct ff_so_context *sc) +{ + if (!rte_spinlock_trylock(&sc->lock)) { + return; + } + + if (sc->status != FF_SC_REQ) { + rte_spinlock_unlock(&sc->lock); + return; + } + + DEBUG_LOG("ff_handle_socket_ops sc:%p, status:%d, ops:%d\n", sc, sc->status, sc->ops); + + errno = 0; + sc->result = ff_so_handler(sc->ops, sc->args); + sc->error = errno; + DEBUG_LOG("ff_handle_socket_ops error:%d, ops:%d, result:%d\n", errno, sc->ops, sc->result); + + if (sc->ops == FF_SO_EPOLL_WAIT || sc->ops == FF_SO_KEVENT) { + /*DEBUG_LOG("ff_event_loop_nb:%d, ff_next_event_flag:%d\n", + ff_event_loop_nb, ff_next_event_flag); + if (ff_event_loop_nb > 0) { + ff_next_event_flag = 1; + } else { + ff_next_event_flag = 0; + } + + if (sc->result > 0) { + ff_event_loop_nb = (sc->result * EVENT_LOOP_TIMES); + } else { + ff_event_loop_nb = 0; + }*/ + + if (sem_flag == 1) { + sc->status = FF_SC_REP; + sem_post(&sc->wait_sem); + } else { + // do nothing with this sc + } + } else { + sc->status = FF_SC_REP; + } + + rte_spinlock_unlock(&sc->lock); +} + +void +ff_handle_each_context() +{ + uint16_t i, nb_handled, tmp; + static uint64_t loop_count = 0; + static uint64_t cur_tsc, diff_tsc, drain_tsc = 0; + + if (unlikely(drain_tsc == 0 && ff_global_cfg.dpdk.pkt_tx_delay)) { + drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * ff_global_cfg.dpdk.pkt_tx_delay; + ERR_LOG("ff_global_cfg.dpdk.handle_sc_delay%d, drain_tsc:%lu\n", + ff_global_cfg.dpdk.pkt_tx_delay, drain_tsc); + } + + ff_event_loop_nb = 0; + + cur_tsc = rte_rdtsc(); + + rte_spinlock_lock(&ff_so_zone->lock); + + assert(ff_so_zone->count >= ff_so_zone->free); + tmp = nb_handled = ff_so_zone->count - ff_so_zone->free; + + while(1) { + nb_handled = tmp; + if (nb_handled) { + for (i = 0; i < ff_so_zone->count; i++) { + struct ff_so_context *sc = &ff_so_zone->sc[i]; + + if ((loop_count & 1048575) == 0) { + DEBUG_LOG("so:%p, so->count:%d,%p, sc:%p, sc->inuse:%d,%p, i:%d, nb:%d, all_nb:%d\n", + ff_so_zone, ff_so_zone->count, &ff_so_zone->count, + sc, ff_so_zone->inuse[i], &ff_so_zone->inuse[i], i, nb_handled, tmp); + } + + if (ff_so_zone->inuse[i] == 0) { + continue; + } + + /* Dirty read first, and then try to lock sc and real read. */ + if (sc->status == FF_SC_REQ) { + ff_handle_socket_ops(sc); + } + + nb_handled--; + if (!nb_handled) { + break; + } + } + } + + /*if (--ff_event_loop_nb <= 0 || ff_next_event_flag == 1) { + break; + }*/ + diff_tsc = rte_rdtsc() - cur_tsc; + DEBUG_LOG("cur_tsc:%lu, diff_tsc:%lu, drain_tsc:%lu\n", cur_tsc, diff_tsc, drain_tsc); + if (diff_tsc >= drain_tsc) { + break; + } + + rte_pause(); + } + + rte_spinlock_unlock(&ff_so_zone->lock); + + loop_count++; + + DEBUG_LOG("loop_count:%lu, nb:%d, all_nb:%d\n", + loop_count, nb_handled, tmp/*, ff_event_loop_nb, ff_next_event_flag*/); + //, ff_event_loop_nb:%d, ff_next_event_flag:%d +} + diff --git a/adapter/syscall/ff_socket_ops.h b/adapter/syscall/ff_socket_ops.h new file mode 100644 index 000000000..10b20014f --- /dev/null +++ b/adapter/syscall/ff_socket_ops.h @@ -0,0 +1,129 @@ +#ifndef _FF_SOCKET_OPS_H_ +#define _FF_SOCKET_OPS_H_ + +#include +#include + +#include +#include + +/* + * Per thread separate initialization dpdk lib and attach sc when needed, + * such as listen same port in different threads, and socket can use in own thread. + * + * Otherwise, one socket can use in all threads. + */ +#ifdef FF_THREAD_SOCKET +#define __FF_THREAD __thread +#else +#define __FF_THREAD +#endif + +#define ERR_LOG(fmt, ...) do { \ + printf("file:%s, line:%u, fun:%s, pid:%d, "fmt, \ + __FILE__, __LINE__, __func__, getpid(), ##__VA_ARGS__); \ + } while (0) + +#ifdef NDEBUG +#define DEBUG_LOG(...) +#else +#define DEBUG_LOG ERR_LOG +#endif + +/* Must be power of 2 */ +#define SOCKET_OPS_CONTEXT_MAX_NUM (1 << 5) + +enum FF_SOCKET_OPS { + FF_SO_SOCKET, + FF_SO_LISTEN, + FF_SO_BIND, + FF_SO_CONNECT, + FF_SO_SHUTDOWN, + FF_SO_GETSOCKNAME, + FF_SO_GETPEERNAME, + FF_SO_GETSOCKOPT, + FF_SO_SETSOCKOPT, + FF_SO_ACCEPT, + FF_SO_ACCEPT4, // 10 + FF_SO_RECV, + FF_SO_RECVFROM, + FF_SO_RECVMSG, + FF_SO_READ, + FF_SO_READV, + FF_SO_SEND, + FF_SO_SENDTO, + FF_SO_SENDMSG, + FF_SO_WRITE, + FF_SO_WRITEV, // 20 + FF_SO_CLOSE, + FF_SO_IOCTL, + FF_SO_FCNTL, + FF_SO_EPOLL_CREATE, + FF_SO_EPOLL_CTL, + FF_SO_EPOLL_WAIT, + FF_SO_KQUEUE, + FF_SO_KEVENT, + FF_SO_FORK, // 29 +}; + +enum FF_SO_CONTEXT_STATUS { + FF_SC_IDLE, + FF_SC_REQ, + FF_SC_REP, +}; + +struct ff_socket_ops_zone { + rte_spinlock_t lock; + + /* total number of so_contex, must be power of 2 */ + uint8_t count; + uint8_t mask; + + /* free number of so_context */ + uint8_t free; + + uint8_t idx; + + /* 1 if used, else 0, most access */ + uint8_t inuse[SOCKET_OPS_CONTEXT_MAX_NUM]; + struct ff_so_context *sc; + + uint8_t padding[16]; +} __attribute__((aligned(RTE_CACHE_LINE_SIZE))); + +struct ff_so_context { + /* CACHE LINE 0 */ + enum FF_SOCKET_OPS ops; + enum FF_SO_CONTEXT_STATUS status; + void *args; + + rte_spinlock_t lock; + + /* errno if failed */ + int error; + /* result of ops processing */ + int result; + int idx; + + sem_t wait_sem; /* 32 bytes */ + + /* CACHE LINE 1 */ + /* listen fd, refcount.. */ + int refcount; +} __attribute__((aligned(RTE_CACHE_LINE_SIZE))); + +extern __FF_THREAD struct ff_socket_ops_zone *ff_so_zone; +#ifdef FF_MULTI_SC +extern struct ff_socket_ops_zone *ff_so_zones[SOCKET_OPS_CONTEXT_MAX_NUM]; +#endif + +/* For primary process */ +int ff_set_max_so_context(uint16_t count); +int ff_create_so_memzone(); +void ff_handle_each_context(); + +/* For secondary process */ +struct ff_so_context *ff_attach_so_context(int proc_id); +void ff_detach_so_context(struct ff_so_context *context); + +#endif diff --git a/adapter/syscall/ff_sysproto.h b/adapter/syscall/ff_sysproto.h new file mode 100644 index 000000000..57e63eca8 --- /dev/null +++ b/adapter/syscall/ff_sysproto.h @@ -0,0 +1,197 @@ +#ifndef _FF_SYSPROTO_H_ +#define _FF_SYSPROTO_H_ + +#include +#include +#include +#include + +struct ff_socket_args { + int domain; + int type; + int protocol; +}; + +struct ff_bind_args { + int fd; + void *addr; + int addrlen; +}; + +struct ff_listen_args { + int fd; + int backlog; +}; + +struct ff_shutdown_args { + int fd; + int how; +}; + +struct ff_getpeername_args { + int fd; + void *name; + socklen_t *namelen; +}; + +struct ff_getsockname_args { + int fd; + void *name; + socklen_t *namelen; +}; + +struct ff_setsockopt_args { + int fd; + int level; + int name; + void *optval; + socklen_t optlen; +}; + +struct ff_getsockopt_args { + int fd; + int level; + int name; + void *optval; + socklen_t *optlen; +}; + +struct ff_accept_args { + int fd; + void *addr; + socklen_t *addrlen; +}; + +struct ff_accept4_args { + int fd; + void *addr; + socklen_t *addrlen; + int flags; +}; + +struct ff_connect_args { + int fd; + void *addr; + int addrlen; +}; + +struct ff_recv_args { + int fd; + void *buf; + size_t len; + int flags; +}; + +struct ff_recvfrom_args { + int fd; + void *buf; + size_t len; + int flags; + void *from; + socklen_t *fromlen; +}; + +struct ff_recvmsg_args { + int fd; + struct msghdr *msg; + int flags; +}; + +struct ff_read_args { + int fd; + void *buf; + size_t len; +}; + +struct ff_readv_args { + int fd; + struct iovec *iov; + int iovcnt; +}; + +struct ff_send_args { + int fd; + void *buf; + size_t len; + int flags; +}; + +struct ff_sendto_args { + int fd; + void *buf; + size_t len; + int flags; + void *to; + int tolen; +}; + +struct ff_sendmsg_args { + int fd; + struct msghdr * msg; + int flags; +}; + +struct ff_write_args { + int fd; + void *buf; + size_t len; +}; + +struct ff_writev_args { + int fd; + struct iovec *iov; + int iovcnt; +}; + +struct ff_close_args { + int fd; +}; + +struct ff_ioctl_args { + int fd; + unsigned long com; + void *data; +}; + +struct ff_fcntl_args { + int fd; + int cmd; + long data; +}; + +struct ff_epoll_create_args { + int size; +}; + +struct ff_epoll_ctl_args { + int epfd; + int op; + int fd; + struct epoll_event *event; +}; + +struct ff_epoll_wait_args { + int epfd; + struct epoll_event *events; + int maxevents; + int timeout; +}; + +struct ff_kqueue_args { + +}; + +struct ff_kevent_args { + int kq; + struct kevent *changelist; + int nchanges; + struct kevent *eventlist; + int nevents; + struct timespec *timeout; +}; + +struct ff_fork_args { + +}; + +#endif diff --git a/adapter/syscall/fstack.c b/adapter/syscall/fstack.c new file mode 100644 index 000000000..9d1031a5a --- /dev/null +++ b/adapter/syscall/fstack.c @@ -0,0 +1,36 @@ +#include "ff_api.h" +#include "ff_socket_ops.h" + +#define WORKERS 32 + +int +loop(void *arg) +{ + ff_handle_each_context(); + + return 0; +} + +int +main(int argc, char * argv[]) +{ + int ret; + + ff_init(argc, argv); + + ret = ff_set_max_so_context(WORKERS); + if (ret < 0) { + return -1; + } + + ret = ff_create_so_memzone(); + if (ret < 0) { + return -1; + } + + ERR_LOG("ff_create_so_memzone successful\n"); + + ff_run(loop, NULL); + + return 0; +} diff --git a/adapter/syscall/main_stack.c b/adapter/syscall/main_stack.c new file mode 100644 index 000000000..86da3b898 --- /dev/null +++ b/adapter/syscall/main_stack.c @@ -0,0 +1,216 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#include "ff_config.h" +//#include "ff_api.h" +#include "ff_event.h" +#include "ff_adapter.h" +#include "ff_hook_syscall.h" + +pthread_t hworker; + +#define MAX_EVENTS 512 + +/* kevent set */ +struct kevent kevSet; +/* events */ +struct kevent events[MAX_EVENTS]; +/* kq */ +int kq; +int sockfd; + +struct timespec timeout = {0, 100000}; + +static int exit_flag = 0; + +char html[] = +"HTTP/1.1 200 OK\r\n" +"Server: F-Stack\r\n" +"Date: Sat, 25 Feb 2017 09:26:33 GMT\r\n" +"Content-Type: text/html\r\n" +"Content-Length: 438\r\n" +"Last-Modified: Tue, 21 Feb 2017 09:44:03 GMT\r\n" +"Connection: keep-alive\r\n" +"Accept-Ranges: bytes\r\n" +"\r\n" +"\r\n" +"\r\n" +"\r\n" +"Welcome to F-Stack!\r\n" +"\r\n" +"\r\n" +"\r\n" +"

Welcome to F-Stack!

\r\n" +"\r\n" +"

For online documentation and support please refer to\r\n" +"F-Stack.org.
\r\n" +"\r\n" +"

Thank you for using F-Stack.

\r\n" +"\r\n" +""; + +void sig_term(int sig) +{ + printf("we caught signal %d, to exit helloworld\n", sig); + exit_flag = 1; + alarm_event_sem(); + return; +} + +void *loop(void *arg) +{ + /* Wait for events to happen */ + while (!exit_flag) { + /* + * If timeout is NULL, must call alarm_event_sem(); + */ + int nevents = kevent(kq, NULL, 0, events, MAX_EVENTS, &timeout); + int i; + + if (nevents <= 0) { + if (nevents) { + printf("ff_kevent failed:%d, %s\n", errno, + strerror(errno)); + return NULL; + } + //usleep(100); + //sleep(1); + } + //printf("get nevents:%d\n", nevents); + + for (i = 0; i < nevents; ++i) { + struct kevent event = events[i]; + int clientfd = (int)event.ident; + + /* Handle disconnect */ + if (event.flags & EV_EOF) { + /* Simply close socket */ + close(clientfd); + } else if (clientfd == sockfd) { + int available = (int)event.data; + do { + int nclientfd = accept(clientfd, NULL, NULL); + if (nclientfd < 0) { + printf("ff_accept failed:%d, %s\n", errno, + strerror(errno)); + break; + } + + /* Add to event list */ + EV_SET(&kevSet, nclientfd, EVFILT_READ, EV_ADD, 0, 0, NULL); + + if(kevent(kq, &kevSet, 1, NULL, 0, NULL) < 0) { + printf("ff_kevent error:%d, %s\n", errno, + strerror(errno)); + close(nclientfd); + break; + } + + available--; + } while (available); + } else if (event.filter == EVFILT_READ) { + char buf[256]; + ssize_t readlen = read(clientfd, buf, sizeof(buf)); + ssize_t writelen = write(clientfd, html, sizeof(html) - 1); + if (writelen < 0){ + printf("ff_write failed, readlen:%lu, writelen:%lu, :%d, %s\n", + readlen, writelen, errno, strerror(errno)); + close(clientfd); + } + } else { + printf("unknown event: %8.8X\n", event.flags); + } + } + } + + return NULL; +} + +int main(int argc, char * argv[]) +{ + signal(SIGINT, sig_term); + signal(SIGTERM, sig_term); + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + printf("sockfd:%d\n", sockfd); + if (sockfd < 0) { + printf("ff_socket failed, sockfd:%d, errno:%d, %s\n", sockfd, errno, strerror(errno)); + return -1;; + } + + /* Set non blocking */ + int on = 1; + ioctl(sockfd, FIONBIO, &on); + + struct sockaddr_in my_addr; + bzero(&my_addr, sizeof(my_addr)); + my_addr.sin_family = AF_INET; + my_addr.sin_port = htons(80); + my_addr.sin_addr.s_addr = htonl(INADDR_ANY); + + int ret = bind(sockfd, (const struct sockaddr *)&my_addr, sizeof(my_addr)); + if (ret < 0) { + printf("ff_bind failed, sockfd:%d, errno:%d, %s\n", sockfd, errno, strerror(errno)); + close(sockfd); + return -1; + } + + ret = listen(sockfd, MAX_EVENTS); + if (ret < 0) { + printf("ff_listen failed, sockfd:%d, errno:%d, %s\n", sockfd, errno, strerror(errno)); + close(sockfd); + return -1; + } + + kq = kqueue(); + printf("kq:%d\n", kq); + if (kq < 0) { + printf("ff_kqueue failed, errno:%d, %s\n", errno, strerror(errno)); + close(sockfd); + return -1; + } + + EV_SET(&kevSet, sockfd, EVFILT_READ, EV_ADD, 0, MAX_EVENTS, NULL); + /* Update kqueue */ + ret = kevent(kq, &kevSet, 1, NULL, 0, &timeout); + if (ret < 0) { + printf("kevent failed\n"); + close(kq); + close(sockfd); + return -1; + } + + if(pthread_create(&hworker, NULL, loop, NULL) < 0) { + printf("create loop thread failed., errno:%d/%s\n", + errno, strerror(errno)); + close(kq); + close(sockfd); + return -1; + } + + pthread_join(hworker, NULL); + + close(kq); + close(sockfd); + + return 0; +} diff --git a/adapter/syscall/main_stack_epoll.c b/adapter/syscall/main_stack_epoll.c new file mode 100644 index 000000000..a1ae19557 --- /dev/null +++ b/adapter/syscall/main_stack_epoll.c @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_WORKERS 128 +pthread_t hworker[MAX_WORKERS]; + +#define MAX_EVENTS 512 +struct epoll_event ev; +struct epoll_event events[MAX_EVENTS]; +int epfd; +int sockfd; + +static int exit_flag = 0; + +char html[] = +"HTTP/1.1 200 OK\r\n" +"Server: F-Stack\r\n" +"Date: Sat, 25 Feb 2017 09:26:33 GMT\r\n" +"Content-Type: text/html\r\n" +"Content-Length: 438\r\n" +"Last-Modified: Tue, 21 Feb 2017 09:44:03 GMT\r\n" +"Connection: keep-alive\r\n" +"Accept-Ranges: bytes\r\n" +"\r\n" +"\r\n" +"\r\n" +"\r\n" +"Welcome to F-Stack!\r\n" +"\r\n" +"\r\n" +"\r\n" +"

Welcome to F-Stack!

\r\n" +"\r\n" +"

For online documentation and support please refer to\r\n" +"F-Stack.org.
\r\n" +"\r\n" +"

Thank you for using F-Stack.

\r\n" +"\r\n" +""; + +void sig_term(int sig) +{ + printf("we caught signal %d, to exit helloworld\n", sig); + exit_flag = 1; + //alarm_event_sem(); + return; +} + +void *loop(void *arg) +{ + /* Wait for events to happen */ + while (!exit_flag) { + /* + * If not call alarm_event_sem, and epoll_wait timeout is 0, + * it can't exit normal, so timeout can't set to 0. + */ + int nevents = epoll_wait(epfd, events, MAX_EVENTS, 100); + int i; + + if (nevents <= 0) { + if (nevents) { + printf("hello world epoll wait ret %d, errno:%d, %s\n", + nevents, errno, strerror(errno)); + break; + } + //usleep(100); + sleep(1); + } + //printf("get nevents:%d\n", nevents); + + for (i = 0; i < nevents; ++i) { + /* Handle new connect */ + if (events[i].data.fd == sockfd) { + while (1) { + int nclientfd = accept(sockfd, NULL, NULL); + printf("accept sockfd:%d, nclientfd:%d, errono:%d/%s\n", sockfd, nclientfd, errno, strerror(errno)); + if (nclientfd < 0) { + break; + } + + /* Add to event list */ + ev.data.fd = nclientfd; + ev.events = EPOLLIN; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, nclientfd, &ev) != 0) { + printf("ff_epoll_ctl failed:%d, %s\n", + errno, strerror(errno)); + close(nclientfd); + break; + } + } + } else { + if (events[i].events & EPOLLERR ) { + /* Simply close socket */ + epoll_ctl(epfd, EPOLL_CTL_DEL, events[i].data.fd, NULL); + close(events[i].data.fd); + } else if (events[i].events & EPOLLIN) { + char buf[256]; + size_t readlen = read( events[i].data.fd, buf, sizeof(buf)); + if(readlen > 0) { + write( events[i].data.fd, html, sizeof(html) - 1); + } else { + epoll_ctl(epfd, EPOLL_CTL_DEL, events[i].data.fd, NULL); + close(events[i].data.fd); + } + } else { + printf("unknown event: %d:%8.8X\n", i, events[i].events); + } + } + } + } + + return NULL; +} + +int main(int argc, char * argv[]) +{ + int i, worker_num = 1; + + signal(SIGINT, sig_term); + signal(SIGTERM, sig_term); + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + printf("sockfd:%d\n", sockfd); + if (sockfd < 0) { + printf("ff_socket failed\n"); + return -1; + } + + int on = 1; + ioctl(sockfd, FIONBIO, &on); + + struct sockaddr_in my_addr; + bzero(&my_addr, sizeof(my_addr)); + my_addr.sin_family = AF_INET; + my_addr.sin_port = htons(80); + my_addr.sin_addr.s_addr = htonl(INADDR_ANY); + + int ret = bind(sockfd, (const struct sockaddr *)&my_addr, sizeof(my_addr)); + if (ret < 0) { + printf("ff_bind failed\n"); + close(sockfd); + return -1; + } + + ret = listen(sockfd, MAX_EVENTS); + if (ret < 0) { + printf("ff_listen failed\n"); + close(sockfd); + return -1; + } + + epfd = epoll_create(512); + printf("epfd:%d\n", epfd); + if (epfd <= 0) { + printf("ff_epoll_create failed, errno:%d, %s\n", + errno, strerror(errno)); + close(sockfd); + return -1; + } + + ev.data.fd = sockfd; + ev.events = EPOLLIN; + ret = epoll_ctl(epfd, EPOLL_CTL_ADD, sockfd, &ev); + if (ret < 0) { + printf("ff_listen failed\n"); + close(epfd); + close(sockfd); + return -1; + } + + for (i = 0; i < worker_num; i++) { + if(pthread_create(&hworker[i], NULL, loop, (void *)&i) < 0) { + printf("create loop thread failed., errno:%d/%s\n", + errno, strerror(errno)); + close(epfd); + close(sockfd); + return -1; + } + } + + for (i = 0; i < worker_num; i++) { + pthread_join(hworker[i], NULL); + } + + close(epfd); + close(sockfd); + + return 0; +} diff --git a/adapter/syscall/main_stack_epoll_kernel.c b/adapter/syscall/main_stack_epoll_kernel.c new file mode 100644 index 000000000..0557ffe79 --- /dev/null +++ b/adapter/syscall/main_stack_epoll_kernel.c @@ -0,0 +1,255 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SOCK_FSTACK 0x01000000 +#define SOCK_KERNEL 0x02000000 + +#define MAX_WORKERS 128 +pthread_t hworker[MAX_WORKERS]; + +#define MAX_EVENTS 512 +struct epoll_event ev; +struct epoll_event events[MAX_EVENTS]; +int epfd; +int sockfd, sockfd_kernel; + +static int exit_flag = 0; + +char html[] = +"HTTP/1.1 200 OK\r\n" +"Server: F-Stack\r\n" +"Date: Sat, 25 Feb 2017 09:26:33 GMT\r\n" +"Content-Type: text/html\r\n" +"Content-Length: 438\r\n" +"Last-Modified: Tue, 21 Feb 2017 09:44:03 GMT\r\n" +"Connection: keep-alive\r\n" +"Accept-Ranges: bytes\r\n" +"\r\n" +"\r\n" +"\r\n" +"\r\n" +"Welcome to F-Stack!\r\n" +"\r\n" +"\r\n" +"\r\n" +"

Welcome to F-Stack!

\r\n" +"\r\n" +"

For online documentation and support please refer to\r\n" +"F-Stack.org.
\r\n" +"\r\n" +"

Thank you for using F-Stack.

\r\n" +"\r\n" +""; + +void sig_term(int sig) +{ + printf("we caught signal %d, to exit helloworld\n", sig); + exit_flag = 1; + //alarm_event_sem(); + return; +} + +void *loop(void *arg) +{ + /* Wait for events to happen */ + while (!exit_flag) { + /* + * If not call alarm_event_sem, and epoll_wait timeout is 0, + * it can't exit normal, so timeout can't set to 0. + */ + int nevents = epoll_wait(epfd, events, MAX_EVENTS, -1); + int i; + + if (nevents <= 0) { + if (nevents) { + printf("hello world epoll wait ret %d, errno:%d, %s\n", + nevents, errno, strerror(errno)); + break; + } + usleep(100); + //sleep(1); + } + //printf("get nevents:%d\n", nevents); + + for (i = 0; i < nevents; ++i) { + /* Handle new connect */ + if (events[i].data.fd == sockfd || events[i].data.fd == sockfd_kernel) { + while (1) { + int nclientfd = accept(events[i].data.fd, NULL, NULL); + printf("accept sockfd(_kernel):%d, nclientfd:%d, errono:%d/%s\n", events[i].data.fd, nclientfd, errno, strerror(errno)); + if (nclientfd < 0) { + break; + } + + /* Add to event list */ + ev.data.fd = nclientfd; + ev.events = EPOLLIN; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, nclientfd, &ev) != 0) { + printf("ff_epoll_ctl failed:%d, %s\n", + errno, strerror(errno)); + close(nclientfd); + break; + } + if (events[i].data.fd == sockfd_kernel) { + break; + } + } + } else { + if (events[i].events & EPOLLERR ) { + /* Simply close socket */ + epoll_ctl(epfd, EPOLL_CTL_DEL, events[i].data.fd, NULL); + close(events[i].data.fd); + } else if (events[i].events & EPOLLIN) { + char buf[256]; + size_t readlen = read( events[i].data.fd, buf, sizeof(buf)); + if(readlen > 0) { + write( events[i].data.fd, html, sizeof(html) - 1); + } else { + epoll_ctl(epfd, EPOLL_CTL_DEL, events[i].data.fd, NULL); + close(events[i].data.fd); + } + } else { + printf("unknown event: %d:%8.8X\n", i, events[i].events); + } + } + } + } + + return NULL; +} + +int main(int argc, char * argv[]) +{ + int i, worker_num = 1; + + signal(SIGINT, sig_term); + signal(SIGTERM, sig_term); + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + printf("sockfd:%d\n", sockfd); + if (sockfd < 0) { + printf("ff_socket failed\n"); + return -1; + } + + int on = 1; + ioctl(sockfd, FIONBIO, &on); + + struct sockaddr_in my_addr; + bzero(&my_addr, sizeof(my_addr)); + my_addr.sin_family = AF_INET; + my_addr.sin_port = htons(80); + my_addr.sin_addr.s_addr = htonl(INADDR_ANY); + + int ret = bind(sockfd, (const struct sockaddr *)&my_addr, sizeof(my_addr)); + if (ret < 0) { + printf("ff_bind failed\n"); + close(sockfd); + return -1; + } + + ret = listen(sockfd, MAX_EVENTS); + if (ret < 0) { + printf("ff_listen failed\n"); + close(sockfd); + return -1; + } + + sockfd_kernel = socket(AF_INET, SOCK_STREAM | SOCK_KERNEL, 0); + printf("sockfd_kernel:%d\n", sockfd_kernel); + if (sockfd_kernel < 0) { + printf("ff_socket failed\n"); + return -1; + } + + bzero(&my_addr, sizeof(my_addr)); + my_addr.sin_family = AF_INET; + my_addr.sin_port = htons(80); + my_addr.sin_addr.s_addr = htonl(INADDR_ANY); + + ret = bind(sockfd_kernel, (const struct sockaddr *)&my_addr, sizeof(my_addr)); + if (ret < 0) { + printf("ff_bind failed\n"); + close(sockfd); + close(sockfd_kernel); + return -1; + } + + ret = listen(sockfd_kernel, MAX_EVENTS); + if (ret < 0) { + printf("ff_listen failed\n"); + close(sockfd); + close(sockfd_kernel); + return -1; + } + + epfd = epoll_create(512); + printf("epfd:%d\n", epfd); + if (epfd <= 0) { + printf("ff_epoll_create failed, errno:%d, %s\n", + errno, strerror(errno)); + close(sockfd); + return -1; + } + + ev.data.fd = sockfd; + ev.events = EPOLLIN; + ret = epoll_ctl(epfd, EPOLL_CTL_ADD, sockfd, &ev); + if (ret < 0) { + printf("ff_listen failed\n"); + close(epfd); + close(sockfd); + close(sockfd_kernel); + return -1; + } + ev.data.fd = sockfd_kernel; + ev.events = EPOLLIN; + ret = epoll_ctl(epfd, EPOLL_CTL_ADD, sockfd_kernel, &ev); + if (ret < 0) { + printf("ff_listen failed\n"); + close(epfd); + close(sockfd); + close(sockfd_kernel); + return -1; + } + + for (i = 0; i < worker_num; i++) { + if(pthread_create(&hworker[i], NULL, loop, (void *)&i) < 0) { + printf("create loop thread failed., errno:%d/%s\n", + errno, strerror(errno)); + close(epfd); + close(sockfd); + close(sockfd_kernel); + return -1; + } + } + + for (i = 0; i < worker_num; i++) { + pthread_join(hworker[i], NULL); + } + + close(epfd); + close(sockfd); + close(sockfd_kernel); + + return 0; +} diff --git a/adapter/syscall/main_stack_epoll_thread_socket.c b/adapter/syscall/main_stack_epoll_thread_socket.c new file mode 100644 index 000000000..f4d6317c0 --- /dev/null +++ b/adapter/syscall/main_stack_epoll_thread_socket.c @@ -0,0 +1,254 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_WORKERS 128 +pthread_t hworker[MAX_WORKERS]; +pthread_spinlock_t worker_lock; +#define MAX_EVENTS 512 + +static int exit_flag = 0; + +char html[] = +"HTTP/1.1 200 OK\r\n" +"Server: F-Stack\r\n" +"Date: Sat, 25 Feb 2017 09:26:33 GMT\r\n" +"Content-Type: text/html\r\n" +"Content-Length: 438\r\n" +"Last-Modified: Tue, 21 Feb 2017 09:44:03 GMT\r\n" +"Connection: keep-alive\r\n" +"Accept-Ranges: bytes\r\n" +"\r\n" +"\r\n" +"\r\n" +"\r\n" +"Welcome to F-Stack!\r\n" +"\r\n" +"\r\n" +"\r\n" +"

Welcome to F-Stack!

\r\n" +"\r\n" +"

For online documentation and support please refer to\r\n" +"F-Stack.org.
\r\n" +"\r\n" +"

Thank you for using F-Stack.

\r\n" +"\r\n" +""; + +void sig_term(int sig) +{ + printf("we caught signal %d, to exit helloworld\n", sig); + exit_flag = 1; + //alarm_event_sem(); + return; +} + +#define SOCK_FSTACK 0x01000000 +#define SOCK_KERNEL 0x02000000 +void *loop(void *arg) +{ + struct epoll_event ev; + struct epoll_event events[MAX_EVENTS]; + int epfd; + int sockfd; + int thread_id; + + thread_id = *(int *)arg; + printf("start thread %d\n", thread_id); + + sockfd = socket(AF_INET, SOCK_STREAM | SOCK_FSTACK, 0); + printf("thread %d, sockfd:%d\n", thread_id, sockfd); + if (sockfd < 0) { + printf("thread %d, ff_socket failed\n", thread_id); + pthread_spin_unlock(&worker_lock); + return NULL; + } + + /* socket will init adapter,so unlock after socket */ + pthread_spin_unlock(&worker_lock); + + int on = 1; + //ioctl(sockfd, FIONBIO, &on); + if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)) < 0) { + printf("thread %d, setsockopt SO_REUSEPORT failed\n", thread_id); + return NULL; + } + + struct sockaddr_in my_addr; + bzero(&my_addr, sizeof(my_addr)); + my_addr.sin_family = AF_INET; + my_addr.sin_port = htons(80); + my_addr.sin_addr.s_addr = htonl(INADDR_ANY); + + int ret = bind(sockfd, (const struct sockaddr *)&my_addr, sizeof(my_addr)); + if (ret < 0) { + printf("thread %d, ff_bind failed\n", thread_id); + close(sockfd); + return NULL; + } + + ret = listen(sockfd, MAX_EVENTS); + if (ret < 0) { + printf("thread %d, ff_listen failed\n", thread_id); + close(sockfd); + return NULL; + } + + epfd = epoll_create(512|SOCK_FSTACK); + printf("thread %d, epfd:%d\n", thread_id, epfd); + if (epfd <= 0) { + printf("thread %d, ff_epoll_create failed, errno:%d, %s\n", + thread_id, errno, strerror(errno)); + close(sockfd); + return NULL; + } + ev.data.fd = sockfd; + ev.events = EPOLLIN; + ret = epoll_ctl(epfd, EPOLL_CTL_ADD, sockfd, &ev); + if (ret < 0) { + printf("ff_listen failed\n"); + close(epfd); + close(sockfd); + return NULL; + } + + /* Wait for events to happen */ + while (!exit_flag) { + /* + * If not call alarm_event_sem, and epoll_wait timeout is 0, + * it can't exit normal, so timeout can't set to 0. + */ + int nevents = epoll_wait(epfd, events, MAX_EVENTS, 100); + int i; + + if (nevents <= 0) { + if (nevents) { + printf("thread %d, hello world epoll wait ret %d, errno:%d, %s\n", + thread_id, nevents, errno, strerror(errno)); + break; + } + //usleep(100); + //sleep(1); + } + /*if (nevents > 100) { + printf("error event nb:%d, to exit\n", nevents); + break; + }*/ + //printf("thread %d, get nevents:%d\n", thread_id, nevents); + + for (i = 0; i < nevents; ++i) { + /* Handle new connect */ + if (events[i].data.fd == sockfd) { + //while (1) { + int nclientfd = accept(sockfd, NULL, NULL); + if (nclientfd < 0) { + break; + } + + /* Add to event list */ + ev.data.fd = nclientfd; + ev.events = EPOLLIN; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, nclientfd, &ev) != 0) { + printf("thread %d, ff_epoll_ctl failed:%d, %s\n", + thread_id, errno, strerror(errno)); + close(nclientfd); + break; + } + //} + } else { + if (events[i].events & EPOLLERR ) { + /* Simply close socket */ + epoll_ctl(epfd, EPOLL_CTL_DEL, events[i].data.fd, NULL); + close(events[i].data.fd); + } else if (events[i].events & EPOLLIN) { + char buf[256]; + size_t readlen = read( events[i].data.fd, buf, sizeof(buf)); + if(readlen > 0) { + write( events[i].data.fd, html, sizeof(html) - 1); + } else { + epoll_ctl(epfd, EPOLL_CTL_DEL, events[i].data.fd, NULL); + close(events[i].data.fd); + } + } else { + printf("thread %d, unknown event: %8.8X\n", thread_id, events[i].events); + } + } + } + } + + close(epfd); + close(sockfd); + + return NULL; +} + +int main(int argc, char * argv[]) +{ + int i, worker_num; + + signal(SIGINT, sig_term); + signal(SIGTERM, sig_term); + + if (argc == 1) { + worker_num = 1; + } else { + worker_num = atoi(argv[1]); + } + printf("to init %d workers.\n", worker_num); + + pthread_spin_init(&worker_lock, PTHREAD_PROCESS_PRIVATE); + pthread_spin_lock(&worker_lock); + + for (i = 0; i < worker_num; i++) { + if(pthread_create(&hworker[i], NULL, loop, (void *)&i) < 0) { + printf("create loop thread failed., errno:%d/%s\n", + errno, strerror(errno)); + pthread_spin_unlock(&worker_lock); + pthread_spin_destroy(&worker_lock); + return -1; + } + if (i > 0) { + cpu_set_t cpuinfo; + int lcore_id = 2 + i; + + CPU_ZERO(&cpuinfo); + CPU_SET_S(lcore_id, sizeof(cpuinfo), &cpuinfo); + if(0 != pthread_setaffinity_np(hworker[i], sizeof(cpu_set_t), &cpuinfo)) + { + printf("set affinity recver faild\n"); + exit(0); + } + printf("set affinity recver sucssed, thread:%d, lcore_id:%d\n", i, lcore_id); + } + pthread_spin_lock(&worker_lock); + //sleep(1); + } + + for (i = 0; i < worker_num; i++) { + pthread_join(hworker[i], NULL); + } + + pthread_spin_destroy(&worker_lock); + + return 0; +} diff --git a/adapter/syscall/main_stack_thread_socket.c b/adapter/syscall/main_stack_thread_socket.c new file mode 100644 index 000000000..2f3b2f410 --- /dev/null +++ b/adapter/syscall/main_stack_thread_socket.c @@ -0,0 +1,262 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#include "ff_config.h" +//#include "ff_api.h" +#include "ff_event.h" +#include "ff_adapter.h" +#include "ff_hook_syscall.h" + +#define MAX_WORKERS 128 +pthread_t hworker[MAX_WORKERS]; +pthread_spinlock_t worker_lock; +#define MAX_EVENTS 512 + +/* 100 ms */ +struct timespec timeout = {0, 100000000}; + +static int exit_flag = 0; + +char html[] = +"HTTP/1.1 200 OK\r\n" +"Server: F-Stack\r\n" +"Date: Sat, 25 Feb 2017 09:26:33 GMT\r\n" +"Content-Type: text/html\r\n" +"Content-Length: 438\r\n" +"Last-Modified: Tue, 21 Feb 2017 09:44:03 GMT\r\n" +"Connection: keep-alive\r\n" +"Accept-Ranges: bytes\r\n" +"\r\n" +"\r\n" +"\r\n" +"\r\n" +"Welcome to F-Stack!\r\n" +"\r\n" +"\r\n" +"\r\n" +"

Welcome to F-Stack!

\r\n" +"\r\n" +"

For online documentation and support please refer to\r\n" +"F-Stack.org.
\r\n" +"\r\n" +"

Thank you for using F-Stack.

\r\n" +"\r\n" +""; + +void sig_term(int sig) +{ + printf("we caught signal %d, to exit helloworld\n", sig); + exit_flag = 1; + //alarm_event_sem(); + return; +} + +void *loop(void *arg) +{ + /* kevent set */ + struct kevent kevSet; + /* events */ + struct kevent events[MAX_EVENTS]; + /* kq */ + int kq; + int sockfd; + + int thread_id; + + thread_id = *(int *)arg; + printf("start thread %d\n", thread_id); + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + printf("thread %d, sockfd:%d\n", thread_id, sockfd); + if (sockfd < 0) { + printf("thread %d, ff_socket failed\n", thread_id); + pthread_spin_unlock(&worker_lock); + return NULL; + } + + /* socket will init adapter,so unlock after socket */ + pthread_spin_unlock(&worker_lock); + + int on = 1; + ioctl(sockfd, FIONBIO, &on); + + struct sockaddr_in my_addr; + bzero(&my_addr, sizeof(my_addr)); + my_addr.sin_family = AF_INET; + my_addr.sin_port = htons(80); + my_addr.sin_addr.s_addr = htonl(INADDR_ANY); + + int ret = bind(sockfd, (const struct sockaddr *)&my_addr, sizeof(my_addr)); + if (ret < 0) { + printf("thread %d, ff_bind failed\n", thread_id); + close(sockfd); + return NULL; + } + + ret = listen(sockfd, MAX_EVENTS); + if (ret < 0) { + printf("thread %d, ff_listen failed\n", thread_id); + close(sockfd); + return NULL; + } + + + kq = kqueue(); + printf("thread %d, kq:%d\n", thread_id, kq); + if (kq < 0) { + printf("thread %d, ff_kqueue failed, errno:%d, %s\n", thread_id, errno, strerror(errno)); + close(sockfd); + return NULL; + } + + EV_SET(&kevSet, sockfd, EVFILT_READ, EV_ADD, 0, MAX_EVENTS, NULL); + /* Update kqueue */ + ret = kevent(kq, &kevSet, 1, NULL, 0, &timeout); + if (ret < 0) { + printf("thread %d, kevent failed\n", thread_id); + close(kq); + close(sockfd); + return NULL; + } + + /* Wait for events to happen */ + while (!exit_flag) { + /* + * If not call alarm_event_sem, and epoll_wait timeout is NULL, + * it can't exit normal, so timeout can't set to NULL. + */ + int nevents = kevent(kq, NULL, 0, events, MAX_EVENTS, &timeout); + int i; + + if (nevents <= 0) { + if (nevents) { + printf("thread %d, ff_kevent failed:%d, %s\n", thread_id, errno, + strerror(errno)); + return NULL; + } + //usleep(100); + //sleep(1); + } + //printf("thread %d, get nevents:%d\n", thread_id, nevents); + + for (i = 0; i < nevents; ++i) { + struct kevent event = events[i]; + int clientfd = (int)event.ident; + + /* Handle disconnect */ + if (event.flags & EV_EOF) { + /* Simply close socket */ + close(clientfd); + } else if (clientfd == sockfd) { + int available = (int)event.data; + do { + int nclientfd = accept(clientfd, NULL, NULL); + if (nclientfd < 0) { + printf("thread %d, ff_accept failed:%d, %s\n", thread_id, errno, + strerror(errno)); + break; + } + + /* Add to event list */ + EV_SET(&kevSet, nclientfd, EVFILT_READ, EV_ADD, 0, 0, NULL); + + if(kevent(kq, &kevSet, 1, NULL, 0, NULL) < 0) { + printf("thread %d, ff_kevent error:%d, %s\n", thread_id, errno, + strerror(errno)); + close(nclientfd); + break; + } + + available--; + } while (available); + } else if (event.filter == EVFILT_READ) { + char buf[256]; + ssize_t readlen = read(clientfd, buf, sizeof(buf)); + ssize_t writelen = write(clientfd, html, sizeof(html) - 1); + if (writelen < 0){ + printf("thread %d, ff_write failed, readlen:%lu, writelen:%lu, :%d, %s\n", + thread_id, readlen, writelen, errno, strerror(errno)); + close(clientfd); + } + } else { + printf("thread %d, unknown event: %d:%8.8X\n", thread_id, i, event.flags); + } + } + } + + close(kq); + close(sockfd); + + return NULL; +} + +int main(int argc, char * argv[]) +{ + int i, worker_num; + + signal(SIGINT, sig_term); + signal(SIGTERM, sig_term); + + if (argc == 1) { + worker_num = 1; + } else { + worker_num = atoi(argv[1]); + } + printf("to init %d workers.\n", worker_num); + + pthread_spin_init(&worker_lock, PTHREAD_PROCESS_PRIVATE); + pthread_spin_lock(&worker_lock); + + for (i = 0; i < worker_num; i++) { + if(pthread_create(&hworker[i], NULL, loop, (void *)&i) < 0) { + printf("create loop thread failed., errno:%d/%s\n", + errno, strerror(errno)); + pthread_spin_unlock(&worker_lock); + pthread_spin_destroy(&worker_lock); + return -1; + } + if (i > 0) { + cpu_set_t cpuinfo; + int lcore_id = 2 + i; + + CPU_ZERO(&cpuinfo); + CPU_SET_S(lcore_id, sizeof(cpuinfo), &cpuinfo); + if(0 != pthread_setaffinity_np(hworker[i], sizeof(cpu_set_t), &cpuinfo)) + { + printf("set affinity recver faild\n"); + exit(0); + } + printf("set affinity recver sucssed, thread:%d, lcore_id:%d\n", i, lcore_id); + } + pthread_spin_lock(&worker_lock); + } + + for (i = 0; i < worker_num; i++) { + pthread_join(hworker[i], NULL); + } + + pthread_spin_destroy(&worker_lock); + + return 0; +} diff --git a/adapter/syscall/nginx-close.png b/adapter/syscall/nginx-close.png new file mode 100644 index 000000000..855688050 Binary files /dev/null and b/adapter/syscall/nginx-close.png differ diff --git a/adapter/syscall/nginx-keep-alive.png b/adapter/syscall/nginx-keep-alive.png new file mode 100644 index 000000000..784727523 Binary files /dev/null and b/adapter/syscall/nginx-keep-alive.png differ diff --git a/example/Makefile b/example/Makefile index 5c52df9a6..77d413d92 100644 --- a/example/Makefile +++ b/example/Makefile @@ -10,7 +10,7 @@ endif PKGCONF ?= pkg-config -CFLAGS += -O -gdwarf-2 $(shell $(PKGCONF) --cflags libdpdk) +CFLAGS += -O0 -g -gdwarf-2 $(shell $(PKGCONF) --cflags libdpdk) LIBS+= $(shell $(PKGCONF) --static --libs libdpdk) LIBS+= -L${FF_PATH}/lib -Wl,--whole-archive,-lfstack,--no-whole-archive