vcpe/srcs/service/dhcpd/dhcpd_network.c

372 lines
12 KiB
C

//
// Created by xajhuang on 2023/3/16.
//
#include <uv.h>
#include <stdlib.h>
#include <string.h>
#include <linux/filter.h>
#include <linux/if_packet.h>
#include <net/if.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include "service/dhcpd.h"
#include "user_errno.h"
#include "task_manager.h"
#include "zlog_module.h"
#include "network/vlan.h"
#define PKG_MMAP_BLOCKSIZ (1 << 22)
#define PKG_MMAP_FRAMESIZ (1 << 11)
#define PKG_MMAP_BLOCKNUM (64)
typedef struct {
struct iovec *rd;
uint8_t *map;
struct tpacket_req3 req;
} PACKET_MMAP_RING, *PPACKET_MMAP_RING;
struct block_desc {
uint32_t version;
uint32_t offset_to_priv;
struct tpacket_hdr_v1 h1;
};
#pragma pack(1)
typedef struct {
VLAN_PKG_HDR vlan_hdr;
DHCP_PROTO dhcp;
} DHCP_PACKAGE, *PDHCP_PACKAGE;
typedef struct {
uv_work_t uvWork;
unsigned short nSize;
unsigned char *pPkgBase;
void *pData;
} PKG_PROCESS_INFO, *PPKG_PROCESS_INFO;
typedef struct {
unsigned int nf;
PPKG_PROCESS_INFO pPkgInfo;
} PKG_MSG, *PPKG_MSG;
#pragma pack()
/*
TORs (Top of Rack switch) at Facebook run DHCP relayers, these relayers are
responsible for relaying broadcast DHCP traffic (DISCOVERY and SOLICIT
messages) originating within their racks to anycast VIPs, one DHCPv4 and one
for DHCPv6.
*/
static struct sock_filter g_filterCode[] = {
#ifdef UDP_DHCP_FILTER // create by: tcpdump "udp and port 67 and port 68" -dd
{0x28, 0, 0, 0x0000000c},
{0x15, 0, 9, 0x000086dd},
{0x30, 0, 0, 0x00000014},
{0x15, 0, 21, 0x00000011},
{0x28, 0, 0, 0x00000036},
{0x15, 0, 2, 0x00000043},
{0x28, 0, 0, 0x00000038},
{0x15, 16, 17, 0x00000044},
{0x15, 0, 16, 0x00000044},
{0x28, 0, 0, 0x00000038},
{0x15, 13, 14, 0x00000043},
{0x15, 0, 13, 0x00000800},
{0x30, 0, 0, 0x00000017},
{0x15, 0, 11, 0x00000011},
{0x28, 0, 0, 0x00000014},
{0x45, 9, 0, 0x00001fff},
{0xb1, 0, 0, 0x0000000e},
{0x48, 0, 0, 0x0000000e},
{0x15, 0, 2, 0x00000043},
{0x48, 0, 0, 0x00000010},
{0x15, 3, 4, 0x00000044},
{0x15, 0, 3, 0x00000044},
{0x48, 0, 0, 0x00000010},
{0x15, 0, 1, 0x00000043},
{0x6, 0, 0, 0x00040000},
{0x6, 0, 0, 0x00000000},
#endif
#if 0
// create by: tcpdump "vxlan" -dd
{0x28, 0, 0, 0x0000000c},
{0x15, 2, 0, 0x00008100},
{0x15, 1, 0, 0x000088a8},
{0x15, 0, 1, 0x00009100},
{0x6, 0, 0, 0x00040000},
{0x6, 0, 0, 0x00000000},
#endif
// create by: tcpdump "vlan and udp and port 67 and port 68" -dd
{0x0, 0, 0, 0x00000000},
{0x2, 0, 0, 0x00000000},
{0x2, 0, 0, 0x00000001},
{0x30, 0, 0, 0xfffff030},
{0x15, 7, 0, 0x00000001},
{0x0, 0, 0, 0x00000004},
{0x2, 0, 0, 0x00000000},
{0x2, 0, 0, 0x00000001},
{0x28, 0, 0, 0x0000000c},
{0x15, 2, 0, 0x00008100},
{0x15, 1, 0, 0x000088a8},
{0x15, 0, 56, 0x00009100},
{0x61, 0, 0, 0x00000001},
{0x48, 0, 0, 0x0000000c},
{0x15, 0, 13, 0x000086dd},
{0x61, 0, 0, 0x00000000},
{0x50, 0, 0, 0x00000014},
{0x15, 0, 50, 0x00000011},
{0x61, 0, 0, 0x00000000},
{0x48, 0, 0, 0x00000036},
{0x15, 0, 3, 0x00000043},
{0x61, 0, 0, 0x00000000},
{0x48, 0, 0, 0x00000038},
{0x15, 43, 44, 0x00000044},
{0x15, 0, 43, 0x00000044},
{0x61, 0, 0, 0x00000000},
{0x48, 0, 0, 0x00000038},
{0x15, 39, 40, 0x00000043},
{0x15, 0, 39, 0x00000800},
{0x61, 0, 0, 0x00000000},
{0x50, 0, 0, 0x00000017},
{0x15, 0, 36, 0x00000011},
{0x61, 0, 0, 0x00000000},
{0x48, 0, 0, 0x00000014},
{0x45, 33, 0, 0x00001fff},
{0x61, 0, 0, 0x00000000},
{0x50, 0, 0, 0x0000000e},
{0x54, 0, 0, 0x0000000f},
{0x64, 0, 0, 0x00000002},
{0xc, 0, 0, 0x00000000},
{0x7, 0, 0, 0x00000000},
{0x48, 0, 0, 0x0000000e},
{0x15, 0, 8, 0x00000043},
{0x61, 0, 0, 0x00000000},
{0x50, 0, 0, 0x0000000e},
{0x54, 0, 0, 0x0000000f},
{0x64, 0, 0, 0x00000002},
{0xc, 0, 0, 0x00000000},
{0x7, 0, 0, 0x00000000},
{0x48, 0, 0, 0x00000010},
{0x15, 16, 17, 0x00000044},
{0x61, 0, 0, 0x00000000},
{0x50, 0, 0, 0x0000000e},
{0x54, 0, 0, 0x0000000f},
{0x64, 0, 0, 0x00000002},
{0xc, 0, 0, 0x00000000},
{0x7, 0, 0, 0x00000000},
{0x48, 0, 0, 0x0000000e},
{0x15, 0, 9, 0x00000044},
{0x61, 0, 0, 0x00000000},
{0x50, 0, 0, 0x0000000e},
{0x54, 0, 0, 0x0000000f},
{0x64, 0, 0, 0x00000002},
{0xc, 0, 0, 0x00000000},
{0x7, 0, 0, 0x00000000},
{0x48, 0, 0, 0x00000010},
{0x15, 0, 1, 0x00000043},
{0x6, 0, 0, 0x00040000},
{0x6, 0, 0, 0x00000000},
};
static PACKET_MMAP_RING g_pkgRing;
static struct sock_fprog bpf = {
.len = sizeof(g_filterCode) / (sizeof(struct sock_filter)),
.filter = g_filterCode,
};
static void on_sock_recv(uv_work_t *req) {
PPKG_PROCESS_INFO pWork = (PPKG_PROCESS_INFO)req->data;
PDHCP_PACKAGE pkg = (PDHCP_PACKAGE)pWork->pPkgBase;
//LOG_MSG_HEX(debug, pkg, pWork->nSize);
LOG_MSG(info, "xid: 0x%08X\n", ntohl(pkg->dhcp.xid));
#if 0
LOG_MOD(info, ZLOG_MOD_OPENDHCPD, "vlan = %u\n", VXLAN_VIN_ID_PACK(pkg->vlan_hdr.vlan.id));
LOG_MSG(info, "xid: 0x%08X\n", ntohl(pkg->dhcp.xid));
LOG_MSG(info,
"dest mac: %02X:%02X:%02X:%02X:%02X:%02X\n",
pkg->vlan_hdr.eth.h_dest[0],
pkg->vlan_hdr.eth.h_dest[1],
pkg->vlan_hdr.eth.h_dest[2],
pkg->vlan_hdr.eth.h_dest[3],
pkg->vlan_hdr.eth.h_dest[4],
pkg->vlan_hdr.eth.h_dest[5]);
LOG_MSG(info,
"client mac: %02X:%02X:%02X:%02X:%02X:%02X\n",
pkg->dhcp.chaddr[0],
pkg->dhcp.chaddr[1],
pkg->dhcp.chaddr[2],
pkg->dhcp.chaddr[3],
pkg->dhcp.chaddr[4],
pkg->dhcp.chaddr[5]);
#endif
}
static void after_msg_recv(uv_work_t *req, int status) {
PPKG_PROCESS_INFO pInfo = (PPKG_PROCESS_INFO)req->data;
PPKG_MSG pMsg = (PPKG_MSG)pInfo->pData;
pMsg->nf -= 1;
if (pMsg->nf == 0) {
LOG_MOD(trace, ZLOG_MOD_OPENDHCPD, "---Free resources: %p\n", pMsg);
free(pMsg->pPkgInfo);
free(pMsg);
}
}
void raw_sock_recv_cb(uv_poll_t *handle, int status, int events) {
static unsigned int block_num = 0;
if (status >= 0) {
struct block_desc *pbd = (struct block_desc *)g_pkgRing.rd[block_num].iov_base;
if ((pbd->h1.block_status & TP_STATUS_USER) && (pbd->h1.num_pkts > 0)) {
int i;
struct tpacket3_hdr *ppd;
unsigned int memSize = sizeof(PKG_PROCESS_INFO) * pbd->h1.num_pkts;
PPKG_MSG pMsg = (PPKG_MSG)malloc(sizeof(PKG_MSG));
if (pMsg == NULL) {
LOG_MOD(error, ZLOG_MOD_OPENDHCPD, "Malloc memory error: %lu\n", sizeof(PKG_MSG));
return;
}
LOG_MOD(trace, ZLOG_MOD_OPENDHCPD, "++++Malloc resources: %p\n", pMsg);
memset(pMsg, 0, sizeof(PKG_MSG));
pMsg->pPkgInfo = (PPKG_PROCESS_INFO)malloc(memSize);
if (pMsg->pPkgInfo == NULL) {
LOG_MOD(error, ZLOG_MOD_OPENDHCPD, "Malloc memory error: %u\n", memSize);
free(pMsg);
return;
}
memset(pMsg->pPkgInfo, 0, memSize);
pMsg->nf = pbd->h1.num_pkts;
ppd = (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->h1.offset_to_first_pkt);
for (i = 0; i < pbd->h1.num_pkts; i++) {
pMsg->pPkgInfo[i].pPkgBase = ((uint8_t *)ppd + ppd->tp_mac);
pMsg->pPkgInfo[i].nSize = ppd->tp_snaplen;
pMsg->pPkgInfo[i].uvWork.data = &pMsg->pPkgInfo[i];
pMsg->pPkgInfo[i].pData = pMsg;
uv_queue_work(get_task_manager(), &(pMsg->pPkgInfo[i].uvWork), on_sock_recv, after_msg_recv);
ppd = (struct tpacket3_hdr *)((uint8_t *)ppd + ppd->tp_next_offset);
}
}
pbd->h1.block_status = TP_STATUS_KERNEL;
block_num = (block_num + 1) % PKG_MMAP_BLOCKNUM;
}
}
static int create_udp_socket() {
int i;
int err;
int v = TPACKET_V3;
struct sockaddr_ll addr;
// 1. create socket
int sock_fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (sock_fd < 0) {
LOG_MOD(error, ZLOG_MOD_OPENDHCPD, "Socket created failure\n");
return -ERR_SOCK_CREATE;
}
// 2. attach filter (no need to call bind)
if ((err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf))) < 0) {
LOG_MOD(error, ZLOG_MOD_OPENDHCPD, "Attaching filter failed: %d\n", err);
return -ERR_SOCK_SETOPT;
}
#if 1
// 3. set PACKET_MMAP version
if ((err = setsockopt(sock_fd, SOL_PACKET, PACKET_VERSION, &v, sizeof(v))) < 0) {
LOG_MOD(error, ZLOG_MOD_OPENDHCPD, "Set PACKET_VERSION option failed: %d\n", err);
return -ERR_SOCK_SETOPT;
}
// 4. setup PAKCET_MMAP ring
memset(&g_pkgRing.req, 0, sizeof(g_pkgRing.req));
g_pkgRing.req.tp_block_size = PKG_MMAP_BLOCKSIZ;
g_pkgRing.req.tp_frame_size = PKG_MMAP_FRAMESIZ;
g_pkgRing.req.tp_block_nr = PKG_MMAP_BLOCKNUM;
g_pkgRing.req.tp_frame_nr = (PKG_MMAP_BLOCKSIZ * PKG_MMAP_BLOCKNUM) / PKG_MMAP_FRAMESIZ;
g_pkgRing.req.tp_retire_blk_tov = 60;
g_pkgRing.req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
if ((err = setsockopt(sock_fd, SOL_PACKET, PACKET_RX_RING, &g_pkgRing.req, sizeof(g_pkgRing.req))) < 0) {
LOG_MOD(error, ZLOG_MOD_OPENDHCPD, "Set PACKET_RX_RING option failed: %d\n", err);
return -ERR_SOCK_SETOPT;
}
g_pkgRing.map = mmap(NULL,
g_pkgRing.req.tp_block_size * g_pkgRing.req.tp_block_nr,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_LOCKED,
sock_fd,
0);
if (g_pkgRing.map == MAP_FAILED) {
LOG_MOD(error, ZLOG_MOD_OPENDHCPD, "MMAP socket ring failed\n");
return -ERR_MMAP_MEMORY;
}
// 5. malloc read buffer
g_pkgRing.rd = malloc(g_pkgRing.req.tp_block_nr * sizeof(struct iovec));
if (g_pkgRing.rd == NULL) {
LOG_MOD(error,
ZLOG_MOD_OPENDHCPD,
"Malloc memory failed: %lu\n",
g_pkgRing.req.tp_block_nr * sizeof(struct iovec));
return -ERR_MMAP_MEMORY;
}
for (i = 0; i < g_pkgRing.req.tp_block_nr; ++i) {
g_pkgRing.rd[i].iov_base = g_pkgRing.map + (i * g_pkgRing.req.tp_block_size);
g_pkgRing.rd[i].iov_len = g_pkgRing.req.tp_block_size;
}
#endif
// 6. bind socket
const char *iface_name = "ens192";
memset(&addr, 0, sizeof(addr));
addr.sll_ifindex = (int)if_nametoindex(iface_name);
addr.sll_family = AF_PACKET;
addr.sll_protocol = htons(ETH_P_ALL);
addr.sll_hatype = 0;
addr.sll_pkttype = 0;
addr.sll_halen = 0;
if ((err = bind(sock_fd, (struct sockaddr *)&addr, sizeof(addr))) < 0) {
LOG_MOD(error, ZLOG_MOD_OPENDHCPD, "Bind raw socket failed: %d\n", err);
return -ERR_SOCK_SETOPT;
}
return sock_fd;
}
int dhcpd_init() {
static uv_udp_t uvRaw;
static uv_poll_t uvSocket;
int sock = create_udp_socket();
if (sock <= 0) {
return sock;
}
//LOG_MSG(info, "sizeof DHCP_PACKAGE = %lu\n", sizeof(DHCP_PACKAGE));
uv_udp_init(get_task_manager(), &uvRaw);
uv_udp_open(&uvRaw, sock);
uv_poll_init_socket(get_task_manager(), &uvSocket, sock);
uv_poll_start(&uvSocket, UV_READABLE, raw_sock_recv_cb);
return ERR_SUCCESS;
}