diff --git a/app/nginx-1.11.10/auto/make b/app/nginx-1.11.10/auto/make index 145d22dc0..5721c6c2a 100644 --- a/app/nginx-1.11.10/auto/make +++ b/app/nginx-1.11.10/auto/make @@ -33,7 +33,7 @@ CORE_LIBS+=" -g -Wl,--no-as-needed -fvisibility=default -pthread -lm -lrt" CORE_LIBS+=" -Wl,--whole-archive -lrte_pmd_vmxnet3_uio -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_pmd_e1000 -lrte_pmd_ring" CORE_LIBS+=" -Wl,--whole-archive -lrte_hash -lrte_kvargs -Wl,-lrte_mbuf -lethdev -lrte_eal -Wl,-lrte_mempool" CORE_LIBS+=" -lrte_ring -lrte_cmdline -lrte_cfgfile -lrte_kni -lrte_timer -Wl,-lrte_pmd_virtio" -CORE_LIBS+=" -Wl,--no-whole-archive -lrt -lm -ldl -lm -lcrypto" +CORE_LIBS+=" -Wl,--no-whole-archive -lrt -lm -ldl -lcrypto" cat << END > $NGX_MAKEFILE diff --git a/example/Makefile b/example/Makefile index 6c0b88ac0..c53819689 100644 --- a/example/Makefile +++ b/example/Makefile @@ -1,9 +1,19 @@ +TOPDIR=.. + +ifeq ($(FF_PATH),) + FF_PATH=${TOPDIR} +endif + +ifeq ($(FF_DPDK),) + FF_DPDK=${TOPDIR}/dpdk/x86_64-native-linuxapp-gcc +endif + LIBS+= -L${FF_PATH}/lib -L${FF_DPDK}/lib -Wl,--whole-archive,-lfstack,--no-whole-archive LIBS+= -g -Wl,--no-as-needed -fvisibility=default -pthread -lm -lrt LIBS+= -Wl,--whole-archive -lrte_pmd_vmxnet3_uio -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_pmd_e1000 -lrte_pmd_ring LIBS+= -Wl,--whole-archive -lrte_hash -lrte_kvargs -Wl,-lrte_mbuf -lethdev -lrte_eal -Wl,-lrte_mempool LIBS+= -lrte_ring -lrte_cmdline -lrte_cfgfile -lrte_kni -lrte_timer -Wl,-lrte_pmd_virtio -LIBS+= -Wl,--no-whole-archive -lrt -lm -ldl -lm -lcrypto +LIBS+= -Wl,--no-whole-archive -lrt -lm -ldl -lcrypto TARGET="helloworld" all: diff --git a/lib/ff_dpdk_if.c b/lib/ff_dpdk_if.c index cae397b68..1f8cd4fb4 100644 --- a/lib/ff_dpdk_if.c +++ b/lib/ff_dpdk_if.c @@ -55,11 +55,15 @@ #include "ff_config.h" #include "ff_veth.h" #include "ff_host_interface.h" +#include "ff_msg.h" +#include "ff_api.h" #define MEMPOOL_CACHE_SIZE 256 #define ARP_RING_SIZE 2048 +#define MSG_RING_SIZE 32 + /* * Configurable number of RX/TX ring descriptors */ @@ -153,6 +157,16 @@ static struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; static struct rte_ring **arp_ring[RTE_MAX_LCORE]; +struct ff_msg_ring { + char ring_name[2][RTE_RING_NAMESIZE]; + /* ring[0] for lcore recv msg, other send */ + /* ring[1] for lcore send msg, other read */ + struct rte_ring *ring[2]; +} __rte_cache_aligned; + +static struct ff_msg_ring msg_ring[RTE_MAX_LCORE]; +static struct rte_mempool *message_pool; + struct ff_dpdk_if_context { void *sc; void *ifp; @@ -441,6 +455,25 @@ init_mem_pool(void) return 0; } +static struct rte_ring * +create_ring(const char *name, unsigned count, int socket_id, unsigned flags) +{ + struct rte_ring *ring; + + if (name == NULL) + return NULL; + + /* If already create, just attached it */ + if (likely((ring = rte_ring_lookup(name)) != NULL)) + return ring; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + return rte_ring_create(name, count, socket_id, flags); + } else { + return rte_ring_lookup(name); + } +} + static int init_arp_ring(void) { @@ -472,21 +505,14 @@ init_arp_ring(void) uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[j].port_id; for(i = 0; i < nb_procs; ++i) { - snprintf(name_buf, RTE_RING_NAMESIZE, "ring_%d_%d", i, port_id); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - arp_ring[i][port_id] = rte_ring_create(name_buf, - ARP_RING_SIZE, socketid, - RING_F_SC_DEQ); - if (rte_ring_lookup(name_buf) != arp_ring[i][port_id]) - rte_panic("lookup arp ring:%s failed!\n", name_buf); - } else { - arp_ring[i][port_id] = rte_ring_lookup(name_buf); - } + snprintf(name_buf, RTE_RING_NAMESIZE, "arp_ring_%d_%d", i, port_id); + arp_ring[i][port_id] = create_ring(name_buf, ARP_RING_SIZE, + socketid, RING_F_SC_DEQ); if (arp_ring[i][port_id] == NULL) - rte_panic("create arp ring::%s failed!\n", name_buf); + rte_panic("create ring:%s failed!\n", name_buf); - printf("create arp ring:%s success, %u ring entries are now free!\n", + printf("create ring:%s success, %u ring entries are now free!\n", name_buf, rte_ring_free_count(arp_ring[i][port_id])); } } @@ -494,6 +520,58 @@ init_arp_ring(void) return 0; } +static void +ff_msg_init(struct rte_mempool *mp, + __attribute__((unused)) void *opaque_arg, + void *obj, __attribute__((unused)) unsigned i) +{ + struct ff_msg *msg = (struct ff_msg *)obj; + msg->buf_addr = (char *)msg + sizeof(struct ff_msg); + msg->buf_len = mp->elt_size - sizeof(struct ff_msg); +} + +static int +init_msg_ring(void) +{ + uint16_t i; + uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs; + unsigned socketid = lcore_conf.socket_id; + + /* Create message buffer pool */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + message_pool = rte_mempool_create(FF_MSG_POOL, + MSG_RING_SIZE * 2 * nb_procs, + MAX_MSG_BUF_SIZE, MSG_RING_SIZE / 2, 0, + NULL, NULL, ff_msg_init, NULL, + socketid, 0); + } else { + message_pool = rte_mempool_lookup(FF_MSG_POOL); + } + + if (message_pool == NULL) { + rte_panic("Create msg mempool failed\n"); + } + + for(i = 0; i < nb_procs; ++i) { + snprintf(msg_ring[i].ring_name[0], RTE_RING_NAMESIZE, + "%s%u", FF_MSG_RING_IN, i); + snprintf(msg_ring[i].ring_name[1], RTE_RING_NAMESIZE, + "%s%u", FF_MSG_RING_OUT, i); + + msg_ring[i].ring[0] = create_ring(msg_ring[i].ring_name[0], + MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); + if (msg_ring[i].ring[0] == NULL) + rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]); + + msg_ring[i].ring[1] = create_ring(msg_ring[i].ring_name[1], + MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ); + if (msg_ring[i].ring[1] == NULL) + rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]); + } + + return 0; +} + static int init_kni(void) { @@ -730,6 +808,8 @@ ff_dpdk_init(int argc, char **argv) init_arp_ring(); + init_msg_ring(); + enable_kni = ff_global_cfg.kni.enable; if (enable_kni) { init_kni(); @@ -872,12 +952,61 @@ process_arp_ring(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **pkts_burst, const struct ff_dpdk_if_context *ctx) { /* read packet from ring buf and to process */ - uint16_t nb_tx; - nb_tx = rte_ring_dequeue_burst(arp_ring[queue_id][port_id], + uint16_t nb_rb; + nb_rb = rte_ring_dequeue_burst(arp_ring[queue_id][port_id], (void **)pkts_burst, MAX_PKT_BURST); - if(nb_tx > 0) { - process_packets(port_id, queue_id, pkts_burst, nb_tx, ctx, 1); + if(nb_rb > 0) { + process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1); + } + + return 0; +} + +static inline void +handle_sysctl_msg(struct ff_msg *msg, uint16_t proc_id) +{ + int ret = ff_sysctl(msg->sysctl.name, msg->sysctl.namelen, + msg->sysctl.old, msg->sysctl.oldlenp, msg->sysctl.new, + msg->sysctl.newlen); + + if (ret < 0) { + msg->result = errno; + } else { + msg->result = 0; + } + + rte_ring_enqueue(msg_ring[proc_id].ring[1], msg); +} + +static inline void +handle_default_msg(struct ff_msg *msg, uint16_t proc_id) +{ + msg->result = EINVAL; + rte_ring_enqueue(msg_ring[proc_id].ring[1], msg); +} + +static inline void +handle_msg(struct ff_msg *msg, uint16_t proc_id) +{ + switch (msg->msg_type) { + case FF_SYSCTL: + handle_sysctl_msg(msg, proc_id); + break; + default: + handle_default_msg(msg, proc_id); + break; + } +} + +static inline int +process_msg_ring(uint16_t proc_id) +{ + void *msg; + int ret = rte_ring_dequeue(msg_ring[proc_id].ring[0], &msg); + + if (unlikely(ret == 0)) { + handle_msg((struct ff_msg *)msg, proc_id); } return 0; @@ -1104,6 +1233,8 @@ main_loop(void *arg) } } + process_msg_ring(qconf->proc_id); + if (likely(lr->loop != NULL)) { lr->loop(lr->arg); } diff --git a/lib/ff_dpdk_kni.c b/lib/ff_dpdk_kni.c index 8222a6198..739d745a7 100644 --- a/lib/ff_dpdk_kni.c +++ b/lib/ff_dpdk_kni.c @@ -383,6 +383,9 @@ ff_kni_alloc(uint8_t port_id, unsigned socket_id, if (rte_eal_process_type() == RTE_PROC_PRIMARY) { kni_rp[port_id] = rte_ring_create(ring_name, KNI_QUEUE_SIZE, socket_id, RING_F_SC_DEQ); + + if (rte_ring_lookup(ring_name) != kni_rp[port_id]) + rte_panic("lookup kni ring failed!\n"); } else { kni_rp[port_id] = rte_ring_lookup(ring_name); } @@ -390,9 +393,6 @@ ff_kni_alloc(uint8_t port_id, unsigned socket_id, if (kni_rp[port_id] == NULL) rte_panic("create kni ring failed!\n"); - if (rte_ring_lookup(ring_name) != kni_rp[port_id]) - rte_panic("lookup kni ring failed!\n"); - printf("create kni ring success, %u ring entries are now free!\n", rte_ring_free_count(kni_rp[port_id])); } diff --git a/lib/ff_msg.h b/lib/ff_msg.h new file mode 100644 index 000000000..1ab300b55 --- /dev/null +++ b/lib/ff_msg.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2017 THL A29 Limited, a Tencent company. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _FF_MSG_H_ +#define _FF_MSG_H_ + +#include + +#define FF_MSG_RING_IN "ff_msg_ring_in_" +#define FF_MSG_RING_OUT "ff_msg_ring_out_" +#define FF_MSG_POOL "ff_msg_pool" + +/* MSG TYPE: sysctl, sysctlbyname, etc.. */ +enum FF_MSG_TYPE { + FF_UNKNOWN = 0, + FF_SYSCTL, +}; + +struct ff_sysctl_args { + int *name; + unsigned namelen; + void *old; + size_t *oldlenp; + void *new; + size_t newlen; +}; + +#define MAX_MSG_BUF_SIZE 10240 + +/* structure of ipc msg */ +struct ff_msg { + enum FF_MSG_TYPE msg_type; + /* Result of msg processing */ + int result; + /* Length of segment buffer. */ + uint16_t buf_len; + /* Address of segment buffer. */ + char *buf_addr; + + union { + struct ff_sysctl_args sysctl; + }; +} __attribute__((packed)) __rte_cache_aligned; + +#endif diff --git a/tools/Makefile b/tools/Makefile new file mode 100644 index 000000000..24ff7f9e2 --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,7 @@ +SUBDIRS=ipc sysctl + +all: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done + +clean: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) clean ) ; done diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 000000000..43c9606bc --- /dev/null +++ b/tools/README.md @@ -0,0 +1,120 @@ +# Introduction + +Directory `ipc` implements an ipc library using dpdk `rte_ring`, can be used to communicate with F-Stack processes. + +All other directories are useful tools ported from FreeBSD. + +# ipc + +This is a simple implemention using dpdk `rte_ring`. +`ff_ipc_msg_alloc` get msg structure from rte_mempool. +`ff_ipc_msg_free` put msg to rte_mempool. +`ff_ipc_send` enqueue msg to rte_ring. +`ff_ipc_recv` dequeue msg from rte_ring. + +Since F-Stack is multi-process architecture and every process has an independent stack, so we must communicate with every F-Stack process. + +# sysctl +Usage: +`sysctl -p [-bdehiNnoqTtWx] [ -B ] [-f filename] name[=value] ...` +`sysctl -p [-bdehNnoqTtWx] [ -B ] -a` + + -p Which F-Stack process to communicate with, default 0. + +Except this option, it is same with the original FreeBSD sysctl, see [Manual page](https://www.freebsd.org/cgi/man.cgi?sysctl). + +# how to implement a custom tool for communicating with F-Stack process + +Add a new FF_MSG_TYPE in ff_msg.h: +``` +enum FF_MSG_TYPE { + FF_UNKNOWN = 0, + FF_SYSCTL, + FF_HELLOWORLD, +}; +``` + +Define a structure used to communicate: +``` +struct ff_helloworld_args { + void *request; + size_t req_len; + void *reply; + size_t rep_len; +}; +``` +Note that, when using struct ff_helloworld_args, pointers in this structure must point to the addresses range from ff_msg.buf_addr and ff_msg.buf_addr+ff_msg.buf_len, ff_msg.buf_len is (10240 - sizeof(struct ff_msg)). + +And add it to ff_msg: +``` +struct ff_msg { + ... + union { + struct ff_sysctl_args sysctl; + struct ff_helloworld_args helloworld; + }; +}; +``` + +Modify ff_dpdk_if.c, add a handle function: +``` +static inline void +handle_helloworld_msg(struct ff_msg *msg, uint16_t proc_id) +{ + printf("helloworld msg recved.\n"); + msg->result = 0; + rte_ring_enqueue(msg_ring[proc_id].ring[1], msg); +} + +static inline void +handle_msg(struct ff_msg *msg, uint16_t proc_id) +{ + switch (msg->msg_type) { + case FF_SYSCTL: + handle_sysctl_msg(msg, proc_id); + break; + case FF_HELLOWORLD: + handle_helloworld_msg(msg, proc_id); + default: + handle_default_msg(msg, proc_id); + break; + } +} +``` + +Create helloworld.c: + +``` +int main() +{ + struct ff_msg *msg = ff_ipc_msg_alloc(); + + char *buf = msg->buf_addr; + + msg->helloworld.request = buf; + memcpy(msg->helloworld.request, "hello", 5); + msg->helloworld.req_len = 5; + buf += 5; + + msg->helloworld.reply = buf; + msg->helloworld.rep_len = 10; + + ff_ipc_send(msg, 0); + + struct ff_msg *retmsg; + ff_ipc_recv(retmsg, 0); + assert(remsg==msg); + + ff_ipc_msg_free(msg); +} + +``` + +The Makefile may like this: +``` +TOPDIR?=${CURDIR}/../.. + +PROG=helloworld + +include ${TOPDIR}/tools/prog.mk +``` \ No newline at end of file diff --git a/tools/ipc/Makefile b/tools/ipc/Makefile new file mode 100644 index 000000000..0c204b88d --- /dev/null +++ b/tools/ipc/Makefile @@ -0,0 +1,32 @@ +TOPDIR?=${CURDIR}/../.. + +ifeq ($(FF_DPDK),) + FF_DPDK=${TOPDIR}/dpdk/x86_64-native-linuxapp-gcc +endif + +TARGET=libfstack_ipc.a + +DPDK_CFLAGS= -g -Wall -Werror -include ${FF_DPDK}/include/rte_config.h +DPDK_CFLAGS+= -march=native -DRTE_MACHINE_CPUFLAG_SSE -DRTE_MACHINE_CPUFLAG_SSE2 -DRTE_MACHINE_CPUFLAG_SSE3 +DPDK_CFLAGS+= -DRTE_MACHINE_CPUFLAG_SSSE3 -DRTE_MACHINE_CPUFLAG_SSE4_1 -DRTE_MACHINE_CPUFLAG_SSE4_2 +DPDK_CFLAGS+= -DRTE_COMPILE_TIME_CPUFLAGS=RTE_CPUFLAG_SSE,RTE_CPUFLAG_SSE2,RTE_CPUFLAG_SSE3,RTE_CPUFLAG_SSSE3,RTE_CPUFLAG_SSE4_1,RTE_CPUFLAG_SSE4_2 +DPDK_CFLAGS+= -I${FF_DPDK}/include + +CFLAGS+= ${DPDK_CFLAGS} +CFLAGS+= -I${TOPDIR}/lib + +SRCS=ff_ipc.c +OBJS=$(patsubst %.c,%.o,${SRCS}) + +all: ${TARGET} + +${TARGET}: ${OBJS} + ar -cqs $@ ${OBJS} + +${OBJS}: %.o: %.c + ${CC} -c $< ${CFLAGS} -o $@ + + +.PHONY: clean +clean: + rm -f *.o ${TARGET} diff --git a/tools/ipc/ff_ipc.c b/tools/ipc/ff_ipc.c new file mode 100644 index 000000000..891e64470 --- /dev/null +++ b/tools/ipc/ff_ipc.c @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2017 THL A29 Limited, a Tencent company. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ff_ipc.h" + +static int inited; + +static struct rte_mempool *message_pool; + +static int +ff_ipc_init(void) +{ + if (inited) { + return 0; + } + + char *dpdk_argv[] = { + "-c1", "-n4", + "--proc-type=secondary", + "--log-level=0", + }; + + int ret = rte_eal_init(sizeof(dpdk_argv)/sizeof(dpdk_argv[0]), dpdk_argv); + if (ret < 0) { + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + } + + message_pool = rte_mempool_lookup(FF_MSG_POOL); + if (message_pool == NULL) { + rte_exit(EXIT_FAILURE, "lookup message pool:%s failed!\n", FF_MSG_POOL); + } + + inited = 1; + + return 0; +} + +struct ff_msg * +ff_ipc_msg_alloc(void) +{ + if (inited == 0) { + int ret = ff_ipc_init(); + if (ret < 0) { + return NULL; + } + } + + void *msg; + if (rte_mempool_get(message_pool, &msg) < 0) { + printf("get buffer from message pool failed.\n"); + return NULL; + } + + return (struct ff_msg *)msg; +} + +int +ff_ipc_msg_free(struct ff_msg *msg) +{ + if (inited == 0) { + printf("ff ipc not inited\n"); + return -1; + } + + rte_mempool_put(message_pool, msg); + + return 0; +} + +int +ff_ipc_send(const struct ff_msg *msg, uint16_t proc_id) +{ + int ret; + + if (inited == 0) { + printf("ff ipc not inited\n"); + return -1; + } + + char name[RTE_RING_NAMESIZE]; + snprintf(name, RTE_RING_NAMESIZE, "%s%u", + FF_MSG_RING_IN, proc_id); + struct rte_ring *ring = rte_ring_lookup(name); + if (ring == NULL) { + printf("lookup message ring:%s failed!\n", name); + return -1; + } + + ret = rte_ring_enqueue(ring, (void *)msg); + if (ret < 0) { + printf("ff_ipc_send failed\n"); + return ret; + } + + return 0; +} + +int +ff_ipc_recv(struct ff_msg **msg, uint16_t proc_id) +{ + int ret, i; + if (inited == 0) { + printf("ff ipc not inited\n"); + return -1; + } + + char name[RTE_RING_NAMESIZE]; + snprintf(name, RTE_RING_NAMESIZE, "%s%u", + FF_MSG_RING_OUT, proc_id); + struct rte_ring *ring = rte_ring_lookup(name); + if (ring == NULL) { + printf("lookup message ring:%s failed!\n", name); + return -1; + } + + void *obj; + #define MAX_ATTEMPTS_NUM 1000 + for (i = 0; i < MAX_ATTEMPTS_NUM; i++) { + ret = rte_ring_dequeue(ring, &obj); + if (ret == 0) { + *msg = (struct ff_msg *)obj; + break; + } + + usleep(1000); + } + + return ret; +} diff --git a/tools/ipc/ff_ipc.h b/tools/ipc/ff_ipc.h new file mode 100644 index 000000000..89102a3a3 --- /dev/null +++ b/tools/ipc/ff_ipc.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2017 THL A29 Limited, a Tencent company. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _FF_IPC_H_ +#define _FF_IPC_H_ + +#include "ff_msg.h" + +struct ff_msg *ff_ipc_msg_alloc(void); +int ff_ipc_msg_free(struct ff_msg *msg); + +int ff_ipc_send(const struct ff_msg *msg, uint16_t proc_id); +int ff_ipc_recv(struct ff_msg **msg, uint16_t proc_id); + +#endif diff --git a/tools/prog.mk b/tools/prog.mk new file mode 100644 index 000000000..ec0685846 --- /dev/null +++ b/tools/prog.mk @@ -0,0 +1,71 @@ +# +# Derived from FreeBSD src/share/mk/bsd.prog.mk +# + +ifdef DEBUG_FLAGS +CFLAGS+=${DEBUG_FLAGS} +CXXFLAGS+=${DEBUG_FLAGS} +endif + +ifdef NO_SHARED +ifneq (${NO_SHARED},no) +ifneq (${NO_SHARED},NO) +LDFLAGS+= -static +endif +endif +endif + +ifdef PROG_CXX +PROG= ${PROG_CXX} +endif + +ifndef PROG +$(error PROG or PROG_CXX must be defined.) +endif + +ifndef TOPDIR +$(error TOPDIR must be defined.) +endif + +ifndef SRCS +ifdef PROG_CXX +SRCS= ${PROG}.cc +else +SRCS= ${PROG}.c +endif +endif + +OBJS+= $(patsubst %.cc,%.o,$(patsubst %.c,%.o,${SRCS})) + +ifeq ($(FF_DPDK),) + FF_DPDK=${TOPDIR}/dpdk/x86_64-native-linuxapp-gcc +endif + +FF_PROG_CFLAGS:= -g -Wall -Werror -DFSTACK +FF_PROG_CFLAGS+= -I${TOPDIR}/lib -I${TOPDIR}/tools/ipc +FF_PROG_CFLAGS+= -include ${FF_DPDK}/include/rte_config.h +FF_PROG_CFLAGS+= -I${FF_DPDK}/include + +FF_PROG_LIBS:= -L${TOPDIR}/tools/ipc -lfstack_ipc +FF_PROG_LIBS+= -L${FF_DPDK}/lib +FF_PROG_LIBS+= -g -Wl,--no-as-needed -fvisibility=default -pthread -lm -lrt +FF_PROG_LIBS+= -Wl,--whole-archive -lrte_eal -Wl,-lrte_mempool -lrte_ring +FF_PROG_LIBS+= -Wl,--no-whole-archive -lrt -lm -ldl -lcrypto + +CFLAGS+= ${FF_PROG_CFLAGS} +CXXFLAGS+= ${FF_PROG_CFLAGS} + +LIBS+= ${FF_PROG_LIBS} + +${PROG}: ${OBJS} +ifdef PROG_CXX + ${CXX} ${CXXFLAGS} ${LDFLAGS} -o $@ ${OBJS} ${LIBS} +else + ${CC} ${CFLAGS} ${LDFLAGS} -o $@ ${OBJS} ${LIBS} +endif + + +clean: + @rm -f ${PROG} ${OBJS} + +all: ${PROG} diff --git a/tools/sysctl/Makefile b/tools/sysctl/Makefile new file mode 100644 index 000000000..ed9e3166a --- /dev/null +++ b/tools/sysctl/Makefile @@ -0,0 +1,5 @@ +TOPDIR?=${CURDIR}/../.. + +PROG=sysctl + +include ${TOPDIR}/tools/prog.mk diff --git a/tools/sysctl/sysctl.8 b/tools/sysctl/sysctl.8 new file mode 100644 index 000000000..8d2d31603 --- /dev/null +++ b/tools/sysctl/sysctl.8 @@ -0,0 +1,326 @@ +.\" Copyright (c) 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" From: @(#)sysctl.8 8.1 (Berkeley) 6/6/93 +.\" $FreeBSD$ +.\" +.Dd December 10, 2015 +.Dt SYSCTL 8 +.Os +.Sh NAME +.Nm sysctl +.Nd get or set kernel state +.Sh SYNOPSIS +.Nm +.Op Fl bdehiNnoRTtqx +.Op Fl B Ar bufsize +.Op Fl f Ar filename +.Ar name Ns Op = Ns Ar value +.Ar ... +.Nm +.Op Fl bdehNnoRTtqx +.Op Fl B Ar bufsize +.Fl a +.Sh DESCRIPTION +The +.Nm +utility retrieves kernel state and allows processes with appropriate +privilege to set kernel state. +The state to be retrieved or set is described using a +.Dq Management Information Base +.Pq Dq MIB +style name, described as a dotted set of +components. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl A +Equivalent to +.Fl o a +(for compatibility). +.It Fl a +List all the currently available non-opaque values. +This option is ignored if one or more variable names are specified on +the command line. +.It Fl b +Force the value of the variable(s) to be output in raw, binary format. +No names are printed and no terminating newlines are output. +This is mostly useful with a single variable. +.It Fl B Ar bufsize +Set the buffer size to read from the +.Nm +to +.Ar bufsize . +This is necessary for a +.Nm +that has variable length, and the probe value of 0 is a valid length, such as +.Va kern.arandom . +.It Fl d +Print the description of the variable instead of its value. +.It Fl e +Separate the name and the value of the variable(s) with +.Ql = . +This is useful for producing output which can be fed back to the +.Nm +utility. +This option is ignored if either +.Fl N +or +.Fl n +is specified, or a variable is being set. +.It Fl f Ar filename +Specify a file which contains a pair of name and value in each line. +.Nm +reads and processes the specified file first and then processes the name +and value pairs in the command line argument. +.It Fl h +Format output for human, rather than machine, readability. +.It Fl i +Ignore unknown OIDs. +The purpose is to make use of +.Nm +for collecting data from a variety of machines (not all of which +are necessarily running exactly the same software) easier. +.It Fl N +Show only variable names, not their values. +This is particularly useful with shells that offer programmable +completion. +To enable completion of variable names in +.Xr zsh 1 Pq Pa ports/shells/zsh , +use the following code: +.Bd -literal -offset indent +listsysctls () { set -A reply $(sysctl -AN ${1%.*}) } +compctl -K listsysctls sysctl +.Ed +.Pp +To enable completion of variable names in +.Xr tcsh 1 , +use: +.Pp +.Dl "complete sysctl 'n/*/`sysctl -Na`/'" +.It Fl n +Show only variable values, not their names. +This option is useful for setting shell variables. +For instance, to save the pagesize in variable +.Va psize , +use: +.Pp +.Dl "set psize=`sysctl -n hw.pagesize`" +.It Fl o +Show opaque variables (which are normally suppressed). +The format and length are printed, as well as a hex dump of the first +sixteen bytes of the value. +.It Fl q +Suppress some warnings generated by +.Nm +to standard error. +.It Fl T +Display only variables that are settable via loader (CTLFLAG_TUN). +.It Fl t +Print the type of the variable. +.It Fl W +Display only writable variables that are not statistical. +Useful for determining the set of runtime tunable sysctls. +.It Fl X +Equivalent to +.Fl x a +(for compatibility). +.It Fl x +As +.Fl o , +but prints a hex dump of the entire value instead of just the first +few bytes. +.El +.Pp +The information available from +.Nm +consists of integers, strings, and opaque types. +The +.Nm +utility +only knows about a couple of opaque types, and will resort to hexdumps +for the rest. +The opaque information is much more useful if retrieved by special +purpose programs such as +.Xr ps 1 , +.Xr systat 1 , +and +.Xr netstat 1 . +.Pp +Some of the variables which cannot be modified during normal system +operation can be initialized via +.Xr loader 8 +tunables. +This can for example be done by setting them in +.Xr loader.conf 5 . +Please refer to +.Xr loader.conf 5 +for more information on which tunables are available and how to set them. +.Pp +The string and integer information is summarized below. +For a detailed description of these variable see +.Xr sysctl 3 . +.Pp +The changeable column indicates whether a process with appropriate +privilege can change the value. +String and integer values can be set using +.Nm . +.Bl -column security.bsd.unprivileged_read_msgbuf integerxxx +.It Sy "Name Type Changeable" +.It "kern.ostype string no" +.It "kern.osrelease string no" +.It "kern.osrevision integer no" +.It "kern.version string no" +.It "kern.maxvnodes integer yes" +.It "kern.maxproc integer no" +.It "kern.maxprocperuid integer yes" +.It "kern.maxfiles integer yes" +.It "kern.maxfilesperproc integer yes" +.It "kern.argmax integer no" +.It "kern.securelevel integer raise only" +.It "kern.hostname string yes" +.It "kern.hostid integer yes" +.It "kern.clockrate struct no" +.It "kern.posix1version integer no" +.It "kern.ngroups integer no" +.It "kern.job_control integer no" +.It "kern.saved_ids integer no" +.It "kern.boottime struct no" +.It "kern.domainname string yes" +.It "kern.filedelay integer yes" +.It "kern.dirdelay integer yes" +.It "kern.metadelay integer yes" +.It "kern.osreldate string no" +.It "kern.bootfile string yes" +.It "kern.corefile string yes" +.It "kern.logsigexit integer yes" +.It "security.bsd.suser_enabled integer yes" +.It "security.bsd.see_other_uids integer yes" +.It "security.bsd.unprivileged_proc_debug integer yes" +.It "security.bsd.unprivileged_read_msgbuf integer yes" +.It "vm.loadavg struct no" +.It "hw.machine string no" +.It "hw.model string no" +.It "hw.ncpu integer no" +.It "hw.byteorder integer no" +.It "hw.physmem integer no" +.It "hw.usermem integer no" +.It "hw.pagesize integer no" +.It "hw.floatingpoint integer no" +.It "hw.machine_arch string no" +.It "hw.realmem integer no" +.It "machdep.adjkerntz integer yes" +.It "machdep.disable_rtc_set integer yes" +.It "machdep.guessed_bootdev string no" +.It "user.cs_path string no" +.It "user.bc_base_max integer no" +.It "user.bc_dim_max integer no" +.It "user.bc_scale_max integer no" +.It "user.bc_string_max integer no" +.It "user.coll_weights_max integer no" +.It "user.expr_nest_max integer no" +.It "user.line_max integer no" +.It "user.re_dup_max integer no" +.It "user.posix2_version integer no" +.It "user.posix2_c_bind integer no" +.It "user.posix2_c_dev integer no" +.It "user.posix2_char_term integer no" +.It "user.posix2_fort_dev integer no" +.It "user.posix2_fort_run integer no" +.It "user.posix2_localedef integer no" +.It "user.posix2_sw_dev integer no" +.It "user.posix2_upe integer no" +.It "user.stream_max integer no" +.It "user.tzname_max integer no" +.El +.Sh FILES +.Bl -tag -width ".In netinet/icmp_var.h" -compact +.It In sys/sysctl.h +definitions for top level identifiers, second level kernel and hardware +identifiers, and user level identifiers +.It In sys/socket.h +definitions for second level network identifiers +.It In sys/gmon.h +definitions for third level profiling identifiers +.It In vm/vm_param.h +definitions for second level virtual memory identifiers +.It In netinet/in.h +definitions for third level Internet identifiers and +fourth level IP identifiers +.It In netinet/icmp_var.h +definitions for fourth level ICMP identifiers +.It In netinet/udp_var.h +definitions for fourth level UDP identifiers +.El +.Sh EXAMPLES +For example, to retrieve the maximum number of processes allowed +in the system, one would use the following request: +.Pp +.Dl "sysctl kern.maxproc" +.Pp +To set the maximum number of processes allowed +per uid to 1000, one would use the following request: +.Pp +.Dl "sysctl kern.maxprocperuid=1000" +.Pp +Information about the system clock rate may be obtained with: +.Pp +.Dl "sysctl kern.clockrate" +.Pp +Information about the load average history may be obtained with: +.Pp +.Dl "sysctl vm.loadavg" +.Pp +More variables than these exist, and the best and likely only place +to search for their deeper meaning is undoubtedly the source where +they are defined. +.Sh COMPATIBILITY +The +.Fl w +option has been deprecated and is silently ignored. +.Sh SEE ALSO +.Xr sysctl 3 , +.Xr loader.conf 5 , +.Xr sysctl.conf 5 , +.Xr loader 8 +.Sh HISTORY +A +.Nm +utility first appeared in +.Bx 4.4 . +.Pp +In +.Fx 2.2 , +.Nm +was significantly remodeled. +.Sh BUGS +The +.Nm +utility presently exploits an undocumented interface to the kernel +sysctl facility to traverse the sysctl tree and to retrieve format +and name information. +This correct interface is being thought about for the time being. diff --git a/tools/sysctl/sysctl.c b/tools/sysctl/sysctl.c new file mode 100644 index 000000000..e9b29e876 --- /dev/null +++ b/tools/sysctl/sysctl.c @@ -0,0 +1,1370 @@ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Copyright (C) 2017 THL A29 Limited, a Tencent company. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Derived in part from FreeBSD's /sbin/sysctl/sysctl.c. + */ + +#ifndef lint +static const char copyright[] = +"@(#) Copyright (c) 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)from: sysctl.c 8.1 (Berkeley) 6/6/93"; +#endif +static const char rcsid[] = "$FreeBSD$"; +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ff_ipc.h" + +/* + * Definitions for sysctl call. The sysctl call uses a hierarchical name + * for objects that can be examined or modified. The name is expressed as + * a sequence of integers. Like a file path name, the meaning of each + * component depends on its place in the hierarchy. The top-level and kern + * identifiers are defined here, and other identifiers are defined in the + * respective subsystem header files. + */ + +#define CTL_MAXNAME 24 /* largest number of components supported */ + +#define CTLTYPE 0xf /* mask for the type */ +#define CTLTYPE_NODE 1 /* name is a node */ +#define CTLTYPE_INT 2 /* name describes an integer */ +#define CTLTYPE_STRING 3 /* name describes a string */ +#define CTLTYPE_S64 4 /* name describes a signed 64-bit number */ +#define CTLTYPE_OPAQUE 5 /* name describes a structure */ +#define CTLTYPE_STRUCT CTLTYPE_OPAQUE /* name describes a structure */ +#define CTLTYPE_UINT 6 /* name describes an unsigned integer */ +#define CTLTYPE_LONG 7 /* name describes a long */ +#define CTLTYPE_ULONG 8 /* name describes an unsigned long */ +#define CTLTYPE_U64 9 /* name describes an unsigned 64-bit number */ +#define CTLTYPE_U8 0xa /* name describes an unsigned 8-bit number */ +#define CTLTYPE_U16 0xb /* name describes an unsigned 16-bit number */ +#define CTLTYPE_S8 0xc /* name describes a signed 8-bit number */ +#define CTLTYPE_S16 0xd /* name describes a signed 16-bit number */ +#define CTLTYPE_S32 0xe /* name describes a signed 32-bit number */ +#define CTLTYPE_U32 0xf /* name describes an unsigned 32-bit number */ + +#define CTLFLAG_RD 0x80000000 /* Allow reads of variable */ +#define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */ +#define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR) +#define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */ +#define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */ +#define CTLFLAG_PRISON 0x04000000 /* Prisoned roots can fiddle */ +#define CTLFLAG_DYN 0x02000000 /* Dynamic oid - can be freed */ +#define CTLFLAG_SKIP 0x01000000 /* Skip this sysctl when listing */ +#define CTLMASK_SECURE 0x00F00000 /* Secure level */ +#define CTLFLAG_TUN 0x00080000 /* Default value is loaded from getenv() */ +#define CTLFLAG_RDTUN (CTLFLAG_RD|CTLFLAG_TUN) +#define CTLFLAG_RWTUN (CTLFLAG_RW|CTLFLAG_TUN) +#define CTLFLAG_MPSAFE 0x00040000 /* Handler is MP safe */ +#define CTLFLAG_VNET 0x00020000 /* Prisons with vnet can fiddle */ +#define CTLFLAG_DYING 0x00010000 /* Oid is being removed */ +#define CTLFLAG_CAPRD 0x00008000 /* Can be read in capability mode */ +#define CTLFLAG_CAPWR 0x00004000 /* Can be written in capability mode */ +#define CTLFLAG_STATS 0x00002000 /* Statistics, not a tuneable */ +#define CTLFLAG_NOFETCH 0x00001000 /* Don't fetch tunable from getenv() */ +#define CTLFLAG_CAPRW (CTLFLAG_CAPRD|CTLFLAG_CAPWR) + + +struct clockinfo { + int hz; /* clock frequency */ + int tick; /* micro-seconds per hz tick */ + int spare; + int stathz; /* statistics clock frequency */ + int profhz; /* profiling clock frequency */ +}; + +struct loadavg { + __uint32_t ldavg[3]; + long fscale; +}; + +/* Structure extended to include extended attribute field in ACPI 3.0. */ +struct bios_smap_xattr { + u_int64_t base; + u_int64_t length; + u_int32_t type; + u_int32_t xattr; +} __packed; + +/* systemwide totals computed every five seconds */ +struct vmtotal { + int16_t t_rq; /* length of the run queue */ + int16_t t_dw; /* jobs in ``disk wait'' (neg priority) */ + int16_t t_pw; /* jobs in page wait */ + int16_t t_sl; /* jobs sleeping in core */ + int16_t t_sw; /* swapped out runnable/short block jobs */ + int32_t t_vm; /* total virtual memory */ + int32_t t_avm; /* active virtual memory */ + int32_t t_rm; /* total real memory in use */ + int32_t t_arm; /* active real memory */ + int32_t t_vmshr; /* shared virtual memory */ + int32_t t_avmshr; /* active shared virtual memory */ + int32_t t_rmshr; /* shared real memory */ + int32_t t_armshr; /* active shared real memory */ + int32_t t_free; /* free memory pages */ +}; + +struct efi_md { + uint32_t md_type; +#define EFI_MD_TYPE_NULL 0 +#define EFI_MD_TYPE_CODE 1 /* Loader text. */ +#define EFI_MD_TYPE_DATA 2 /* Loader data. */ +#define EFI_MD_TYPE_BS_CODE 3 /* Boot services text. */ +#define EFI_MD_TYPE_BS_DATA 4 /* Boot services data. */ +#define EFI_MD_TYPE_RT_CODE 5 /* Runtime services text. */ +#define EFI_MD_TYPE_RT_DATA 6 /* Runtime services data. */ +#define EFI_MD_TYPE_FREE 7 /* Unused/free memory. */ +#define EFI_MD_TYPE_BAD 8 /* Bad memory */ +#define EFI_MD_TYPE_RECLAIM 9 /* ACPI reclaimable memory. */ +#define EFI_MD_TYPE_FIRMWARE 10 /* ACPI NV memory */ +#define EFI_MD_TYPE_IOMEM 11 /* Memory-mapped I/O. */ +#define EFI_MD_TYPE_IOPORT 12 /* I/O port space. */ +#define EFI_MD_TYPE_PALCODE 13 /* PAL */ + uint32_t __pad; + uint64_t md_phys; + void *md_virt; + uint64_t md_pages; + uint64_t md_attr; +#define EFI_MD_ATTR_UC 0x0000000000000001UL +#define EFI_MD_ATTR_WC 0x0000000000000002UL +#define EFI_MD_ATTR_WT 0x0000000000000004UL +#define EFI_MD_ATTR_WB 0x0000000000000008UL +#define EFI_MD_ATTR_UCE 0x0000000000000010UL +#define EFI_MD_ATTR_WP 0x0000000000001000UL +#define EFI_MD_ATTR_RP 0x0000000000002000UL +#define EFI_MD_ATTR_XP 0x0000000000004000UL +#define EFI_MD_ATTR_RT 0x8000000000000000UL +}; + +struct efi_map_header { + uint64_t memory_size; + uint64_t descriptor_size; + uint32_t descriptor_version; +}; + +static const char *conffile; + +static int aflag, bflag, Bflag, dflag, eflag, hflag, iflag; +static int Nflag, nflag, oflag, qflag, tflag, Tflag, Wflag, xflag; +static uint16_t proc_id; + +static int oidfmt(int *, int, char *, u_int *); +static int parsefile(const char *); +static int parse(const char *, int); +static int show_var(int *, int); +static int sysctl_all(int *oid, int len); +static int name2oid(const char *, int *); + +static int strIKtoi(const char *, char **, const char *); + +static int sysctl_ipc(int *name, unsigned namelen, void *old, + size_t *oldlenp, const void *new, size_t newlen); + +static int ctl_sign[CTLTYPE+1] = { + [CTLTYPE_INT] = 1, + [CTLTYPE_LONG] = 1, + [CTLTYPE_S8] = 1, + [CTLTYPE_S16] = 1, + [CTLTYPE_S32] = 1, + [CTLTYPE_S64] = 1, +}; + +static int ctl_size[CTLTYPE+1] = { + [CTLTYPE_INT] = sizeof(int), + [CTLTYPE_UINT] = sizeof(u_int), + [CTLTYPE_LONG] = sizeof(long), + [CTLTYPE_ULONG] = sizeof(u_long), + [CTLTYPE_S8] = sizeof(int8_t), + [CTLTYPE_S16] = sizeof(int16_t), + [CTLTYPE_S32] = sizeof(int32_t), + [CTLTYPE_S64] = sizeof(int64_t), + [CTLTYPE_U8] = sizeof(uint8_t), + [CTLTYPE_U16] = sizeof(uint16_t), + [CTLTYPE_U32] = sizeof(uint32_t), + [CTLTYPE_U64] = sizeof(uint64_t), +}; + +static const char *ctl_typename[CTLTYPE+1] = { + [CTLTYPE_INT] = "integer", + [CTLTYPE_UINT] = "unsigned integer", + [CTLTYPE_LONG] = "long integer", + [CTLTYPE_ULONG] = "unsigned long", + [CTLTYPE_U8] = "uint8_t", + [CTLTYPE_U16] = "uint16_t", + [CTLTYPE_U32] = "uint16_t", + [CTLTYPE_U64] = "uint64_t", + [CTLTYPE_S8] = "int8_t", + [CTLTYPE_S16] = "int16_t", + [CTLTYPE_S32] = "int32_t", + [CTLTYPE_S64] = "int64_t", + [CTLTYPE_NODE] = "node", + [CTLTYPE_STRING] = "string", + [CTLTYPE_OPAQUE] = "opaque", +}; + +static void +usage(void) +{ + (void)fprintf(stderr, "%s\n%s\n", + "usage: sysctl -p [-bdehiNnoqTtWx] [ -B ] [-f filename] name[=value] ...", + " sysctl -p [-bdehNnoqTtWx] [ -B ] -a"); + exit(1); +} + +int +main(int argc, char **argv) +{ + int ch; + int warncount = 0; + + setlocale(LC_NUMERIC, ""); + setbuf(stdout,0); + setbuf(stderr,0); + + while ((ch = getopt(argc, argv, "AabB:def:hiNnoqtTwWxXp:")) != -1) { + switch (ch) { + case 'A': + /* compatibility */ + aflag = oflag = 1; + break; + case 'a': + aflag = 1; + break; + case 'b': + bflag = 1; + break; + case 'B': + Bflag = strtol(optarg, NULL, 0); + break; + case 'd': + dflag = 1; + break; + case 'e': + eflag = 1; + break; + case 'f': + conffile = optarg; + break; + case 'h': + hflag = 1; + break; + case 'i': + iflag = 1; + break; + case 'N': + Nflag = 1; + break; + case 'n': + nflag = 1; + break; + case 'o': + oflag = 1; + break; + case 'q': + qflag = 1; + break; + case 't': + tflag = 1; + break; + case 'T': + Tflag = 1; + break; + case 'w': + /* compatibility */ + /* ignored */ + break; + case 'W': + Wflag = 1; + break; + case 'X': + /* compatibility */ + aflag = xflag = 1; + break; + case 'x': + xflag = 1; + break; + case 'p': + proc_id = atoi(optarg); + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (Nflag && nflag) + usage(); + if (aflag && argc == 0) + exit(sysctl_all(0, 0)); + if (argc == 0 && conffile == NULL) + usage(); + + warncount = 0; + if (conffile != NULL) + warncount += parsefile(conffile); + + while (argc-- > 0) + warncount += parse(*argv++, 0); + + return (warncount); +} + +/* + * Parse a name into a MIB entry. + * Lookup and print out the MIB entry if it exists. + * Set a new value if requested. + */ +static int +parse(const char *string, int lineno) +{ + int len, i, j; + const void *newval; + const char *newvalstr = NULL; + int8_t i8val; + uint8_t u8val; + int16_t i16val; + uint16_t u16val; + int32_t i32val; + uint32_t u32val; + int intval; + unsigned int uintval; + long longval; + unsigned long ulongval; + size_t newsize = Bflag; + int64_t i64val; + uint64_t u64val; + int mib[CTL_MAXNAME]; + char *cp, *bufp, buf[BUFSIZ], *endptr = NULL, fmt[BUFSIZ], line[BUFSIZ]; + u_int kind; + + if (lineno) + snprintf(line, sizeof(line), " at line %d", lineno); + else + line[0] = '\0'; + + cp = buf; + if (snprintf(buf, BUFSIZ, "%s", string) >= BUFSIZ) { + warnx("oid too long: '%s'%s", string, line); + return (1); + } + bufp = strsep(&cp, "=:"); + if (cp != NULL) { + /* Tflag just lists tunables, do not allow assignment */ + if (Tflag || Wflag) { + warnx("Can't set variables when using -T or -W"); + usage(); + } + while (isspace(*cp)) + cp++; + /* Strip a pair of " or ' if any. */ + switch (*cp) { + case '\"': + case '\'': + if (cp[strlen(cp) - 1] == *cp) + cp[strlen(cp) - 1] = '\0'; + cp++; + } + newvalstr = cp; + newsize = strlen(cp); + } + /* Trim spaces */ + cp = bufp + strlen(bufp) - 1; + while (cp >= bufp && isspace((int)*cp)) { + *cp = '\0'; + cp--; + } + len = name2oid(bufp, mib); + + if (len < 0) { + if (iflag) + return (0); + if (qflag) + return (1); + else { + if (errno == ENOENT) { + warnx("unknown oid '%s'%s", bufp, line); + } else { + warn("unknown oid '%s'%s", bufp, line); + } + return (1); + } + } + + if (oidfmt(mib, len, fmt, &kind)) { + warn("couldn't find format of oid '%s'%s", bufp, line); + if (iflag) + return (1); + else + exit(1); + } + + if (newvalstr == NULL || dflag) { + if ((kind & CTLTYPE) == CTLTYPE_NODE) { + if (dflag) { + i = show_var(mib, len); + if (!i && !bflag) + putchar('\n'); + } + sysctl_all(mib, len); + } else { + i = show_var(mib, len); + if (!i && !bflag) + putchar('\n'); + } + } else { + if ((kind & CTLTYPE) == CTLTYPE_NODE) { + warnx("oid '%s' isn't a leaf node%s", bufp, line); + return (1); + } + + if (!(kind & CTLFLAG_WR)) { + if (kind & CTLFLAG_TUN) { + warnx("oid '%s' is a read only tunable%s", bufp, line); + warnx("Tunable values are set in /boot/loader.conf"); + } else + warnx("oid '%s' is read only%s", bufp, line); + return (1); + } + + switch (kind & CTLTYPE) { + case CTLTYPE_INT: + case CTLTYPE_UINT: + case CTLTYPE_LONG: + case CTLTYPE_ULONG: + case CTLTYPE_S8: + case CTLTYPE_S16: + case CTLTYPE_S32: + case CTLTYPE_S64: + case CTLTYPE_U8: + case CTLTYPE_U16: + case CTLTYPE_U32: + case CTLTYPE_U64: + if (strlen(newvalstr) == 0) { + warnx("empty numeric value"); + return (1); + } + /* FALLTHROUGH */ + case CTLTYPE_STRING: + break; + default: + warnx("oid '%s' is type %d," + " cannot set that%s", bufp, + kind & CTLTYPE, line); + return (1); + } + + errno = 0; + + switch (kind & CTLTYPE) { + case CTLTYPE_INT: + if (strncmp(fmt, "IK", 2) == 0) + intval = strIKtoi(newvalstr, &endptr, fmt); + else + intval = (int)strtol(newvalstr, &endptr, + 0); + newval = &intval; + newsize = sizeof(intval); + break; + case CTLTYPE_UINT: + uintval = (int) strtoul(newvalstr, &endptr, 0); + newval = &uintval; + newsize = sizeof(uintval); + break; + case CTLTYPE_LONG: + longval = strtol(newvalstr, &endptr, 0); + newval = &longval; + newsize = sizeof(longval); + break; + case CTLTYPE_ULONG: + ulongval = strtoul(newvalstr, &endptr, 0); + newval = &ulongval; + newsize = sizeof(ulongval); + break; + case CTLTYPE_STRING: + newval = newvalstr; + break; + case CTLTYPE_S8: + i8val = (int8_t)strtol(newvalstr, &endptr, 0); + newval = &i8val; + newsize = sizeof(i8val); + break; + case CTLTYPE_S16: + i16val = (int16_t)strtol(newvalstr, &endptr, + 0); + newval = &i16val; + newsize = sizeof(i16val); + break; + case CTLTYPE_S32: + i32val = (int32_t)strtol(newvalstr, &endptr, + 0); + newval = &i32val; + newsize = sizeof(i32val); + break; + case CTLTYPE_S64: + i64val = strtoimax(newvalstr, &endptr, 0); + newval = &i64val; + newsize = sizeof(i64val); + break; + case CTLTYPE_U8: + u8val = (uint8_t)strtoul(newvalstr, &endptr, 0); + newval = &u8val; + newsize = sizeof(u8val); + break; + case CTLTYPE_U16: + u16val = (uint16_t)strtoul(newvalstr, &endptr, + 0); + newval = &u16val; + newsize = sizeof(u16val); + break; + case CTLTYPE_U32: + u32val = (uint32_t)strtoul(newvalstr, &endptr, + 0); + newval = &u32val; + newsize = sizeof(u32val); + break; + case CTLTYPE_U64: + u64val = strtoumax(newvalstr, &endptr, 0); + newval = &u64val; + newsize = sizeof(u64val); + break; + default: + /* NOTREACHED */ + abort(); + } + + if (errno != 0 || endptr == newvalstr || + (endptr != NULL && *endptr != '\0')) { + warnx("invalid %s '%s'%s", ctl_typename[kind & CTLTYPE], + newvalstr, line); + return (1); + } + + i = show_var(mib, len); + if (sysctl_ipc(mib, len, 0, 0, newval, newsize) == -1) { + if (!i && !bflag) + putchar('\n'); + switch (errno) { + case EOPNOTSUPP: + warnx("%s: value is not available%s", + string, line); + return (1); + case ENOTDIR: + warnx("%s: specification is incomplete%s", + string, line); + return (1); + case ENOMEM: + warnx("%s: type is unknown to this program%s", + string, line); + return (1); + default: + warn("%s%s", string, line); + return (1); + } + } + if (!bflag) + printf(" -> "); + i = nflag; + nflag = 1; + j = show_var(mib, len); + if (!j && !bflag) + putchar('\n'); + nflag = i; + } + + return (0); +} + +static int +parsefile(const char *filename) +{ + FILE *file; + char line[BUFSIZ], *p, *pq, *pdq; + int warncount = 0, lineno = 0; + + file = fopen(filename, "r"); + if (file == NULL) + err(EX_NOINPUT, "%s", filename); + while (fgets(line, sizeof(line), file) != NULL) { + lineno++; + p = line; + pq = strchr(line, '\''); + pdq = strchr(line, '\"'); + /* Replace the first # with \0. */ + while((p = strchr(p, '#')) != NULL) { + if (pq != NULL && p > pq) { + if ((p = strchr(pq+1, '\'')) != NULL) + *(++p) = '\0'; + break; + } else if (pdq != NULL && p > pdq) { + if ((p = strchr(pdq+1, '\"')) != NULL) + *(++p) = '\0'; + break; + } else if (p == line || *(p-1) != '\\') { + *p = '\0'; + break; + } + p++; + } + /* Trim spaces */ + p = line + strlen(line) - 1; + while (p >= line && isspace((int)*p)) { + *p = '\0'; + p--; + } + p = line; + while (isspace((int)*p)) + p++; + if (*p == '\0') + continue; + else + warncount += parse(p, lineno); + } + fclose(file); + + return (warncount); +} + +/* These functions will dump out various interesting structures. */ + +static int +S_clockinfo(size_t l2, void *p) +{ + struct clockinfo *ci = (struct clockinfo*)p; + + if (l2 != sizeof(*ci)) { + warnx("S_clockinfo %zu != %zu", l2, sizeof(*ci)); + return (1); + } + printf(hflag ? "{ hz = %'d, tick = %'d, profhz = %'d, stathz = %'d }" : + "{ hz = %d, tick = %d, profhz = %d, stathz = %d }", + ci->hz, ci->tick, ci->profhz, ci->stathz); + return (0); +} + +static int +S_loadavg(size_t l2, void *p) +{ + struct loadavg *tv = (struct loadavg*)p; + + if (l2 != sizeof(*tv)) { + warnx("S_loadavg %zu != %zu", l2, sizeof(*tv)); + return (1); + } + printf(hflag ? "{ %'.2f %'.2f %'.2f }" : "{ %.2f %.2f %.2f }", + (double)tv->ldavg[0]/(double)tv->fscale, + (double)tv->ldavg[1]/(double)tv->fscale, + (double)tv->ldavg[2]/(double)tv->fscale); + return (0); +} + +static int +S_timeval(size_t l2, void *p) +{ + struct timeval *tv = (struct timeval*)p; + time_t tv_sec; + char *p1, *p2; + + if (l2 != sizeof(*tv)) { + warnx("S_timeval %zu != %zu", l2, sizeof(*tv)); + return (1); + } + printf(hflag ? "{ sec = %'jd, usec = %'ld } " : + "{ sec = %jd, usec = %ld } ", + (intmax_t)tv->tv_sec, tv->tv_usec); + tv_sec = tv->tv_sec; + p1 = strdup(ctime(&tv_sec)); + for (p2=p1; *p2 ; p2++) + if (*p2 == '\n') + *p2 = '\0'; + fputs(p1, stdout); + free(p1); + return (0); +} + +static int +S_vmtotal(size_t l2, void *p) +{ + struct vmtotal *v = (struct vmtotal *)p; + int pageKilo = getpagesize() / 1024; + + if (l2 != sizeof(*v)) { + warnx("S_vmtotal %zu != %zu", l2, sizeof(*v)); + return (1); + } + + printf( + "\nSystem wide totals computed every five seconds:" + " (values in kilobytes)\n"); + printf("===============================================\n"); + printf( + "Processes:\t\t(RUNQ: %hd Disk Wait: %hd Page Wait: " + "%hd Sleep: %hd)\n", + v->t_rq, v->t_dw, v->t_pw, v->t_sl); + printf( + "Virtual Memory:\t\t(Total: %jdK Active: %jdK)\n", + (intmax_t)v->t_vm * pageKilo, (intmax_t)v->t_avm * pageKilo); + printf("Real Memory:\t\t(Total: %jdK Active: %jdK)\n", + (intmax_t)v->t_rm * pageKilo, (intmax_t)v->t_arm * pageKilo); + printf("Shared Virtual Memory:\t(Total: %jdK Active: %jdK)\n", + (intmax_t)v->t_vmshr * pageKilo, (intmax_t)v->t_avmshr * pageKilo); + printf("Shared Real Memory:\t(Total: %jdK Active: %jdK)\n", + (intmax_t)v->t_rmshr * pageKilo, (intmax_t)v->t_armshr * pageKilo); + printf("Free Memory:\t%jdK", (intmax_t)v->t_free * pageKilo); + + return (0); +} + +#ifdef __amd64__ +#define efi_next_descriptor(ptr, size) \ + ((struct efi_md *)(((uint8_t *) ptr) + size)) + +static int +S_efi_map(size_t l2, void *p) +{ + struct efi_map_header *efihdr; + struct efi_md *map; + const char *type; + size_t efisz; + int ndesc, i; + + static const char *types[] = { + "Reserved", + "LoaderCode", + "LoaderData", + "BootServicesCode", + "BootServicesData", + "RuntimeServicesCode", + "RuntimeServicesData", + "ConventionalMemory", + "UnusableMemory", + "ACPIReclaimMemory", + "ACPIMemoryNVS", + "MemoryMappedIO", + "MemoryMappedIOPortSpace", + "PalCode" + }; + + /* + * Memory map data provided by UEFI via the GetMemoryMap + * Boot Services API. + */ + if (l2 < sizeof(*efihdr)) { + warnx("S_efi_map length less than header"); + return (1); + } + efihdr = p; + efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; + map = (struct efi_md *)((uint8_t *)efihdr + efisz); + + if (efihdr->descriptor_size == 0) + return (0); + if (l2 != efisz + efihdr->memory_size) { + warnx("S_efi_map length mismatch %zu vs %zu", l2, efisz + + efihdr->memory_size); + return (1); + } + ndesc = efihdr->memory_size / efihdr->descriptor_size; + + printf("\n%23s %12s %12s %8s %4s", + "Type", "Physical", "Virtual", "#Pages", "Attr"); + + for (i = 0; i < ndesc; i++, + map = efi_next_descriptor(map, efihdr->descriptor_size)) { + if (map->md_type <= EFI_MD_TYPE_PALCODE) + type = types[map->md_type]; + else + type = ""; + printf("\n%23s %012lx %12p %08lx ", type, map->md_phys, + map->md_virt, map->md_pages); + if (map->md_attr & EFI_MD_ATTR_UC) + printf("UC "); + if (map->md_attr & EFI_MD_ATTR_WC) + printf("WC "); + if (map->md_attr & EFI_MD_ATTR_WT) + printf("WT "); + if (map->md_attr & EFI_MD_ATTR_WB) + printf("WB "); + if (map->md_attr & EFI_MD_ATTR_UCE) + printf("UCE "); + if (map->md_attr & EFI_MD_ATTR_WP) + printf("WP "); + if (map->md_attr & EFI_MD_ATTR_RP) + printf("RP "); + if (map->md_attr & EFI_MD_ATTR_XP) + printf("XP "); + if (map->md_attr & EFI_MD_ATTR_RT) + printf("RUNTIME"); + } + return (0); +} +#endif + +#if defined(__amd64__) || defined(__i386__) +static int +S_bios_smap_xattr(size_t l2, void *p) +{ + struct bios_smap_xattr *smap, *end; + + if (l2 % sizeof(*smap) != 0) { + warnx("S_bios_smap_xattr %zu is not a multiple of %zu", l2, + sizeof(*smap)); + return (1); + } + + end = (struct bios_smap_xattr *)((char *)p + l2); + for (smap = p; smap < end; smap++) + printf("\nSMAP type=%02x, xattr=%02x, base=%016jx, len=%016jx", + smap->type, smap->xattr, (uintmax_t)smap->base, + (uintmax_t)smap->length); + return (0); +} +#endif + +static int +strIKtoi(const char *str, char **endptrp, const char *fmt) +{ + int kelv; + float temp; + size_t len; + const char *p; + int prec, i; + + assert(errno == 0); + + len = strlen(str); + /* caller already checked this */ + assert(len > 0); + + /* + * A format of "IK" is in deciKelvin. A format of "IK3" is in + * milliKelvin. The single digit following IK is log10 of the + * multiplying factor to convert Kelvin into the untis of this sysctl, + * or the dividing factor to convert the sysctl value to Kelvin. Numbers + * larger than 6 will run into precision issues with 32-bit integers. + * Characters that aren't ASCII digits after the 'K' are ignored. No + * localization is present because this is an interface from the kernel + * to this program (eg not an end-user interface), so isdigit() isn't + * used here. + */ + if (fmt[2] != '\0' && fmt[2] >= '0' && fmt[2] <= '9') + prec = fmt[2] - '0'; + else + prec = 1; + p = &str[len - 1]; + if (*p == 'C' || *p == 'F' || *p == 'K') { + temp = strtof(str, endptrp); + if (*endptrp != str && *endptrp == p && errno == 0) { + if (*p == 'F') + temp = (temp - 32) * 5 / 9; + *endptrp = NULL; + if (*p != 'K') + temp += 273.15; + for (i = 0; i < prec; i++) + temp *= 10.0; + return ((int)(temp + 0.5)); + } + } else { + /* No unit specified -> treat it as a raw number */ + kelv = (int)strtol(str, endptrp, 10); + if (*endptrp != str && *endptrp == p && errno == 0) { + *endptrp = NULL; + return (kelv); + } + } + + errno = ERANGE; + return (0); +} + +/* + * These functions uses a presently undocumented interface to the kernel + * to walk the tree and get the type so it can print the value. + * This interface is under work and consideration, and should probably + * be killed with a big axe by the first person who can find the time. + * (be aware though, that the proper interface isn't as obvious as it + * may seem, there are various conflicting requirements. + */ + +static int +name2oid(const char *name, int *oidp) +{ + int oid[2]; + int i; + size_t j; + + oid[0] = 0; + oid[1] = 3; + + j = CTL_MAXNAME * sizeof(int); + i = sysctl_ipc(oid, 2, oidp, &j, name, strlen(name)); + if (i < 0) + return (i); + j /= sizeof(int); + return (j); +} + +static int +oidfmt(int *oid, int len, char *fmt, u_int *kind) +{ + int qoid[CTL_MAXNAME+2]; + u_char buf[BUFSIZ]; + int i; + size_t j; + + qoid[0] = 0; + qoid[1] = 4; + memcpy(qoid + 2, oid, len * sizeof(int)); + + j = sizeof(buf); + i = sysctl_ipc(qoid, len + 2, buf, &j, 0, 0); + if (i) + err(1, "sysctl fmt %d %zu %d", i, j, errno); + + if (kind) + *kind = *(u_int *)buf; + + if (fmt) + strcpy(fmt, (char *)(buf + sizeof(u_int))); + return (0); +} + +/* + * This formats and outputs the value of one variable + * + * Returns zero if anything was actually output. + * Returns one if didn't know what to do with this. + * Return minus one if we had errors. + */ +static int +show_var(int *oid, int nlen) +{ + u_char buf[BUFSIZ], *val, *oval, *p; + char name[BUFSIZ], fmt[BUFSIZ]; + const char *sep, *sep1, *prntype; + int qoid[CTL_MAXNAME+2]; + uintmax_t umv; + intmax_t mv; + int i, hexlen, sign, ctltype; + size_t intlen; + size_t j, len; + u_int kind; + float base; + int (*func)(size_t, void *); + int prec; + + /* Silence GCC. */ + umv = mv = intlen = 0; + + bzero(buf, BUFSIZ); + bzero(fmt, BUFSIZ); + bzero(name, BUFSIZ); + qoid[0] = 0; + memcpy(qoid + 2, oid, nlen * sizeof(int)); + + qoid[1] = 1; + j = sizeof(name); + i = sysctl_ipc(qoid, nlen + 2, name, &j, 0, 0); + if (i || !j) + err(1, "sysctl name %d %zu %d", i, j, errno); + + oidfmt(oid, nlen, fmt, &kind); + /* if Wflag then only list sysctls that are writeable and not stats. */ + if (Wflag && ((kind & CTLFLAG_WR) == 0 || (kind & CTLFLAG_STATS) != 0)) + return 1; + + /* if Tflag then only list sysctls that are tuneables. */ + if (Tflag && (kind & CTLFLAG_TUN) == 0) + return 1; + + if (Nflag) { + printf("%s", name); + return (0); + } + + if (eflag) + sep = "="; + else + sep = ": "; + + ctltype = (kind & CTLTYPE); + if (tflag || dflag) { + if (!nflag) + printf("%s%s", name, sep); + if (ctl_typename[ctltype] != NULL) + prntype = ctl_typename[ctltype]; + else + prntype = "unknown"; + if (tflag && dflag) + printf("%s%s", prntype, sep); + else if (tflag) { + printf("%s", prntype); + return (0); + } + qoid[1] = 5; + j = sizeof(buf); + i = sysctl_ipc(qoid, nlen + 2, buf, &j, 0, 0); + printf("%s", buf); + return (0); + } + /* find an estimate of how much we need for this var */ + if (Bflag) + j = Bflag; + else { + j = 0; + i = sysctl_ipc(oid, nlen, 0, &j, 0, 0); + j += j; /* we want to be sure :-) */ + } + + val = oval = malloc(j + 1); + if (val == NULL) { + warnx("malloc failed"); + return (1); + } + len = j; + i = sysctl_ipc(oid, nlen, val, &len, 0, 0); + if (i != 0 || (len == 0 && ctltype != CTLTYPE_STRING)) { + free(oval); + return (1); + } + + if (bflag) { + fwrite(val, 1, len, stdout); + free(oval); + return (0); + } + val[len] = '\0'; + p = val; + sign = ctl_sign[ctltype]; + intlen = ctl_size[ctltype]; + + switch (ctltype) { + case CTLTYPE_STRING: + if (!nflag) + printf("%s%s", name, sep); + printf("%.*s", (int)len, p); + free(oval); + return (0); + + case CTLTYPE_INT: + case CTLTYPE_UINT: + case CTLTYPE_LONG: + case CTLTYPE_ULONG: + case CTLTYPE_S8: + case CTLTYPE_S16: + case CTLTYPE_S32: + case CTLTYPE_S64: + case CTLTYPE_U8: + case CTLTYPE_U16: + case CTLTYPE_U32: + case CTLTYPE_U64: + if (!nflag) + printf("%s%s", name, sep); + hexlen = 2 + (intlen * CHAR_BIT + 3) / 4; + sep1 = ""; + while (len >= intlen) { + switch (kind & CTLTYPE) { + case CTLTYPE_INT: + case CTLTYPE_UINT: + umv = *(u_int *)p; + mv = *(int *)p; + break; + case CTLTYPE_LONG: + case CTLTYPE_ULONG: + umv = *(u_long *)p; + mv = *(long *)p; + break; + case CTLTYPE_S8: + case CTLTYPE_U8: + umv = *(uint8_t *)p; + mv = *(int8_t *)p; + break; + case CTLTYPE_S16: + case CTLTYPE_U16: + umv = *(uint16_t *)p; + mv = *(int16_t *)p; + break; + case CTLTYPE_S32: + case CTLTYPE_U32: + umv = *(uint32_t *)p; + mv = *(int32_t *)p; + break; + case CTLTYPE_S64: + case CTLTYPE_U64: + umv = *(uint64_t *)p; + mv = *(int64_t *)p; + break; + } + fputs(sep1, stdout); + if (xflag) + printf("%#0*jx", hexlen, umv); + else if (!sign) + printf(hflag ? "%'ju" : "%ju", umv); + else if (fmt[1] == 'K') { + if (mv < 0) + printf("%jd", mv); + else { + /* + * See strIKtoi for details on fmt. + */ + int i; + prec = 1; + if (fmt[2] != '\0') + prec = fmt[2] - '0'; + base = 1.0; + for (i = 0; i < prec; i++) + base *= 10.0; + printf("%.*fC", prec, + (float)mv / base - 273.15); + } + } else + printf(hflag ? "%'jd" : "%jd", mv); + sep1 = " "; + len -= intlen; + p += intlen; + } + free(oval); + return (0); + + case CTLTYPE_OPAQUE: + i = 0; + if (strcmp(fmt, "S,clockinfo") == 0) + func = S_clockinfo; + else if (strcmp(fmt, "S,timeval") == 0) + func = S_timeval; + else if (strcmp(fmt, "S,loadavg") == 0) + func = S_loadavg; + else if (strcmp(fmt, "S,vmtotal") == 0) + func = S_vmtotal; +#ifdef __amd64__ + else if (strcmp(fmt, "S,efi_map_header") == 0) + func = S_efi_map; +#endif +#if defined(__amd64__) || defined(__i386__) + else if (strcmp(fmt, "S,bios_smap_xattr") == 0) + func = S_bios_smap_xattr; +#endif + else + func = NULL; + if (func) { + if (!nflag) + printf("%s%s", name, sep); + i = (*func)(len, p); + free(oval); + return (i); + } + /* FALLTHROUGH */ + default: + if (!oflag && !xflag) { + free(oval); + return (1); + } + if (!nflag) + printf("%s%s", name, sep); + printf("Format:%s Length:%zu Dump:0x", fmt, len); + while (len-- && (xflag || p < val + 16)) + printf("%02x", *p++); + if (!xflag && len > 16) + printf("..."); + free(oval); + return (0); + } + free(oval); + return (1); +} + +static int +sysctl_all(int *oid, int len) +{ + int name1[22], name2[22]; + int i, j; + size_t l1, l2; + + name1[0] = 0; + name1[1] = 2; + l1 = 2; + if (len) { + memcpy(name1+2, oid, len * sizeof(int)); + l1 += len; + } else { + name1[2] = 1; + l1++; + } + for (;;) { + l2 = sizeof(name2); + j = sysctl_ipc(name1, l1, name2, &l2, 0, 0); + if (j < 0) { + if (errno == ENOENT) + return (0); + else + err(1, "sysctl_ipc(getnext) %d %zu", j, l2); + } + + l2 /= sizeof(int); + + if (len < 0 || l2 < (unsigned int)len) + return (0); + + for (i = 0; i < len; i++) + if (name2[i] != oid[i]) + return (0); + + i = show_var(name2, l2); + if (!i && !bflag) + putchar('\n'); + + memcpy(name1+2, name2, l2 * sizeof(int)); + l1 = 2 + l2; + } +} + +static int +sysctl_ipc(int *name, unsigned namelen, void *old, + size_t *oldlenp, const void *new, size_t newlen) +{ + struct ff_msg *msg, *retmsg = NULL; + + if (old != NULL && oldlenp == NULL) { + errno = EINVAL; + return -1; + } + + msg = ff_ipc_msg_alloc(); + if (msg == NULL) { + errno = ENOMEM; + return -1; + } + + size_t oldlen = 0; + if (oldlenp) { + oldlen = *oldlenp; + } + + if (namelen + oldlen + newlen > msg->buf_len) { + errno = EINVAL; + ff_ipc_msg_free(msg); + return -1; + } + + char *buf_addr = msg->buf_addr; + + msg->msg_type = FF_SYSCTL; + msg->sysctl.name = (int *)buf_addr; + msg->sysctl.namelen = namelen; + memcpy(msg->sysctl.name, name, namelen*sizeof(int)); + + buf_addr += namelen*sizeof(int); + + if (new != NULL && newlen != 0) { + msg->sysctl.new = buf_addr; + msg->sysctl.newlen = newlen; + memcpy(msg->sysctl.new, new, newlen); + + buf_addr += newlen; + } else { + msg->sysctl.new = NULL; + msg->sysctl.newlen = 0; + } + + if (oldlenp != NULL) { + msg->sysctl.oldlenp = (size_t *)buf_addr; + memcpy(msg->sysctl.oldlenp, oldlenp, sizeof(size_t)); + buf_addr += sizeof(size_t); + + if (old != NULL) { + msg->sysctl.old = (void *)buf_addr; + memcpy(msg->sysctl.old, old, *oldlenp); + buf_addr += *oldlenp; + } else { + msg->sysctl.old = NULL; + } + } else { + msg->sysctl.oldlenp = NULL; + msg->sysctl.old = NULL; + } + + int ret = ff_ipc_send(msg, proc_id); + if (ret < 0) { + errno = EPIPE; + ff_ipc_msg_free(msg); + return -1; + } + + do { + if (retmsg != NULL) { + ff_ipc_msg_free(retmsg); + } + ret = ff_ipc_recv(&retmsg, proc_id); + if (ret < 0) { + errno = EPIPE; + ff_ipc_msg_free(msg); + return -1; + } + } while (msg != retmsg); + + if (retmsg->result == 0) { + ret = 0; + if (oldlenp && retmsg->sysctl.oldlenp) { + *oldlenp = *retmsg->sysctl.oldlenp; + } + + if (old && retmsg->sysctl.old && oldlenp) { + memcpy(old, retmsg->sysctl.old, *oldlenp); + } + } else { + ret = -1; + errno = retmsg->result; + } + + ff_ipc_msg_free(msg); + + return ret; +}