Update socketvar.h

copy from file "freebsd/sys/socketvar.h"
This commit is contained in:
agerguo 2022-03-25 14:34:02 +08:00 committed by GitHub
parent db0a1d59e9
commit 85a2b77b4f
1 changed files with 240 additions and 112 deletions

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1982, 1986, 1990, 1993 * Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
* *
@ -10,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -34,6 +36,12 @@
#ifndef _SYS_SOCKETVAR_H_ #ifndef _SYS_SOCKETVAR_H_
#define _SYS_SOCKETVAR_H_ #define _SYS_SOCKETVAR_H_
/*
* Socket generation count type. Also used in xinpcb, xtcpcb, xunpcb.
*/
typedef uint64_t so_gen_t;
#if defined(_KERNEL) || defined(_WANT_SOCKET)
#include <sys/queue.h> /* for TAILQ macros */ #include <sys/queue.h> /* for TAILQ macros */
#include <sys/selinfo.h> /* for struct selinfo */ #include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h> #include <sys/_lock.h>
@ -41,7 +49,6 @@
#include <sys/osd.h> #include <sys/osd.h>
#include <sys/_sx.h> #include <sys/_sx.h>
#include <sys/sockbuf.h> #include <sys/sockbuf.h>
#include <sys/sockstate.h>
#ifdef _KERNEL #ifdef _KERNEL
#include <sys/caprights.h> #include <sys/caprights.h>
#include <sys/sockopt.h> #include <sys/sockopt.h>
@ -55,68 +62,52 @@ struct vnet;
* handle on protocol and pointer to protocol * handle on protocol and pointer to protocol
* private data and error information. * private data and error information.
*/ */
typedef u_quad_t so_gen_t; typedef int so_upcall_t(struct socket *, void *, int);
typedef void so_dtor_t(struct socket *);
struct socket; struct socket;
enum socket_qstate {
SQ_NONE = 0,
SQ_INCOMP = 0x0800, /* on sol_incomp */
SQ_COMP = 0x1000, /* on sol_comp */
};
/*- /*-
* Locking key to struct socket: * Locking key to struct socket:
* (a) constant after allocation, no locking required. * (a) constant after allocation, no locking required.
* (b) locked by SOCK_LOCK(so). * (b) locked by SOCK_LOCK(so).
* (c) locked by SOCKBUF_LOCK(&so->so_rcv). * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
* (e) locked by ACCEPT_LOCK(). * (cs) locked by SOCKBUF_LOCK(&so->so_snd).
* (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
* (f) not locked since integer reads/writes are atomic. * (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value. * (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx. * (h) locked by global mutex so_global_mtx.
* (k) locked by KTLS workqueue mutex
*/ */
TAILQ_HEAD(accept_queue, socket);
struct socket { struct socket {
int so_count; /* (b) reference count */ struct mtx so_lock;
volatile u_int so_count; /* (b / refcount) */
struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */
struct selinfo so_wrsel; /* (b/cs) for so_snd */
short so_type; /* (a) generic type, see socket.h */ short so_type; /* (a) generic type, see socket.h */
short so_options; /* from socket call, see socket.h */ int so_options; /* (b) from socket call, see socket.h */
short so_linger; /* time to linger while closing */ short so_linger; /* time to linger close(2) */
short so_state; /* (b) internal state flags SS_* */ short so_state; /* (b) internal state flags SS_* */
int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */ void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */ struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */ struct protosw *so_proto; /* (a) protocol handle */
/*
* Variables for connection queuing.
* Socket where accepts occur is so_head in all subsidiary sockets.
* If so_head is 0, socket is not related to an accept.
* For head socket so_incomp queues partially completed connections,
* while so_comp is a queue of connections ready to be accepted.
* If a connection is aborted and it has so_head set, then
* it has to be pulled out of either so_incomp or so_comp.
* We allow connections to queue up based on current queue lengths
* and limit on number of queued connections for this socket.
*/
struct socket *so_head; /* (e) back pointer to listen socket */
TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
u_int so_qlen; /* (e) number of unaccepted connections */
u_int so_incqlen; /* (e) number of unaccepted incomplete
connections */
u_int so_qlimit; /* (e) max number queued connections */
short so_timeo; /* (g) connection timeout */ short so_timeo; /* (g) connection timeout */
u_short so_error; /* (f) error affecting connection */ u_short so_error; /* (f) error affecting connection */
struct sigio *so_sigio; /* [sg] information for async I/O or struct sigio *so_sigio; /* [sg] information for async I/O or
out of band data (SIGURG) */ out of band data (SIGURG) */
u_long so_oobmark; /* (c) chars to oob mark */
struct sockbuf so_rcv, so_snd;
struct ucred *so_cred; /* (a) user credentials */ struct ucred *so_cred; /* (a) user credentials */
struct label *so_label; /* (b) MAC label for socket */ struct label *so_label; /* (b) MAC label for socket */
struct label *so_peerlabel; /* (b) cached MAC label for peer */
/* NB: generation count must not be first. */ /* NB: generation count must not be first. */
so_gen_t so_gencnt; /* (h) generation count */ so_gen_t so_gencnt; /* (h) generation count */
void *so_emuldata; /* (b) private data for emulators */ void *so_emuldata; /* (b) private data for emulators */
struct so_accf { so_dtor_t *so_dtor; /* (b) optional destructor */
struct accept_filter *so_accept_filter;
void *so_accept_filter_arg; /* saved filter args */
char *so_accept_filter_str; /* saved user args */
} *so_accf;
struct osd osd; /* Object Specific extensions */ struct osd osd; /* Object Specific extensions */
/* /*
* so_fibnum, so_user_cookie and friends can be used to attach * so_fibnum, so_user_cookie and friends can be used to attach
@ -127,65 +118,134 @@ struct socket {
int so_fibnum; /* routing domain for this socket */ int so_fibnum; /* routing domain for this socket */
uint32_t so_user_cookie; uint32_t so_user_cookie;
void *so_pspare[2]; /* packet pacing / general use */ int so_ts_clock; /* type of the clock used for timestamps */
int so_ispare[2]; /* packet pacing / general use */ uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */
union {
/* Regular (data flow) socket. */
struct {
/* (cr, cs) Receive and send buffers. */
struct sockbuf so_rcv, so_snd;
/* (e) Our place on accept queue. */
TAILQ_ENTRY(socket) so_list;
struct socket *so_listen; /* (b) */
enum socket_qstate so_qstate; /* (b) */
/* (b) cached MAC label for peer */
struct label *so_peerlabel;
u_long so_oobmark; /* chars to oob mark */
/* (k) Our place on KTLS RX work queue. */
STAILQ_ENTRY(socket) so_ktls_rx_list;
};
/*
* Listening socket, where accepts occur, is so_listen in all
* subsidiary sockets. If so_listen is NULL, socket is not
* related to an accept. For a listening socket itself
* sol_incomp queues partially completed connections, while
* sol_comp is a queue of connections ready to be accepted.
* If a connection is aborted and it has so_listen set, then
* it has to be pulled out of either sol_incomp or sol_comp.
* We allow connections to queue up based on current queue
* lengths and limit on number of queued connections for this
* socket.
*/
struct {
/* (e) queue of partial unaccepted connections */
struct accept_queue sol_incomp;
/* (e) queue of complete unaccepted connections */
struct accept_queue sol_comp;
u_int sol_qlen; /* (e) sol_comp length */
u_int sol_incqlen; /* (e) sol_incomp length */
u_int sol_qlimit; /* (e) queue limit */
/* accept_filter(9) optional data */
struct accept_filter *sol_accept_filter;
void *sol_accept_filter_arg; /* saved filter args */
char *sol_accept_filter_str; /* saved user args */
/* Optional upcall, for kernel socket. */
so_upcall_t *sol_upcall; /* (e) */
void *sol_upcallarg; /* (e) */
/* Socket buffer parameters, to be copied to
* dataflow sockets, accepted from this one. */
int sol_sbrcv_lowat;
int sol_sbsnd_lowat;
u_int sol_sbrcv_hiwat;
u_int sol_sbsnd_hiwat;
short sol_sbrcv_flags;
short sol_sbsnd_flags;
sbintime_t sol_sbrcv_timeo;
sbintime_t sol_sbsnd_timeo;
/* Information tracking listen queue overflows. */
struct timeval sol_lastover; /* (e) */
int sol_overcount; /* (e) */
};
};
#ifdef LVS_TCPOPT_TOA
uint8_t so_toa[8]; /* lvs toa option */
#endif
}; };
#endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */
/* /*
* Global accept mutex to serialize access to accept queues and * Socket state bits.
* fields associated with multiple sockets. This allows us to *
* avoid defining a lock order between listen and accept sockets * Historically, these bits were all kept in the so_state field.
* until such time as it proves to be a good idea. * They are now split into separate, lock-specific fields.
* so_state maintains basic socket state protected by the socket lock.
* so_qstate holds information about the socket accept queues.
* Each socket buffer also has a state field holding information
* relevant to that socket buffer (can't send, rcv).
* Many fields will be read without locks to improve performance and avoid
* lock order issues. However, this approach must be used with caution.
*/ */
extern struct mtx accept_mtx; #define SS_NOFDREF 0x0001 /* no file table ref any more */
#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED) #define SS_ISCONNECTED 0x0002 /* socket connected to a peer */
#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED) #define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */
#define ACCEPT_LOCK() mtx_lock(&accept_mtx) #define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */
#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx) #define SS_NBIO 0x0100 /* non-blocking ops */
#define SS_ASYNC 0x0200 /* async i/o notify */
#define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */
#define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */
/* /*
* Per-socket mutex: we reuse the receive socket buffer mutex for space * Protocols can mark a socket as SS_PROTOREF to indicate that, following
* efficiency. This decision should probably be revisited as we optimize * pru_detach, they still want the socket to persist, and will free it
* locking for the socket code. * themselves when they are done. Protocols should only ever call sofree()
* following setting this flag in pru_detach(), and never otherwise, as
* sofree() bypasses socket reference counting.
*/ */
#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv) #define SS_PROTOREF 0x4000 /* strong protocol reference */
#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv)
#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv)
#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv)
#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
/*
* Socket state bits stored in so_qstate.
*/
#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */
#define SQ_COMP 0x1000 /* unaccepted, complete connection */
/*
* Externalized form of struct socket used by the sysctl(3) interface.
*/
struct xsocket {
size_t xso_len; /* length of this structure */
struct socket *xso_so; /* makes a convenient handle sometimes */
short so_type;
short so_options;
short so_linger;
short so_state;
caddr_t so_pcb; /* another convenient handle */
int xso_protocol;
int xso_family;
u_int so_qlen;
u_int so_incqlen;
u_int so_qlimit;
short so_timeo;
u_short so_error;
pid_t so_pgid;
u_long so_oobmark;
struct xsockbuf so_rcv, so_snd;
uid_t so_uid; /* XXX */
};
#ifdef _KERNEL #ifdef _KERNEL
#define SOCK_MTX(so) &(so)->so_lock
#define SOCK_LOCK(so) mtx_lock(&(so)->so_lock)
#define SOCK_OWNED(so) mtx_owned(&(so)->so_lock)
#define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock)
#define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED)
#define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED)
#define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0)
#define SOLISTEN_LOCK(sol) do { \
mtx_lock(&(sol)->so_lock); \
KASSERT(SOLISTENING(sol), \
("%s: %p not listening", __func__, (sol))); \
} while (0)
#define SOLISTEN_TRYLOCK(sol) mtx_trylock(&(sol)->so_lock)
#define SOLISTEN_UNLOCK(sol) do { \
KASSERT(SOLISTENING(sol), \
("%s: %p not listening", __func__, (sol))); \
mtx_unlock(&(sol)->so_lock); \
} while (0)
#define SOLISTEN_LOCK_ASSERT(sol) do { \
mtx_assert(&(sol)->so_lock, MA_OWNED); \
KASSERT(SOLISTENING(sol), \
("%s: %p not listening", __func__, (sol))); \
} while (0)
/* /*
* Macros for sockets and socket buffering. * Macros for sockets and socket buffering.
*/ */
@ -209,8 +269,7 @@ struct xsocket {
/* can we read something from so? */ /* can we read something from so? */
#define soreadabledata(so) \ #define soreadabledata(so) \
(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || (so)->so_error)
!TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
#define soreadable(so) \ #define soreadable(so) \
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE)) (soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
@ -223,26 +282,19 @@ struct xsocket {
(so)->so_error) (so)->so_error)
/* /*
* soref()/sorele() ref-count the socket structure. Note that you must * soref()/sorele() ref-count the socket structure.
* still explicitly close the socket, but the last ref count will free * soref() may be called without owning socket lock, but in that case a
* the structure. * caller must own something that holds socket, and so_count must be not 0.
* Note that you must still explicitly close the socket, but the last ref
* count will free the structure.
*/ */
#define soref(so) do { \ #define soref(so) refcount_acquire(&(so)->so_count)
SOCK_LOCK_ASSERT(so); \
++(so)->so_count; \
} while (0)
#define sorele(so) do { \ #define sorele(so) do { \
ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \ SOCK_LOCK_ASSERT(so); \
if ((so)->so_count <= 0) \ if (refcount_release(&(so)->so_count)) \
panic("sorele"); \
if (--(so)->so_count == 0) \
sofree(so); \ sofree(so); \
else { \ else \
SOCK_UNLOCK(so); \ SOCK_UNLOCK(so); \
ACCEPT_UNLOCK(); \
} \
} while (0) } while (0)
/* /*
@ -290,6 +342,22 @@ struct accept_filter {
SLIST_ENTRY(accept_filter) accf_next; SLIST_ENTRY(accept_filter) accf_next;
}; };
#define ACCEPT_FILTER_DEFINE(modname, filtname, cb, create, destroy, ver) \
static struct accept_filter modname##_filter = { \
.accf_name = filtname, \
.accf_callback = cb, \
.accf_create = create, \
.accf_destroy = destroy, \
}; \
static moduledata_t modname##_mod = { \
.name = __XSTRING(modname), \
.evhand = accept_filt_generic_mod_event, \
.priv = &modname##_filter, \
}; \
DECLARE_MODULE(modname, modname##_mod, SI_SUB_DRIVERS, \
SI_ORDER_MIDDLE); \
MODULE_VERSION(modname, ver)
#ifdef MALLOC_DECLARE #ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_ACCF); MALLOC_DECLARE(M_ACCF);
MALLOC_DECLARE(M_PCB); MALLOC_DECLARE(M_PCB);
@ -321,6 +389,7 @@ extern u_long sb_max;
extern so_gen_t so_gencnt; extern so_gen_t so_gencnt;
struct file; struct file;
struct filecaps;
struct filedesc; struct filedesc;
struct mbuf; struct mbuf;
struct sockaddr; struct sockaddr;
@ -338,9 +407,10 @@ struct uio;
/* /*
* From uipc_socket and friends * From uipc_socket and friends
*/ */
int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len); int getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr,
size_t len);
int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp, int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp, u_int *fflagp); struct file **fpp, u_int *fflagp, struct filecaps *havecaps);
void soabort(struct socket *so); void soabort(struct socket *so);
int soaccept(struct socket *so, struct sockaddr **nam); int soaccept(struct socket *so, struct sockaddr **nam);
void soaio_enqueue(struct task *task); void soaio_enqueue(struct task *task);
@ -358,16 +428,18 @@ int soconnect2(struct socket *so1, struct socket *so2);
int socreate(int dom, struct socket **aso, int type, int proto, int socreate(int dom, struct socket **aso, int type, int proto,
struct ucred *cred, struct thread *td); struct ucred *cred, struct thread *td);
int sodisconnect(struct socket *so); int sodisconnect(struct socket *so);
void sodtor_set(struct socket *, so_dtor_t *);
struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags); struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
void sofree(struct socket *so); void sofree(struct socket *so);
void sohasoutofband(struct socket *so); void sohasoutofband(struct socket *so);
int solisten(struct socket *so, int backlog, struct thread *td); int solisten(struct socket *so, int backlog, struct thread *td);
void solisten_proto(struct socket *so, int backlog); void solisten_proto(struct socket *so, int backlog);
int solisten_proto_check(struct socket *so); int solisten_proto_check(struct socket *so);
int solisten_dequeue(struct socket *, struct socket **, int);
struct socket * struct socket *
sonewconn(struct socket *head, int connstatus); sonewconn(struct socket *head, int connstatus);
struct socket *
sopeeloff(struct socket *);
int sopoll(struct socket *so, int events, struct ucred *active_cred, int sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td); struct thread *td);
int sopoll_generic(struct socket *so, int events, int sopoll_generic(struct socket *so, int events,
@ -395,14 +467,22 @@ int sosend_generic(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control, struct uio *uio, struct mbuf *top, struct mbuf *control,
int flags, struct thread *td); int flags, struct thread *td);
int soshutdown(struct socket *so, int how); int soshutdown(struct socket *so, int how);
void sotoxsocket(struct socket *so, struct xsocket *xso); void soupcall_clear(struct socket *, int);
void soupcall_clear(struct socket *so, int which); void soupcall_set(struct socket *, int, so_upcall_t, void *);
void soupcall_set(struct socket *so, int which, void solisten_upcall_set(struct socket *, so_upcall_t, void *);
int (*func)(struct socket *, void *, int), void *arg);
void sowakeup(struct socket *so, struct sockbuf *sb); void sowakeup(struct socket *so, struct sockbuf *sb);
void sowakeup_aio(struct socket *so, struct sockbuf *sb); void sowakeup_aio(struct socket *so, struct sockbuf *sb);
void solisten_wakeup(struct socket *);
int selsocket(struct socket *so, int events, struct timeval *tv, int selsocket(struct socket *so, int events, struct timeval *tv,
struct thread *td); struct thread *td);
void soisconnected(struct socket *so);
void soisconnecting(struct socket *so);
void soisdisconnected(struct socket *so);
void soisdisconnecting(struct socket *so);
void socantrcvmore(struct socket *so);
void socantrcvmore_locked(struct socket *so);
void socantsendmore(struct socket *so);
void socantsendmore_locked(struct socket *so);
/* /*
* Accept filter functions (duh). * Accept filter functions (duh).
@ -419,4 +499,52 @@ int accept_filt_generic_mod_event(module_t mod, int event, void *data);
#endif /* _KERNEL */ #endif /* _KERNEL */
/*
* Structure to export socket from kernel to utilities, via sysctl(3).
*/
struct xsocket {
ksize_t xso_len; /* length of this structure */
kvaddr_t xso_so; /* kernel address of struct socket */
kvaddr_t so_pcb; /* kernel address of struct inpcb */
uint64_t so_oobmark;
int64_t so_spare64[8];
int32_t xso_protocol;
int32_t xso_family;
uint32_t so_qlen;
uint32_t so_incqlen;
uint32_t so_qlimit;
pid_t so_pgid;
uid_t so_uid;
int32_t so_spare32[8];
int16_t so_type;
int16_t so_options;
int16_t so_linger;
int16_t so_state;
int16_t so_timeo;
uint16_t so_error;
struct xsockbuf {
uint32_t sb_cc;
uint32_t sb_hiwat;
uint32_t sb_mbcnt;
uint32_t sb_mcnt;
uint32_t sb_ccnt;
uint32_t sb_mbmax;
int32_t sb_lowat;
int32_t sb_timeo;
int16_t sb_flags;
} so_rcv, so_snd;
};
#ifdef _KERNEL
void sotoxsocket(struct socket *so, struct xsocket *xso);
void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
#endif
/*
* Socket buffer state bits. Exported via libprocstat(3).
*/
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
#define SBS_RCVATMARK 0x0040 /* at mark on input */
#endif /* !_SYS_SOCKETVAR_H_ */ #endif /* !_SYS_SOCKETVAR_H_ */