diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-08-22 14:59:50 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-09-21 10:29:41 +0200 |
commit | 3489e3b6396ee9944a6a2e19e675ca54c36993b4 (patch) | |
tree | cd55cfac1c96ff4b888a9606fd6a0d8eb65bb446 /freebsd/sys/netinet/in_pcb.h | |
parent | ck: Define CK_MD_PPC32_LWSYNC if available (diff) | |
download | rtems-libbsd-3489e3b6396ee9944a6a2e19e675ca54c36993b4.tar.bz2 |
Update to FreeBSD head 2018-09-17
Git mirror commit 6c2192b1ef8c50788c751f878552526800b1e319.
Update #3472.
Diffstat (limited to 'freebsd/sys/netinet/in_pcb.h')
-rw-r--r-- | freebsd/sys/netinet/in_pcb.h | 167 |
1 files changed, 101 insertions, 66 deletions
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h index d00dd456..86c9705c 100644 --- a/freebsd/sys/netinet/in_pcb.h +++ b/freebsd/sys/netinet/in_pcb.h @@ -51,8 +51,11 @@ #include <sys/lock.h> #include <sys/rwlock.h> #include <net/vnet.h> +#include <net/if.h> +#include <net/if_var.h> #include <vm/uma.h> #endif +#include <sys/ck.h> #define in6pcb inpcb /* for KAME src sync over BSD*'s */ #define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ @@ -65,8 +68,9 @@ * numbers, and pointers up (to a socket structure) and down (to a * protocol-specific control block) are stored here. */ -LIST_HEAD(inpcbhead, inpcb); -LIST_HEAD(inpcbporthead, inpcbport); +CK_LIST_HEAD(inpcbhead, inpcb); +CK_LIST_HEAD(inpcbporthead, inpcbport); +CK_LIST_HEAD(inpcblbgrouphead, inpcblbgroup); typedef uint64_t inp_gen_t; /* @@ -79,6 +83,11 @@ struct in_addr_4in6 { struct in_addr ia46_addr4; }; +union in_dependaddr { + struct in_addr_4in6 id46_addr; + struct in6_addr id6_addr; +}; + /* * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has * some extra padding to accomplish this. @@ -89,22 +98,14 @@ struct in_endpoints { u_int16_t ie_fport; /* foreign port */ u_int16_t ie_lport; /* local port */ /* protocol dependent part, local and foreign addr */ - union { - /* foreign host table entry */ - struct in_addr_4in6 ie46_foreign; - struct in6_addr ie6_foreign; - } ie_dependfaddr; - union { - /* local host table entry */ - struct in_addr_4in6 ie46_local; - struct in6_addr ie6_local; - } ie_dependladdr; + union in_dependaddr ie_dependfaddr; /* foreign host table entry */ + union in_dependaddr ie_dependladdr; /* local host table entry */ +#define ie_faddr ie_dependfaddr.id46_addr.ia46_addr4 +#define ie_laddr ie_dependladdr.id46_addr.ia46_addr4 +#define ie6_faddr ie_dependfaddr.id6_addr +#define ie6_laddr ie_dependladdr.id6_addr u_int32_t ie6_zoneid; /* scope zone id */ }; -#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4 -#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4 -#define ie6_faddr ie_dependfaddr.ie6_foreign -#define ie6_laddr ie_dependladdr.ie6_local /* * XXX The defines for inc_* are hacks and should be changed to direct @@ -122,8 +123,8 @@ struct in_conninfo { * Flags for inc_flags. */ #define INC_ISIPV6 0x01 +#define INC_IPV6MINMTU 0x02 -#define inc_isipv6 inc_flags /* temp compatibility */ #define inc_fport inc_ie.ie_fport #define inc_lport inc_ie.ie_lport #define inc_faddr inc_ie.ie_faddr @@ -159,6 +160,7 @@ struct in_conninfo { * Key: * (b) - Protected by the hpts lock. * (c) - Constant after initialization + * (e) - Protected by the net_epoch_prempt epoch * (g) - Protected by the pcbgroup lock * (i) - Protected by the inpcb lock * (p) - Protected by the pcbinfo lock for the inpcb @@ -233,8 +235,8 @@ struct inpcbpolicy; struct m_snd_tag; struct inpcb { /* Cache line #1 (amd64) */ - LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */ - LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ + CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */ + CK_LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ struct rwlock inp_lock; /* Cache line #2 (amd64) */ #define inp_start_zero inp_hpts @@ -278,7 +280,7 @@ struct inpcb { TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */ struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */ - LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */ + CK_LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */ struct ucred *inp_cred; /* (c) cache of socket cred */ u_int32_t inp_flow; /* (i) IPv6 flow information */ u_char inp_vflag; /* (i) IP version flag (v4/v6) */ @@ -316,18 +318,19 @@ struct inpcb { int in6p_cksum; short in6p_hops; }; - LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */ + CK_LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */ struct inpcbport *inp_phd; /* (i/h) head of this list */ inp_gen_t inp_gencnt; /* (c) generation count */ - struct llentry *inp_lle; /* cached L2 information */ + void *spare_ptr; /* Spare pointer. */ rt_gen_t inp_rt_cookie; /* generation for route entry */ union { /* cached L3 information */ struct route inp_route; struct route_in6 inp_route6; }; - LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */ - /* (p[w]) for list iteration */ - /* (p[r]/l) for addition/removal */ + CK_LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */ + /* (e[r]) for list iteration */ + /* (p[w]/l) for addition/removal */ + struct epoch_context inp_epoch_ctx; }; #endif /* _KERNEL */ @@ -364,14 +367,11 @@ struct inpcb { */ #ifdef _SYS_SOCKETVAR_H_ struct xinpcb { - size_t xi_len; /* length of this structure */ + ksize_t xi_len; /* length of this structure */ struct xsocket xi_socket; /* (s,p) */ struct in_conninfo inp_inc; /* (s,p) */ uint64_t inp_gencnt; /* (s,p) */ - union { - void *inp_ppcb; /* (s) netstat(1) */ - int64_t ph_ppcb; - }; + kvaddr_t inp_ppcb; /* (s) netstat(1) */ int64_t inp_spare64[4]; uint32_t inp_flow; /* (s) */ uint32_t inp_flowid; /* (s) */ @@ -392,10 +392,12 @@ struct xinpcb { } __aligned(8); struct xinpgen { - size_t xig_len; /* length of this structure */ + ksize_t xig_len; /* length of this structure */ u_int xig_count; /* number of PCBs at this time */ + uint32_t _xig_spare32; inp_gen_t xig_gen; /* generation count at this time */ so_gen_t xig_sogen; /* socket generation count this time */ + uint64_t _xig_spare64[4]; } __aligned(8); #ifdef _KERNEL void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *); @@ -403,7 +405,8 @@ void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *); #endif /* _SYS_SOCKETVAR_H_ */ struct inpcbport { - LIST_ENTRY(inpcbport) phd_hash; + struct epoch_context phd_epoch_ctx; + CK_LIST_ENTRY(inpcbport) phd_hash; struct inpcbhead phd_pcblist; u_short phd_port; }; @@ -436,22 +439,23 @@ struct in_pcblist { * Locking key: * * (c) Constant or nearly constant after initialisation + * (e) - Protected by the net_epoch_prempt epoch * (g) Locked by ipi_lock * (l) Locked by ipi_list_lock - * (h) Read using either ipi_hash_lock or inpcb lock; write requires both + * (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock * (p) Protected by one or more pcbgroup locks * (x) Synchronisation properties poorly defined */ struct inpcbinfo { /* - * Global lock protecting full inpcb list traversal + * Global lock protecting inpcb list modification */ - struct rwlock ipi_lock; + struct mtx ipi_lock; /* * Global list of inpcbs on the protocol. */ - struct inpcbhead *ipi_listhead; /* (g/l) */ + struct inpcbhead *ipi_listhead; /* [r](e) [w](g/l) */ u_int ipi_count; /* (l) */ /* @@ -482,9 +486,9 @@ struct inpcbinfo { u_int ipi_hashfields; /* (c) */ /* - * Global lock protecting non-pcbgroup hash lookup tables. + * Global lock protecting modification non-pcbgroup hash lookup tables. */ - struct rwlock ipi_hash_lock; + struct mtx ipi_hash_lock; /* * Global hash of inpcbs, hashed by local and foreign addresses and @@ -508,6 +512,13 @@ struct inpcbinfo { u_long ipi_wildmask; /* (p) */ /* + * Load balance groups used for the SO_REUSEPORT_LB option, + * hashed by local port. + */ + struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (h) */ + u_long ipi_lbgrouphashmask; /* (h) */ + + /* * Pointer to network stack instance */ struct vnet *ipi_vnet; /* (c) */ @@ -549,6 +560,27 @@ struct inpcbgroup { struct mtx ipg_lock; } __aligned(CACHE_LINE_SIZE); +/* + * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group + * (or unique address:port combination) can be re-used at most + * INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which + * is dynamically resized as processes bind/unbind to that specific group. + */ +struct inpcblbgroup { + CK_LIST_ENTRY(inpcblbgroup) il_list; + struct epoch_context il_epoch_ctx; + uint16_t il_lport; /* (c) */ + u_char il_vflag; /* (c) */ + u_char il_pad; + uint32_t il_pad2; + union in_dependaddr il_dependladdr; /* (c) */ +#define il_laddr il_dependladdr.id46_addr.ia46_addr4 +#define il6_laddr il_dependladdr.id6_addr + uint32_t il_inpsiz; /* max count in il_inp[] (h) */ + uint32_t il_inpcnt; /* cur count in il_inp[] (h) */ + struct inpcb *il_inp[]; /* (h) */ +}; + #define INP_LOCK_INIT(inp, d, t) \ rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK) #define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock) @@ -593,25 +625,24 @@ struct tcpcb * inp_inpcbtotcpcb(struct inpcb *inp); void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, uint32_t *faddr, uint16_t *fp); -short inp_so_options(const struct inpcb *inp); +int inp_so_options(const struct inpcb *inp); #endif /* _KERNEL */ #define INP_INFO_LOCK_INIT(ipi, d) \ - rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE) -#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock) -#define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock) -#define INP_INFO_WLOCK(ipi) rw_wlock(&(ipi)->ipi_lock) -#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock) -#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock) -#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock) -#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock) -#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock) -#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock) -#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED) -#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED) -#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED) -#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED) + mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE) +#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock) +#define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER_ET((et)) +#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock) +#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock) +#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock) +#define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et)) +#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT_ET(*(tp)->t_inpcb->inp_et) +#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock) +#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock)) +#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt)) +#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED) +#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock)) #define INP_LIST_LOCK_INIT(ipi, d) \ rw_init_flags(&(ipi)->ipi_list_lock, (d), 0) @@ -632,17 +663,16 @@ short inp_so_options(const struct inpcb *inp); #define INP_LIST_UNLOCK_ASSERT(ipi) \ rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED) -#define INP_HASH_LOCK_INIT(ipi, d) \ - rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0) -#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock) -#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock) -#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ - RA_LOCKED) -#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ - RA_WLOCKED) +#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF) +#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock) +#define INP_HASH_RLOCK(ipi) struct epoch_tracker inp_hash_et; epoch_enter_preempt(net_epoch_preempt, &inp_hash_et) +#define INP_HASH_RLOCK_ET(ipi, et) epoch_enter_preempt(net_epoch_preempt, &(et)) +#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock) +#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT_ET(inp_hash_et) +#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et)) +#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock)) +#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED); #define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \ MTX_DEF | MTX_DUPOK) @@ -656,6 +686,10 @@ short inp_so_options(const struct inpcb *inp); (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) #define INP_PCBPORTHASH(lport, mask) \ (ntohs((lport)) & (mask)) +#define INP_PCBLBGROUP_PORTHASH(lport, mask) \ + (ntohs((lport)) & (mask)) +#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \ + ((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) #define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3]) /* @@ -711,8 +745,8 @@ short inp_so_options(const struct inpcb *inp); /* * Flags for inp_flags2. */ -#define INP_LLE_VALID 0x00000001 /* cached lle is valid */ -#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */ +#define INP_2UNUSED1 0x00000001 +#define INP_2UNUSED2 0x00000002 #define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */ #define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */ #define INP_FREED 0x00000010 /* inp itself is not valid */ @@ -724,6 +758,7 @@ short inp_so_options(const struct inpcb *inp); #define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ #define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */ #define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */ +#define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */ /* * Flags passed to in_pcblookup*() functions. |