summaryrefslogtreecommitdiffstats
path: root/freebsd/sys
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2018-08-22 14:59:50 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2018-09-21 10:29:41 +0200
commit3489e3b6396ee9944a6a2e19e675ca54c36993b4 (patch)
treecd55cfac1c96ff4b888a9606fd6a0d8eb65bb446 /freebsd/sys
parentck: Define CK_MD_PPC32_LWSYNC if available (diff)
downloadrtems-libbsd-3489e3b6396ee9944a6a2e19e675ca54c36993b4.tar.bz2
Update to FreeBSD head 2018-09-17
Git mirror commit 6c2192b1ef8c50788c751f878552526800b1e319. Update #3472.
Diffstat (limited to 'freebsd/sys')
-rw-r--r--freebsd/sys/arm/at91/at91_mci.c1713
-rw-r--r--freebsd/sys/arm/at91/at91_mcireg.h183
-rw-r--r--freebsd/sys/arm/at91/at91_pdcreg.h50
-rw-r--r--freebsd/sys/arm/at91/at91reg.h92
-rw-r--r--freebsd/sys/arm/at91/at91var.h175
-rw-r--r--freebsd/sys/arm/include/machine/cpufunc.h157
-rw-r--r--freebsd/sys/cam/cam_ccb.h42
-rw-r--r--freebsd/sys/cam/cam_xpt.h1
-rw-r--r--freebsd/sys/cam/mmc/mmc.h8
-rw-r--r--freebsd/sys/contrib/ck/include/ck_pr.h49
-rw-r--r--freebsd/sys/contrib/ck/include/ck_queue.h14
-rw-r--r--freebsd/sys/contrib/ck/include/gcc/ppc/ck_pr.h32
-rw-r--r--freebsd/sys/contrib/ck/include/gcc/x86/ck_pr.h10
-rw-r--r--freebsd/sys/contrib/ck/include/gcc/x86_64/ck_pr.h10
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.c126
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h12
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h235
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c92
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.h21
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/crypto_verify/sodium/verify.c100
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h67
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_16.h23
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_32.h23
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_64.h23
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/export.h57
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/private/common.h246
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/private/implementations.h11
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/randombytes.h68
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/runtime.h52
-rw-r--r--freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/utils.h170
-rw-r--r--freebsd/sys/crypto/chacha20/chacha.c11
-rw-r--r--freebsd/sys/crypto/chacha20/chacha.h12
-rw-r--r--freebsd/sys/crypto/libsodium/utils.c16
-rw-r--r--freebsd/sys/crypto/sha2/sha224.h96
-rw-r--r--freebsd/sys/crypto/sha2/sha256c.c60
-rw-r--r--freebsd/sys/crypto/sha2/sha512c.c8
-rw-r--r--freebsd/sys/crypto/skein/skein.c3
-rw-r--r--freebsd/sys/dev/bfe/if_bfe.c2
-rw-r--r--freebsd/sys/dev/bge/if_bge.c16
-rw-r--r--freebsd/sys/dev/dc/if_dc.c2
-rw-r--r--freebsd/sys/dev/e1000/if_em.c134
-rw-r--r--freebsd/sys/dev/e1000/if_em.h7
-rw-r--r--freebsd/sys/dev/evdev/cdev.c4
-rw-r--r--freebsd/sys/dev/evdev/evdev.c14
-rw-r--r--freebsd/sys/dev/evdev/evdev.h11
-rw-r--r--freebsd/sys/dev/evdev/evdev_utils.c42
-rw-r--r--freebsd/sys/dev/evdev/input-event-codes.h36
-rw-r--r--freebsd/sys/dev/evdev/input.h6
-rw-r--r--freebsd/sys/dev/evdev/uinput.c6
-rw-r--r--freebsd/sys/dev/fxp/if_fxp.c2
-rw-r--r--freebsd/sys/dev/kbd/kbd.c39
-rw-r--r--freebsd/sys/dev/kbd/kbdreg.h4
-rwxr-xr-xfreebsd/sys/dev/mii/icsphy.c2
-rw-r--r--freebsd/sys/dev/mmc/mmc.c57
-rw-r--r--freebsd/sys/dev/mmc/mmcbrvar.h15
-rw-r--r--freebsd/sys/dev/mmc/mmcsd.c18
-rw-r--r--freebsd/sys/dev/nvme/nvme.h13
-rw-r--r--freebsd/sys/dev/ofw/ofw_bus_subr.c4
-rw-r--r--freebsd/sys/dev/ofw/ofw_fdt.c7
-rw-r--r--freebsd/sys/dev/ofw/ofw_subr.c47
-rw-r--r--freebsd/sys/dev/pci/pci.c257
-rw-r--r--freebsd/sys/dev/pci/pci_user.c113
-rw-r--r--freebsd/sys/dev/pci/pcivar.h64
-rw-r--r--freebsd/sys/dev/rtwn/rtl8812a/r12a_reg.h10
-rw-r--r--freebsd/sys/dev/rtwn/rtl8812a/usb/r12au.h1
-rw-r--r--freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_init.c33
-rw-r--r--freebsd/sys/dev/rtwn/rtl8821a/usb/r21au.h1
-rw-r--r--freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c2
-rw-r--r--freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_init.c11
-rw-r--r--freebsd/sys/dev/sdhci/sdhci.c65
-rw-r--r--freebsd/sys/dev/usb/controller/usb_controller.c1
-rw-r--r--freebsd/sys/dev/usb/input/uep.c105
-rw-r--r--freebsd/sys/dev/usb/input/ukbd.c20
-rw-r--r--freebsd/sys/dev/usb/input/ums.c12
-rw-r--r--freebsd/sys/dev/usb/net/if_ure.c1
-rw-r--r--freebsd/sys/dev/usb/serial/u3g.c1
-rw-r--r--freebsd/sys/dev/usb/usb_hid.c18
-rw-r--r--freebsd/sys/dev/usb/usb_request.c29
-rw-r--r--freebsd/sys/dev/usb/wlan/if_run.c2
-rw-r--r--freebsd/sys/i386/include/machine/cpufunc.h9
-rw-r--r--freebsd/sys/i386/include/machine/intr_machdep.h190
-rw-r--r--freebsd/sys/kern/init_main.c24
-rw-r--r--freebsd/sys/kern/kern_event.c116
-rw-r--r--freebsd/sys/kern/kern_intr.c193
-rw-r--r--freebsd/sys/kern/kern_sysctl.c102
-rw-r--r--freebsd/sys/kern/kern_time.c4
-rw-r--r--freebsd/sys/kern/subr_blist.c14
-rw-r--r--freebsd/sys/kern/subr_bus.c133
-rw-r--r--freebsd/sys/kern/subr_counter.c15
-rw-r--r--freebsd/sys/kern/subr_gtaskqueue.c18
-rw-r--r--freebsd/sys/kern/subr_hints.c353
-rw-r--r--freebsd/sys/kern/subr_module.c36
-rw-r--r--freebsd/sys/kern/subr_pcpu.c2
-rw-r--r--freebsd/sys/kern/subr_prf.c25
-rwxr-xr-xfreebsd/sys/kern/sys_pipe.c99
-rw-r--r--freebsd/sys/kern/uipc_sockbuf.c79
-rw-r--r--freebsd/sys/kern/uipc_socket.c28
-rw-r--r--freebsd/sys/kern/uipc_syscalls.c97
-rw-r--r--freebsd/sys/kern/uipc_usrreq.c157
-rw-r--r--freebsd/sys/mips/include/machine/cpuregs.h94
-rw-r--r--freebsd/sys/net/altq/altq.h29
-rw-r--r--freebsd/sys/net/altq/altq_cbq.c2
-rw-r--r--freebsd/sys/net/altq/altq_cbq.h6
-rw-r--r--freebsd/sys/net/altq/altq_codel.c2
-rw-r--r--freebsd/sys/net/altq/altq_codel.h6
-rw-r--r--freebsd/sys/net/altq/altq_fairq.c2
-rw-r--r--freebsd/sys/net/altq/altq_fairq.h6
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.c161
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.h97
-rw-r--r--freebsd/sys/net/altq/altq_priq.c2
-rw-r--r--freebsd/sys/net/altq/altq_priq.h6
-rw-r--r--freebsd/sys/net/altq/altq_subr.c58
-rw-r--r--freebsd/sys/net/altq/altq_var.h13
-rw-r--r--freebsd/sys/net/altq/if_altq.h4
-rw-r--r--freebsd/sys/net/bpf.c16
-rw-r--r--freebsd/sys/net/bpf_jitter.c6
-rw-r--r--freebsd/sys/net/bpf_jitter.h1
-rw-r--r--freebsd/sys/net/bpfdesc.h5
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c69
-rw-r--r--freebsd/sys/net/if.c89
-rw-r--r--freebsd/sys/net/if_bridge.c24
-rw-r--r--freebsd/sys/net/if_clone.c2
-rw-r--r--freebsd/sys/net/if_disc.c2
-rw-r--r--freebsd/sys/net/if_edsc.c2
-rw-r--r--freebsd/sys/net/if_enc.c12
-rw-r--r--freebsd/sys/net/if_epair.c17
-rw-r--r--freebsd/sys/net/if_ethersubr.c7
-rw-r--r--freebsd/sys/net/if_gif.c547
-rw-r--r--freebsd/sys/net/if_gif.h58
-rw-r--r--freebsd/sys/net/if_gre.c668
-rw-r--r--freebsd/sys/net/if_gre.h46
-rw-r--r--freebsd/sys/net/if_ipsec.c8
-rw-r--r--freebsd/sys/net/if_lagg.c32
-rw-r--r--freebsd/sys/net/if_llatbl.c7
-rw-r--r--freebsd/sys/net/if_loop.c3
-rw-r--r--freebsd/sys/net/if_media.h168
-rw-r--r--freebsd/sys/net/if_stf.c42
-rw-r--r--freebsd/sys/net/if_tun.c2
-rw-r--r--freebsd/sys/net/if_var.h31
-rw-r--r--freebsd/sys/net/if_vlan.c17
-rw-r--r--freebsd/sys/net/if_vlan_var.h3
-rw-r--r--freebsd/sys/net/iflib.h10
-rw-r--r--freebsd/sys/net/netisr.c2
-rw-r--r--freebsd/sys/net/pfvar.h120
-rw-r--r--freebsd/sys/net/radix.c2
-rw-r--r--freebsd/sys/net/radix.h26
-rw-r--r--freebsd/sys/net/radix_mpath.c3
-rw-r--r--freebsd/sys/net/route.c9
-rw-r--r--freebsd/sys/net/route.h6
-rw-r--r--freebsd/sys/net/route_var.h19
-rw-r--r--freebsd/sys/net/rtsock.c14
-rw-r--r--freebsd/sys/net/vnet.h20
-rw-r--r--freebsd/sys/net80211/ieee80211.c23
-rw-r--r--freebsd/sys/net80211/ieee80211_hwmp.c2
-rw-r--r--freebsd/sys/net80211/ieee80211_node.c19
-rw-r--r--freebsd/sys/netinet/cc/cc_newreno.c8
-rw-r--r--freebsd/sys/netinet/if_ether.c12
-rw-r--r--freebsd/sys/netinet/igmp.c26
-rw-r--r--freebsd/sys/netinet/in.c9
-rw-r--r--freebsd/sys/netinet/in_fib.c4
-rw-r--r--freebsd/sys/netinet/in_gif.c311
-rw-r--r--freebsd/sys/netinet/in_gif.h45
-rw-r--r--freebsd/sys/netinet/in_kdtrace.h23
-rw-r--r--freebsd/sys/netinet/in_mcast.c39
-rw-r--r--freebsd/sys/netinet/in_pcb.c589
-rw-r--r--freebsd/sys/netinet/in_pcb.h167
-rw-r--r--freebsd/sys/netinet/ip.h5
-rw-r--r--freebsd/sys/netinet/ip6.h1
-rw-r--r--freebsd/sys/netinet/ip_carp.c46
-rw-r--r--freebsd/sys/netinet/ip_divert.c49
-rw-r--r--freebsd/sys/netinet/ip_encap.c470
-rw-r--r--freebsd/sys/netinet/ip_encap.h47
-rw-r--r--freebsd/sys/netinet/ip_fastfwd.c26
-rw-r--r--freebsd/sys/netinet/ip_fw.h2
-rw-r--r--freebsd/sys/netinet/ip_gre.c243
-rw-r--r--freebsd/sys/netinet/ip_icmp.c24
-rw-r--r--freebsd/sys/netinet/ip_id.c20
-rw-r--r--freebsd/sys/netinet/ip_input.c10
-rw-r--r--freebsd/sys/netinet/ip_mroute.c86
-rw-r--r--freebsd/sys/netinet/ip_options.c4
-rw-r--r--freebsd/sys/netinet/ip_output.c71
-rw-r--r--freebsd/sys/netinet/ip_reass.c203
-rw-r--r--freebsd/sys/netinet/libalias/alias.c12
-rw-r--r--freebsd/sys/netinet/libalias/alias_irc.c3
-rw-r--r--freebsd/sys/netinet/libalias/alias_mod.h8
-rw-r--r--freebsd/sys/netinet/pim_var.h2
-rw-r--r--freebsd/sys/netinet/raw_ip.c102
-rw-r--r--freebsd/sys/netinet/sctp.h2
-rw-r--r--freebsd/sys/netinet/sctp_asconf.c39
-rw-r--r--freebsd/sys/netinet/sctp_asconf.h4
-rw-r--r--freebsd/sys/netinet/sctp_auth.c8
-rw-r--r--freebsd/sys/netinet/sctp_auth.h40
-rw-r--r--freebsd/sys/netinet/sctp_bsd_addr.c2
-rw-r--r--freebsd/sys/netinet/sctp_cc_functions.c10
-rw-r--r--freebsd/sys/netinet/sctp_constants.h12
-rw-r--r--freebsd/sys/netinet/sctp_dtrace_define.h2
-rw-r--r--freebsd/sys/netinet/sctp_header.h26
-rw-r--r--freebsd/sys/netinet/sctp_indata.c66
-rw-r--r--freebsd/sys/netinet/sctp_indata.h3
-rw-r--r--freebsd/sys/netinet/sctp_input.c154
-rw-r--r--freebsd/sys/netinet/sctp_input.h2
-rw-r--r--freebsd/sys/netinet/sctp_os_bsd.h2
-rw-r--r--freebsd/sys/netinet/sctp_output.c145
-rw-r--r--freebsd/sys/netinet/sctp_output.h10
-rw-r--r--freebsd/sys/netinet/sctp_pcb.c67
-rw-r--r--freebsd/sys/netinet/sctp_pcb.h14
-rw-r--r--freebsd/sys/netinet/sctp_peeloff.c6
-rw-r--r--freebsd/sys/netinet/sctp_structs.h57
-rw-r--r--freebsd/sys/netinet/sctp_sysctl.c2
-rw-r--r--freebsd/sys/netinet/sctp_timer.c23
-rw-r--r--freebsd/sys/netinet/sctp_uio.h71
-rw-r--r--freebsd/sys/netinet/sctp_usrreq.c71
-rw-r--r--freebsd/sys/netinet/sctp_var.h4
-rw-r--r--freebsd/sys/netinet/sctputil.c112
-rw-r--r--freebsd/sys/netinet/sctputil.h31
-rw-r--r--freebsd/sys/netinet/tcp_hostcache.c4
-rw-r--r--freebsd/sys/netinet/tcp_hpts.h4
-rw-r--r--freebsd/sys/netinet/tcp_input.c167
-rw-r--r--freebsd/sys/netinet/tcp_log_buf.h6
-rw-r--r--freebsd/sys/netinet/tcp_output.c245
-rw-r--r--freebsd/sys/netinet/tcp_reass.c975
-rw-r--r--freebsd/sys/netinet/tcp_subr.c165
-rw-r--r--freebsd/sys/netinet/tcp_syncache.c92
-rw-r--r--freebsd/sys/netinet/tcp_syncache.h1
-rw-r--r--freebsd/sys/netinet/tcp_timer.c197
-rw-r--r--freebsd/sys/netinet/tcp_timer.h17
-rw-r--r--freebsd/sys/netinet/tcp_timewait.c116
-rw-r--r--freebsd/sys/netinet/tcp_usrreq.c238
-rw-r--r--freebsd/sys/netinet/tcp_var.h56
-rw-r--r--freebsd/sys/netinet/udp_usrreq.c105
-rw-r--r--freebsd/sys/netinet/udplite.h11
-rw-r--r--freebsd/sys/netinet6/frag6.c331
-rw-r--r--freebsd/sys/netinet6/icmp6.c11
-rw-r--r--freebsd/sys/netinet6/in6.c3
-rw-r--r--freebsd/sys/netinet6/in6_fib.c4
-rw-r--r--freebsd/sys/netinet6/in6_gif.c337
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.c2
-rw-r--r--freebsd/sys/netinet6/in6_mcast.c46
-rw-r--r--freebsd/sys/netinet6/in6_pcb.c189
-rw-r--r--freebsd/sys/netinet6/in6_proto.c37
-rw-r--r--freebsd/sys/netinet6/in6_rmx.c4
-rw-r--r--freebsd/sys/netinet6/in6_src.c6
-rw-r--r--freebsd/sys/netinet6/in6_var.h2
-rw-r--r--freebsd/sys/netinet6/ip6_input.c4
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.c55
-rw-r--r--freebsd/sys/netinet6/ip6_output.c57
-rw-r--r--freebsd/sys/netinet6/ip6_var.h8
-rw-r--r--freebsd/sys/netinet6/mld6.c43
-rw-r--r--freebsd/sys/netinet6/nd6.c6
-rw-r--r--freebsd/sys/netinet6/nd6_nbr.c8
-rw-r--r--freebsd/sys/netinet6/nd6_rtr.c2
-rw-r--r--freebsd/sys/netinet6/pim6_var.h4
-rw-r--r--freebsd/sys/netinet6/raw_ip6.c7
-rw-r--r--freebsd/sys/netinet6/scope6.c4
-rw-r--r--freebsd/sys/netinet6/scope6_var.h2
-rw-r--r--freebsd/sys/netinet6/sctp6_usrreq.c9
-rw-r--r--freebsd/sys/netinet6/sctp6_var.h4
-rw-r--r--freebsd/sys/netinet6/udp6_usrreq.c450
-rw-r--r--freebsd/sys/netipsec/ipsec.c17
-rw-r--r--freebsd/sys/netipsec/ipsec.h2
-rw-r--r--freebsd/sys/netipsec/key.c78
-rw-r--r--freebsd/sys/netipsec/key_debug.c87
-rw-r--r--freebsd/sys/netipsec/keydb.h3
-rw-r--r--freebsd/sys/netipsec/keysock.c2
-rw-r--r--freebsd/sys/netipsec/xform.h2
-rw-r--r--freebsd/sys/netipsec/xform_ah.c83
-rw-r--r--freebsd/sys/netipsec/xform_esp.c21
-rw-r--r--freebsd/sys/netipsec/xform_ipcomp.c87
-rw-r--r--freebsd/sys/netipsec/xform_tcp.c16
-rw-r--r--freebsd/sys/netpfil/pf/if_pflog.c2
-rw-r--r--freebsd/sys/netpfil/pf/if_pfsync.c10
-rw-r--r--freebsd/sys/netpfil/pf/pf.c206
-rw-r--r--freebsd/sys/netpfil/pf/pf_altq.h145
-rw-r--r--freebsd/sys/netpfil/pf/pf_if.c41
-rw-r--r--freebsd/sys/netpfil/pf/pf_ioctl.c310
-rw-r--r--freebsd/sys/netpfil/pf/pf_norm.c16
-rw-r--r--freebsd/sys/netpfil/pf/pf_osfp.c2
-rw-r--r--freebsd/sys/netpfil/pf/pf_table.c10
-rw-r--r--freebsd/sys/opencrypto/_cryptodev.h8
-rw-r--r--freebsd/sys/opencrypto/crypto.c174
-rw-r--r--freebsd/sys/opencrypto/cryptodev.c55
-rw-r--r--freebsd/sys/opencrypto/cryptodev.h64
-rw-r--r--freebsd/sys/opencrypto/cryptosoft.c335
-rw-r--r--freebsd/sys/opencrypto/cryptosoft.h5
-rw-r--r--freebsd/sys/opencrypto/xform_auth.h7
-rw-r--r--freebsd/sys/opencrypto/xform_md5.c4
-rw-r--r--freebsd/sys/opencrypto/xform_poly1305.c93
-rw-r--r--freebsd/sys/opencrypto/xform_poly1305.h16
-rw-r--r--freebsd/sys/opencrypto/xform_rmd160.c4
-rw-r--r--freebsd/sys/opencrypto/xform_sha1.c16
-rw-r--r--freebsd/sys/opencrypto/xform_sha2.c81
-rw-r--r--freebsd/sys/powerpc/include/machine/spr.h4
-rw-r--r--freebsd/sys/sys/bus.h9
-rw-r--r--freebsd/sys/sys/cpu.h2
-rw-r--r--freebsd/sys/sys/file.h13
-rw-r--r--freebsd/sys/sys/interrupt.h13
-rw-r--r--freebsd/sys/sys/jail.h5
-rw-r--r--freebsd/sys/sys/libkern.h23
-rw-r--r--freebsd/sys/sys/linker.h3
-rw-r--r--freebsd/sys/sys/malloc.h76
-rw-r--r--freebsd/sys/sys/mbuf.h7
-rw-r--r--freebsd/sys/sys/module.h5
-rw-r--r--freebsd/sys/sys/mouse.h15
-rw-r--r--freebsd/sys/sys/mutex.h2
-rw-r--r--freebsd/sys/sys/nv.h8
-rw-r--r--freebsd/sys/sys/pciio.h19
-rw-r--r--freebsd/sys/sys/pcpu.h59
-rw-r--r--freebsd/sys/sys/proc.h21
-rw-r--r--freebsd/sys/sys/random.h42
-rw-r--r--freebsd/sys/sys/reboot.h9
-rw-r--r--freebsd/sys/sys/sglist.h113
-rw-r--r--freebsd/sys/sys/sockbuf.h8
-rw-r--r--freebsd/sys/sys/socketvar.h17
-rw-r--r--freebsd/sys/sys/sockopt.h1
-rw-r--r--freebsd/sys/sys/sx.h4
-rw-r--r--freebsd/sys/sys/sysproto.h4
-rw-r--r--freebsd/sys/sys/systm.h121
-rw-r--r--freebsd/sys/sys/unpcb.h23
-rw-r--r--freebsd/sys/sys/vmmeter.h7
-rw-r--r--freebsd/sys/vm/uma.h23
-rw-r--r--freebsd/sys/vm/uma_core.c326
-rw-r--r--freebsd/sys/vm/uma_int.h53
-rw-r--r--freebsd/sys/vm/vm_extern.h9
323 files changed, 12076 insertions, 8299 deletions
diff --git a/freebsd/sys/arm/at91/at91_mci.c b/freebsd/sys/arm/at91/at91_mci.c
deleted file mode 100644
index c25983b7..00000000
--- a/freebsd/sys/arm/at91/at91_mci.c
+++ /dev/null
@@ -1,1713 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2006 Bernd Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh.
- * Copyright (c) 2010 Greg Ansley. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <rtems/bsd/local/opt_platform.h>
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/endian.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/mutex.h>
-#include <rtems/bsd/sys/resource.h>
-#include <sys/rman.h>
-#include <sys/sysctl.h>
-
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <machine/intr.h>
-
-#include <arm/at91/at91var.h>
-#include <arm/at91/at91_mcireg.h>
-#include <arm/at91/at91_pdcreg.h>
-
-#include <dev/mmc/bridge.h>
-#include <dev/mmc/mmcbrvar.h>
-
-#ifdef FDT
-#include <dev/ofw/ofw_bus.h>
-#include <dev/ofw/ofw_bus_subr.h>
-#endif
-
-#include <rtems/bsd/local/mmcbr_if.h>
-
-#include <rtems/bsd/local/opt_at91.h>
-
-#ifdef __rtems__
-#include <bsp.h>
-#endif /* __rtems__ */
-#if defined(__rtems__) && defined(LIBBSP_ARM_ATSAM_BSP_H)
-#ifdef __rtems__
-#include <rtems/irq-extension.h>
-#include <libchip/chip.h>
-
-#define AT91_MCI_HAS_4WIRE 1
-
-#define at91_master_clock BOARD_MCK
-
-static sXdmad *pXdmad = &XDMAD_Instance;
-#endif /* __rtems__ */
-/*
- * About running the MCI bus above 25MHz
- *
- * Historically, the MCI bus has been run at 30MHz on systems with a 60MHz
- * master clock, in part due to a bug in dev/mmc.c making always request
- * 30MHz, and in part over clocking the bus because 15MHz was too slow.
- * Fixing that bug causes the mmc driver to request a 25MHz clock (as it
- * should) and the logic in at91_mci_update_ios() picks the highest speed that
- * doesn't exceed that limit. With a 60MHz MCK that would be 15MHz, and
- * that's a real performance buzzkill when you've been getting away with 30MHz
- * all along.
- *
- * By defining AT91_MCI_ALLOW_OVERCLOCK (or setting the allow_overclock=1
- * device hint or sysctl) you can enable logic in at91_mci_update_ios() to
- * overlcock the SD bus a little by running it at MCK / 2 when the requested
- * speed is 25MHz and the next highest speed is 15MHz or less. This appears
- * to work on virtually all SD cards, since it is what this driver has been
- * doing prior to the introduction of this option, where the overclocking vs
- * underclocking decision was automatically "overclock". Modern SD cards can
- * run at 45mhz/1-bit in standard mode (high speed mode enable commands not
- * sent) without problems.
- *
- * Speaking of high-speed mode, the rm9200 manual says the MCI device supports
- * the SD v1.0 specification and can run up to 50MHz. This is interesting in
- * that the SD v1.0 spec caps the speed at 25MHz; high speed mode was added in
- * the v1.10 spec. Furthermore, high speed mode doesn't just crank up the
- * clock, it alters the signal timing. The rm9200 MCI device doesn't support
- * these altered timings. So while speeds over 25MHz may work, they only work
- * in what the SD spec calls "default" speed mode, and it amounts to violating
- * the spec by overclocking the bus.
- *
- * If you also enable 4-wire mode it's possible transfers faster than 25MHz
- * will fail. On the AT91RM9200, due to bugs in the bus contention logic, if
- * you have the USB host device and OHCI driver enabled will fail. Even
- * underclocking to 15MHz, intermittant overrun and underrun errors occur.
- * Note that you don't even need to have usb devices attached to the system,
- * the errors begin to occur as soon as the OHCI driver sets the register bit
- * to enable periodic transfers. It appears (based on brief investigation)
- * that the usb host controller uses so much ASB bandwidth that sometimes the
- * DMA for MCI transfers doesn't get a bus grant in time and data gets
- * dropped. Adding even a modicum of network activity changes the symptom
- * from intermittant to very frequent. Members of the AT91SAM9 family have
- * corrected this problem, or are at least better about their use of the bus.
- */
-#ifndef AT91_MCI_ALLOW_OVERCLOCK
-#define AT91_MCI_ALLOW_OVERCLOCK 1
-#endif
-
-/*
- * Allocate 2 bounce buffers we'll use to endian-swap the data due to the rm9200
- * erratum. We use a pair of buffers because when reading that lets us begin
- * endian-swapping the data in the first buffer while the DMA is reading into
- * the second buffer. (We can't use the same trick for writing because we might
- * not get all the data in the 2nd buffer swapped before the hardware needs it;
- * dealing with that would add complexity to the driver.)
- *
- * The buffers are sized at 16K each due to the way the busdma cache sync
- * operations work on arm. A dcache_inv_range() operation on a range larger
- * than 16K gets turned into a dcache_wbinv_all(). That needlessly flushes the
- * entire data cache, impacting overall system performance.
- */
-#ifndef __rtems__
-#define BBCOUNT 2
-#define BBSIZE (32*1024)
-#define MAX_BLOCKS ((BBSIZE)/512)
-/* FIXME: It would be better to split the DMA up in that case like in the
- * original driver. But that would need some rework. */
-#else /* __rtems__ */
-#define MAX_BLOCKS 256
-#endif /* __rtems__ */
-
-#ifndef __rtems__
-static int mci_debug;
-#else /* __rtems__ */
-#define mci_debug 0
-#endif /* __rtems__ */
-
-struct at91_mci_softc {
- void *intrhand; /* Interrupt handle */
- device_t dev;
- int sc_cap;
-#define CAP_HAS_4WIRE 1 /* Has 4 wire bus */
-#define CAP_NEEDS_BYTESWAP 2 /* broken hardware needing bounce */
-#define CAP_MCI1_REV2XX 4 /* MCI 1 rev 2.x */
- int flags;
-#define PENDING_CMD 0x01
-#define PENDING_STOP 0x02
-#define CMD_MULTIREAD 0x10
-#define CMD_MULTIWRITE 0x20
- int has_4wire;
- int allow_overclock;
- struct resource *irq_res; /* IRQ resource */
- struct resource *mem_res; /* Memory resource */
- struct mtx sc_mtx;
-#ifdef __rtems__
- RTEMS_INTERRUPT_LOCK_MEMBER(sc_lock)
-#endif /* __rtems__ */
-#ifndef __rtems__
- bus_dma_tag_t dmatag;
-#endif /* __rtems__ */
- struct mmc_host host;
- int bus_busy;
- struct mmc_request *req;
- struct mmc_command *curcmd;
-#ifndef __rtems__
- bus_dmamap_t bbuf_map[BBCOUNT];
- char * bbuf_vaddr[BBCOUNT]; /* bounce bufs in KVA space */
- uint32_t bbuf_len[BBCOUNT]; /* len currently queued for bounce buf */
- uint32_t bbuf_curidx; /* which bbuf is the active DMA buffer */
- uint32_t xfer_offset; /* offset so far into caller's buf */
-#else /* __rtems__ */
- LinkedListDescriporView1 xdma_desc;
- uint32_t xdma_tx_channel;
- uint32_t xdma_rx_channel;
- uint8_t xdma_tx_perid;
- uint8_t xdma_rx_perid;
- sXdmadCfg xdma_tx_cfg;
- sXdmadCfg xdma_rx_cfg;
-#endif /* __rtems__ */
-};
-
-/* bus entry points */
-static int at91_mci_probe(device_t dev);
-static int at91_mci_attach(device_t dev);
-static int at91_mci_detach(device_t dev);
-static void at91_mci_intr(void *);
-
-/* helper routines */
-static int at91_mci_activate(device_t dev);
-static void at91_mci_deactivate(device_t dev);
-static int at91_mci_is_mci1rev2xx(void);
-#ifndef __rtems__
-static void at91_mci_read_done(struct at91_mci_softc *sc, uint32_t sr);
-#endif /* __rtems__ */
-static void at91_mci_write_done(struct at91_mci_softc *sc, uint32_t sr);
-
-#ifndef __rtems__
-#define AT91_MCI_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx)
-#define AT91_MCI_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx)
-#define AT91_MCI_LOCK_INIT(_sc) \
- mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \
- "mci", MTX_DEF)
-#define AT91_MCI_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx);
-#define AT91_MCI_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED);
-#define AT91_MCI_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED);
-#else /* __rtems__ */
-#define AT91_MCI_LOCK(_sc) \
- rtems_interrupt_lock_context at91_mci_lock_context; \
- rtems_interrupt_lock_acquire(&(_sc)->sc_lock, &at91_mci_lock_context)
-#define AT91_MCI_UNLOCK(_sc) \
- rtems_interrupt_lock_release(&(_sc)->sc_lock, &at91_mci_lock_context)
-#define AT91_MCI_LOCK_INIT(_sc) \
- rtems_interrupt_lock_initialize(&(_sc)->sc_lock, \
- device_get_nameunit((_sc)->dev))
-#define AT91_MCI_LOCK_DESTROY(_sc) \
- rtems_interrupt_lock_destroy(&(_sc)->sc_mtx)
-#define AT91_MCI_BUS_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx)
-#define AT91_MCI_BUS_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx)
-#define AT91_MCI_BUS_LOCK_INIT(_sc) \
- mtx_init(&_sc->sc_mtx, device_get_nameunit((_sc)->dev), \
- "mci", MTX_DEF)
-#endif /* __rtems__ */
-
-static inline uint32_t
-RD4(struct at91_mci_softc *sc, bus_size_t off)
-{
- return (bus_read_4(sc->mem_res, off));
-}
-
-static inline void
-WR4(struct at91_mci_softc *sc, bus_size_t off, uint32_t val)
-{
- bus_write_4(sc->mem_res, off, val);
-}
-
-#ifndef __rtems__
-static void
-at91_bswap_buf(struct at91_mci_softc *sc, void * dptr, void * sptr, uint32_t memsize)
-{
- uint32_t * dst = (uint32_t *)dptr;
- uint32_t * src = (uint32_t *)sptr;
- uint32_t i;
-
- /*
- * If the hardware doesn't need byte-swapping, let bcopy() do the
- * work. Use bounce buffer even if we don't need byteswap, since
- * buffer may straddle a page boundary, and we don't handle
- * multi-segment transfers in hardware. Seen from 'bsdlabel -w' which
- * uses raw geom access to the volume. Greg Ansley (gja (at)
- * ansley.com)
- */
- if (!(sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
- memcpy(dptr, sptr, memsize);
- return;
- }
-
- /*
- * Nice performance boost for slightly unrolling this loop.
- * (But very little extra boost for further unrolling it.)
- */
- for (i = 0; i < memsize; i += 16) {
- *dst++ = bswap32(*src++);
- *dst++ = bswap32(*src++);
- *dst++ = bswap32(*src++);
- *dst++ = bswap32(*src++);
- }
-
- /* Mop up the last 1-3 words, if any. */
- for (i = 0; i < (memsize & 0x0F); i += 4) {
- *dst++ = bswap32(*src++);
- }
-}
-
-static void
-at91_mci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
-{
- if (error != 0)
- return;
- *(bus_addr_t *)arg = segs[0].ds_addr;
-}
-#endif /* __rtems__ */
-
-static void
-at91_mci_pdc_disable(struct at91_mci_softc *sc)
-{
-#ifndef __rtems__
- WR4(sc, PDC_PTCR, PDC_PTCR_TXTDIS | PDC_PTCR_RXTDIS);
- WR4(sc, PDC_RPR, 0);
- WR4(sc, PDC_RCR, 0);
- WR4(sc, PDC_RNPR, 0);
- WR4(sc, PDC_RNCR, 0);
- WR4(sc, PDC_TPR, 0);
- WR4(sc, PDC_TCR, 0);
- WR4(sc, PDC_TNPR, 0);
- WR4(sc, PDC_TNCR, 0);
-#else /* __rtems__ */
- /* On SAMV71 there is no PDC but a DMAC */
- XDMAD_StopTransfer(pXdmad, sc->xdma_rx_channel);
- XDMAD_StopTransfer(pXdmad, sc->xdma_tx_channel);
- WR4(sc, MCI_DMA, 0);
-#endif /* __rtems__ */
-}
-
-/*
- * Reset the controller, then restore most of the current state.
- *
- * This is called after detecting an error. It's also called after stopping a
- * multi-block write, to un-wedge the device so that it will handle the NOTBUSY
- * signal correctly. See comments in at91_mci_stop_done() for more details.
- */
-static void at91_mci_reset(struct at91_mci_softc *sc)
-{
- uint32_t mr;
- uint32_t sdcr;
- uint32_t dtor;
- uint32_t imr;
-
- at91_mci_pdc_disable(sc);
-
- /* save current state */
-
- imr = RD4(sc, MCI_IMR);
-#ifndef __rtems__
- mr = RD4(sc, MCI_MR) & 0x7fff;
-#else /* __rtems__ */
- mr = RD4(sc, MCI_MR);
-#endif /* __rtems__ */
- sdcr = RD4(sc, MCI_SDCR);
- dtor = RD4(sc, MCI_DTOR);
-
- /* reset the controller */
-
- WR4(sc, MCI_IDR, 0xffffffff);
- WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST);
-
- /* restore state */
-
- WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
- WR4(sc, MCI_MR, mr);
- WR4(sc, MCI_SDCR, sdcr);
- WR4(sc, MCI_DTOR, dtor);
- WR4(sc, MCI_IER, imr);
-
- /*
- * Make sure sdio interrupts will fire. Not sure why reading
- * SR ensures that, but this is in the linux driver.
- */
-
- RD4(sc, MCI_SR);
-}
-
-static void
-at91_mci_init(device_t dev)
-{
- struct at91_mci_softc *sc = device_get_softc(dev);
- uint32_t val;
-
- WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */
- WR4(sc, MCI_IDR, 0xffffffff); /* Turn off interrupts */
- WR4(sc, MCI_DTOR, MCI_DTOR_DTOMUL_1M | 1);
-#ifndef __rtems__
- val = MCI_MR_PDCMODE;
-#else /* __rtems__ */
- val = 0;
- val |= MCI_MR_RDPROOF | MCI_MR_WRPROOF;
-#endif /* __rtems__ */
- val |= 0x34a; /* PWSDIV = 3; CLKDIV = 74 */
-// if (sc->sc_cap & CAP_MCI1_REV2XX)
-// val |= MCI_MR_RDPROOF | MCI_MR_WRPROOF;
- WR4(sc, MCI_MR, val);
-#ifndef AT91_MCI_SLOT_B
- WR4(sc, MCI_SDCR, 0); /* SLOT A, 1 bit bus */
-#else
- /*
- * XXX Really should add second "unit" but nobody using using
- * a two slot card that we know of. XXX
- */
- WR4(sc, MCI_SDCR, 1); /* SLOT B, 1 bit bus */
-#endif
- /*
- * Enable controller, including power-save. The slower clock
- * of the power-save mode is only in effect when there is no
- * transfer in progress, so it can be left in this mode all
- * the time.
- */
- WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
-}
-
-static void
-at91_mci_fini(device_t dev)
-{
- struct at91_mci_softc *sc = device_get_softc(dev);
-
- WR4(sc, MCI_IDR, 0xffffffff); /* Turn off interrupts */
- at91_mci_pdc_disable(sc);
- WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */
-}
-
-static int
-at91_mci_probe(device_t dev)
-{
-#ifdef FDT
- if (!ofw_bus_is_compatible(dev, "atmel,hsmci"))
- return (ENXIO);
-#endif
- device_set_desc(dev, "MCI mmc/sd host bridge");
- return (0);
-}
-
-static int
-at91_mci_attach(device_t dev)
-{
- struct at91_mci_softc *sc = device_get_softc(dev);
- struct sysctl_ctx_list *sctx;
- struct sysctl_oid *soid;
- device_t child;
-#ifndef __rtems__
- int err, i;
-#else /* __rtems__ */
- int err;
-#endif /* __rtems__ */
-
-#ifdef __rtems__
-#ifdef LIBBSP_ARM_ATSAM_BSP_H
- PMC_EnablePeripheral(ID_HSMCI);
- sc->xdma_tx_channel = XDMAD_ALLOC_FAILED;
- sc->xdma_rx_channel = XDMAD_ALLOC_FAILED;
-#endif /* LIBBSP_ARM_ATSAM_BSP_H */
-#endif /* __rtems__ */
- sctx = device_get_sysctl_ctx(dev);
- soid = device_get_sysctl_tree(dev);
-
- sc->dev = dev;
- sc->sc_cap = 0;
-#ifndef __rtems__
- if (at91_is_rm92())
- sc->sc_cap |= CAP_NEEDS_BYTESWAP;
-#endif /* __rtems__ */
- /*
- * MCI1 Rev 2 controllers need some workarounds, flag if so.
- */
- if (at91_mci_is_mci1rev2xx())
- sc->sc_cap |= CAP_MCI1_REV2XX;
-
- err = at91_mci_activate(dev);
- if (err)
- goto out;
-
-#ifdef __rtems__
- eXdmadRC rc;
-
- /* Prepare some configurations so they don't have to be fetched on every
- * setup */
- sc->xdma_rx_perid = XDMAIF_Get_ChannelNumber(ID_HSMCI,
- XDMAD_TRANSFER_RX);
- sc->xdma_tx_perid = XDMAIF_Get_ChannelNumber(ID_HSMCI,
- XDMAD_TRANSFER_TX);
- memset(&sc->xdma_rx_cfg, 0, sizeof(sc->xdma_rx_cfg));
- sc->xdma_rx_cfg.mbr_cfg = XDMAC_CC_TYPE_PER_TRAN |
- XDMAC_CC_MBSIZE_SINGLE | XDMAC_CC_DSYNC_PER2MEM |
- XDMAC_CC_SWREQ_HWR_CONNECTED | XDMAC_CC_MEMSET_NORMAL_MODE |
- XDMAC_CC_CSIZE_CHK_1 | XDMAC_CC_DWIDTH_WORD |
- XDMAC_CC_SIF_AHB_IF1 | XDMAC_CC_DIF_AHB_IF1 |
- XDMAC_CC_SAM_FIXED_AM | XDMAC_CC_DAM_INCREMENTED_AM |
- XDMAC_CC_PERID(
- XDMAIF_Get_ChannelNumber(ID_HSMCI,XDMAD_TRANSFER_RX));
- memset(&sc->xdma_tx_cfg, 0, sizeof(sc->xdma_tx_cfg));
- sc->xdma_tx_cfg.mbr_cfg = XDMAC_CC_TYPE_PER_TRAN |
- XDMAC_CC_MBSIZE_SINGLE | XDMAC_CC_DSYNC_MEM2PER |
- XDMAC_CC_SWREQ_HWR_CONNECTED | XDMAC_CC_MEMSET_NORMAL_MODE |
- XDMAC_CC_CSIZE_CHK_1 | XDMAC_CC_DWIDTH_WORD |
- XDMAC_CC_SIF_AHB_IF1 | XDMAC_CC_DIF_AHB_IF1 |
- XDMAC_CC_SAM_INCREMENTED_AM | XDMAC_CC_DAM_FIXED_AM |
- XDMAC_CC_PERID(
- XDMAIF_Get_ChannelNumber(ID_HSMCI,XDMAD_TRANSFER_TX));
-
- sc->xdma_tx_channel = XDMAD_AllocateChannel(pXdmad,
- XDMAD_TRANSFER_MEMORY, ID_HSMCI);
- if (sc->xdma_tx_channel == XDMAD_ALLOC_FAILED)
- goto out;
-
- /* FIXME: The two DMA channels are not really necessary for the driver.
- * But the XDMAD interface does not allow to allocate one and use it
- * into two directions. The current (2017-07-11) implementation of
- * the XDMAD interface should work with it. So we might could try it. */
- sc->xdma_rx_channel = XDMAD_AllocateChannel(pXdmad, ID_HSMCI,
- XDMAD_TRANSFER_MEMORY);
- if (sc->xdma_rx_channel == XDMAD_ALLOC_FAILED)
- goto out;
-
- rc = XDMAD_PrepareChannel(pXdmad, sc->xdma_rx_channel);
- if (rc != XDMAD_OK)
- goto out;
-
- rc = XDMAD_PrepareChannel(pXdmad, sc->xdma_tx_channel);
- if (rc != XDMAD_OK)
- goto out;
-
- AT91_MCI_BUS_LOCK_INIT(sc);
-#endif /* __rtems__ */
- AT91_MCI_LOCK_INIT(sc);
-
- at91_mci_fini(dev);
- at91_mci_init(dev);
-
-#ifndef __rtems__
- /*
- * Allocate DMA tags and maps and bounce buffers.
- *
- * The parms in the tag_create call cause the dmamem_alloc call to
- * create each bounce buffer as a single contiguous buffer of BBSIZE
- * bytes aligned to a 4096 byte boundary.
- *
- * Do not use DMA_COHERENT for these buffers because that maps the
- * memory as non-cachable, which prevents cache line burst fills/writes,
- * which is something we need since we're trying to overlap the
- * byte-swapping with the DMA operations.
- */
- err = bus_dma_tag_create(bus_get_dma_tag(dev), 4096, 0,
- BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
- BBSIZE, 1, BBSIZE, 0, NULL, NULL, &sc->dmatag);
- if (err != 0)
- goto out;
-
- for (i = 0; i < BBCOUNT; ++i) {
- err = bus_dmamem_alloc(sc->dmatag, (void **)&sc->bbuf_vaddr[i],
- BUS_DMA_NOWAIT, &sc->bbuf_map[i]);
- if (err != 0)
- goto out;
- }
-
- /*
- * Activate the interrupt
- */
- err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
- NULL, at91_mci_intr, sc, &sc->intrhand);
-#else /* __rtems__ */
- err = rtems_interrupt_handler_install(rman_get_start(sc->irq_res),
- device_get_nameunit(dev), RTEMS_INTERRUPT_SHARED, at91_mci_intr,
- sc);
-#endif /* __rtems__ */
- if (err) {
- AT91_MCI_LOCK_DESTROY(sc);
- goto out;
- }
-
- /*
- * Allow 4-wire to be initially set via #define.
- * Allow a device hint to override that.
- * Allow a sysctl to override that.
- */
-#if defined(AT91_MCI_HAS_4WIRE) && AT91_MCI_HAS_4WIRE != 0
- sc->has_4wire = 1;
-#endif
- resource_int_value(device_get_name(dev), device_get_unit(dev),
- "4wire", &sc->has_4wire);
- SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "4wire",
- CTLFLAG_RW, &sc->has_4wire, 0, "has 4 wire SD Card bus");
- if (sc->has_4wire)
- sc->sc_cap |= CAP_HAS_4WIRE;
-
- sc->allow_overclock = AT91_MCI_ALLOW_OVERCLOCK;
- resource_int_value(device_get_name(dev), device_get_unit(dev),
- "allow_overclock", &sc->allow_overclock);
- SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "allow_overclock",
- CTLFLAG_RW, &sc->allow_overclock, 0,
- "Allow up to 30MHz clock for 25MHz request when next highest speed 15MHz or less.");
-
-#ifndef __rtems__
- SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "debug",
- CTLFLAG_RWTUN, &mci_debug, 0, "enable debug output");
-#endif /* __rtems__ */
-
- /*
- * Our real min freq is master_clock/512, but upper driver layers are
- * going to set the min speed during card discovery, and the right speed
- * for that is 400kHz, so advertise a safe value just under that.
- *
- * For max speed, while the rm9200 manual says the max is 50mhz, it also
- * says it supports only the SD v1.0 spec, which means the real limit is
- * 25mhz. On the other hand, historical use has been to slightly violate
- * the standard by running the bus at 30MHz. For more information on
- * that, see the comments at the top of this file.
- */
- sc->host.f_min = 375000;
- sc->host.f_max = at91_master_clock / 2;
- if (sc->host.f_max > 25000000)
- sc->host.f_max = 25000000;
- sc->host.host_ocr = MMC_OCR_320_330 | MMC_OCR_330_340;
- sc->host.caps = 0;
- if (sc->sc_cap & CAP_HAS_4WIRE)
- sc->host.caps |= MMC_CAP_4_BIT_DATA;
-
- child = device_add_child(dev, "mmc", 0);
-#ifdef __rtems__
- (void)child;
-#endif /* __rtems__ */
- device_set_ivars(dev, &sc->host);
- err = bus_generic_attach(dev);
-out:
- if (err)
- at91_mci_deactivate(dev);
- return (err);
-}
-
-static int
-at91_mci_detach(device_t dev)
-{
-#ifndef __rtems__
- struct at91_mci_softc *sc = device_get_softc(dev);
-#endif /* __rtems__ */
-
- at91_mci_fini(dev);
- at91_mci_deactivate(dev);
-
-#ifndef __rtems__
- bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[0], sc->bbuf_map[0]);
- bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[1], sc->bbuf_map[1]);
- bus_dma_tag_destroy(sc->dmatag);
-#endif /* __rtems__ */
-
- return (EBUSY); /* XXX */
-}
-
-static int
-at91_mci_activate(device_t dev)
-{
- struct at91_mci_softc *sc;
- int rid;
-
- sc = device_get_softc(dev);
- rid = 0;
- sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
- RF_ACTIVE);
- if (sc->mem_res == NULL)
- goto errout;
-
- rid = 0;
- sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
- RF_ACTIVE);
- if (sc->irq_res == NULL)
- goto errout;
-
- return (0);
-errout:
- at91_mci_deactivate(dev);
- return (ENOMEM);
-}
-
-static void
-at91_mci_deactivate(device_t dev)
-{
- struct at91_mci_softc *sc;
-
- sc = device_get_softc(dev);
- if (sc->intrhand)
- bus_teardown_intr(dev, sc->irq_res, sc->intrhand);
- sc->intrhand = NULL;
- bus_generic_detach(sc->dev);
- if (sc->mem_res)
- bus_release_resource(dev, SYS_RES_MEMORY,
- rman_get_rid(sc->mem_res), sc->mem_res);
- sc->mem_res = NULL;
- if (sc->irq_res)
- bus_release_resource(dev, SYS_RES_IRQ,
- rman_get_rid(sc->irq_res), sc->irq_res);
- sc->irq_res = NULL;
-#ifdef __rtems__
- if (sc->xdma_rx_channel != XDMAD_ALLOC_FAILED) {
- XDMAD_FreeChannel(pXdmad, sc->xdma_rx_channel);
- }
- if (sc->xdma_tx_channel != XDMAD_ALLOC_FAILED) {
- XDMAD_FreeChannel(pXdmad, sc->xdma_tx_channel);
- }
-#endif /* __rtems__ */
- return;
-}
-
-static int
-at91_mci_is_mci1rev2xx(void)
-{
-
-#ifndef __rtems__
- switch (soc_info.type) {
- case AT91_T_SAM9260:
- case AT91_T_SAM9263:
- case AT91_T_CAP9:
- case AT91_T_SAM9G10:
- case AT91_T_SAM9G20:
- case AT91_T_SAM9RL:
- return(1);
- default:
- return (0);
- }
-#else /* __rtems__ */
- /* Currently only supports the SAM V71 */
- return (1);
-#endif /* __rtems__ */
-}
-
-static int
-at91_mci_update_ios(device_t brdev, device_t reqdev)
-{
- struct at91_mci_softc *sc;
- struct mmc_ios *ios;
- uint32_t clkdiv;
- uint32_t freq;
-
- sc = device_get_softc(brdev);
- ios = &sc->host.ios;
-
- /*
- * Calculate our closest available clock speed that doesn't exceed the
- * requested speed.
- *
- * When overclocking is allowed, the requested clock is 25MHz, the
- * computed frequency is 15MHz or smaller and clockdiv is 1, use
- * clockdiv of 0 to double that. If less than 12.5MHz, double
- * regardless of the overclocking setting.
- *
- * Whatever we come up with, store it back into ios->clock so that the
- * upper layer drivers can report the actual speed of the bus.
- */
- if (ios->clock == 0) {
- WR4(sc, MCI_CR, MCI_CR_MCIDIS);
- clkdiv = 0;
- } else {
- WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
- if ((at91_master_clock % (ios->clock * 2)) == 0)
- clkdiv = ((at91_master_clock / ios->clock) / 2) - 1;
- else
- clkdiv = (at91_master_clock / ios->clock) / 2;
- freq = at91_master_clock / ((clkdiv+1) * 2);
- if (clkdiv == 1 && ios->clock == 25000000 && freq <= 15000000) {
- if (sc->allow_overclock || freq <= 12500000) {
- clkdiv = 0;
- freq = at91_master_clock / ((clkdiv+1) * 2);
- }
- }
- ios->clock = freq;
- }
- if (ios->bus_width == bus_width_4)
- WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) | MCI_SDCR_SDCBUS);
- else
- WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) & ~MCI_SDCR_SDCBUS);
- WR4(sc, MCI_MR, (RD4(sc, MCI_MR) & ~MCI_MR_CLKDIV) | clkdiv);
- /* Do we need a settle time here? */
- /* XXX We need to turn the device on/off here with a GPIO pin */
- return (0);
-}
-
-#ifdef __rtems__
-static void
-at91_mci_setup_xdma(struct at91_mci_softc *sc, bool read, void *data,
- uint32_t len)
-{
- const uint32_t xdma_cndc = XDMAC_CNDC_NDVIEW_NDV1 |
- XDMAC_CNDC_NDE_DSCR_FETCH_EN |
- XDMAC_CNDC_NDSUP_SRC_PARAMS_UPDATED |
- XDMAC_CNDC_NDDUP_DST_PARAMS_UPDATED;
- const uint32_t xdma_interrupt = XDMAC_CIE_BIE | XDMAC_CIE_DIE |
- XDMAC_CIE_FIE | XDMAC_CIE_RBIE | XDMAC_CIE_WBIE | XDMAC_CIE_ROIE;
- sXdmadCfg *xdma_cfg;
- uint32_t xdma_channel;
- eXdmadRC rc;
-
- if (len % 4 != 0)
- panic("invalid XDMA transfer length");
-
- if (read) {
- xdma_cfg = &sc->xdma_rx_cfg;
- xdma_channel = sc->xdma_rx_channel;
- sc->xdma_desc.mbr_sa = (uint32_t)(sc->mem_res->r_bushandle +
- MCI_RDR);
- sc->xdma_desc.mbr_da = (uint32_t)data;
- rtems_cache_invalidate_multiple_data_lines(data, len);
- } else {
- xdma_cfg = &sc->xdma_tx_cfg;
- xdma_channel = sc->xdma_tx_channel;
- sc->xdma_desc.mbr_sa = (uint32_t)data;
- sc->xdma_desc.mbr_da = (uint32_t)(sc->mem_res->r_bushandle +
- MCI_TDR);
- rtems_cache_flush_multiple_data_lines(data, len);
- }
-
- sc->xdma_desc.mbr_ubc = XDMA_UBC_NVIEW_NDV1 |
- XDMA_UBC_NDEN_UPDATED | (len / 4);
- sc->xdma_desc.mbr_ubc |= XDMA_UBC_NDE_FETCH_DIS;
- sc->xdma_desc.mbr_nda = 0;
-
- rc = XDMAD_ConfigureTransfer(pXdmad, xdma_channel, xdma_cfg, xdma_cndc,
- (uint32_t)&sc->xdma_desc, xdma_interrupt);
- if (rc != XDMAD_OK)
- panic("configure XDMA failed: %d", rc);
-
- rtems_cache_flush_multiple_data_lines(&sc->xdma_desc, sizeof(sc->xdma_desc));
-
- rc = XDMAD_StartTransfer(pXdmad, xdma_channel);
- if (rc != XDMAD_OK)
- panic("start XDMA failed: %d", rc);
-}
-#endif /* __rtems__ */
-static void
-at91_mci_start_cmd(struct at91_mci_softc *sc, struct mmc_command *cmd)
-{
- uint32_t cmdr, mr;
- struct mmc_data *data;
-#ifdef __rtems__
- uint32_t block_count;
- uint32_t block_size;
-#endif /* __rtems__ */
-
- sc->curcmd = cmd;
- data = cmd->data;
-
- /* XXX Upper layers don't always set this */
- cmd->mrq = sc->req;
-
- /* Begin setting up command register. */
-
- cmdr = cmd->opcode;
-
- if (sc->host.ios.bus_mode == opendrain)
- cmdr |= MCI_CMDR_OPDCMD;
-
- /* Set up response handling. Allow max timeout for responses. */
-
- if (MMC_RSP(cmd->flags) == MMC_RSP_NONE)
- cmdr |= MCI_CMDR_RSPTYP_NO;
- else {
- cmdr |= MCI_CMDR_MAXLAT;
- if (cmd->flags & MMC_RSP_136)
- cmdr |= MCI_CMDR_RSPTYP_136;
- else
- cmdr |= MCI_CMDR_RSPTYP_48;
- }
-
- /*
- * If there is no data transfer, just set up the right interrupt mask
- * and start the command.
- *
- * The interrupt mask needs to be CMDRDY plus all non-data-transfer
- * errors. It's important to leave the transfer-related errors out, to
- * avoid spurious timeout or crc errors on a STOP command following a
- * multiblock read. When a multiblock read is in progress, sending a
- * STOP in the middle of a block occasionally triggers such errors, but
- * we're totally disinterested in them because we've already gotten all
- * the data we wanted without error before sending the STOP command.
- */
-
- if (data == NULL) {
- uint32_t ier = MCI_SR_CMDRDY |
- MCI_SR_RTOE | MCI_SR_RENDE |
- MCI_SR_RCRCE | MCI_SR_RDIRE | MCI_SR_RINDE;
-
- at91_mci_pdc_disable(sc);
-
- if (cmd->opcode == MMC_STOP_TRANSMISSION)
- cmdr |= MCI_CMDR_TRCMD_STOP;
-
- /* Ignore response CRC on CMD2 and ACMD41, per standard. */
-
- if (cmd->opcode == MMC_SEND_OP_COND ||
- cmd->opcode == ACMD_SD_SEND_OP_COND)
- ier &= ~MCI_SR_RCRCE;
-
- if (mci_debug)
- printf("CMDR %x (opcode %d) ARGR %x no data\n",
- cmdr, cmd->opcode, cmd->arg);
-
- WR4(sc, MCI_ARGR, cmd->arg);
- WR4(sc, MCI_CMDR, cmdr);
- WR4(sc, MCI_IDR, 0xffffffff);
- WR4(sc, MCI_IER, ier);
- return;
- }
-
- /* There is data, set up the transfer-related parts of the command. */
-
- if (data->flags & MMC_DATA_READ)
- cmdr |= MCI_CMDR_TRDIR;
-
- if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE))
- cmdr |= MCI_CMDR_TRCMD_START;
-
- if (data->flags & MMC_DATA_STREAM)
- cmdr |= MCI_CMDR_TRTYP_STREAM;
- else if (data->flags & MMC_DATA_MULTI) {
- cmdr |= MCI_CMDR_TRTYP_MULTIPLE;
- sc->flags |= (data->flags & MMC_DATA_READ) ?
- CMD_MULTIREAD : CMD_MULTIWRITE;
- }
-
- /*
- * Disable PDC until we're ready.
- *
- * Set block size and turn on PDC mode for dma xfer.
- * Note that the block size is the smaller of the amount of data to be
- * transferred, or 512 bytes. The 512 size is fixed by the standard;
- * smaller blocks are possible, but never larger.
- */
-
-#ifndef __rtems__
- WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
-
- mr = RD4(sc,MCI_MR) & ~MCI_MR_BLKLEN;
- mr |= min(data->len, 512) << 16;
- WR4(sc, MCI_MR, mr | MCI_MR_PDCMODE|MCI_MR_PDCPADV);
-
- /*
- * Set up DMA.
- *
- * Use bounce buffers even if we don't need to byteswap, because doing
- * multi-block IO with large DMA buffers is way fast (compared to
- * single-block IO), even after incurring the overhead of also copying
- * from/to the caller's buffers (which may be in non-contiguous physical
- * pages).
- *
- * In an ideal non-byteswap world we could create a dma tag that allows
- * for discontiguous segments and do the IO directly from/to the
- * caller's buffer(s), using ENDRX/ENDTX interrupts to chain the
- * discontiguous buffers through the PDC. Someday.
- *
- * If a read is bigger than 2k, split it in half so that we can start
- * byte-swapping the first half while the second half is on the wire.
- * It would be best if we could split it into 8k chunks, but we can't
- * always keep up with the byte-swapping due to other system activity,
- * and if an RXBUFF interrupt happens while we're still handling the
- * byte-swap from the prior buffer (IE, we haven't returned from
- * handling the prior interrupt yet), then data will get dropped on the
- * floor and we can't easily recover from that. The right fix for that
- * would be to have the interrupt handling only keep the DMA flowing and
- * enqueue filled buffers to be byte-swapped in a non-interrupt context.
- * Even that won't work on the write side of things though; in that
- * context we have to have all the data ready to go before starting the
- * dma.
- *
- * XXX what about stream transfers?
- */
- sc->xfer_offset = 0;
- sc->bbuf_curidx = 0;
-#else /* __rtems__ */
- mr = RD4(sc,MCI_MR);
- WR4(sc, MCI_MR, mr | MCI_MR_PDCPADV);
-
- WR4(sc, MCI_DMA, MCI_DMA_DMAEN | MCI_DMA_CHKSIZE_1);
-
- block_size = min(data->len, 512);
- block_count = data->len / block_size;
- WR4(sc, MCI_BLKR, (block_size << 16) | block_count);
-#endif /* __rtems__ */
-
- if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE)) {
-#ifndef __rtems__
- uint32_t len;
- uint32_t remaining = data->len;
- bus_addr_t paddr;
- int err;
-
- if (remaining > (BBCOUNT*BBSIZE))
- panic("IO read size exceeds MAXDATA\n");
-#endif /* __rtems__ */
-
- if (data->flags & MMC_DATA_READ) {
-#ifndef __rtems__
- if (remaining > 2048) // XXX
- len = remaining / 2;
- else
- len = remaining;
- err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0],
- sc->bbuf_vaddr[0], len, at91_mci_getaddr,
- &paddr, BUS_DMA_NOWAIT);
- if (err != 0)
- panic("IO read dmamap_load failed\n");
- bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0],
- BUS_DMASYNC_PREREAD);
- WR4(sc, PDC_RPR, paddr);
- WR4(sc, PDC_RCR, len / 4);
- sc->bbuf_len[0] = len;
- remaining -= len;
- if (remaining == 0) {
- sc->bbuf_len[1] = 0;
- } else {
- len = remaining;
- err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1],
- sc->bbuf_vaddr[1], len, at91_mci_getaddr,
- &paddr, BUS_DMA_NOWAIT);
- if (err != 0)
- panic("IO read dmamap_load failed\n");
- bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1],
- BUS_DMASYNC_PREREAD);
- WR4(sc, PDC_RNPR, paddr);
- WR4(sc, PDC_RNCR, len / 4);
- sc->bbuf_len[1] = len;
- remaining -= len;
- }
- WR4(sc, PDC_PTCR, PDC_PTCR_RXTEN);
-#else /* __rtems__ */
- at91_mci_setup_xdma(sc, true, data->data, data->len);
-#endif /* __rtems__ */
- } else {
-#ifndef __rtems__
- len = min(BBSIZE, remaining);
- at91_bswap_buf(sc, sc->bbuf_vaddr[0], data->data, len);
- err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0],
- sc->bbuf_vaddr[0], len, at91_mci_getaddr,
- &paddr, BUS_DMA_NOWAIT);
- if (err != 0)
- panic("IO write dmamap_load failed\n");
- bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0],
- BUS_DMASYNC_PREWRITE);
- /*
- * Erratum workaround: PDC transfer length on a write
- * must not be smaller than 12 bytes (3 words); only
- * blklen bytes (set above) are actually transferred.
- */
- WR4(sc, PDC_TPR,paddr);
- WR4(sc, PDC_TCR, (len < 12) ? 3 : len / 4);
- sc->bbuf_len[0] = len;
- remaining -= len;
- if (remaining == 0) {
- sc->bbuf_len[1] = 0;
- } else {
- len = remaining;
- at91_bswap_buf(sc, sc->bbuf_vaddr[1],
- ((char *)data->data)+BBSIZE, len);
- err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1],
- sc->bbuf_vaddr[1], len, at91_mci_getaddr,
- &paddr, BUS_DMA_NOWAIT);
- if (err != 0)
- panic("IO write dmamap_load failed\n");
- bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1],
- BUS_DMASYNC_PREWRITE);
- WR4(sc, PDC_TNPR, paddr);
- WR4(sc, PDC_TNCR, (len < 12) ? 3 : len / 4);
- sc->bbuf_len[1] = len;
- remaining -= len;
- }
- /* do not enable PDC xfer until CMDRDY asserted */
-#else /* __rtems__ */
- at91_mci_setup_xdma(sc, false, data->data, data->len);
-#endif /* __rtems__ */
- }
- data->xfer_len = 0; /* XXX what's this? appears to be unused. */
- }
-
- if (mci_debug)
- printf("CMDR %x (opcode %d) ARGR %x with data len %d\n",
- cmdr, cmd->opcode, cmd->arg, cmd->data->len);
-
- WR4(sc, MCI_ARGR, cmd->arg);
- WR4(sc, MCI_CMDR, cmdr);
- WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_CMDRDY);
-}
-
-static void
-at91_mci_next_operation(struct at91_mci_softc *sc)
-{
- struct mmc_request *req;
-
- req = sc->req;
- if (req == NULL)
- return;
-
- if (sc->flags & PENDING_CMD) {
- sc->flags &= ~PENDING_CMD;
- at91_mci_start_cmd(sc, req->cmd);
- return;
- } else if (sc->flags & PENDING_STOP) {
- sc->flags &= ~PENDING_STOP;
- at91_mci_start_cmd(sc, req->stop);
- return;
- }
-
- WR4(sc, MCI_IDR, 0xffffffff);
- sc->req = NULL;
- sc->curcmd = NULL;
- //printf("req done\n");
- req->done(req);
-}
-
-static int
-at91_mci_request(device_t brdev, device_t reqdev, struct mmc_request *req)
-{
- struct at91_mci_softc *sc = device_get_softc(brdev);
-
- AT91_MCI_LOCK(sc);
- if (sc->req != NULL) {
- AT91_MCI_UNLOCK(sc);
- return (EBUSY);
- }
- //printf("new req\n");
- sc->req = req;
- sc->flags = PENDING_CMD;
- if (sc->req->stop)
- sc->flags |= PENDING_STOP;
- at91_mci_next_operation(sc);
- AT91_MCI_UNLOCK(sc);
- return (0);
-}
-
-static int
-at91_mci_get_ro(device_t brdev, device_t reqdev)
-{
- return (0);
-}
-
-static int
-at91_mci_acquire_host(device_t brdev, device_t reqdev)
-{
- struct at91_mci_softc *sc = device_get_softc(brdev);
- int err = 0;
-
-#ifndef __rtems__
- AT91_MCI_LOCK(sc);
-#else /* __rtems__ */
- AT91_MCI_BUS_LOCK(sc);
-#endif /* __rtems__ */
- while (sc->bus_busy)
- msleep(sc, &sc->sc_mtx, PZERO, "mciah", hz / 5);
- sc->bus_busy++;
-#ifndef __rtems__
- AT91_MCI_UNLOCK(sc);
-#else /* __rtems__ */
- AT91_MCI_BUS_UNLOCK(sc);
-#endif /* __rtems__ */
- return (err);
-}
-
-static int
-at91_mci_release_host(device_t brdev, device_t reqdev)
-{
- struct at91_mci_softc *sc = device_get_softc(brdev);
-
-#ifndef __rtems__
- AT91_MCI_LOCK(sc);
-#else /* __rtems__ */
- AT91_MCI_BUS_LOCK(sc);
-#endif /* __rtems__ */
- sc->bus_busy--;
- wakeup(sc);
-#ifndef __rtems__
- AT91_MCI_UNLOCK(sc);
-#else /* __rtems__ */
- AT91_MCI_BUS_UNLOCK(sc);
-#endif /* __rtems__ */
- return (0);
-}
-
-#ifndef __rtems__
-static void
-at91_mci_read_done(struct at91_mci_softc *sc, uint32_t sr)
-{
- struct mmc_command *cmd = sc->curcmd;
- char * dataptr = (char *)cmd->data->data;
- uint32_t curidx = sc->bbuf_curidx;
- uint32_t len = sc->bbuf_len[curidx];
-
- /*
- * We arrive here when a DMA transfer for a read is done, whether it's
- * a single or multi-block read.
- *
- * We byte-swap the buffer that just completed, and if that is the
- * last buffer that's part of this read then we move on to the next
- * operation, otherwise we wait for another ENDRX for the next bufer.
- */
-
- bus_dmamap_sync(sc->dmatag, sc->bbuf_map[curidx], BUS_DMASYNC_POSTREAD);
- bus_dmamap_unload(sc->dmatag, sc->bbuf_map[curidx]);
-
- at91_bswap_buf(sc, dataptr + sc->xfer_offset, sc->bbuf_vaddr[curidx], len);
-
- if (mci_debug) {
- printf("read done sr %x curidx %d len %d xfer_offset %d\n",
- sr, curidx, len, sc->xfer_offset);
- }
-
- sc->xfer_offset += len;
- sc->bbuf_curidx = !curidx; /* swap buffers */
-
- /*
- * If we've transferred all the data, move on to the next operation.
- *
- * If we're still transferring the last buffer, RNCR is already zero but
- * we have to write a zero anyway to clear the ENDRX status so we don't
- * re-interrupt until the last buffer is done.
- */
- if (sc->xfer_offset == cmd->data->len) {
- WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
- cmd->error = MMC_ERR_NONE;
- at91_mci_next_operation(sc);
- } else {
- WR4(sc, PDC_RNCR, 0);
- WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_ENDRX);
- }
-}
-#endif /* __rtems__ */
-
-static void
-at91_mci_write_done(struct at91_mci_softc *sc, uint32_t sr)
-{
- struct mmc_command *cmd = sc->curcmd;
-
- /*
- * We arrive here when the entire DMA transfer for a write is done,
- * whether it's a single or multi-block write. If it's multi-block we
- * have to immediately move on to the next operation which is to send
- * the stop command. If it's a single-block transfer we need to wait
- * for NOTBUSY, but if that's already asserted we can avoid another
- * interrupt and just move on to completing the request right away.
- */
-
- WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
-
-#ifndef __rtems__
- bus_dmamap_sync(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx],
- BUS_DMASYNC_POSTWRITE);
- bus_dmamap_unload(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx]);
-#endif /* __rtems__ */
-
- if ((cmd->data->flags & MMC_DATA_MULTI) || (sr & MCI_SR_NOTBUSY)) {
- cmd->error = MMC_ERR_NONE;
- at91_mci_next_operation(sc);
- } else {
- WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
- }
-}
-
-static void
-at91_mci_notbusy(struct at91_mci_softc *sc)
-{
- struct mmc_command *cmd = sc->curcmd;
-
- /*
- * We arrive here by either completion of a single-block write, or
- * completion of the stop command that ended a multi-block write (and,
- * I suppose, after a card-select or erase, but I haven't tested
- * those). Anyway, we're done and it's time to move on to the next
- * command.
- */
-
- cmd->error = MMC_ERR_NONE;
- at91_mci_next_operation(sc);
-}
-
-static void
-at91_mci_stop_done(struct at91_mci_softc *sc, uint32_t sr)
-{
- struct mmc_command *cmd = sc->curcmd;
-
- /*
- * We arrive here after receiving CMDRDY for a MMC_STOP_TRANSMISSION
- * command. Depending on the operation being stopped, we may have to
- * do some unusual things to work around hardware bugs.
- */
-
- /*
- * This is known to be true of at91rm9200 hardware; it may or may not
- * apply to more recent chips:
- *
- * After stopping a multi-block write, the NOTBUSY bit in MCI_SR does
- * not properly reflect the actual busy state of the card as signaled
- * on the DAT0 line; it always claims the card is not-busy. If we
- * believe that and let operations continue, following commands will
- * fail with response timeouts (except of course MMC_SEND_STATUS -- it
- * indicates the card is busy in the PRG state, which was the smoking
- * gun that showed MCI_SR NOTBUSY was not tracking DAT0 correctly).
- *
- * The atmel docs are emphatic: "This flag [NOTBUSY] must be used only
- * for Write Operations." I guess technically since we sent a stop
- * it's not a write operation anymore. But then just what did they
- * think it meant for the stop command to have "...an optional busy
- * signal transmitted on the data line" according to the SD spec?
- *
- * I tried a variety of things to un-wedge the MCI and get the status
- * register to reflect NOTBUSY correctly again, but the only thing
- * that worked was a full device reset. It feels like an awfully big
- * hammer, but doing a full reset after every multiblock write is
- * still faster than doing single-block IO (by almost two orders of
- * magnitude: 20KB/sec improves to about 1.8MB/sec best case).
- *
- * After doing the reset, wait for a NOTBUSY interrupt before
- * continuing with the next operation.
- *
- * This workaround breaks multiwrite on the rev2xx parts, but some other
- * workaround is needed.
- */
- if ((sc->flags & CMD_MULTIWRITE) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
- at91_mci_reset(sc);
- WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
- return;
- }
-
- /*
- * This is known to be true of at91rm9200 hardware; it may or may not
- * apply to more recent chips:
- *
- * After stopping a multi-block read, loop to read and discard any
- * data that coasts in after we sent the stop command. The docs don't
- * say anything about it, but empirical testing shows that 1-3
- * additional words of data get buffered up in some unmentioned
- * internal fifo and if we don't read and discard them here they end
- * up on the front of the next read DMA transfer we do.
- *
- * This appears to be unnecessary for rev2xx parts.
- */
- if ((sc->flags & CMD_MULTIREAD) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
- uint32_t sr;
- int count = 0;
-
- do {
- sr = RD4(sc, MCI_SR);
- if (sr & MCI_SR_RXRDY) {
- RD4(sc, MCI_RDR);
- ++count;
- }
- } while (sr & MCI_SR_RXRDY);
- at91_mci_reset(sc);
- }
-
- cmd->error = MMC_ERR_NONE;
- at91_mci_next_operation(sc);
-
-}
-
-static void
-at91_mci_cmdrdy(struct at91_mci_softc *sc, uint32_t sr)
-{
- struct mmc_command *cmd = sc->curcmd;
- int i;
-
- if (cmd == NULL)
- return;
-
- /*
- * We get here at the end of EVERY command. We retrieve the command
- * response (if any) then decide what to do next based on the command.
- */
-
- if (cmd->flags & MMC_RSP_PRESENT) {
- for (i = 0; i < ((cmd->flags & MMC_RSP_136) ? 4 : 1); i++) {
- cmd->resp[i] = RD4(sc, MCI_RSPR + i * 4);
- if (mci_debug)
- printf("RSPR[%d] = %x sr=%x\n", i, cmd->resp[i], sr);
- }
- }
-
- /*
- * If this was a stop command, go handle the various special
- * conditions (read: bugs) that have to be dealt with following a stop.
- */
- if (cmd->opcode == MMC_STOP_TRANSMISSION) {
- at91_mci_stop_done(sc, sr);
- return;
- }
-
- /*
- * If this command can continue to assert BUSY beyond the response then
- * we need to wait for NOTBUSY before the command is really done.
- *
- * Note that this may not work properly on the at91rm9200. It certainly
- * doesn't work for the STOP command that follows a multi-block write,
- * so post-stop CMDRDY is handled separately; see the special handling
- * in at91_mci_stop_done().
- *
- * Beside STOP, there are other R1B-type commands that use the busy
- * signal after CMDRDY: CMD7 (card select), CMD28-29 (write protect),
- * CMD38 (erase). I haven't tested any of them, but I rather expect
- * them all to have the same sort of problem with MCI_SR not actually
- * reflecting the state of the DAT0-line busy indicator. So this code
- * may need to grow some sort of special handling for them too. (This
- * just in: CMD7 isn't a problem right now because dev/mmc.c incorrectly
- * sets the response flags to R1 rather than R1B.) XXX
- */
- if ((cmd->flags & MMC_RSP_BUSY)) {
- WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
- return;
- }
-
- /*
- * If there is a data transfer with this command, then...
- * - If it's a read, we need to wait for ENDRX.
- * - If it's a write, now is the time to enable the PDC, and we need
- * to wait for a BLKE that follows a TXBUFE, because if we're doing
- * a split transfer we get a BLKE after the first half (when TPR/TCR
- * get loaded from TNPR/TNCR). So first we wait for the TXBUFE, and
- * the handling for that interrupt will then invoke the wait for the
- * subsequent BLKE which indicates actual completion.
- */
- if (cmd->data) {
- uint32_t ier;
-#ifndef __rtems__
- if (cmd->data->flags & MMC_DATA_READ) {
- ier = MCI_SR_ENDRX;
- } else {
- ier = MCI_SR_TXBUFE;
- WR4(sc, PDC_PTCR, PDC_PTCR_TXTEN);
- }
-#else /* __rtems__ */
- ier = MCI_SR_XFRDONE;
-#endif /* __rtems__ */
- WR4(sc, MCI_IER, MCI_SR_ERROR | ier);
- return;
- }
-
- /*
- * If we made it to here, we don't need to wait for anything more for
- * the current command, move on to the next command (will complete the
- * request if there is no next command).
- */
- cmd->error = MMC_ERR_NONE;
- at91_mci_next_operation(sc);
-}
-
-static void
-at91_mci_intr(void *arg)
-{
- struct at91_mci_softc *sc = (struct at91_mci_softc*)arg;
- struct mmc_command *cmd = sc->curcmd;
- uint32_t sr, isr;
-
- AT91_MCI_LOCK(sc);
-
- sr = RD4(sc, MCI_SR);
- isr = sr & RD4(sc, MCI_IMR);
-
- if (mci_debug)
- printf("i 0x%x sr 0x%x\n", isr, sr);
-
- /*
- * All interrupts are one-shot; disable it now.
- * The next operation will re-enable whatever interrupts it wants.
- */
- WR4(sc, MCI_IDR, isr);
- if (isr & MCI_SR_ERROR) {
- if (isr & (MCI_SR_RTOE | MCI_SR_DTOE))
- cmd->error = MMC_ERR_TIMEOUT;
- else if (isr & (MCI_SR_RCRCE | MCI_SR_DCRCE))
- cmd->error = MMC_ERR_BADCRC;
- else if (isr & (MCI_SR_OVRE | MCI_SR_UNRE))
- cmd->error = MMC_ERR_FIFO;
- else
- cmd->error = MMC_ERR_FAILED;
- /*
- * CMD8 is used to probe for SDHC cards, a standard SD card
- * will get a response timeout; don't report it because it's a
- * normal and expected condition. One might argue that all
- * error reporting should be left to higher levels, but when
- * they report at all it's always EIO, which isn't very
- * helpful. XXX bootverbose?
- */
- if (cmd->opcode != 8) {
- device_printf(sc->dev,
- "IO error; status MCI_SR = 0x%b cmd opcode = %d%s\n",
- sr, MCI_SR_BITSTRING, cmd->opcode,
- (cmd->opcode != 12) ? "" :
- (sc->flags & CMD_MULTIREAD) ? " after read" : " after write");
- /* XXX not sure RTOE needs a full reset, just a retry */
- at91_mci_reset(sc);
- }
- at91_mci_next_operation(sc);
- } else {
-#ifndef __rtems__
- if (isr & MCI_SR_TXBUFE) {
-// printf("TXBUFE\n");
- /*
- * We need to wait for a BLKE that follows TXBUFE
- * (intermediate BLKEs might happen after ENDTXes if
- * we're chaining multiple buffers). If BLKE is also
- * asserted at the time we get TXBUFE, we can avoid
- * another interrupt and process it right away, below.
- */
- if (sr & MCI_SR_BLKE)
- isr |= MCI_SR_BLKE;
- else
- WR4(sc, MCI_IER, MCI_SR_BLKE);
- }
- if (isr & MCI_SR_RXBUFF) {
-// printf("RXBUFF\n");
- }
- if (isr & MCI_SR_ENDTX) {
-// printf("ENDTX\n");
- }
- if (isr & MCI_SR_ENDRX) {
-// printf("ENDRX\n");
- at91_mci_read_done(sc, sr);
- }
-#else /* __rtems__ */
- if (isr & MCI_SR_XFRDONE) {
- if (cmd->data->flags & MMC_DATA_READ) {
- WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS |
- PDC_PTCR_TXTDIS);
- cmd->error = MMC_ERR_NONE;
- at91_mci_next_operation(sc);
- } else {
- if (sr & MCI_SR_BLKE)
- isr |= MCI_SR_BLKE;
- else
- WR4(sc, MCI_IER, MCI_SR_BLKE);
- }
- }
-#endif /* __rtems__ */
- if (isr & MCI_SR_NOTBUSY) {
-// printf("NOTBUSY\n");
- at91_mci_notbusy(sc);
- }
- if (isr & MCI_SR_DTIP) {
-// printf("Data transfer in progress\n");
- }
- if (isr & MCI_SR_BLKE) {
-// printf("Block transfer end\n");
- at91_mci_write_done(sc, sr);
- }
- if (isr & MCI_SR_TXRDY) {
-// printf("Ready to transmit\n");
- }
- if (isr & MCI_SR_RXRDY) {
-// printf("Ready to receive\n");
- }
- if (isr & MCI_SR_CMDRDY) {
-// printf("Command ready\n");
- at91_mci_cmdrdy(sc, sr);
- }
- }
- AT91_MCI_UNLOCK(sc);
-}
-
-static int
-at91_mci_read_ivar(device_t bus, device_t child, int which, uintptr_t *result)
-{
- struct at91_mci_softc *sc = device_get_softc(bus);
-
- switch (which) {
- default:
- return (EINVAL);
- case MMCBR_IVAR_BUS_MODE:
- *(int *)result = sc->host.ios.bus_mode;
- break;
- case MMCBR_IVAR_BUS_WIDTH:
- *(int *)result = sc->host.ios.bus_width;
- break;
- case MMCBR_IVAR_CHIP_SELECT:
- *(int *)result = sc->host.ios.chip_select;
- break;
- case MMCBR_IVAR_CLOCK:
- *(int *)result = sc->host.ios.clock;
- break;
- case MMCBR_IVAR_F_MIN:
- *(int *)result = sc->host.f_min;
- break;
- case MMCBR_IVAR_F_MAX:
- *(int *)result = sc->host.f_max;
- break;
- case MMCBR_IVAR_HOST_OCR:
- *(int *)result = sc->host.host_ocr;
- break;
- case MMCBR_IVAR_MODE:
- *(int *)result = sc->host.mode;
- break;
- case MMCBR_IVAR_OCR:
- *(int *)result = sc->host.ocr;
- break;
- case MMCBR_IVAR_POWER_MODE:
- *(int *)result = sc->host.ios.power_mode;
- break;
- case MMCBR_IVAR_VDD:
- *(int *)result = sc->host.ios.vdd;
- break;
- case MMCBR_IVAR_CAPS:
- if (sc->has_4wire) {
- sc->sc_cap |= CAP_HAS_4WIRE;
- sc->host.caps |= MMC_CAP_4_BIT_DATA;
- } else {
- sc->sc_cap &= ~CAP_HAS_4WIRE;
- sc->host.caps &= ~MMC_CAP_4_BIT_DATA;
- }
- *(int *)result = sc->host.caps;
- break;
-#ifdef __rtems__
- case MMCBR_IVAR_TIMING:
- *result = sc->host.ios.timing;
- break;
-#endif /* __rtems__ */
- case MMCBR_IVAR_MAX_DATA:
- /*
- * Something is wrong with the 2x parts and multiblock, so
- * just do 1 block at a time for now, which really kills
- * performance.
- */
- if (sc->sc_cap & CAP_MCI1_REV2XX)
- *(int *)result = 1;
- else
- *(int *)result = MAX_BLOCKS;
- break;
- }
- return (0);
-}
-
-static int
-at91_mci_write_ivar(device_t bus, device_t child, int which, uintptr_t value)
-{
- struct at91_mci_softc *sc = device_get_softc(bus);
-
- switch (which) {
- default:
- return (EINVAL);
- case MMCBR_IVAR_BUS_MODE:
- sc->host.ios.bus_mode = value;
- break;
- case MMCBR_IVAR_BUS_WIDTH:
- sc->host.ios.bus_width = value;
- break;
- case MMCBR_IVAR_CHIP_SELECT:
- sc->host.ios.chip_select = value;
- break;
- case MMCBR_IVAR_CLOCK:
- sc->host.ios.clock = value;
- break;
- case MMCBR_IVAR_MODE:
- sc->host.mode = value;
- break;
- case MMCBR_IVAR_OCR:
- sc->host.ocr = value;
- break;
- case MMCBR_IVAR_POWER_MODE:
- sc->host.ios.power_mode = value;
- break;
- case MMCBR_IVAR_VDD:
- sc->host.ios.vdd = value;
- break;
-#ifdef __rtems__
- case MMCBR_IVAR_TIMING:
- sc->host.ios.timing = value;
- break;
-#endif /* __rtems__ */
- /* These are read-only */
- case MMCBR_IVAR_CAPS:
- case MMCBR_IVAR_HOST_OCR:
- case MMCBR_IVAR_F_MIN:
- case MMCBR_IVAR_F_MAX:
- case MMCBR_IVAR_MAX_DATA:
- return (EINVAL);
- }
- return (0);
-}
-
-static device_method_t at91_mci_methods[] = {
- /* device_if */
- DEVMETHOD(device_probe, at91_mci_probe),
- DEVMETHOD(device_attach, at91_mci_attach),
- DEVMETHOD(device_detach, at91_mci_detach),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, at91_mci_read_ivar),
- DEVMETHOD(bus_write_ivar, at91_mci_write_ivar),
-
- /* mmcbr_if */
- DEVMETHOD(mmcbr_update_ios, at91_mci_update_ios),
- DEVMETHOD(mmcbr_request, at91_mci_request),
- DEVMETHOD(mmcbr_get_ro, at91_mci_get_ro),
- DEVMETHOD(mmcbr_acquire_host, at91_mci_acquire_host),
- DEVMETHOD(mmcbr_release_host, at91_mci_release_host),
-
- DEVMETHOD_END
-};
-
-static driver_t at91_mci_driver = {
- "at91_mci",
- at91_mci_methods,
- sizeof(struct at91_mci_softc),
-};
-
-static devclass_t at91_mci_devclass;
-
-#ifndef __rtems__
-#ifdef FDT
-DRIVER_MODULE(at91_mci, simplebus, at91_mci_driver, at91_mci_devclass, NULL,
- NULL);
-#else
-DRIVER_MODULE(at91_mci, atmelarm, at91_mci_driver, at91_mci_devclass, NULL,
- NULL);
-#endif
-
-MMC_DECLARE_BRIDGE(at91_mci);
-#else /* __rtems__ */
-DRIVER_MODULE(at91_mci, nexus, at91_mci_driver, at91_mci_devclass, NULL, NULL);
-#endif /* __rtems__ */
-DRIVER_MODULE(mmc, at91_mci, mmc_driver, mmc_devclass, NULL, NULL);
-MODULE_DEPEND(at91_mci, mmc, 1, 1, 1);
-#endif /* __rtems__ && LIBBSP_ARM_ATSAM_BSP_H */
diff --git a/freebsd/sys/arm/at91/at91_mcireg.h b/freebsd/sys/arm/at91/at91_mcireg.h
deleted file mode 100644
index 80acf48d..00000000
--- a/freebsd/sys/arm/at91/at91_mcireg.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2006 Berndt Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/* $FreeBSD$ */
-
-#ifndef ARM_AT91_AT91_MCIREG_H
-#define ARM_AT91_AT91_MCIREG_H
-
-#define MMC_MAX 30
-
-#define MCI_CR 0x00 /* MCI Control Register */
-#define MCI_MR 0x04 /* MCI Mode Register */
-#define MCI_DTOR 0x08 /* MCI Data Timeout Register */
-#define MCI_SDCR 0x0c /* MCI SD Card Register */
-#define MCI_ARGR 0x10 /* MCI Argument Register */
-#define MCI_CMDR 0x14 /* MCI Command Register */
-#ifdef __rtems__
-#define MCI_BLKR 0x18 /* MCI Block Register */
-#endif /* __rtems__ */
-#define MCI_RSPR 0x20 /* MCI Response Registers - 4 of them */
-#define MCI_RDR 0x30 /* MCI Receive Data Register */
-#define MCI_TDR 0x34 /* MCI Transmit Data Register */
-#define MCI_SR 0x40 /* MCI Status Register */
-#define MCI_IER 0x44 /* MCI Interrupt Enable Register */
-#define MCI_IDR 0x48 /* MCI Interrupt Disable Register */
-#define MCI_IMR 0x4c /* MCI Interrupt Mask Register */
-#ifdef __rtems__
-#define MCI_DMA 0x50 /* MCI DMA Control Register */
-#endif /* __rtems__ */
-
-/* -------- MCI_CR : (MCI Offset: 0x0) MCI Control Register -------- */
-#define MCI_CR_MCIEN (0x1u << 0) /* (MCI) Multimedia Interface Enable */
-#define MCI_CR_MCIDIS (0x1u << 1) /* (MCI) Multimedia Interface Disable */
-#define MCI_CR_PWSEN (0x1u << 2) /* (MCI) Power Save Mode Enable */
-#define MCI_CR_PWSDIS (0x1u << 3) /* (MCI) Power Save Mode Disable */
-#define MCI_CR_SWRST (0x1u << 7) /* (MCI) Software Reset */
-/* -------- MCI_MR : (MCI Offset: 0x4) MCI Mode Register -------- */
-#define MCI_MR_CLKDIV (0xffu << 0) /* (MCI) Clock Divider */
-#define MCI_MR_PWSDIV (0x3fu << 8) /* (MCI) Power Saving Divider */
-#define MCI_MR_RDPROOF (0x1u << 11) /* (MCI) Read Proof Enable */
-#define MCI_MR_WRPROOF (0x1u << 12) /* (MCI) Write Proof Enable */
-#define MCI_MR_PDCFBYTE (0x1u << 13) /* (MCI) PDC Force Byte Transfer */
-#define MCI_MR_PDCPADV (0x1u << 14) /* (MCI) PDC Padding Value */
-#define MCI_MR_PDCMODE (0x1u << 15) /* (MCI) PDC Oriented Mode */
-#define MCI_MR_CLKODD (0x1u << 16) /* (MCI) Clock Divider is Odd */
-#define MCI_MR_BLKLEN 0x3fff0000ul /* (MCI) Data Block Length */
-/* -------- MCI_DTOR : (MCI Offset: 0x8) MCI Data Timeout Register -------- */
-#define MCI_DTOR_DTOCYC (0xfu << 0) /* (MCI) Data Timeout Cycle Number */
-#define MCI_DTOR_DTOMUL (0x7u << 4) /* (MCI) Data Timeout Multiplier */
-#define MCI_DTOR_DTOMUL_1 (0x0u << 4) /* (MCI) DTOCYC x 1 */
-#define MCI_DTOR_DTOMUL_16 (0x1u << 4) /* (MCI) DTOCYC x 16 */
-#define MCI_DTOR_DTOMUL_128 (0x2u << 4) /* (MCI) DTOCYC x 128 */
-#define MCI_DTOR_DTOMUL_256 (0x3u << 4) /* (MCI) DTOCYC x 256 */
-#define MCI_DTOR_DTOMUL_1k (0x4u << 4) /* (MCI) DTOCYC x 1024 */
-#define MCI_DTOR_DTOMUL_4k (0x5u << 4) /* (MCI) DTOCYC x 4096 */
-#define MCI_DTOR_DTOMUL_64k (0x6u << 4) /* (MCI) DTOCYC x 65536 */
-#define MCI_DTOR_DTOMUL_1M (0x7u << 4) /* (MCI) DTOCYC x 1048576 */
-/* -------- MCI_SDCR : (MCI Offset: 0xc) MCI SD Card Register -------- */
-#define MCI_SDCR_SDCSEL (0x1u << 0) /* (MCI) SD Card Selector */
-#define MCI_SDCR_SDCBUS (0x1u << 7) /* (MCI) SD Card Bus Width */
-/* -------- MCI_CMDR : (MCI Offset: 0x14) MCI Command Register -------- */
-#define MCI_CMDR_CMDNB (0x1Fu << 0) /* (MCI) Command Number */
-#define MCI_CMDR_RSPTYP (0x3u << 6) /* (MCI) Response Type */
-#define MCI_CMDR_RSPTYP_NO (0x0u << 6) /* (MCI) No response */
-#define MCI_CMDR_RSPTYP_48 (0x1u << 6) /* (MCI) 48-bit response */
-#define MCI_CMDR_RSPTYP_136 (0x2u << 6) /* (MCI) 136-bit response */
-#define MCI_CMDR_SPCMD (0x7u << 8) /* (MCI) Special CMD */
-#define MCI_CMDR_SPCMD_NONE (0x0u << 8) /* (MCI) Not a special CMD */
-#define MCI_CMDR_SPCMD_INIT (0x1u << 8) /* (MCI) Initialization CMD */
-#define MCI_CMDR_SPCMD_SYNC (0x2u << 8) /* (MCI) Synchronized CMD */
-#define MCI_CMDR_SPCMD_IT_CMD (0x4u << 8) /* (MCI) Interrupt command */
-#define MCI_CMDR_SPCMD_IT_REP (0x5u << 8) /* (MCI) Interrupt response */
-#define MCI_CMDR_OPDCMD (0x1u << 11) /* (MCI) Open Drain Command */
-#define MCI_CMDR_MAXLAT (0x1u << 12) /* (MCI) Maximum Latency for Command to respond */
-#define MCI_CMDR_TRCMD (0x3u << 16) /* (MCI) Transfer CMD */
-#define MCI_CMDR_TRCMD_NO (0x0u << 16) /* (MCI) No transfer */
-#define MCI_CMDR_TRCMD_START (0x1u << 16) /* (MCI) Start transfer */
-#define MCI_CMDR_TRCMD_STOP (0x2u << 16) /* (MCI) Stop transfer */
-#define MCI_CMDR_TRDIR (0x1u << 18) /* (MCI) Transfer Direction */
-#define MCI_CMDR_TRTYP (0x3u << 19) /* (MCI) Transfer Type */
-#define MCI_CMDR_TRTYP_BLOCK (0x0u << 19) /* (MCI) Block Transfer type */
-#define MCI_CMDR_TRTYP_MULTIPLE (0x1u << 19) /* (MCI) Multiple Block transfer type */
-#define MCI_CMDR_TRTYP_STREAM (0x2u << 19) /* (MCI) Stream transfer type */
-#ifdef __rtems__
-/* -------- MCI_BLKR : (MCI Offset: 0x18) MCI Block Register -------- */
-#define MCI_BLKR_BCNT (0xFFFFu << 0)
-#define MCI_BLKR_BLKLEN (0xFFFFu << 16)
-#endif /* __rtems__ */
-/* -------- MCI_SR : (MCI Offset: 0x40) MCI Status Register -------- */
-#define MCI_SR_CMDRDY (0x1u << 0) /* (MCI) Command Ready flag */
-#define MCI_SR_RXRDY (0x1u << 1) /* (MCI) RX Ready flag */
-#define MCI_SR_TXRDY (0x1u << 2) /* (MCI) TX Ready flag */
-#define MCI_SR_BLKE (0x1u << 3) /* (MCI) Data Block Transfer Ended flag */
-#define MCI_SR_DTIP (0x1u << 4) /* (MCI) Data Transfer in Progress flag */
-#define MCI_SR_NOTBUSY (0x1u << 5) /* (MCI) Data Line Not Busy flag */
-#define MCI_SR_ENDRX (0x1u << 6) /* (MCI) End of RX Buffer flag */
-#define MCI_SR_ENDTX (0x1u << 7) /* (MCI) End of TX Buffer flag */
-#define MCI_SR_RXBUFF (0x1u << 14) /* (MCI) RX Buffer Full flag */
-#define MCI_SR_TXBUFE (0x1u << 15) /* (MCI) TX Buffer Empty flag */
-#define MCI_SR_RINDE (0x1u << 16) /* (MCI) Response Index Error flag */
-#define MCI_SR_RDIRE (0x1u << 17) /* (MCI) Response Direction Error flag */
-#define MCI_SR_RCRCE (0x1u << 18) /* (MCI) Response CRC Error flag */
-#define MCI_SR_RENDE (0x1u << 19) /* (MCI) Response End Bit Error flag */
-#define MCI_SR_RTOE (0x1u << 20) /* (MCI) Response Time-out Error flag */
-#define MCI_SR_DCRCE (0x1u << 21) /* (MCI) data CRC Error flag */
-#define MCI_SR_DTOE (0x1u << 22) /* (MCI) Data timeout Error flag */
-#define MCI_SR_OVRE (0x1u << 30) /* (MCI) Overrun flag */
-#define MCI_SR_UNRE (0x1u << 31) /* (MCI) Underrun flag */
-#ifdef __rtems__
-#define MCI_SR_XFRDONE (0x1u << 27) /* (MCI) Underrun flag */
-#endif /* __rtems__ */
-#ifdef __rtems__
-/* -------- MCI_DMA : (MCI Offset: 0x50) MCI DMA Control Register -------- */
-#define MCI_DMA_DMAEN (0x1u << 8)
-#define MCI_DMA_CHKSIZE (0x7u << 4)
-#define MCI_DMA_CHKSIZE_1 (0x0u << 4)
-#define MCI_DMA_CHKSIZE_2 (0x1u << 4)
-#define MCI_DMA_CHKSIZE_4 (0x2u << 4)
-#define MCI_DMA_CHKSIZE_8 (0x3u << 4)
-#define MCI_DMA_CHKSIZE_16 (0x4u << 4)
-#endif /* __rtems__ */
-
-/* TXRDY,DTIP,ENDTX,TXBUFE,RTOE */
-
-#define MCI_SR_BITSTRING \
- "\020" \
- "\001CMDRDY" \
- "\002RXRDY" \
- "\003TXRDY" \
- "\004BLKE" \
- "\005DTIP" \
- "\006NOTBUSY" \
- "\007ENDRX" \
- "\010ENDTX" \
- "\017RXBUFF" \
- "\020TXBUFE" \
- "\021RINDE" \
- "\022RDIRE" \
- "\023RCRCE" \
- "\024RENDE" \
- "\025RTOE" \
- "\026DCRCE" \
- "\027DTOE" \
- "\037OVRE" \
- "\040UNRE"
-
-/* -------- MCI_IER : (MCI Offset: 0x44) MCI Interrupt Enable Register -------- */
-/* -------- MCI_IDR : (MCI Offset: 0x48) MCI Interrupt Disable Register -------- */
-/* -------- MCI_IMR : (MCI Offset: 0x4c) MCI Interrupt Mask Register -------- */
-
-#define MCI_SR_ERROR (MCI_SR_UNRE | MCI_SR_OVRE | MCI_SR_DTOE | \
- MCI_SR_DCRCE | MCI_SR_RTOE | MCI_SR_RENDE | \
- MCI_SR_RCRCE | MCI_SR_RDIRE | MCI_SR_RINDE)
-
-#define AT91C_BUS_WIDTH_1BIT 0x00
-#define AT91C_BUS_WIDTH_4BITS 0x02
-
-#endif /* ARM_AT91_AT91_MCIREG_H */
diff --git a/freebsd/sys/arm/at91/at91_pdcreg.h b/freebsd/sys/arm/at91/at91_pdcreg.h
deleted file mode 100644
index 659804bd..00000000
--- a/freebsd/sys/arm/at91/at91_pdcreg.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2006 M. Warner Losh.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/* $FreeBSD$ */
-
-#ifndef ARM_AT91_AT91_PDCREG_H
-#define ARM_AT91_AT91_PDCREG_H
-
-#define PDC_RPR 0x100 /* PDC Receive Pointer Register */
-#define PDC_RCR 0x104 /* PDC Receive Counter Register */
-#define PDC_TPR 0x108 /* PDC Transmit Pointer Register */
-#define PDC_TCR 0x10c /* PDC Transmit Counter Register */
-#define PDC_RNPR 0x110 /* PDC Receive Next Pointer Register */
-#define PDC_RNCR 0x114 /* PDC Receive Next Counter Register */
-#define PDC_TNPR 0x118 /* PDC Transmit Next Pointer Reg */
-#define PDC_TNCR 0x11c /* PDC Transmit Next Counter Reg */
-#define PDC_PTCR 0x120 /* PDC Transfer Control Register */
-#define PDC_PTSR 0x124 /* PDC Transfer Status Register */
-
-/* PTCR/PTSR */
-#define PDC_PTCR_RXTEN (1UL << 0) /* RXTEN: Receiver Transfer Enable */
-#define PDC_PTCR_RXTDIS (1UL << 1) /* RXTDIS: Receiver Transfer Disable */
-#define PDC_PTCR_TXTEN (1UL << 8) /* TXTEN: Transmitter Transfer En */
-#define PDC_PTCR_TXTDIS (1UL << 9) /* TXTDIS: Transmitter Transmit Dis */
-
-#endif /* ARM_AT91_AT91_PDCREG_H */
diff --git a/freebsd/sys/arm/at91/at91reg.h b/freebsd/sys/arm/at91/at91reg.h
deleted file mode 100644
index f5791bd7..00000000
--- a/freebsd/sys/arm/at91/at91reg.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009 Greg Ansley All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifndef _AT91REG_H_
-#define _AT91REG_H_
-
-#include <rtems/bsd/local/opt_at91.h>
-
-/* Where builtin peripherals start in KVM */
-#define AT91_BASE 0xd0000000
-
-/* Where builtin peripherals start PA */
-#define AT91_PA_BASE 0xf0000000
-
-/* A few things that we count on being the same
- * throughout the whole family of SOCs */
-
-/* SYSC System Controller */
-/* System Registers */
-#define AT91_SYS_BASE 0xffff000
-#define AT91_SYS_SIZE 0x1000
-
-#define AT91_DBGU0 0x0ffff200 /* Most */
-#define AT91_DBGU1 0x0fffee00 /* SAM9263, CAP9, and SAM9G45 */
-
-#define AT91_DBGU_SIZE 0x200
-#define DBGU_C1R (64) /* Chip ID1 Register */
-#define DBGU_C2R (68) /* Chip ID2 Register */
-#define DBGU_FNTR (72) /* Force NTRST Register */
-
-#define AT91_CPU_VERSION_MASK 0x0000001f
-#define AT91_CPU_FAMILY_MASK 0x0ff00000
-
-#define AT91_CPU_RM9200 0x09290780
-#define AT91_CPU_SAM9260 0x019803a0
-#define AT91_CPU_SAM9261 0x019703a0
-#define AT91_CPU_SAM9263 0x019607a0
-#define AT91_CPU_SAM9G10 0x819903a0
-#define AT91_CPU_SAM9G20 0x019905a0
-#define AT91_CPU_SAM9G45 0x819b05a0
-#define AT91_CPU_SAM9N12 0x819a07a0
-#define AT91_CPU_SAM9RL64 0x019b03a0
-#define AT91_CPU_SAM9X5 0x819a05a0
-
-#define AT91_CPU_SAM9XE128 0x329973a0
-#define AT91_CPU_SAM9XE256 0x329a93a0
-#define AT91_CPU_SAM9XE512 0x329aa3a0
-
-#define AT91_CPU_CAP9 0x039a03a0
-
-#define AT91_EXID_SAM9M11 0x00000001
-#define AT91_EXID_SAM9M10 0x00000002
-#define AT91_EXID_SAM9G46 0x00000003
-#define AT91_EXID_SAM9G45 0x00000004
-
-#define AT91_EXID_SAM9G15 0x00000000
-#define AT91_EXID_SAM9G35 0x00000001
-#define AT91_EXID_SAM9X35 0x00000002
-#define AT91_EXID_SAM9G25 0x00000003
-#define AT91_EXID_SAM9X25 0x00000004
-
-#define AT91_IRQ_SYSTEM 1
-
-#endif /* _AT91REG_H_ */
diff --git a/freebsd/sys/arm/at91/at91var.h b/freebsd/sys/arm/at91/at91var.h
deleted file mode 100644
index 84c898fb..00000000
--- a/freebsd/sys/arm/at91/at91var.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2005 Olivier Houchard. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/* $FreeBSD$ */
-
-#ifndef _AT91VAR_H_
-#define _AT91VAR_H_
-
-#include <sys/bus.h>
-#include <sys/rman.h>
-
-#include <arm/at91/at91reg.h>
-
-struct at91_softc {
- device_t dev;
- bus_space_tag_t sc_st;
- bus_space_handle_t sc_sh;
- bus_space_handle_t sc_aic_sh;
- struct rman sc_irq_rman;
- struct rman sc_mem_rman;
-};
-
-struct at91_ivar {
- struct resource_list resources;
-};
-
-struct cpu_devs
-{
- const char *name;
- int unit;
- bus_addr_t mem_base;
- bus_size_t mem_len;
- int irq0;
- int irq1;
- int irq2;
- const char *parent_clk;
-};
-
-enum at91_soc_type {
- AT91_T_NONE = 0,
- AT91_T_CAP9,
- AT91_T_RM9200,
- AT91_T_SAM9260,
- AT91_T_SAM9261,
- AT91_T_SAM9263,
- AT91_T_SAM9G10,
- AT91_T_SAM9G20,
- AT91_T_SAM9G45,
- AT91_T_SAM9N12,
- AT91_T_SAM9RL,
- AT91_T_SAM9X5,
-};
-
-enum at91_soc_subtype {
- AT91_ST_ANY = -1, /* Match any type */
- AT91_ST_NONE = 0,
- /* AT91RM9200 */
- AT91_ST_RM9200_BGA,
- AT91_ST_RM9200_PQFP,
- /* AT91SAM9260 */
- AT91_ST_SAM9XE,
- /* AT91SAM9G45 */
- AT91_ST_SAM9G45,
- AT91_ST_SAM9M10,
- AT91_ST_SAM9G46,
- AT91_ST_SAM9M11,
- /* AT91SAM9X5 */
- AT91_ST_SAM9G15,
- AT91_ST_SAM9G25,
- AT91_ST_SAM9G35,
- AT91_ST_SAM9X25,
- AT91_ST_SAM9X35,
-};
-
-enum at91_soc_family {
- AT91_FAMILY_SAM9 = 0x19,
- AT91_FAMILY_SAM9XE = 0x29,
- AT91_FAMILY_RM92 = 0x92,
-};
-
-#define AT91_SOC_NAME_MAX 50
-
-typedef void (*DELAY_t)(int);
-typedef void (*cpu_reset_t)(void);
-typedef void (*clk_init_t)(void);
-
-struct at91_soc_data {
- DELAY_t soc_delay; /* SoC specific delay function */
- cpu_reset_t soc_reset; /* SoC specific reset function */
- clk_init_t soc_clock_init; /* SoC specific clock init function */
- const int *soc_irq_prio; /* SoC specific IRQ priorities */
- const struct cpu_devs *soc_children; /* SoC specific children list */
- const uint32_t *soc_pio_base; /* SoC specific PIO base registers */
- size_t soc_pio_count; /* Count of PIO units (not pins) in SoC */
-};
-
-struct at91_soc_info {
- enum at91_soc_type type;
- enum at91_soc_subtype subtype;
- enum at91_soc_family family;
- uint32_t cidr;
- uint32_t exid;
- char name[AT91_SOC_NAME_MAX];
- uint32_t dbgu_base;
- struct at91_soc_data *soc_data;
-};
-
-extern struct at91_soc_info soc_info;
-
-static inline int at91_is_rm92(void);
-static inline int at91_is_sam9(void);
-static inline int at91_is_sam9xe(void);
-static inline int at91_cpu_is(u_int cpu);
-
-static inline int
-at91_is_rm92(void)
-{
-
- return (soc_info.type == AT91_T_RM9200);
-}
-
-static inline int
-at91_is_sam9(void)
-{
-
- return (soc_info.family == AT91_FAMILY_SAM9);
-}
-
-static inline int
-at91_is_sam9xe(void)
-{
-
- return (soc_info.family == AT91_FAMILY_SAM9XE);
-}
-
-static inline int
-at91_cpu_is(u_int cpu)
-{
-
- return (soc_info.type == cpu);
-}
-
-void at91_add_child(device_t dev, int prio, const char *name, int unit,
- bus_addr_t addr, bus_size_t size, int irq0, int irq1, int irq2);
-
-extern uint32_t at91_irq_system;
-extern uint32_t at91_master_clock;
-void at91_pmc_init_clock(void);
-void at91_soc_id(void);
-
-#endif /* _AT91VAR_H_ */
diff --git a/freebsd/sys/arm/include/machine/cpufunc.h b/freebsd/sys/arm/include/machine/cpufunc.h
index e3ada92b..9dba8043 100644
--- a/freebsd/sys/arm/include/machine/cpufunc.h
+++ b/freebsd/sys/arm/include/machine/cpufunc.h
@@ -46,8 +46,8 @@
#ifndef _MACHINE_CPUFUNC_H_
#define _MACHINE_CPUFUNC_H_
-#ifdef _KERNEL
#ifndef __rtems__
+#ifdef _KERNEL
#include <sys/types.h>
#include <machine/armreg.h>
@@ -55,7 +55,7 @@
static __inline void
breakpoint(void)
{
- __asm(".word 0xe7ffffff");
+ __asm("udf 0xffff");
}
struct cpu_functions {
@@ -209,58 +209,12 @@ int set_cpufuncs (void);
#define ARCHITECTURE_NOT_SUPPORTED 2 /* not known */
void cpufunc_nullop (void);
-u_int cpu_ident (void);
u_int cpufunc_control (u_int clear, u_int bic);
void cpu_domains (u_int domains);
-u_int cpu_faultstatus (void);
-u_int cpu_faultaddress (void);
-u_int cpu_get_control (void);
-u_int cpu_pfr (int);
-
-#if defined(CPU_FA526)
-void fa526_setup (void);
-void fa526_setttb (u_int ttb);
-void fa526_context_switch (void);
-void fa526_cpu_sleep (int);
-void fa526_tlb_flushID_SE (u_int);
-
-void fa526_icache_sync_range(vm_offset_t start, vm_size_t end);
-void fa526_dcache_wbinv_all (void);
-void fa526_dcache_wbinv_range(vm_offset_t start, vm_size_t end);
-void fa526_dcache_inv_range (vm_offset_t start, vm_size_t end);
-void fa526_dcache_wb_range (vm_offset_t start, vm_size_t end);
-void fa526_idcache_wbinv_all(void);
-void fa526_idcache_wbinv_range(vm_offset_t start, vm_size_t end);
-#endif
-
-#if defined(CPU_ARM9) || defined(CPU_ARM9E)
-void arm9_setttb (u_int);
+#if defined(CPU_ARM9E)
void arm9_tlb_flushID_SE (u_int va);
void arm9_context_switch (void);
-#endif
-
-#if defined(CPU_ARM9)
-void arm9_icache_sync_range (vm_offset_t, vm_size_t);
-
-void arm9_dcache_wbinv_all (void);
-void arm9_dcache_wbinv_range (vm_offset_t, vm_size_t);
-void arm9_dcache_inv_range (vm_offset_t, vm_size_t);
-void arm9_dcache_wb_range (vm_offset_t, vm_size_t);
-
-void arm9_idcache_wbinv_all (void);
-void arm9_idcache_wbinv_range (vm_offset_t, vm_size_t);
-
-void arm9_setup (void);
-
-extern unsigned arm9_dcache_sets_max;
-extern unsigned arm9_dcache_sets_inc;
-extern unsigned arm9_dcache_index_max;
-extern unsigned arm9_dcache_index_inc;
-#endif
-
-#if defined(CPU_ARM9E)
-void arm10_setup (void);
u_int sheeva_control_ext (u_int, u_int);
void sheeva_cpu_sleep (int);
@@ -276,26 +230,14 @@ void sheeva_l2cache_wb_range (vm_offset_t, vm_size_t);
void sheeva_l2cache_wbinv_all (void);
#endif
-#if defined(CPU_MV_PJ4B)
-void armv6_idcache_wbinv_all (void);
-#endif
#if defined(CPU_CORTEXA) || defined(CPU_MV_PJ4B) || defined(CPU_KRAIT)
-void armv7_idcache_wbinv_all (void);
void armv7_cpu_sleep (int);
-void armv7_setup (void);
-void armv7_drain_writebuf (void);
-
-void cortexa_setup (void);
#endif
#if defined(CPU_MV_PJ4B)
void pj4b_config (void);
-void pj4bv7_setup (void);
#endif
#if defined(CPU_ARM1176)
-void arm11_drain_writebuf (void);
-
-void arm11x6_setup (void);
void arm11x6_sleep (int); /* no ref. for errata */
#endif
@@ -311,12 +253,6 @@ void armv5_ec_dcache_wb_range(vm_offset_t, vm_size_t);
void armv5_ec_idcache_wbinv_all(void);
void armv5_ec_idcache_wbinv_range(vm_offset_t, vm_size_t);
-#endif
-
-#if defined(CPU_ARM9) || defined(CPU_ARM9E) || \
- defined(CPU_FA526) || \
- defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \
- defined(CPU_XSCALE_81342)
void armv4_tlb_flushID (void);
void armv4_tlb_flushD (void);
@@ -326,72 +262,6 @@ void armv4_drain_writebuf (void);
void armv4_idcache_inv_all (void);
#endif
-#if defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \
- defined(CPU_XSCALE_81342)
-void xscale_cpwait (void);
-
-void xscale_cpu_sleep (int mode);
-
-u_int xscale_control (u_int clear, u_int bic);
-
-void xscale_setttb (u_int ttb);
-
-void xscale_tlb_flushID_SE (u_int va);
-
-void xscale_cache_flushID (void);
-void xscale_cache_flushI (void);
-void xscale_cache_flushD (void);
-void xscale_cache_flushD_SE (u_int entry);
-
-void xscale_cache_cleanID (void);
-void xscale_cache_cleanD (void);
-void xscale_cache_cleanD_E (u_int entry);
-
-void xscale_cache_clean_minidata (void);
-
-void xscale_cache_purgeID (void);
-void xscale_cache_purgeID_E (u_int entry);
-void xscale_cache_purgeD (void);
-void xscale_cache_purgeD_E (u_int entry);
-
-void xscale_cache_syncI (void);
-void xscale_cache_cleanID_rng (vm_offset_t start, vm_size_t end);
-void xscale_cache_cleanD_rng (vm_offset_t start, vm_size_t end);
-void xscale_cache_purgeID_rng (vm_offset_t start, vm_size_t end);
-void xscale_cache_purgeD_rng (vm_offset_t start, vm_size_t end);
-void xscale_cache_syncI_rng (vm_offset_t start, vm_size_t end);
-void xscale_cache_flushD_rng (vm_offset_t start, vm_size_t end);
-
-void xscale_context_switch (void);
-
-void xscale_setup (void);
-#endif /* CPU_XSCALE_PXA2X0 || CPU_XSCALE_IXP425 */
-
-#ifdef CPU_XSCALE_81342
-
-void xscalec3_l2cache_purge (void);
-void xscalec3_cache_purgeID (void);
-void xscalec3_cache_purgeD (void);
-void xscalec3_cache_cleanID (void);
-void xscalec3_cache_cleanD (void);
-void xscalec3_cache_syncI (void);
-
-void xscalec3_cache_purgeID_rng (vm_offset_t start, vm_size_t end);
-void xscalec3_cache_purgeD_rng (vm_offset_t start, vm_size_t end);
-void xscalec3_cache_cleanID_rng (vm_offset_t start, vm_size_t end);
-void xscalec3_cache_cleanD_rng (vm_offset_t start, vm_size_t end);
-void xscalec3_cache_syncI_rng (vm_offset_t start, vm_size_t end);
-
-void xscalec3_l2cache_flush_rng (vm_offset_t, vm_size_t);
-void xscalec3_l2cache_clean_rng (vm_offset_t start, vm_size_t end);
-void xscalec3_l2cache_purge_rng (vm_offset_t start, vm_size_t end);
-
-
-void xscalec3_setttb (u_int ttb);
-void xscalec3_context_switch (void);
-
-#endif /* CPU_XSCALE_81342 */
-
/*
* Macros for manipulating CPU interrupts
*/
@@ -462,12 +332,6 @@ void set_stackptr (u_int mode, u_int address);
u_int get_stackptr (u_int mode);
/*
- * Miscellany
- */
-
-int get_pc_str_offset (void);
-
-/*
* CPU functions from locore.S
*/
@@ -496,8 +360,21 @@ extern u_int arm_cache_level;
extern u_int arm_cache_loc;
extern u_int arm_cache_type[14];
-#endif /* __rtems__ */
+#else /* !_KERNEL */
+
+static __inline void
+breakpoint(void)
+{
+
+ /*
+ * This matches the instruction used by GDB for software
+ * breakpoints.
+ */
+ __asm("udf 0xfdee");
+}
+
#endif /* _KERNEL */
+#endif /* __rtems__ */
#endif /* _MACHINE_CPUFUNC_H_ */
/* End of cpufunc.h */
diff --git a/freebsd/sys/cam/cam_ccb.h b/freebsd/sys/cam/cam_ccb.h
index 642e7862..9119468d 100644
--- a/freebsd/sys/cam/cam_ccb.h
+++ b/freebsd/sys/cam/cam_ccb.h
@@ -1372,48 +1372,6 @@ cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries,
u_int32_t flags, u_int8_t tag_action,
u_int8_t *data_ptr, u_int32_t dxfer_len,
u_int8_t sense_len, u_int8_t cdb_len,
- u_int32_t timeout);
-
-static __inline void
-cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries,
- void (*cbfcnp)(struct cam_periph *, union ccb *),
- u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len,
- u_int32_t timeout);
-
-static __inline void
-cam_fill_ctio(struct ccb_scsiio *csio, u_int32_t retries,
- void (*cbfcnp)(struct cam_periph *, union ccb *),
- u_int32_t flags, u_int tag_action, u_int tag_id,
- u_int init_id, u_int scsi_status, u_int8_t *data_ptr,
- u_int32_t dxfer_len, u_int32_t timeout);
-
-static __inline void
-cam_fill_ataio(struct ccb_ataio *ataio, u_int32_t retries,
- void (*cbfcnp)(struct cam_periph *, union ccb *),
- u_int32_t flags, u_int tag_action,
- u_int8_t *data_ptr, u_int32_t dxfer_len,
- u_int32_t timeout);
-
-static __inline void
-cam_fill_smpio(struct ccb_smpio *smpio, uint32_t retries,
- void (*cbfcnp)(struct cam_periph *, union ccb *), uint32_t flags,
- uint8_t *smp_request, int smp_request_len,
- uint8_t *smp_response, int smp_response_len,
- uint32_t timeout);
-
-static __inline void
-cam_fill_mmcio(struct ccb_mmcio *mmcio, uint32_t retries,
- void (*cbfcnp)(struct cam_periph *, union ccb *), uint32_t flags,
- uint32_t mmc_opcode, uint32_t mmc_arg, uint32_t mmc_flags,
- struct mmc_data *mmc_d,
- uint32_t timeout);
-
-static __inline void
-cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries,
- void (*cbfcnp)(struct cam_periph *, union ccb *),
- u_int32_t flags, u_int8_t tag_action,
- u_int8_t *data_ptr, u_int32_t dxfer_len,
- u_int8_t sense_len, u_int8_t cdb_len,
u_int32_t timeout)
{
csio->ccb_h.func_code = XPT_SCSI_IO;
diff --git a/freebsd/sys/cam/cam_xpt.h b/freebsd/sys/cam/cam_xpt.h
index fb49c893..52af71fc 100644
--- a/freebsd/sys/cam/cam_xpt.h
+++ b/freebsd/sys/cam/cam_xpt.h
@@ -148,6 +148,7 @@ void xpt_release_path(struct cam_path *path);
const char * xpt_action_name(uint32_t action);
void xpt_pollwait(union ccb *start_ccb, uint32_t timeout);
uint32_t xpt_poll_setup(union ccb *start_ccb);
+void xpt_sim_poll(struct cam_sim *sim);
/*
* Perform a path inquiry at the request priority. The bzero may be
diff --git a/freebsd/sys/cam/mmc/mmc.h b/freebsd/sys/cam/mmc/mmc.h
index 9b9659fe..44c4aad0 100644
--- a/freebsd/sys/cam/mmc/mmc.h
+++ b/freebsd/sys/cam/mmc/mmc.h
@@ -94,4 +94,12 @@ struct mmc_params {
uint8_t sdio_func_count;
} __packed;
+/*
+ * Only one MMC card on bus is supported now.
+ * If we ever want to support multiple MMC cards on the same bus,
+ * mmc_xpt needs to be extended to issue new RCAs based on number
+ * of already probed cards. Furthermore, retuning and high-speed
+ * settings should also take all cards into account.
+ */
+#define MMC_PROPOSED_RCA 2
#endif
diff --git a/freebsd/sys/contrib/ck/include/ck_pr.h b/freebsd/sys/contrib/ck/include/ck_pr.h
index 7fa57a8e..2a8c5398 100644
--- a/freebsd/sys/contrib/ck/include/ck_pr.h
+++ b/freebsd/sys/contrib/ck/include/ck_pr.h
@@ -619,8 +619,8 @@ CK_PR_BTX_S(bts, 16, uint16_t, |,)
}
#define CK_PR_UNARY_Z(K, S, M, T, P, C, Z) \
- CK_CC_INLINE static void \
- ck_pr_##K##_##S##_zero(M *target, bool *zero) \
+ CK_CC_INLINE static bool \
+ ck_pr_##K##_##S##_is_zero(M *target) \
{ \
T previous; \
C punt; \
@@ -631,12 +631,21 @@ CK_PR_BTX_S(bts, 16, uint16_t, |,)
(C)(previous P 1), \
&previous) == false) \
ck_pr_stall(); \
- *zero = previous == (T)Z; \
+ return previous == (T)Z; \
+ }
+
+#define CK_PR_UNARY_Z_STUB(K, S, M) \
+ CK_CC_INLINE static void \
+ ck_pr_##K##_##S##_zero(M *target, bool *zero) \
+ { \
+ *zero = ck_pr_##K##_##S##_is_zero(target); \
return; \
}
#define CK_PR_UNARY_S(K, X, S, M) CK_PR_UNARY(K, X, S, M, M)
-#define CK_PR_UNARY_Z_S(K, S, M, P, Z) CK_PR_UNARY_Z(K, S, M, M, P, M, Z)
+#define CK_PR_UNARY_Z_S(K, S, M, P, Z) \
+ CK_PR_UNARY_Z(K, S, M, M, P, M, Z) \
+ CK_PR_UNARY_Z_STUB(K, S, M)
#if defined(CK_F_PR_LOAD_CHAR) && defined(CK_F_PR_CAS_CHAR_VALUE)
@@ -648,6 +657,8 @@ CK_PR_UNARY_S(inc, add, char, char)
#ifndef CK_F_PR_INC_CHAR_ZERO
#define CK_F_PR_INC_CHAR_ZERO
CK_PR_UNARY_Z_S(inc, char, char, +, -1)
+#else
+CK_PR_UNARY_Z_STUB(inc, char, char)
#endif /* CK_F_PR_INC_CHAR_ZERO */
#ifndef CK_F_PR_DEC_CHAR
@@ -658,6 +669,8 @@ CK_PR_UNARY_S(dec, sub, char, char)
#ifndef CK_F_PR_DEC_CHAR_ZERO
#define CK_F_PR_DEC_CHAR_ZERO
CK_PR_UNARY_Z_S(dec, char, char, -, 1)
+#else
+CK_PR_UNARY_Z_STUB(dec, char, char)
#endif /* CK_F_PR_DEC_CHAR_ZERO */
#endif /* CK_F_PR_LOAD_CHAR && CK_F_PR_CAS_CHAR_VALUE */
@@ -672,6 +685,8 @@ CK_PR_UNARY_S(inc, add, int, int)
#ifndef CK_F_PR_INC_INT_ZERO
#define CK_F_PR_INC_INT_ZERO
CK_PR_UNARY_Z_S(inc, int, int, +, -1)
+#else
+CK_PR_UNARY_Z_STUB(inc, int, int)
#endif /* CK_F_PR_INC_INT_ZERO */
#ifndef CK_F_PR_DEC_INT
@@ -682,6 +697,8 @@ CK_PR_UNARY_S(dec, sub, int, int)
#ifndef CK_F_PR_DEC_INT_ZERO
#define CK_F_PR_DEC_INT_ZERO
CK_PR_UNARY_Z_S(dec, int, int, -, 1)
+#else
+CK_PR_UNARY_Z_STUB(dec, int, int)
#endif /* CK_F_PR_DEC_INT_ZERO */
#endif /* CK_F_PR_LOAD_INT && CK_F_PR_CAS_INT_VALUE */
@@ -711,6 +728,8 @@ CK_PR_UNARY_S(inc, add, uint, unsigned int)
#ifndef CK_F_PR_INC_UINT_ZERO
#define CK_F_PR_INC_UINT_ZERO
CK_PR_UNARY_Z_S(inc, uint, unsigned int, +, UINT_MAX)
+#else
+CK_PR_UNARY_Z_STUB(inc, uint, unsigned int)
#endif /* CK_F_PR_INC_UINT_ZERO */
#ifndef CK_F_PR_DEC_UINT
@@ -721,6 +740,8 @@ CK_PR_UNARY_S(dec, sub, uint, unsigned int)
#ifndef CK_F_PR_DEC_UINT_ZERO
#define CK_F_PR_DEC_UINT_ZERO
CK_PR_UNARY_Z_S(dec, uint, unsigned int, -, 1)
+#else
+CK_PR_UNARY_Z_STUB(dec, uint, unsigned int)
#endif /* CK_F_PR_DEC_UINT_ZERO */
#endif /* CK_F_PR_LOAD_UINT && CK_F_PR_CAS_UINT_VALUE */
@@ -735,6 +756,8 @@ CK_PR_UNARY(inc, add, ptr, void, uintptr_t)
#ifndef CK_F_PR_INC_PTR_ZERO
#define CK_F_PR_INC_PTR_ZERO
CK_PR_UNARY_Z(inc, ptr, void, uintptr_t, +, void *, UINT_MAX)
+#else
+CK_PR_UNARY_Z_STUB(inc, ptr, void)
#endif /* CK_F_PR_INC_PTR_ZERO */
#ifndef CK_F_PR_DEC_PTR
@@ -745,6 +768,8 @@ CK_PR_UNARY(dec, sub, ptr, void, uintptr_t)
#ifndef CK_F_PR_DEC_PTR_ZERO
#define CK_F_PR_DEC_PTR_ZERO
CK_PR_UNARY_Z(dec, ptr, void, uintptr_t, -, void *, 1)
+#else
+CK_PR_UNARY_Z_STUB(dec, ptr, void)
#endif /* CK_F_PR_DEC_PTR_ZERO */
#endif /* CK_F_PR_LOAD_PTR && CK_F_PR_CAS_PTR_VALUE */
@@ -759,6 +784,8 @@ CK_PR_UNARY_S(inc, add, 64, uint64_t)
#ifndef CK_F_PR_INC_64_ZERO
#define CK_F_PR_INC_64_ZERO
CK_PR_UNARY_Z_S(inc, 64, uint64_t, +, UINT64_MAX)
+#else
+CK_PR_UNARY_Z_STUB(inc, 64, uint64_t)
#endif /* CK_F_PR_INC_64_ZERO */
#ifndef CK_F_PR_DEC_64
@@ -769,6 +796,8 @@ CK_PR_UNARY_S(dec, sub, 64, uint64_t)
#ifndef CK_F_PR_DEC_64_ZERO
#define CK_F_PR_DEC_64_ZERO
CK_PR_UNARY_Z_S(dec, 64, uint64_t, -, 1)
+#else
+CK_PR_UNARY_Z_STUB(dec, 64, uint64_t)
#endif /* CK_F_PR_DEC_64_ZERO */
#endif /* CK_F_PR_LOAD_64 && CK_F_PR_CAS_64_VALUE */
@@ -783,6 +812,8 @@ CK_PR_UNARY_S(inc, add, 32, uint32_t)
#ifndef CK_F_PR_INC_32_ZERO
#define CK_F_PR_INC_32_ZERO
CK_PR_UNARY_Z_S(inc, 32, uint32_t, +, UINT32_MAX)
+#else
+CK_PR_UNARY_Z_STUB(inc, 32, uint32_t)
#endif /* CK_F_PR_INC_32_ZERO */
#ifndef CK_F_PR_DEC_32
@@ -793,6 +824,8 @@ CK_PR_UNARY_S(dec, sub, 32, uint32_t)
#ifndef CK_F_PR_DEC_32_ZERO
#define CK_F_PR_DEC_32_ZERO
CK_PR_UNARY_Z_S(dec, 32, uint32_t, -, 1)
+#else
+CK_PR_UNARY_Z_STUB(dec, 32, uint32_t)
#endif /* CK_F_PR_DEC_32_ZERO */
#endif /* CK_F_PR_LOAD_32 && CK_F_PR_CAS_32_VALUE */
@@ -807,6 +840,8 @@ CK_PR_UNARY_S(inc, add, 16, uint16_t)
#ifndef CK_F_PR_INC_16_ZERO
#define CK_F_PR_INC_16_ZERO
CK_PR_UNARY_Z_S(inc, 16, uint16_t, +, UINT16_MAX)
+#else
+CK_PR_UNARY_Z_STUB(inc, 16, uint16_t)
#endif /* CK_F_PR_INC_16_ZERO */
#ifndef CK_F_PR_DEC_16
@@ -817,6 +852,8 @@ CK_PR_UNARY_S(dec, sub, 16, uint16_t)
#ifndef CK_F_PR_DEC_16_ZERO
#define CK_F_PR_DEC_16_ZERO
CK_PR_UNARY_Z_S(dec, 16, uint16_t, -, 1)
+#else
+CK_PR_UNARY_Z_STUB(dec, 16, uint16_t)
#endif /* CK_F_PR_DEC_16_ZERO */
#endif /* CK_F_PR_LOAD_16 && CK_F_PR_CAS_16_VALUE */
@@ -831,6 +868,8 @@ CK_PR_UNARY_S(inc, add, 8, uint8_t)
#ifndef CK_F_PR_INC_8_ZERO
#define CK_F_PR_INC_8_ZERO
CK_PR_UNARY_Z_S(inc, 8, uint8_t, +, UINT8_MAX)
+#else
+CK_PR_UNARY_Z_STUB(inc, 8, uint8_t)
#endif /* CK_F_PR_INC_8_ZERO */
#ifndef CK_F_PR_DEC_8
@@ -841,6 +880,8 @@ CK_PR_UNARY_S(dec, sub, 8, uint8_t)
#ifndef CK_F_PR_DEC_8_ZERO
#define CK_F_PR_DEC_8_ZERO
CK_PR_UNARY_Z_S(dec, 8, uint8_t, -, 1)
+#else
+CK_PR_UNARY_Z_STUB(dec, 8, uint8_t)
#endif /* CK_F_PR_DEC_8_ZERO */
#endif /* CK_F_PR_LOAD_8 && CK_F_PR_CAS_8_VALUE */
diff --git a/freebsd/sys/contrib/ck/include/ck_queue.h b/freebsd/sys/contrib/ck/include/ck_queue.h
index faf96a17..3f503aa6 100644
--- a/freebsd/sys/contrib/ck/include/ck_queue.h
+++ b/freebsd/sys/contrib/ck/include/ck_queue.h
@@ -180,8 +180,14 @@ struct { \
ck_pr_store_ptr(&(head)->cslh_first, elm); \
} while (0)
+#define CK_SLIST_INSERT_PREVPTR(prevp, slistelm, elm, field) do { \
+ (elm)->field.csle_next = (slistelm); \
+ ck_pr_fence_store(); \
+ ck_pr_store_ptr(prevp, elm); \
+} while (0)
+
#define CK_SLIST_REMOVE_AFTER(elm, field) do { \
- ck_pr_store_ptr(&(elm)->field.csle_next, \
+ ck_pr_store_ptr(&(elm)->field.csle_next, \
(elm)->field.csle_next->field.csle_next); \
} while (0)
@@ -190,7 +196,7 @@ struct { \
CK_SLIST_REMOVE_HEAD((head), field); \
} else { \
struct type *curelm = (head)->cslh_first; \
- while (curelm->field.csle_next != (elm)) \
+ while (curelm->field.csle_next != (elm)) \
curelm = curelm->field.csle_next; \
CK_SLIST_REMOVE_AFTER(curelm, field); \
} \
@@ -201,6 +207,10 @@ struct { \
(head)->cslh_first->field.csle_next); \
} while (0)
+#define CK_SLIST_REMOVE_PREVPTR(prevp, elm, field) do { \
+ ck_pr_store_ptr(prevptr, (elm)->field.csle_next); \
+} while (0)
+
#define CK_SLIST_MOVE(head1, head2, field) do { \
ck_pr_store_ptr(&(head1)->cslh_first, (head2)->cslh_first); \
} while (0)
diff --git a/freebsd/sys/contrib/ck/include/gcc/ppc/ck_pr.h b/freebsd/sys/contrib/ck/include/gcc/ppc/ck_pr.h
index cd7935dd..73f0cb78 100644
--- a/freebsd/sys/contrib/ck/include/gcc/ppc/ck_pr.h
+++ b/freebsd/sys/contrib/ck/include/gcc/ppc/ck_pr.h
@@ -67,21 +67,29 @@ ck_pr_stall(void)
__asm__ __volatile__(I ::: "memory"); \
}
-CK_PR_FENCE(atomic, "lwsync")
-CK_PR_FENCE(atomic_store, "lwsync")
+#ifdef CK_MD_PPC32_LWSYNC
+#define CK_PR_LWSYNCOP "lwsync"
+#else /* CK_MD_PPC32_LWSYNC_DISABLE */
+#define CK_PR_LWSYNCOP "sync"
+#endif
+
+CK_PR_FENCE(atomic, CK_PR_LWSYNCOP)
+CK_PR_FENCE(atomic_store, CK_PR_LWSYNCOP)
CK_PR_FENCE(atomic_load, "sync")
-CK_PR_FENCE(store_atomic, "lwsync")
-CK_PR_FENCE(load_atomic, "lwsync")
-CK_PR_FENCE(store, "lwsync")
+CK_PR_FENCE(store_atomic, CK_PR_LWSYNCOP)
+CK_PR_FENCE(load_atomic, CK_PR_LWSYNCOP)
+CK_PR_FENCE(store, CK_PR_LWSYNCOP)
CK_PR_FENCE(store_load, "sync")
-CK_PR_FENCE(load, "lwsync")
-CK_PR_FENCE(load_store, "lwsync")
+CK_PR_FENCE(load, CK_PR_LWSYNCOP)
+CK_PR_FENCE(load_store, CK_PR_LWSYNCOP)
CK_PR_FENCE(memory, "sync")
-CK_PR_FENCE(acquire, "lwsync")
-CK_PR_FENCE(release, "lwsync")
-CK_PR_FENCE(acqrel, "lwsync")
-CK_PR_FENCE(lock, "lwsync")
-CK_PR_FENCE(unlock, "lwsync")
+CK_PR_FENCE(acquire, CK_PR_LWSYNCOP)
+CK_PR_FENCE(release, CK_PR_LWSYNCOP)
+CK_PR_FENCE(acqrel, CK_PR_LWSYNCOP)
+CK_PR_FENCE(lock, CK_PR_LWSYNCOP)
+CK_PR_FENCE(unlock, CK_PR_LWSYNCOP)
+
+#undef CK_PR_LWSYNCOP
#undef CK_PR_FENCE
diff --git a/freebsd/sys/contrib/ck/include/gcc/x86/ck_pr.h b/freebsd/sys/contrib/ck/include/gcc/x86/ck_pr.h
index 3e36376f..e678e830 100644
--- a/freebsd/sys/contrib/ck/include/gcc/x86/ck_pr.h
+++ b/freebsd/sys/contrib/ck/include/gcc/x86/ck_pr.h
@@ -233,18 +233,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb")
}
#define CK_PR_UNARY_V(K, S, T, C, I) \
- CK_CC_INLINE static void \
- ck_pr_##K##_##S##_zero(T *target, bool *r) \
+ CK_CC_INLINE static bool \
+ ck_pr_##K##_##S##_is_zero(T *target) \
{ \
+ bool ret; \
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \
: "+m" (*(C *)target), \
- "=m" (*r) \
+ "=rm" (ret) \
: \
: "memory", "cc"); \
- return; \
+ return ret; \
}
-
#define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I)
#define CK_PR_GENERATE(K) \
diff --git a/freebsd/sys/contrib/ck/include/gcc/x86_64/ck_pr.h b/freebsd/sys/contrib/ck/include/gcc/x86_64/ck_pr.h
index 4de13329..fb2804e8 100644
--- a/freebsd/sys/contrib/ck/include/gcc/x86_64/ck_pr.h
+++ b/freebsd/sys/contrib/ck/include/gcc/x86_64/ck_pr.h
@@ -332,18 +332,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb")
}
#define CK_PR_UNARY_V(K, S, T, C, I) \
- CK_CC_INLINE static void \
- ck_pr_##K##_##S##_zero(T *target, bool *r) \
+ CK_CC_INLINE static bool \
+ ck_pr_##K##_##S##_is_zero(T *target) \
{ \
+ bool ret; \
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \
: "+m" (*(C *)target), \
- "=m" (*r) \
+ "=rm" (ret) \
: \
: "memory", "cc"); \
- return; \
+ return ret; \
}
-
#define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I)
#define CK_PR_GENERATE(K) \
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.c b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.c
new file mode 100644
index 00000000..f7b0c244
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.c
@@ -0,0 +1,126 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+
+#include "poly1305_donna.h"
+#include "crypto_verify_16.h"
+#include "private/common.h"
+#include "utils.h"
+
+#ifdef HAVE_TI_MODE
+#include "poly1305_donna64.h"
+#else
+#include "poly1305_donna32.h"
+#endif
+#include "../onetimeauth_poly1305.h"
+
+static void
+poly1305_update(poly1305_state_internal_t *st, const unsigned char *m,
+ unsigned long long bytes)
+{
+ unsigned long long i;
+
+ /* handle leftover */
+ if (st->leftover) {
+ unsigned long long want = (poly1305_block_size - st->leftover);
+
+ if (want > bytes) {
+ want = bytes;
+ }
+ for (i = 0; i < want; i++) {
+ st->buffer[st->leftover + i] = m[i];
+ }
+ bytes -= want;
+ m += want;
+ st->leftover += want;
+ if (st->leftover < poly1305_block_size) {
+ return;
+ }
+ poly1305_blocks(st, st->buffer, poly1305_block_size);
+ st->leftover = 0;
+ }
+
+ /* process full blocks */
+ if (bytes >= poly1305_block_size) {
+ unsigned long long want = (bytes & ~(poly1305_block_size - 1));
+
+ poly1305_blocks(st, m, want);
+ m += want;
+ bytes -= want;
+ }
+
+ /* store leftover */
+ if (bytes) {
+ for (i = 0; i < bytes; i++) {
+ st->buffer[st->leftover + i] = m[i];
+ }
+ st->leftover += bytes;
+ }
+}
+
+static int
+crypto_onetimeauth_poly1305_donna(unsigned char *out, const unsigned char *m,
+ unsigned long long inlen,
+ const unsigned char *key)
+{
+ CRYPTO_ALIGN(64) poly1305_state_internal_t state;
+
+ poly1305_init(&state, key);
+ poly1305_update(&state, m, inlen);
+ poly1305_finish(&state, out);
+
+ return 0;
+}
+
+static int
+crypto_onetimeauth_poly1305_donna_init(crypto_onetimeauth_poly1305_state *state,
+ const unsigned char *key)
+{
+ COMPILER_ASSERT(sizeof(crypto_onetimeauth_poly1305_state) >=
+ sizeof(poly1305_state_internal_t));
+ poly1305_init((poly1305_state_internal_t *) (void *) state, key);
+
+ return 0;
+}
+
+static int
+crypto_onetimeauth_poly1305_donna_update(
+ crypto_onetimeauth_poly1305_state *state, const unsigned char *in,
+ unsigned long long inlen)
+{
+ poly1305_update((poly1305_state_internal_t *) (void *) state, in, inlen);
+
+ return 0;
+}
+
+static int
+crypto_onetimeauth_poly1305_donna_final(
+ crypto_onetimeauth_poly1305_state *state, unsigned char *out)
+{
+ poly1305_finish((poly1305_state_internal_t *) (void *) state, out);
+
+ return 0;
+}
+
+static int
+crypto_onetimeauth_poly1305_donna_verify(const unsigned char *h,
+ const unsigned char *in,
+ unsigned long long inlen,
+ const unsigned char *k)
+{
+ unsigned char correct[16];
+
+ crypto_onetimeauth_poly1305_donna(correct, in, inlen, k);
+
+ return crypto_verify_16(h, correct);
+}
+
+struct crypto_onetimeauth_poly1305_implementation
+ crypto_onetimeauth_poly1305_donna_implementation = {
+ SODIUM_C99(.onetimeauth =) crypto_onetimeauth_poly1305_donna,
+ SODIUM_C99(.onetimeauth_verify =)
+ crypto_onetimeauth_poly1305_donna_verify,
+ SODIUM_C99(.onetimeauth_init =) crypto_onetimeauth_poly1305_donna_init,
+ SODIUM_C99(.onetimeauth_update =)
+ crypto_onetimeauth_poly1305_donna_update,
+ SODIUM_C99(.onetimeauth_final =) crypto_onetimeauth_poly1305_donna_final
+ };
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h
new file mode 100644
index 00000000..d6474b3a
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h
@@ -0,0 +1,12 @@
+#ifndef poly1305_donna_H
+#define poly1305_donna_H
+
+#include <stddef.h>
+
+#include "../onetimeauth_poly1305.h"
+#include "crypto_onetimeauth_poly1305.h"
+
+extern struct crypto_onetimeauth_poly1305_implementation
+ crypto_onetimeauth_poly1305_donna_implementation;
+
+#endif /* poly1305_donna_H */
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h
new file mode 100644
index 00000000..cef64480
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h
@@ -0,0 +1,235 @@
+/*
+ poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication
+ and 64 bit addition
+*/
+
+#if defined(_MSC_VER)
+# define POLY1305_NOINLINE __declspec(noinline)
+#elif defined(__clang__) || defined(__GNUC__)
+# define POLY1305_NOINLINE __attribute__((noinline))
+#else
+# define POLY1305_NOINLINE
+#endif
+
+#include "private/common.h"
+
+#define poly1305_block_size 16
+
+/* 17 + sizeof(unsigned long long) + 14*sizeof(unsigned long) */
+typedef struct poly1305_state_internal_t {
+ unsigned long r[5];
+ unsigned long h[5];
+ unsigned long pad[4];
+ unsigned long long leftover;
+ unsigned char buffer[poly1305_block_size];
+ unsigned char final;
+} poly1305_state_internal_t;
+
+static void
+poly1305_init(poly1305_state_internal_t *st, const unsigned char key[32])
+{
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff - wiped after finalization */
+ st->r[0] = (LOAD32_LE(&key[0])) & 0x3ffffff;
+ st->r[1] = (LOAD32_LE(&key[3]) >> 2) & 0x3ffff03;
+ st->r[2] = (LOAD32_LE(&key[6]) >> 4) & 0x3ffc0ff;
+ st->r[3] = (LOAD32_LE(&key[9]) >> 6) & 0x3f03fff;
+ st->r[4] = (LOAD32_LE(&key[12]) >> 8) & 0x00fffff;
+
+ /* h = 0 */
+ st->h[0] = 0;
+ st->h[1] = 0;
+ st->h[2] = 0;
+ st->h[3] = 0;
+ st->h[4] = 0;
+
+ /* save pad for later */
+ st->pad[0] = LOAD32_LE(&key[16]);
+ st->pad[1] = LOAD32_LE(&key[20]);
+ st->pad[2] = LOAD32_LE(&key[24]);
+ st->pad[3] = LOAD32_LE(&key[28]);
+
+ st->leftover = 0;
+ st->final = 0;
+}
+
+static void
+poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
+ unsigned long long bytes)
+{
+ const unsigned long hibit = (st->final) ? 0UL : (1UL << 24); /* 1 << 128 */
+ unsigned long r0, r1, r2, r3, r4;
+ unsigned long s1, s2, s3, s4;
+ unsigned long h0, h1, h2, h3, h4;
+ unsigned long long d0, d1, d2, d3, d4;
+ unsigned long c;
+
+ r0 = st->r[0];
+ r1 = st->r[1];
+ r2 = st->r[2];
+ r3 = st->r[3];
+ r4 = st->r[4];
+
+ s1 = r1 * 5;
+ s2 = r2 * 5;
+ s3 = r3 * 5;
+ s4 = r4 * 5;
+
+ h0 = st->h[0];
+ h1 = st->h[1];
+ h2 = st->h[2];
+ h3 = st->h[3];
+ h4 = st->h[4];
+
+ while (bytes >= poly1305_block_size) {
+ /* h += m[i] */
+ h0 += (LOAD32_LE(m + 0)) & 0x3ffffff;
+ h1 += (LOAD32_LE(m + 3) >> 2) & 0x3ffffff;
+ h2 += (LOAD32_LE(m + 6) >> 4) & 0x3ffffff;
+ h3 += (LOAD32_LE(m + 9) >> 6) & 0x3ffffff;
+ h4 += (LOAD32_LE(m + 12) >> 8) | hibit;
+
+ /* h *= r */
+ d0 = ((unsigned long long) h0 * r0) + ((unsigned long long) h1 * s4) +
+ ((unsigned long long) h2 * s3) + ((unsigned long long) h3 * s2) +
+ ((unsigned long long) h4 * s1);
+ d1 = ((unsigned long long) h0 * r1) + ((unsigned long long) h1 * r0) +
+ ((unsigned long long) h2 * s4) + ((unsigned long long) h3 * s3) +
+ ((unsigned long long) h4 * s2);
+ d2 = ((unsigned long long) h0 * r2) + ((unsigned long long) h1 * r1) +
+ ((unsigned long long) h2 * r0) + ((unsigned long long) h3 * s4) +
+ ((unsigned long long) h4 * s3);
+ d3 = ((unsigned long long) h0 * r3) + ((unsigned long long) h1 * r2) +
+ ((unsigned long long) h2 * r1) + ((unsigned long long) h3 * r0) +
+ ((unsigned long long) h4 * s4);
+ d4 = ((unsigned long long) h0 * r4) + ((unsigned long long) h1 * r3) +
+ ((unsigned long long) h2 * r2) + ((unsigned long long) h3 * r1) +
+ ((unsigned long long) h4 * r0);
+
+ /* (partial) h %= p */
+ c = (unsigned long) (d0 >> 26);
+ h0 = (unsigned long) d0 & 0x3ffffff;
+ d1 += c;
+ c = (unsigned long) (d1 >> 26);
+ h1 = (unsigned long) d1 & 0x3ffffff;
+ d2 += c;
+ c = (unsigned long) (d2 >> 26);
+ h2 = (unsigned long) d2 & 0x3ffffff;
+ d3 += c;
+ c = (unsigned long) (d3 >> 26);
+ h3 = (unsigned long) d3 & 0x3ffffff;
+ d4 += c;
+ c = (unsigned long) (d4 >> 26);
+ h4 = (unsigned long) d4 & 0x3ffffff;
+ h0 += c * 5;
+ c = (h0 >> 26);
+ h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ m += poly1305_block_size;
+ bytes -= poly1305_block_size;
+ }
+
+ st->h[0] = h0;
+ st->h[1] = h1;
+ st->h[2] = h2;
+ st->h[3] = h3;
+ st->h[4] = h4;
+}
+
+static POLY1305_NOINLINE void
+poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
+{
+ unsigned long h0, h1, h2, h3, h4, c;
+ unsigned long g0, g1, g2, g3, g4;
+ unsigned long long f;
+ unsigned long mask;
+
+ /* process the remaining block */
+ if (st->leftover) {
+ unsigned long long i = st->leftover;
+
+ st->buffer[i++] = 1;
+ for (; i < poly1305_block_size; i++) {
+ st->buffer[i] = 0;
+ }
+ st->final = 1;
+ poly1305_blocks(st, st->buffer, poly1305_block_size);
+ }
+
+ /* fully carry h */
+ h0 = st->h[0];
+ h1 = st->h[1];
+ h2 = st->h[2];
+ h3 = st->h[3];
+ h4 = st->h[4];
+
+ c = h1 >> 26;
+ h1 = h1 & 0x3ffffff;
+ h2 += c;
+ c = h2 >> 26;
+ h2 = h2 & 0x3ffffff;
+ h3 += c;
+ c = h3 >> 26;
+ h3 = h3 & 0x3ffffff;
+ h4 += c;
+ c = h4 >> 26;
+ h4 = h4 & 0x3ffffff;
+ h0 += c * 5;
+ c = h0 >> 26;
+ h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ /* compute h + -p */
+ g0 = h0 + 5;
+ c = g0 >> 26;
+ g0 &= 0x3ffffff;
+ g1 = h1 + c;
+ c = g1 >> 26;
+ g1 &= 0x3ffffff;
+ g2 = h2 + c;
+ c = g2 >> 26;
+ g2 &= 0x3ffffff;
+ g3 = h3 + c;
+ c = g3 >> 26;
+ g3 &= 0x3ffffff;
+ g4 = h4 + c - (1UL << 26);
+
+ /* select h if h < p, or h + -p if h >= p */
+ mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
+ g0 &= mask;
+ g1 &= mask;
+ g2 &= mask;
+ g3 &= mask;
+ g4 &= mask;
+ mask = ~mask;
+
+ h0 = (h0 & mask) | g0;
+ h1 = (h1 & mask) | g1;
+ h2 = (h2 & mask) | g2;
+ h3 = (h3 & mask) | g3;
+ h4 = (h4 & mask) | g4;
+
+ /* h = h % (2^128) */
+ h0 = ((h0) | (h1 << 26)) & 0xffffffff;
+ h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
+ h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
+ h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
+
+ /* mac = (h + pad) % (2^128) */
+ f = (unsigned long long) h0 + st->pad[0];
+ h0 = (unsigned long) f;
+ f = (unsigned long long) h1 + st->pad[1] + (f >> 32);
+ h1 = (unsigned long) f;
+ f = (unsigned long long) h2 + st->pad[2] + (f >> 32);
+ h2 = (unsigned long) f;
+ f = (unsigned long long) h3 + st->pad[3] + (f >> 32);
+ h3 = (unsigned long) f;
+
+ STORE32_LE(mac + 0, (uint32_t) h0);
+ STORE32_LE(mac + 4, (uint32_t) h1);
+ STORE32_LE(mac + 8, (uint32_t) h2);
+ STORE32_LE(mac + 12, (uint32_t) h3);
+
+ /* zero out the state */
+ sodium_memzero((void *) st, sizeof *st);
+}
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c
new file mode 100644
index 00000000..2aed7762
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c
@@ -0,0 +1,92 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+
+#include "onetimeauth_poly1305.h"
+#include "crypto_onetimeauth_poly1305.h"
+#include "private/common.h"
+#include "private/implementations.h"
+#include "randombytes.h"
+#include "runtime.h"
+
+#include "donna/poly1305_donna.h"
+#if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
+# include "sse2/poly1305_sse2.h"
+#endif
+
+static const crypto_onetimeauth_poly1305_implementation *implementation =
+ &crypto_onetimeauth_poly1305_donna_implementation;
+
+int
+crypto_onetimeauth_poly1305(unsigned char *out, const unsigned char *in,
+ unsigned long long inlen, const unsigned char *k)
+{
+ return implementation->onetimeauth(out, in, inlen, k);
+}
+
+int
+crypto_onetimeauth_poly1305_verify(const unsigned char *h,
+ const unsigned char *in,
+ unsigned long long inlen,
+ const unsigned char *k)
+{
+ return implementation->onetimeauth_verify(h, in, inlen, k);
+}
+
+int
+crypto_onetimeauth_poly1305_init(crypto_onetimeauth_poly1305_state *state,
+ const unsigned char *key)
+{
+ return implementation->onetimeauth_init(state, key);
+}
+
+int
+crypto_onetimeauth_poly1305_update(crypto_onetimeauth_poly1305_state *state,
+ const unsigned char *in,
+ unsigned long long inlen)
+{
+ return implementation->onetimeauth_update(state, in, inlen);
+}
+
+int
+crypto_onetimeauth_poly1305_final(crypto_onetimeauth_poly1305_state *state,
+ unsigned char *out)
+{
+ return implementation->onetimeauth_final(state, out);
+}
+
+size_t
+crypto_onetimeauth_poly1305_bytes(void)
+{
+ return crypto_onetimeauth_poly1305_BYTES;
+}
+
+size_t
+crypto_onetimeauth_poly1305_keybytes(void)
+{
+ return crypto_onetimeauth_poly1305_KEYBYTES;
+}
+
+size_t
+crypto_onetimeauth_poly1305_statebytes(void)
+{
+ return sizeof(crypto_onetimeauth_poly1305_state);
+}
+
+void
+crypto_onetimeauth_poly1305_keygen(
+ unsigned char k[crypto_onetimeauth_poly1305_KEYBYTES])
+{
+ randombytes_buf(k, crypto_onetimeauth_poly1305_KEYBYTES);
+}
+
+int
+_crypto_onetimeauth_poly1305_pick_best_implementation(void)
+{
+ implementation = &crypto_onetimeauth_poly1305_donna_implementation;
+#if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
+ if (sodium_runtime_has_sse2()) {
+ implementation = &crypto_onetimeauth_poly1305_sse2_implementation;
+ }
+#endif
+ return 0;
+}
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.h b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.h
new file mode 100644
index 00000000..243eadd5
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.h
@@ -0,0 +1,21 @@
+
+#ifndef onetimeauth_poly1305_H
+#define onetimeauth_poly1305_H
+
+#include "crypto_onetimeauth_poly1305.h"
+
+typedef struct crypto_onetimeauth_poly1305_implementation {
+ int (*onetimeauth)(unsigned char *out, const unsigned char *in,
+ unsigned long long inlen, const unsigned char *k);
+ int (*onetimeauth_verify)(const unsigned char *h, const unsigned char *in,
+ unsigned long long inlen, const unsigned char *k);
+ int (*onetimeauth_init)(crypto_onetimeauth_poly1305_state *state,
+ const unsigned char * key);
+ int (*onetimeauth_update)(crypto_onetimeauth_poly1305_state *state,
+ const unsigned char * in,
+ unsigned long long inlen);
+ int (*onetimeauth_final)(crypto_onetimeauth_poly1305_state *state,
+ unsigned char * out);
+} crypto_onetimeauth_poly1305_implementation;
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/crypto_verify/sodium/verify.c b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_verify/sodium/verify.c
new file mode 100644
index 00000000..64d9d11e
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/crypto_verify/sodium/verify.c
@@ -0,0 +1,100 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "crypto_verify_16.h"
+#include "crypto_verify_32.h"
+#include "crypto_verify_64.h"
+
+size_t
+crypto_verify_16_bytes(void)
+{
+ return crypto_verify_16_BYTES;
+}
+
+size_t
+crypto_verify_32_bytes(void)
+{
+ return crypto_verify_32_BYTES;
+}
+
+size_t
+crypto_verify_64_bytes(void)
+{
+ return crypto_verify_64_BYTES;
+}
+
+#if defined(HAVE_EMMINTRIN_H) && defined(__SSE2__)
+
+# ifdef __GNUC__
+# pragma GCC target("sse2")
+# endif
+# include <emmintrin.h>
+
+static inline int
+crypto_verify_n(const unsigned char *x_, const unsigned char *y_,
+ const int n)
+{
+ const __m128i zero = _mm_setzero_si128();
+ volatile __m128i v1, v2, z;
+ volatile int m;
+ int i;
+
+ const volatile __m128i *volatile x =
+ (const volatile __m128i *volatile) (const void *) x_;
+ const volatile __m128i *volatile y =
+ (const volatile __m128i *volatile) (const void *) y_;
+ v1 = _mm_loadu_si128((const __m128i *) &x[0]);
+ v2 = _mm_loadu_si128((const __m128i *) &y[0]);
+ z = _mm_xor_si128(v1, v2);
+ for (i = 1; i < n / 16; i++) {
+ v1 = _mm_loadu_si128((const __m128i *) &x[i]);
+ v2 = _mm_loadu_si128((const __m128i *) &y[i]);
+ z = _mm_or_si128(z, _mm_xor_si128(v1, v2));
+ }
+ m = _mm_movemask_epi8(_mm_cmpeq_epi32(z, zero));
+ v1 = zero; v2 = zero; z = zero;
+
+ return (int) (((uint32_t) m + 1U) >> 16) - 1;
+}
+
+#else
+
+static inline int
+crypto_verify_n(const unsigned char *x_, const unsigned char *y_,
+ const int n)
+{
+ const volatile unsigned char *volatile x =
+ (const volatile unsigned char *volatile) x_;
+ const volatile unsigned char *volatile y =
+ (const volatile unsigned char *volatile) y_;
+ volatile uint_fast16_t d = 0U;
+ int i;
+
+ for (i = 0; i < n; i++) {
+ d |= x[i] ^ y[i];
+ }
+ return (1 & ((d - 1) >> 8)) - 1;
+}
+
+#endif
+
+int
+crypto_verify_16(const unsigned char *x, const unsigned char *y)
+{
+ return crypto_verify_n(x, y, crypto_verify_16_BYTES);
+}
+
+int
+crypto_verify_32(const unsigned char *x, const unsigned char *y)
+{
+ return crypto_verify_n(x, y, crypto_verify_32_BYTES);
+}
+
+int
+crypto_verify_64(const unsigned char *x, const unsigned char *y)
+{
+ return crypto_verify_n(x, y, crypto_verify_64_BYTES);
+}
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h
new file mode 100644
index 00000000..4b89c4f0
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h
@@ -0,0 +1,67 @@
+#ifndef crypto_onetimeauth_poly1305_H
+#define crypto_onetimeauth_poly1305_H
+
+#ifdef __cplusplus
+# ifdef __GNUC__
+# pragma GCC diagnostic ignored "-Wlong-long"
+# endif
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/types.h>
+
+#include "export.h"
+
+typedef struct CRYPTO_ALIGN(16) crypto_onetimeauth_poly1305_state {
+ unsigned char opaque[256];
+} crypto_onetimeauth_poly1305_state;
+
+SODIUM_EXPORT
+size_t crypto_onetimeauth_poly1305_statebytes(void);
+
+#define crypto_onetimeauth_poly1305_BYTES 16U
+SODIUM_EXPORT
+size_t crypto_onetimeauth_poly1305_bytes(void);
+
+#define crypto_onetimeauth_poly1305_KEYBYTES 32U
+SODIUM_EXPORT
+size_t crypto_onetimeauth_poly1305_keybytes(void);
+
+SODIUM_EXPORT
+int crypto_onetimeauth_poly1305(unsigned char *out,
+ const unsigned char *in,
+ unsigned long long inlen,
+ const unsigned char *k);
+
+SODIUM_EXPORT
+int crypto_onetimeauth_poly1305_verify(const unsigned char *h,
+ const unsigned char *in,
+ unsigned long long inlen,
+ const unsigned char *k)
+ __attribute__ ((warn_unused_result));
+
+SODIUM_EXPORT
+int crypto_onetimeauth_poly1305_init(crypto_onetimeauth_poly1305_state *state,
+ const unsigned char *key);
+
+SODIUM_EXPORT
+int crypto_onetimeauth_poly1305_update(crypto_onetimeauth_poly1305_state *state,
+ const unsigned char *in,
+ unsigned long long inlen);
+
+SODIUM_EXPORT
+int crypto_onetimeauth_poly1305_final(crypto_onetimeauth_poly1305_state *state,
+ unsigned char *out);
+
+SODIUM_EXPORT
+void crypto_onetimeauth_poly1305_keygen(unsigned char k[crypto_onetimeauth_poly1305_KEYBYTES]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_16.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_16.h
new file mode 100644
index 00000000..5e9eeabe
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_16.h
@@ -0,0 +1,23 @@
+#ifndef crypto_verify_16_H
+#define crypto_verify_16_H
+
+#include <stddef.h>
+#include "export.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define crypto_verify_16_BYTES 16U
+SODIUM_EXPORT
+size_t crypto_verify_16_bytes(void);
+
+SODIUM_EXPORT
+int crypto_verify_16(const unsigned char *x, const unsigned char *y)
+ __attribute__ ((warn_unused_result));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_32.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_32.h
new file mode 100644
index 00000000..281b5a1b
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_32.h
@@ -0,0 +1,23 @@
+#ifndef crypto_verify_32_H
+#define crypto_verify_32_H
+
+#include <stddef.h>
+#include "export.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define crypto_verify_32_BYTES 32U
+SODIUM_EXPORT
+size_t crypto_verify_32_bytes(void);
+
+SODIUM_EXPORT
+int crypto_verify_32(const unsigned char *x, const unsigned char *y)
+ __attribute__ ((warn_unused_result));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_64.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_64.h
new file mode 100644
index 00000000..0dc7c304
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/crypto_verify_64.h
@@ -0,0 +1,23 @@
+#ifndef crypto_verify_64_H
+#define crypto_verify_64_H
+
+#include <stddef.h>
+#include "export.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define crypto_verify_64_BYTES 64U
+SODIUM_EXPORT
+size_t crypto_verify_64_bytes(void);
+
+SODIUM_EXPORT
+int crypto_verify_64(const unsigned char *x, const unsigned char *y)
+ __attribute__ ((warn_unused_result));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/export.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/export.h
new file mode 100644
index 00000000..a0074fc9
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/export.h
@@ -0,0 +1,57 @@
+
+#ifndef sodium_export_H
+#define sodium_export_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+
+#if !defined(__clang__) && !defined(__GNUC__)
+# ifdef __attribute__
+# undef __attribute__
+# endif
+# define __attribute__(a)
+#endif
+
+#ifdef SODIUM_STATIC
+# define SODIUM_EXPORT
+# define SODIUM_EXPORT_WEAK
+#else
+# if defined(_MSC_VER)
+# ifdef SODIUM_DLL_EXPORT
+# define SODIUM_EXPORT __declspec(dllexport)
+# else
+# define SODIUM_EXPORT __declspec(dllimport)
+# endif
+# else
+# if defined(__SUNPRO_C)
+# ifndef __GNU_C__
+# define SODIUM_EXPORT __attribute__ (visibility(__global))
+# else
+# define SODIUM_EXPORT __attribute__ __global
+# endif
+# elif defined(_MSG_VER)
+# define SODIUM_EXPORT extern __declspec(dllexport)
+# else
+# define SODIUM_EXPORT __attribute__ ((visibility ("default")))
+# endif
+# endif
+# if defined(__ELF__) && !defined(SODIUM_DISABLE_WEAK_FUNCTIONS)
+# define SODIUM_EXPORT_WEAK SODIUM_EXPORT __attribute__((weak))
+# else
+# define SODIUM_EXPORT_WEAK SODIUM_EXPORT
+# endif
+#endif
+
+#ifndef CRYPTO_ALIGN
+# if defined(__INTEL_COMPILER) || defined(_MSC_VER)
+# define CRYPTO_ALIGN(x) __declspec(align(x))
+# else
+# define CRYPTO_ALIGN(x) __attribute__ ((aligned(x)))
+# endif
+#endif
+
+#define SODIUM_MIN(A, B) ((A) < (B) ? (A) : (B))
+#define SODIUM_SIZE_MAX SODIUM_MIN(UINT64_MAX, SIZE_MAX)
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/private/common.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/private/common.h
new file mode 100644
index 00000000..632fc8a7
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/private/common.h
@@ -0,0 +1,246 @@
+#ifndef common_H
+#define common_H 1
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define COMPILER_ASSERT(X) (void) sizeof(char[(X) ? 1 : -1])
+
+#ifdef HAVE_TI_MODE
+# if defined(__SIZEOF_INT128__)
+typedef unsigned __int128 uint128_t;
+# else
+typedef unsigned uint128_t __attribute__((mode(TI)));
+# endif
+#endif
+
+#define ROTL32(X, B) rotl32((X), (B))
+static inline uint32_t
+rotl32(const uint32_t x, const int b)
+{
+ return (x << b) | (x >> (32 - b));
+}
+
+#define ROTL64(X, B) rotl64((X), (B))
+static inline uint64_t
+rotl64(const uint64_t x, const int b)
+{
+ return (x << b) | (x >> (64 - b));
+}
+
+#define ROTR32(X, B) rotr32((X), (B))
+static inline uint32_t
+rotr32(const uint32_t x, const int b)
+{
+ return (x >> b) | (x << (32 - b));
+}
+
+#define ROTR64(X, B) rotr64((X), (B))
+static inline uint64_t
+rotr64(const uint64_t x, const int b)
+{
+ return (x >> b) | (x << (64 - b));
+}
+
+#define LOAD64_LE(SRC) load64_le(SRC)
+static inline uint64_t
+load64_le(const uint8_t src[8])
+{
+#ifdef NATIVE_LITTLE_ENDIAN
+ uint64_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ uint64_t w = (uint64_t) src[0];
+ w |= (uint64_t) src[1] << 8;
+ w |= (uint64_t) src[2] << 16;
+ w |= (uint64_t) src[3] << 24;
+ w |= (uint64_t) src[4] << 32;
+ w |= (uint64_t) src[5] << 40;
+ w |= (uint64_t) src[6] << 48;
+ w |= (uint64_t) src[7] << 56;
+ return w;
+#endif
+}
+
+#define STORE64_LE(DST, W) store64_le((DST), (W))
+static inline void
+store64_le(uint8_t dst[8], uint64_t w)
+{
+#ifdef NATIVE_LITTLE_ENDIAN
+ memcpy(dst, &w, sizeof w);
+#else
+ dst[0] = (uint8_t) w; w >>= 8;
+ dst[1] = (uint8_t) w; w >>= 8;
+ dst[2] = (uint8_t) w; w >>= 8;
+ dst[3] = (uint8_t) w; w >>= 8;
+ dst[4] = (uint8_t) w; w >>= 8;
+ dst[5] = (uint8_t) w; w >>= 8;
+ dst[6] = (uint8_t) w; w >>= 8;
+ dst[7] = (uint8_t) w;
+#endif
+}
+
+#define LOAD32_LE(SRC) load32_le(SRC)
+static inline uint32_t
+load32_le(const uint8_t src[4])
+{
+#ifdef NATIVE_LITTLE_ENDIAN
+ uint32_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ uint32_t w = (uint32_t) src[0];
+ w |= (uint32_t) src[1] << 8;
+ w |= (uint32_t) src[2] << 16;
+ w |= (uint32_t) src[3] << 24;
+ return w;
+#endif
+}
+
+#define STORE32_LE(DST, W) store32_le((DST), (W))
+static inline void
+store32_le(uint8_t dst[4], uint32_t w)
+{
+#ifdef NATIVE_LITTLE_ENDIAN
+ memcpy(dst, &w, sizeof w);
+#else
+ dst[0] = (uint8_t) w; w >>= 8;
+ dst[1] = (uint8_t) w; w >>= 8;
+ dst[2] = (uint8_t) w; w >>= 8;
+ dst[3] = (uint8_t) w;
+#endif
+}
+
+/* ----- */
+
+#define LOAD64_BE(SRC) load64_be(SRC)
+static inline uint64_t
+load64_be(const uint8_t src[8])
+{
+#ifdef NATIVE_BIG_ENDIAN
+ uint64_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ uint64_t w = (uint64_t) src[7];
+ w |= (uint64_t) src[6] << 8;
+ w |= (uint64_t) src[5] << 16;
+ w |= (uint64_t) src[4] << 24;
+ w |= (uint64_t) src[3] << 32;
+ w |= (uint64_t) src[2] << 40;
+ w |= (uint64_t) src[1] << 48;
+ w |= (uint64_t) src[0] << 56;
+ return w;
+#endif
+}
+
+#define STORE64_BE(DST, W) store64_be((DST), (W))
+static inline void
+store64_be(uint8_t dst[8], uint64_t w)
+{
+#ifdef NATIVE_BIG_ENDIAN
+ memcpy(dst, &w, sizeof w);
+#else
+ dst[7] = (uint8_t) w; w >>= 8;
+ dst[6] = (uint8_t) w; w >>= 8;
+ dst[5] = (uint8_t) w; w >>= 8;
+ dst[4] = (uint8_t) w; w >>= 8;
+ dst[3] = (uint8_t) w; w >>= 8;
+ dst[2] = (uint8_t) w; w >>= 8;
+ dst[1] = (uint8_t) w; w >>= 8;
+ dst[0] = (uint8_t) w;
+#endif
+}
+
+#define LOAD32_BE(SRC) load32_be(SRC)
+static inline uint32_t
+load32_be(const uint8_t src[4])
+{
+#ifdef NATIVE_BIG_ENDIAN
+ uint32_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ uint32_t w = (uint32_t) src[3];
+ w |= (uint32_t) src[2] << 8;
+ w |= (uint32_t) src[1] << 16;
+ w |= (uint32_t) src[0] << 24;
+ return w;
+#endif
+}
+
+#define STORE32_BE(DST, W) store32_be((DST), (W))
+static inline void
+store32_be(uint8_t dst[4], uint32_t w)
+{
+#ifdef NATIVE_BIG_ENDIAN
+ memcpy(dst, &w, sizeof w);
+#else
+ dst[3] = (uint8_t) w; w >>= 8;
+ dst[2] = (uint8_t) w; w >>= 8;
+ dst[1] = (uint8_t) w; w >>= 8;
+ dst[0] = (uint8_t) w;
+#endif
+}
+
+#define XOR_BUF(OUT, IN, N) xor_buf((OUT), (IN), (N))
+static inline void
+xor_buf(unsigned char *out, const unsigned char *in, size_t n)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ out[i] ^= in[i];
+ }
+}
+
+#if !defined(__clang__) && !defined(__GNUC__)
+# ifdef __attribute__
+# undef __attribute__
+# endif
+# define __attribute__(a)
+#endif
+
+#ifndef CRYPTO_ALIGN
+# if defined(__INTEL_COMPILER) || defined(_MSC_VER)
+# define CRYPTO_ALIGN(x) __declspec(align(x))
+# else
+# define CRYPTO_ALIGN(x) __attribute__ ((aligned(x)))
+# endif
+#endif
+
+#if defined(_MSC_VER) && \
+ (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
+
+# include <intrin.h>
+
+# define HAVE_INTRIN_H 1
+# define HAVE_MMINTRIN_H 1
+# define HAVE_EMMINTRIN_H 1
+# define HAVE_PMMINTRIN_H 1
+# define HAVE_TMMINTRIN_H 1
+# define HAVE_SMMINTRIN_H 1
+# define HAVE_AVXINTRIN_H 1
+# if _MSC_VER >= 1600
+# define HAVE_WMMINTRIN_H 1
+# endif
+# if _MSC_VER >= 1700 && defined(_M_X64)
+# define HAVE_AVX2INTRIN_H 1
+# endif
+#elif defined(HAVE_INTRIN_H)
+# include <intrin.h>
+#endif
+
+#ifdef HAVE_LIBCTGRIND
+extern void ct_poison (const void *, size_t);
+extern void ct_unpoison(const void *, size_t);
+# define POISON(X, L) ct_poison((X), (L))
+# define UNPOISON(X, L) ct_unpoison((X), (L))
+#else
+# define POISON(X, L) (void) 0
+# define UNPOISON(X, L) (void) 0
+#endif
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/private/implementations.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/private/implementations.h
new file mode 100644
index 00000000..c7237f85
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/private/implementations.h
@@ -0,0 +1,11 @@
+#ifndef implementations_H
+#define implementations_H
+
+int _crypto_generichash_blake2b_pick_best_implementation(void);
+int _crypto_onetimeauth_poly1305_pick_best_implementation(void);
+int _crypto_pwhash_argon2_pick_best_implementation(void);
+int _crypto_scalarmult_curve25519_pick_best_implementation(void);
+int _crypto_stream_chacha20_pick_best_implementation(void);
+int _crypto_stream_salsa20_pick_best_implementation(void);
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/randombytes.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/randombytes.h
new file mode 100644
index 00000000..d19f684e
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/randombytes.h
@@ -0,0 +1,68 @@
+
+#ifndef randombytes_H
+#define randombytes_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <sys/types.h>
+
+#include "export.h"
+
+#ifdef __cplusplus
+# ifdef __GNUC__
+# pragma GCC diagnostic ignored "-Wlong-long"
+# endif
+extern "C" {
+#endif
+
+typedef struct randombytes_implementation {
+ const char *(*implementation_name)(void); /* required */
+ uint32_t (*random)(void); /* required */
+ void (*stir)(void); /* optional */
+ uint32_t (*uniform)(const uint32_t upper_bound); /* optional, a default implementation will be used if NULL */
+ void (*buf)(void * const buf, const size_t size); /* required */
+ int (*close)(void); /* optional */
+} randombytes_implementation;
+
+#define randombytes_BYTES_MAX SODIUM_MIN(SODIUM_SIZE_MAX, 0xffffffffUL)
+
+#define randombytes_SEEDBYTES 32U
+SODIUM_EXPORT
+size_t randombytes_seedbytes(void);
+
+SODIUM_EXPORT
+void randombytes_buf(void * const buf, const size_t size);
+
+SODIUM_EXPORT
+void randombytes_buf_deterministic(void * const buf, const size_t size,
+ const unsigned char seed[randombytes_SEEDBYTES]);
+
+SODIUM_EXPORT
+uint32_t randombytes_random(void);
+
+SODIUM_EXPORT
+uint32_t randombytes_uniform(const uint32_t upper_bound);
+
+SODIUM_EXPORT
+void randombytes_stir(void);
+
+SODIUM_EXPORT
+int randombytes_close(void);
+
+SODIUM_EXPORT
+int randombytes_set_implementation(randombytes_implementation *impl);
+
+SODIUM_EXPORT
+const char *randombytes_implementation_name(void);
+
+/* -- NaCl compatibility interface -- */
+
+SODIUM_EXPORT
+void randombytes(unsigned char * const buf, const unsigned long long buf_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/runtime.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/runtime.h
new file mode 100644
index 00000000..7f15d58e
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/runtime.h
@@ -0,0 +1,52 @@
+
+#ifndef sodium_runtime_H
+#define sodium_runtime_H
+
+#include "export.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_neon(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_sse2(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_sse3(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_ssse3(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_sse41(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_avx(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_avx2(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_avx512f(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_pclmul(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_aesni(void);
+
+SODIUM_EXPORT_WEAK
+int sodium_runtime_has_rdrand(void);
+
+/* ------------------------------------------------------------------------- */
+
+int _sodium_runtime_get_cpu_features(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/utils.h b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/utils.h
new file mode 100644
index 00000000..46eb331c
--- /dev/null
+++ b/freebsd/sys/contrib/libsodium/src/libsodium/include/sodium/utils.h
@@ -0,0 +1,170 @@
+
+#ifndef sodium_utils_H
+#define sodium_utils_H
+
+#include <stddef.h>
+
+#include "export.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef SODIUM_C99
+# if defined(__cplusplus) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L
+# define SODIUM_C99(X)
+# else
+# define SODIUM_C99(X) X
+# endif
+#endif
+
+SODIUM_EXPORT
+void sodium_memzero(void * const pnt, const size_t len);
+
+SODIUM_EXPORT
+void sodium_stackzero(const size_t len);
+
+/*
+ * WARNING: sodium_memcmp() must be used to verify if two secret keys
+ * are equal, in constant time.
+ * It returns 0 if the keys are equal, and -1 if they differ.
+ * This function is not designed for lexicographical comparisons.
+ */
+SODIUM_EXPORT
+int sodium_memcmp(const void * const b1_, const void * const b2_, size_t len)
+ __attribute__ ((warn_unused_result));
+
+/*
+ * sodium_compare() returns -1 if b1_ < b2_, 1 if b1_ > b2_ and 0 if b1_ == b2_
+ * It is suitable for lexicographical comparisons, or to compare nonces
+ * and counters stored in little-endian format.
+ * However, it is slower than sodium_memcmp().
+ */
+SODIUM_EXPORT
+int sodium_compare(const unsigned char *b1_, const unsigned char *b2_,
+ size_t len)
+ __attribute__ ((warn_unused_result));
+
+SODIUM_EXPORT
+int sodium_is_zero(const unsigned char *n, const size_t nlen);
+
+SODIUM_EXPORT
+void sodium_increment(unsigned char *n, const size_t nlen);
+
+SODIUM_EXPORT
+void sodium_add(unsigned char *a, const unsigned char *b, const size_t len);
+
+SODIUM_EXPORT
+char *sodium_bin2hex(char * const hex, const size_t hex_maxlen,
+ const unsigned char * const bin, const size_t bin_len);
+
+SODIUM_EXPORT
+int sodium_hex2bin(unsigned char * const bin, const size_t bin_maxlen,
+ const char * const hex, const size_t hex_len,
+ const char * const ignore, size_t * const bin_len,
+ const char ** const hex_end);
+
+#define sodium_base64_VARIANT_ORIGINAL 1
+#define sodium_base64_VARIANT_ORIGINAL_NO_PADDING 3
+#define sodium_base64_VARIANT_URLSAFE 5
+#define sodium_base64_VARIANT_URLSAFE_NO_PADDING 7
+
+/*
+ * Computes the required length to encode BIN_LEN bytes as a base64 string
+ * using the given variant. The computed length includes a trailing \0.
+ */
+#define sodium_base64_ENCODED_LEN(BIN_LEN, VARIANT) \
+ (((BIN_LEN) / 3U) * 4U + \
+ ((((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) | (((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) >> 1)) & 1U) * \
+ (4U - (~((((VARIANT) & 2U) >> 1) - 1U) & (3U - ((BIN_LEN) - ((BIN_LEN) / 3U) * 3U)))) + 1U)
+
+SODIUM_EXPORT
+size_t sodium_base64_encoded_len(const size_t bin_len, const int variant);
+
+SODIUM_EXPORT
+char *sodium_bin2base64(char * const b64, const size_t b64_maxlen,
+ const unsigned char * const bin, const size_t bin_len,
+ const int variant);
+
+SODIUM_EXPORT
+int sodium_base642bin(unsigned char * const bin, const size_t bin_maxlen,
+ const char * const b64, const size_t b64_len,
+ const char * const ignore, size_t * const bin_len,
+ const char ** const b64_end, const int variant);
+
+SODIUM_EXPORT
+int sodium_mlock(void * const addr, const size_t len);
+
+SODIUM_EXPORT
+int sodium_munlock(void * const addr, const size_t len);
+
+/* WARNING: sodium_malloc() and sodium_allocarray() are not general-purpose
+ * allocation functions.
+ *
+ * They return a pointer to a region filled with 0xd0 bytes, immediately
+ * followed by a guard page.
+ * As a result, accessing a single byte after the requested allocation size
+ * will intentionally trigger a segmentation fault.
+ *
+ * A canary and an additional guard page placed before the beginning of the
+ * region may also kill the process if a buffer underflow is detected.
+ *
+ * The memory layout is:
+ * [unprotected region size (read only)][guard page (no access)][unprotected pages (read/write)][guard page (no access)]
+ * With the layout of the unprotected pages being:
+ * [optional padding][16-bytes canary][user region]
+ *
+ * However:
+ * - These functions are significantly slower than standard functions
+ * - Each allocation requires 3 or 4 additional pages
+ * - The returned address will not be aligned if the allocation size is not
+ * a multiple of the required alignment. For this reason, these functions
+ * are designed to store data, such as secret keys and messages.
+ *
+ * sodium_malloc() can be used to allocate any libsodium data structure.
+ *
+ * The crypto_generichash_state structure is packed and its length is
+ * either 357 or 361 bytes. For this reason, when using sodium_malloc() to
+ * allocate a crypto_generichash_state structure, padding must be added in
+ * order to ensure proper alignment. crypto_generichash_statebytes()
+ * returns the rounded up structure size, and should be prefered to sizeof():
+ * state = sodium_malloc(crypto_generichash_statebytes());
+ */
+
+SODIUM_EXPORT
+void *sodium_malloc(const size_t size)
+ __attribute__ ((malloc));
+
+SODIUM_EXPORT
+void *sodium_allocarray(size_t count, size_t size)
+ __attribute__ ((malloc));
+
+SODIUM_EXPORT
+void sodium_free(void *ptr);
+
+SODIUM_EXPORT
+int sodium_mprotect_noaccess(void *ptr);
+
+SODIUM_EXPORT
+int sodium_mprotect_readonly(void *ptr);
+
+SODIUM_EXPORT
+int sodium_mprotect_readwrite(void *ptr);
+
+SODIUM_EXPORT
+int sodium_pad(size_t *padded_buflen_p, unsigned char *buf,
+ size_t unpadded_buflen, size_t blocksize, size_t max_buflen);
+
+SODIUM_EXPORT
+int sodium_unpad(size_t *unpadded_buflen_p, const unsigned char *buf,
+ size_t padded_buflen, size_t blocksize);
+
+/* -------- */
+
+int _sodium_alloc_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/freebsd/sys/crypto/chacha20/chacha.c b/freebsd/sys/crypto/chacha20/chacha.c
index 74d7fe02..3ce5310a 100644
--- a/freebsd/sys/crypto/chacha20/chacha.c
+++ b/freebsd/sys/crypto/chacha20/chacha.c
@@ -16,7 +16,6 @@ __FBSDID("$FreeBSD$");
#include <crypto/chacha20/chacha.h>
-
typedef uint8_t u8;
typedef uint32_t u32;
@@ -59,7 +58,7 @@ typedef struct chacha_ctx chacha_ctx;
static const char sigma[16] = "expand 32-byte k";
static const char tau[16] = "expand 16-byte k";
-void
+LOCAL void
#ifndef __rtems__
chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
#else /* __rtems__ */
@@ -88,7 +87,7 @@ chacha_keysetup(chacha_ctx *x,const u8 *k,u_int kbits)
x->input[3] = U8TO32_LITTLE(constants + 12);
}
-void
+LOCAL void
chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
{
x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
@@ -97,7 +96,7 @@ chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
x->input[15] = U8TO32_LITTLE(iv + 4);
}
-void
+LOCAL void
#ifndef __rtems__
chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
#else /* __rtems__ */
@@ -179,6 +178,7 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u_int bytes)
x14 = PLUS(x14,j14);
x15 = PLUS(x15,j15);
+#ifndef KEYSTREAM_ONLY
x0 = XOR(x0,U8TO32_LITTLE(m + 0));
x1 = XOR(x1,U8TO32_LITTLE(m + 4));
x2 = XOR(x2,U8TO32_LITTLE(m + 8));
@@ -195,6 +195,7 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u_int bytes)
x13 = XOR(x13,U8TO32_LITTLE(m + 52));
x14 = XOR(x14,U8TO32_LITTLE(m + 56));
x15 = XOR(x15,U8TO32_LITTLE(m + 60));
+#endif
j12 = PLUSONE(j12);
if (!j12) {
@@ -229,6 +230,8 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u_int bytes)
}
bytes -= 64;
c += 64;
+#ifndef KEYSTREAM_ONLY
m += 64;
+#endif
}
}
diff --git a/freebsd/sys/crypto/chacha20/chacha.h b/freebsd/sys/crypto/chacha20/chacha.h
index 02106eaa..e67cc812 100644
--- a/freebsd/sys/crypto/chacha20/chacha.h
+++ b/freebsd/sys/crypto/chacha20/chacha.h
@@ -23,9 +23,15 @@ struct chacha_ctx {
#define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN)
#define CHACHA_BLOCKLEN 64
-void chacha_keysetup(struct chacha_ctx *x, const u_char *k, u_int kbits);
-void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv, const u_char *ctr);
-void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
+#ifdef _KERNEL
+#define LOCAL
+#else
+#define LOCAL static
+#endif
+
+LOCAL void chacha_keysetup(struct chacha_ctx *x, const u_char *k, u_int kbits);
+LOCAL void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv, const u_char *ctr);
+LOCAL void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
u_char *c, u_int bytes);
#endif /* CHACHA_H */
diff --git a/freebsd/sys/crypto/libsodium/utils.c b/freebsd/sys/crypto/libsodium/utils.c
new file mode 100644
index 00000000..8c6dfc36
--- /dev/null
+++ b/freebsd/sys/crypto/libsodium/utils.c
@@ -0,0 +1,16 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* This file is in the public domain. */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <sodium/utils.h>
+
+void
+sodium_memzero(void *b, size_t n)
+{
+ explicit_bzero(b, n);
+}
diff --git a/freebsd/sys/crypto/sha2/sha224.h b/freebsd/sys/crypto/sha2/sha224.h
new file mode 100644
index 00000000..c2833543
--- /dev/null
+++ b/freebsd/sys/crypto/sha2/sha224.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA224_H_
+#define _SHA224_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define SHA224_BLOCK_LENGTH 64
+#define SHA224_DIGEST_LENGTH 28
+#define SHA224_DIGEST_STRING_LENGTH (SHA224_DIGEST_LENGTH * 2 + 1)
+
+typedef struct SHA224Context {
+ uint32_t state[8];
+ uint64_t count;
+ uint8_t buf[SHA224_BLOCK_LENGTH];
+} SHA224_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+
+#ifndef SHA224_Init
+#define SHA224_Init _libmd_SHA224_Init
+#endif
+#ifndef SHA224_Update
+#define SHA224_Update _libmd_SHA224_Update
+#endif
+#ifndef SHA224_Final
+#define SHA224_Final _libmd_SHA224_Final
+#endif
+#ifndef SHA224_End
+#define SHA224_End _libmd_SHA224_End
+#endif
+#ifndef SHA224_Fd
+#define SHA224_Fd _libmd_SHA224_Fd
+#endif
+#ifndef SHA224_FdChunk
+#define SHA224_FdChunk _libmd_SHA224_FdChunk
+#endif
+#ifndef SHA224_File
+#define SHA224_File _libmd_SHA224_File
+#endif
+#ifndef SHA224_FileChunk
+#define SHA224_FileChunk _libmd_SHA224_FileChunk
+#endif
+#ifndef SHA224_Data
+#define SHA224_Data _libmd_SHA224_Data
+#endif
+
+#ifndef SHA224_version
+#define SHA224_version _libmd_SHA224_version
+#endif
+
+void SHA224_Init(SHA224_CTX *);
+void SHA224_Update(SHA224_CTX *, const void *, size_t);
+void SHA224_Final(unsigned char [__min_size(SHA224_DIGEST_LENGTH)],
+ SHA224_CTX *);
+#ifndef _KERNEL
+char *SHA224_End(SHA224_CTX *, char *);
+char *SHA224_Data(const void *, unsigned int, char *);
+char *SHA224_Fd(int, char *);
+char *SHA224_FdChunk(int, char *, off_t, off_t);
+char *SHA224_File(const char *, char *);
+char *SHA224_FileChunk(const char *, char *, off_t, off_t);
+#endif
+__END_DECLS
+
+#endif /* !_SHA224_H_ */
diff --git a/freebsd/sys/crypto/sha2/sha256c.c b/freebsd/sys/crypto/sha2/sha256c.c
index ea389694..dbba6d35 100644
--- a/freebsd/sys/crypto/sha2/sha256c.c
+++ b/freebsd/sys/crypto/sha2/sha256c.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#endif
+#include "sha224.h"
#include "sha256.h"
#if BYTE_ORDER == BIG_ENDIAN
@@ -299,7 +300,57 @@ SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx)
be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH);
/* Clear the context state */
- memset(ctx, 0, sizeof(*ctx));
+ explicit_bzero(ctx, sizeof(*ctx));
+}
+
+/*** SHA-224: *********************************************************/
+/*
+ * the SHA224 and SHA256 transforms are identical
+ */
+
+/* SHA-224 initialization. Begins a SHA-224 operation. */
+void
+SHA224_Init(SHA224_CTX * ctx)
+{
+
+ /* Zero bits processed so far */
+ ctx->count = 0;
+
+ /* Magic initialization constants */
+ ctx->state[0] = 0xC1059ED8;
+ ctx->state[1] = 0x367CD507;
+ ctx->state[2] = 0x3070DD17;
+ ctx->state[3] = 0xF70E5939;
+ ctx->state[4] = 0xFFC00B31;
+ ctx->state[5] = 0x68581511;
+ ctx->state[6] = 0x64f98FA7;
+ ctx->state[7] = 0xBEFA4FA4;
+}
+
+/* Add bytes into the SHA-224 hash */
+void
+SHA224_Update(SHA224_CTX * ctx, const void *in, size_t len)
+{
+
+ SHA256_Update((SHA256_CTX *)ctx, in, len);
+}
+
+/*
+ * SHA-224 finalization. Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA224_Final(unsigned char digest[static SHA224_DIGEST_LENGTH], SHA224_CTX *ctx)
+{
+
+ /* Add padding */
+ SHA256_Pad((SHA256_CTX *)ctx);
+
+ /* Write the hash */
+ be32enc_vect(digest, ctx->state, SHA224_DIGEST_LENGTH);
+
+ /* Clear the context state */
+ explicit_bzero(ctx, sizeof(*ctx));
}
#ifdef WEAK_REFS
@@ -315,4 +366,11 @@ __weak_reference(_libmd_SHA256_Update, SHA256_Update);
__weak_reference(_libmd_SHA256_Final, SHA256_Final);
#undef SHA256_Transform
__weak_reference(_libmd_SHA256_Transform, SHA256_Transform);
+
+#undef SHA224_Init
+__weak_reference(_libmd_SHA224_Init, SHA224_Init);
+#undef SHA224_Update
+__weak_reference(_libmd_SHA224_Update, SHA224_Update);
+#undef SHA224_Final
+__weak_reference(_libmd_SHA224_Final, SHA224_Final);
#endif
diff --git a/freebsd/sys/crypto/sha2/sha512c.c b/freebsd/sys/crypto/sha2/sha512c.c
index 7aa4bf52..4e7de3ee 100644
--- a/freebsd/sys/crypto/sha2/sha512c.c
+++ b/freebsd/sys/crypto/sha2/sha512c.c
@@ -333,7 +333,7 @@ SHA512_Final(unsigned char digest[static SHA512_DIGEST_LENGTH], SHA512_CTX *ctx)
be64enc_vect(digest, ctx->state, SHA512_DIGEST_LENGTH);
/* Clear the context state */
- memset(ctx, 0, sizeof(*ctx));
+ explicit_bzero(ctx, sizeof(*ctx));
}
/*** SHA-512t: *********************************************************/
@@ -376,7 +376,7 @@ SHA512_224_Final(unsigned char digest[static SHA512_224_DIGEST_LENGTH], SHA512_C
be64enc_vect(digest, ctx->state, SHA512_224_DIGEST_LENGTH);
/* Clear the context state */
- memset(ctx, 0, sizeof(*ctx));
+ explicit_bzero(ctx, sizeof(*ctx));
}
void
@@ -415,7 +415,7 @@ SHA512_256_Final(unsigned char digest[static SHA512_256_DIGEST_LENGTH], SHA512_C
be64enc_vect(digest, ctx->state, SHA512_256_DIGEST_LENGTH);
/* Clear the context state */
- memset(ctx, 0, sizeof(*ctx));
+ explicit_bzero(ctx, sizeof(*ctx));
}
/*** SHA-384: *********************************************************/
@@ -465,7 +465,7 @@ SHA384_Final(unsigned char digest[static SHA384_DIGEST_LENGTH], SHA384_CTX *ctx)
be64enc_vect(digest, ctx->state, SHA384_DIGEST_LENGTH);
/* Clear the context state */
- memset(ctx, 0, sizeof(*ctx));
+ explicit_bzero(ctx, sizeof(*ctx));
}
#ifdef WEAK_REFS
diff --git a/freebsd/sys/crypto/skein/skein.c b/freebsd/sys/crypto/skein/skein.c
index c13f9ad4..e444837a 100644
--- a/freebsd/sys/crypto/skein/skein.c
+++ b/freebsd/sys/crypto/skein/skein.c
@@ -814,6 +814,7 @@ SKEIN256_Final(unsigned char digest[static SKEIN_256_BLOCK_BYTES], SKEIN256_CTX
{
Skein_256_Final(ctx, digest);
+ explicit_bzero(ctx, sizeof(*ctx));
}
void
@@ -821,6 +822,7 @@ SKEIN512_Final(unsigned char digest[static SKEIN_512_BLOCK_BYTES], SKEIN512_CTX
{
Skein_512_Final(ctx, digest);
+ explicit_bzero(ctx, sizeof(*ctx));
}
void
@@ -828,6 +830,7 @@ SKEIN1024_Final(unsigned char digest[static SKEIN1024_BLOCK_BYTES], SKEIN1024_CT
{
Skein1024_Final(ctx, digest);
+ explicit_bzero(ctx, sizeof(*ctx));
}
#ifdef WEAK_REFS
diff --git a/freebsd/sys/dev/bfe/if_bfe.c b/freebsd/sys/dev/bfe/if_bfe.c
index c07d87fb..f1586d59 100644
--- a/freebsd/sys/dev/bfe/if_bfe.c
+++ b/freebsd/sys/dev/bfe/if_bfe.c
@@ -159,6 +159,8 @@ static driver_t bfe_driver = {
static devclass_t bfe_devclass;
DRIVER_MODULE(bfe, pci, bfe_driver, bfe_devclass, 0, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, bfe, bfe_devs,
+ sizeof(bfe_devs[0]), nitems(bfe_devs) - 1);
DRIVER_MODULE(miibus, bfe, miibus_driver, miibus_devclass, 0, 0);
/*
diff --git a/freebsd/sys/dev/bge/if_bge.c b/freebsd/sys/dev/bge/if_bge.c
index aba0b05d..260c6c23 100644
--- a/freebsd/sys/dev/bge/if_bge.c
+++ b/freebsd/sys/dev/bge/if_bge.c
@@ -549,6 +549,8 @@ static driver_t bge_driver = {
static devclass_t bge_devclass;
DRIVER_MODULE(bge, pci, bge_driver, bge_devclass, 0, 0);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, bge, bge_devs,
+ sizeof(bge_devs[0]), nitems(bge_devs) - 1);
DRIVER_MODULE(miibus, bge, miibus_driver, miibus_devclass, 0, 0);
static int bge_allow_asf = 1;
@@ -3212,6 +3214,14 @@ bge_can_use_msi(struct bge_softc *sc)
sc->bge_chiprev != BGE_CHIPREV_5750_BX)
can_use_msi = 1;
break;
+ case BGE_ASICREV_BCM5784:
+ /*
+ * Prevent infinite "watchdog timeout" errors
+ * in some MacBook Pro and make it work out-of-the-box.
+ */
+ if (sc->bge_chiprev == BGE_CHIPREV_5784_AX)
+ break;
+ /* FALLTHROUGH */
default:
if (BGE_IS_575X_PLUS(sc))
can_use_msi = 1;
@@ -6716,15 +6726,15 @@ bge_sysctl_mem_read(SYSCTL_HANDLER_ARGS)
static int
bge_get_eaddr_fw(struct bge_softc *sc, uint8_t ether_addr[])
{
-
+#ifdef __sparc64__
if (sc->bge_flags & BGE_FLAG_EADDR)
return (1);
-#ifdef __sparc64__
OF_getetheraddr(sc->bge_dev, ether_addr);
return (0);
-#endif
+#else
return (1);
+#endif
}
static int
diff --git a/freebsd/sys/dev/dc/if_dc.c b/freebsd/sys/dev/dc/if_dc.c
index ac34a20a..14023e54 100644
--- a/freebsd/sys/dev/dc/if_dc.c
+++ b/freebsd/sys/dev/dc/if_dc.c
@@ -361,6 +361,8 @@ static devclass_t dc_devclass;
DRIVER_MODULE_ORDERED(dc, pci, dc_driver, dc_devclass, NULL, NULL,
SI_ORDER_ANY);
+MODULE_PNP_INFO("W32:vendor/device;U8:revision;D:#", pci, dc, dc_devs,
+ sizeof(dc_devs[0]), nitems(dc_devs) - 1);
DRIVER_MODULE(miibus, dc, miibus_driver, miibus_devclass, NULL, NULL);
#define DC_SETBIT(sc, reg, x) \
diff --git a/freebsd/sys/dev/e1000/if_em.c b/freebsd/sys/dev/e1000/if_em.c
index 72711a6e..4fe66dfc 100644
--- a/freebsd/sys/dev/e1000/if_em.c
+++ b/freebsd/sys/dev/e1000/if_em.c
@@ -412,7 +412,6 @@ static driver_t em_if_driver = {
#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
-#define M_TSO_LEN 66
#define MAX_INTS_PER_SEC 8000
#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
@@ -422,8 +421,6 @@ static driver_t em_if_driver = {
#define CSUM_TSO 0
#endif
-#define TSO_WORKAROUND 4
-
static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
static int em_disable_crc_stripping = 0;
@@ -486,8 +483,10 @@ extern struct if_txrx lem_txrx;
static struct if_shared_ctx em_sctx_init = {
.isc_magic = IFLIB_MAGIC,
.isc_q_align = PAGE_SIZE,
- .isc_tx_maxsize = EM_TSO_SIZE,
+ .isc_tx_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header),
.isc_tx_maxsegsize = PAGE_SIZE,
+ .isc_tso_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header),
+ .isc_tso_maxsegsize = EM_TSO_SEG_SIZE,
.isc_rx_maxsize = MJUM9BYTES,
.isc_rx_nsegments = 1,
.isc_rx_maxsegsize = MJUM9BYTES,
@@ -510,12 +509,13 @@ static struct if_shared_ctx em_sctx_init = {
if_shared_ctx_t em_sctx = &em_sctx_init;
-
static struct if_shared_ctx igb_sctx_init = {
.isc_magic = IFLIB_MAGIC,
.isc_q_align = PAGE_SIZE,
- .isc_tx_maxsize = EM_TSO_SIZE,
+ .isc_tx_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header),
.isc_tx_maxsegsize = PAGE_SIZE,
+ .isc_tso_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header),
+ .isc_tso_maxsegsize = EM_TSO_SEG_SIZE,
.isc_rx_maxsize = MJUM9BYTES,
.isc_rx_nsegments = 1,
.isc_rx_maxsegsize = MJUM9BYTES,
@@ -699,16 +699,19 @@ em_set_num_queues(if_ctx_t ctx)
return (maxqueues);
}
+#define LEM_CAPS \
+ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \
+ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER
-#define EM_CAPS \
- IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \
- IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | \
- IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU;
+#define EM_CAPS \
+ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \
+ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \
+ IFCAP_LRO | IFCAP_VLAN_HWTSO
-#define IGB_CAPS \
- IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \
- IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | \
- IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_TXCSUM_IPV6 | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU;
+#define IGB_CAPS \
+ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \
+ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \
+ IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU | IFCAP_HWCSUM_IPV6;
/*********************************************************************
* Device initialization routine
@@ -775,18 +778,11 @@ em_if_attach_pre(if_ctx_t ctx)
/* Determine hardware and mac info */
em_identify_hardware(ctx);
- /* Set isc_msix_bar */
scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR);
scctx->isc_tx_nsegments = EM_MAX_SCATTER;
- scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments;
- scctx->isc_tx_tso_size_max = EM_TSO_SIZE;
- scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE;
scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx);
device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max);
- scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO;
-
-
if (adapter->hw.mac.type >= igb_mac_min) {
int try_second_bar;
@@ -795,9 +791,12 @@ em_if_attach_pre(if_ctx_t ctx)
scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc);
scctx->isc_rxd_size[0] = sizeof(union e1000_adv_rx_desc);
scctx->isc_txrx = &igb_txrx;
- scctx->isc_capenable = IGB_CAPS;
- scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP \
- | CSUM_IP6_UDP | CSUM_IP6_TCP;
+ scctx->isc_tx_tso_segments_max = EM_MAX_SCATTER;
+ scctx->isc_tx_tso_size_max = EM_TSO_SIZE;
+ scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE;
+ scctx->isc_capabilities = scctx->isc_capenable = IGB_CAPS;
+ scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO |
+ CSUM_IP6_TCP | CSUM_IP6_UDP;
if (adapter->hw.mac.type != e1000_82575)
scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP;
@@ -809,26 +808,44 @@ em_if_attach_pre(if_ctx_t ctx)
try_second_bar = pci_read_config(dev, scctx->isc_msix_bar, 4);
if (try_second_bar == 0)
scctx->isc_msix_bar += 4;
-
} else if (adapter->hw.mac.type >= em_mac_min) {
scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc);
scctx->isc_rxd_size[0] = sizeof(union e1000_rx_desc_extended);
scctx->isc_txrx = &em_txrx;
- scctx->isc_capenable = EM_CAPS;
+ scctx->isc_tx_tso_segments_max = EM_MAX_SCATTER;
+ scctx->isc_tx_tso_size_max = EM_TSO_SIZE;
+ scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE;
+ scctx->isc_capabilities = scctx->isc_capenable = EM_CAPS;
+ /*
+ * For EM-class devices, don't enable IFCAP_{TSO4,VLAN_HWTSO}
+ * by default as we don't have workarounds for all associated
+ * silicon errata. E. g., with several MACs such as 82573E,
+ * TSO only works at Gigabit speed and otherwise can cause the
+ * hardware to hang (which also would be next to impossible to
+ * work around given that already queued TSO-using descriptors
+ * would need to be flushed and vlan(4) reconfigured at runtime
+ * in case of a link speed change). Moreover, MACs like 82579
+ * still can hang at Gigabit even with all publicly documented
+ * TSO workarounds implemented. Generally, the penality of
+ * these workarounds is rather high and may involve copying
+ * mbuf data around so advantages of TSO lapse. Still, TSO may
+ * work for a few MACs of this class - at least when sticking
+ * with Gigabit - in which case users may enable TSO manually.
+ */
+ scctx->isc_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO);
scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO;
} else {
scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc);
scctx->isc_rxd_size[0] = sizeof(struct e1000_rx_desc);
- scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO;
+ scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP;
scctx->isc_txrx = &lem_txrx;
- scctx->isc_capenable = EM_CAPS;
+ scctx->isc_capabilities = scctx->isc_capenable = LEM_CAPS;
if (adapter->hw.mac.type < e1000_82543)
scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM);
- scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO;
scctx->isc_msix_bar = 0;
}
@@ -1009,6 +1026,11 @@ em_if_attach_pre(if_ctx_t ctx)
*/
em_get_wakeup(ctx);
+ /* Enable only WOL MAGIC by default */
+ scctx->isc_capenable &= ~IFCAP_WOL;
+ if (adapter->wol != 0)
+ scctx->isc_capenable |= IFCAP_WOL_MAGIC;
+
iflib_set_mac(ctx, hw->mac.addr);
return (0);
@@ -1702,7 +1724,6 @@ em_if_update_admin_status(if_ctx_t ctx)
{
struct adapter *adapter = iflib_get_softc(ctx);
struct e1000_hw *hw = &adapter->hw;
- struct ifnet *ifp = iflib_get_ifp(ctx);
device_t dev = iflib_get_dev(ctx);
u32 link_check, thstat, ctrl;
@@ -1766,8 +1787,8 @@ em_if_update_admin_status(if_ctx_t ctx)
"Full Duplex" : "Half Duplex"));
adapter->link_active = 1;
adapter->smartspeed = 0;
- if_setbaudrate(ifp, adapter->link_speed * 1000000);
- if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
+ if ((ctrl & E1000_CTRL_EXT_LINK_MODE_MASK) ==
+ E1000_CTRL_EXT_LINK_MODE_GMII &&
(thstat & E1000_THSTAT_LINK_THROTTLE))
device_printf(dev, "Link: thermal downshift\n");
/* Delay Link Up for Phy update */
@@ -1782,17 +1803,15 @@ em_if_update_admin_status(if_ctx_t ctx)
adapter->flags |= IGB_MEDIA_RESET;
em_reset(ctx);
}
- iflib_link_state_change(ctx, LINK_STATE_UP, ifp->if_baudrate);
+ iflib_link_state_change(ctx, LINK_STATE_UP,
+ IF_Mbps(adapter->link_speed));
printf("Link state changed to up\n");
} else if (!link_check && (adapter->link_active == 1)) {
- if_setbaudrate(ifp, 0);
adapter->link_speed = 0;
adapter->link_duplex = 0;
- if (bootverbose)
- device_printf(dev, "Link is Down\n");
adapter->link_active = 0;
- iflib_link_state_change(ctx, LINK_STATE_DOWN, ifp->if_baudrate);
- printf("link state changed to down\n");
+ iflib_link_state_change(ctx, LINK_STATE_DOWN, 0);
+ printf("Link state changed to down\n");
}
em_update_stats_counters(adapter);
@@ -1965,7 +1984,6 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix)
vector = 0;
for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) {
- rid = vector + 1;
snprintf(buf, sizeof(buf), "txq%d", i);
tx_que = &adapter->tx_queues[i];
iflib_softirq_alloc_generic(ctx,
@@ -2304,7 +2322,7 @@ igb_init_dmac(struct adapter *adapter, u32 pba)
dmac = pba - 10;
reg = E1000_READ_REG(hw, E1000_DMACR);
reg &= ~E1000_DMACR_DMACTHR_MASK;
- reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
+ reg |= ((dmac << E1000_DMACR_DMACTHR_SHIFT)
& E1000_DMACR_DMACTHR_MASK);
/* transition to L0x or L1 if available..*/
@@ -2752,51 +2770,15 @@ em_setup_interface(if_ctx_t ctx)
struct ifnet *ifp = iflib_get_ifp(ctx);
struct adapter *adapter = iflib_get_softc(ctx);
if_softc_ctx_t scctx = adapter->shared;
- uint64_t cap = 0;
INIT_DEBUGOUT("em_setup_interface: begin");
- /* TSO parameters */
- if_sethwtsomax(ifp, IP_MAXPACKET);
- /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
- if_sethwtsomaxsegcount(ifp, EM_MAX_SCATTER - 5);
- if_sethwtsomaxsegsize(ifp, EM_TSO_SEG_SIZE);
-
/* Single Queue */
if (adapter->tx_num_queues == 1) {
if_setsendqlen(ifp, scctx->isc_ntxd[0] - 1);
if_setsendqready(ifp);
}
- cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4;
- cap |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU;
-
- /*
- * Tell the upper layer(s) we
- * support full VLAN capability
- */
- if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
- if_setcapabilitiesbit(ifp, cap, 0);
-
- /*
- * Don't turn this on by default, if vlans are
- * created on another pseudo device (eg. lagg)
- * then vlan events are not passed thru, breaking
- * operation, but with HW FILTER off it works. If
- * using vlans directly on the em driver you can
- * enable this and get full hardware tag filtering.
- */
- if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
-
- /* Enable only WOL MAGIC by default */
- if (adapter->wol) {
- if_setcapenablebit(ifp, IFCAP_WOL_MAGIC,
- IFCAP_WOL_MCAST| IFCAP_WOL_UCAST);
- } else {
- if_setcapenablebit(ifp, 0, IFCAP_WOL_MAGIC |
- IFCAP_WOL_MCAST| IFCAP_WOL_UCAST);
- }
-
/*
* Specify the media types supported by this adapter and register
* callbacks to update media and link information
diff --git a/freebsd/sys/dev/e1000/if_em.h b/freebsd/sys/dev/e1000/if_em.h
index 0c892947..26bdcb2e 100644
--- a/freebsd/sys/dev/e1000/if_em.h
+++ b/freebsd/sys/dev/e1000/if_em.h
@@ -250,11 +250,6 @@
#define IGB_LINK_ITR 2000
#define I210_LINK_DELAY 1000
-#define IGB_MAX_SCATTER 40
-#define IGB_VFTA_SIZE 128
-#define IGB_BR_SIZE 4096 /* ring buf size */
-#define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header))
-#define IGB_TSO_SEG_SIZE 4096 /* Max dma segment size */
#define IGB_TXPBSIZE 20408
#define IGB_HDR_BUF 128
#define IGB_PKTTYPE_MASK 0x0000FFF0
@@ -340,7 +335,7 @@
#define EM_MAX_SCATTER 40
#define EM_VFTA_SIZE 128
-#define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header))
+#define EM_TSO_SIZE 65535
#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */
#define EM_MSIX_MASK 0x01F00000 /* For 82574 use */
#define EM_MSIX_LINK 0x01000000 /* For 82574 use */
diff --git a/freebsd/sys/dev/evdev/cdev.c b/freebsd/sys/dev/evdev/cdev.c
index 10f4e77e..5ae14fed 100644
--- a/freebsd/sys/dev/evdev/cdev.c
+++ b/freebsd/sys/dev/evdev/cdev.c
@@ -421,7 +421,7 @@ evdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
return (ENOTSUP);
ke = (struct input_keymap_entry *)data;
- evdev->ev_methods->ev_get_keycode(evdev, evdev->ev_softc, ke);
+ evdev->ev_methods->ev_get_keycode(evdev, ke);
return (0);
case EVIOCSKEYCODE:
@@ -434,7 +434,7 @@ evdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
return (ENOTSUP);
ke = (struct input_keymap_entry *)data;
- evdev->ev_methods->ev_set_keycode(evdev, evdev->ev_softc, ke);
+ evdev->ev_methods->ev_set_keycode(evdev, ke);
return (0);
case EVIOCGABS(0) ... EVIOCGABS(ABS_MAX):
diff --git a/freebsd/sys/dev/evdev/evdev.c b/freebsd/sys/dev/evdev/evdev.c
index 6c2976d7..a355ec50 100644
--- a/freebsd/sys/dev/evdev/evdev.c
+++ b/freebsd/sys/dev/evdev/evdev.c
@@ -343,6 +343,13 @@ evdev_set_methods(struct evdev_dev *evdev, void *softc,
evdev->ev_softc = softc;
}
+inline void *
+evdev_get_softc(struct evdev_dev *evdev)
+{
+
+ return (evdev->ev_softc);
+}
+
inline void
evdev_support_prop(struct evdev_dev *evdev, uint16_t prop)
{
@@ -804,8 +811,7 @@ evdev_inject_event(struct evdev_dev *evdev, uint16_t type, uint16_t code,
case EV_FF:
if (evdev->ev_methods != NULL &&
evdev->ev_methods->ev_event != NULL)
- evdev->ev_methods->ev_event(evdev, evdev->ev_softc,
- type, code, value);
+ evdev->ev_methods->ev_event(evdev, type, code, value);
/*
* Leds and driver repeats should be reported in ev_event
* method body to interoperate with kbdmux states and rates
@@ -848,7 +854,7 @@ evdev_register_client(struct evdev_dev *evdev, struct evdev_client *client)
evdev->ev_methods->ev_open != NULL) {
debugf(evdev, "calling ev_open() on device %s",
evdev->ev_shortname);
- ret = evdev->ev_methods->ev_open(evdev, evdev->ev_softc);
+ ret = evdev->ev_methods->ev_open(evdev);
}
if (ret == 0)
LIST_INSERT_HEAD(&evdev->ev_clients, client, ec_link);
@@ -866,7 +872,7 @@ evdev_dispose_client(struct evdev_dev *evdev, struct evdev_client *client)
if (LIST_EMPTY(&evdev->ev_clients)) {
if (evdev->ev_methods != NULL &&
evdev->ev_methods->ev_close != NULL)
- evdev->ev_methods->ev_close(evdev, evdev->ev_softc);
+ (void)evdev->ev_methods->ev_close(evdev);
if (evdev_event_supported(evdev, EV_REP) &&
bit_test(evdev->ev_flags, EVDEV_FLAG_SOFTREPEAT))
evdev_stop_repeat(evdev);
diff --git a/freebsd/sys/dev/evdev/evdev.h b/freebsd/sys/dev/evdev/evdev.h
index 31f0c2c3..b897a465 100644
--- a/freebsd/sys/dev/evdev/evdev.h
+++ b/freebsd/sys/dev/evdev/evdev.h
@@ -38,11 +38,10 @@
struct evdev_dev;
-typedef int (evdev_open_t)(struct evdev_dev *, void *);
-typedef void (evdev_close_t)(struct evdev_dev *, void *);
-typedef void (evdev_event_t)(struct evdev_dev *, void *, uint16_t,
- uint16_t, int32_t);
-typedef void (evdev_keycode_t)(struct evdev_dev *, void *,
+typedef int (evdev_open_t)(struct evdev_dev *);
+typedef int (evdev_close_t)(struct evdev_dev *);
+typedef void (evdev_event_t)(struct evdev_dev *, uint16_t, uint16_t, int32_t);
+typedef void (evdev_keycode_t)(struct evdev_dev *,
struct input_keymap_entry *);
/*
@@ -126,6 +125,7 @@ void evdev_support_sw(struct evdev_dev *, uint16_t);
void evdev_set_repeat_params(struct evdev_dev *, uint16_t, int);
int evdev_set_report_size(struct evdev_dev *, size_t);
void evdev_set_flag(struct evdev_dev *, uint16_t);
+void *evdev_get_softc(struct evdev_dev *);
/* Multitouch related functions: */
int32_t evdev_get_mt_slot_by_tracking_id(struct evdev_dev *, int32_t);
@@ -141,7 +141,6 @@ uint16_t evdev_scancode2key(int *, int);
void evdev_push_mouse_btn(struct evdev_dev *, int);
void evdev_push_leds(struct evdev_dev *, int);
void evdev_push_repeats(struct evdev_dev *, keyboard_t *);
-evdev_event_t evdev_ev_kbd_event;
/* Event reporting shortcuts: */
static __inline int
diff --git a/freebsd/sys/dev/evdev/evdev_utils.c b/freebsd/sys/dev/evdev/evdev_utils.c
index 9814ffeb..17a815fb 100644
--- a/freebsd/sys/dev/evdev/evdev_utils.c
+++ b/freebsd/sys/dev/evdev/evdev_utils.c
@@ -42,8 +42,6 @@
#include <dev/evdev/evdev.h>
#include <dev/evdev/input.h>
-#include <dev/kbd/kbdreg.h>
-
#define NONE KEY_RESERVED
static uint16_t evdev_usb_scancodes[256] = {
@@ -301,43 +299,3 @@ evdev_push_repeats(struct evdev_dev *evdev, keyboard_t *kbd)
evdev_push_event(evdev, EV_REP, REP_DELAY, kbd->kb_delay1);
evdev_push_event(evdev, EV_REP, REP_PERIOD, kbd->kb_delay2);
}
-
-void
-evdev_ev_kbd_event(struct evdev_dev *evdev, void *softc, uint16_t type,
- uint16_t code, int32_t value)
-{
- keyboard_t *kbd = (keyboard_t *)softc;
- int delay[2], leds, oleds;
- size_t i;
-
- if (type == EV_LED) {
- leds = oleds = KBD_LED_VAL(kbd);
- for (i = 0; i < nitems(evdev_led_codes); i++) {
- if (evdev_led_codes[i] == code) {
- if (value)
- leds |= 1 << i;
- else
- leds &= ~(1 << i);
- if (leds != oleds) {
- mtx_lock(&Giant);
- kbdd_ioctl(kbd, KDSETLED,
- (caddr_t)&leds);
- mtx_unlock(&Giant);
- }
- break;
- }
- }
- } else if (type == EV_REP && code == REP_DELAY) {
- delay[0] = value;
- delay[1] = kbd->kb_delay2;
- mtx_lock(&Giant);
- kbdd_ioctl(kbd, KDSETREPEAT, (caddr_t)delay);
- mtx_unlock(&Giant);
- } else if (type == EV_REP && code == REP_PERIOD) {
- delay[0] = kbd->kb_delay1;
- delay[1] = value;
- mtx_lock(&Giant);
- kbdd_ioctl(kbd, KDSETREPEAT, (caddr_t)delay);
- mtx_unlock(&Giant);
- }
-}
diff --git a/freebsd/sys/dev/evdev/input-event-codes.h b/freebsd/sys/dev/evdev/input-event-codes.h
index cc1528f6..61c9000b 100644
--- a/freebsd/sys/dev/evdev/input-event-codes.h
+++ b/freebsd/sys/dev/evdev/input-event-codes.h
@@ -421,6 +421,7 @@
#define BTN_TOOL_MOUSE 0x146
#define BTN_TOOL_LENS 0x147
#define BTN_TOOL_QUINTTAP 0x148 /* Five fingers on trackpad */
+#define BTN_STYLUS3 0x149
#define BTN_TOUCH 0x14a
#define BTN_STYLUS 0x14b
#define BTN_STYLUS2 0x14c
@@ -607,6 +608,7 @@
#define BTN_DPAD_RIGHT 0x223
#define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */
+#define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */
#define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */
#define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */
@@ -615,6 +617,7 @@
#define KEY_APPSELECT 0x244 /* AL Select Task/Application */
#define KEY_SCREENSAVER 0x245 /* AL Screen Saver */
#define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */
+#define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */
#define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */
#define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */
@@ -626,6 +629,38 @@
#define KEY_KBDINPUTASSIST_ACCEPT 0x264
#define KEY_KBDINPUTASSIST_CANCEL 0x265
+/* Diagonal movement keys */
+#define KEY_RIGHT_UP 0x266
+#define KEY_RIGHT_DOWN 0x267
+#define KEY_LEFT_UP 0x268
+#define KEY_LEFT_DOWN 0x269
+
+#define KEY_ROOT_MENU 0x26a /* Show Device's Root Menu */
+/* Show Top Menu of the Media (e.g. DVD) */
+#define KEY_MEDIA_TOP_MENU 0x26b
+#define KEY_NUMERIC_11 0x26c
+#define KEY_NUMERIC_12 0x26d
+/*
+ * Toggle Audio Description: refers to an audio service that helps blind and
+ * visually impaired consumers understand the action in a program. Note: in
+ * some countries this is referred to as "Video Description".
+ */
+#define KEY_AUDIO_DESC 0x26e
+#define KEY_3D_MODE 0x26f
+#define KEY_NEXT_FAVORITE 0x270
+#define KEY_STOP_RECORD 0x271
+#define KEY_PAUSE_RECORD 0x272
+#define KEY_VOD 0x273 /* Video on Demand */
+#define KEY_UNMUTE 0x274
+#define KEY_FASTREVERSE 0x275
+#define KEY_SLOWREVERSE 0x276
+/*
+ * Control a data application associated with the currently viewed channel,
+ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.)
+ */
+#define KEY_DATA 0x277
+#define KEY_ONSCREEN_KEYBOARD 0x278
+
#define BTN_TRIGGER_HAPPY 0x2c0
#define BTN_TRIGGER_HAPPY1 0x2c0
#define BTN_TRIGGER_HAPPY2 0x2c1
@@ -763,6 +798,7 @@
#define SW_ROTATE_LOCK 0x0c /* set = rotate locked/disabled */
#define SW_LINEIN_INSERT 0x0d /* set = inserted */
#define SW_MUTE_DEVICE 0x0e /* set = device disabled */
+#define SW_PEN_INSERTED 0x0f /* set = pen inserted */
#define SW_MAX 0x0f
#define SW_CNT (SW_MAX+1)
diff --git a/freebsd/sys/dev/evdev/input.h b/freebsd/sys/dev/evdev/input.h
index 7639e0d6..947f4643 100644
--- a/freebsd/sys/dev/evdev/input.h
+++ b/freebsd/sys/dev/evdev/input.h
@@ -38,6 +38,9 @@
#include "input-event-codes.h"
+#define input_event_sec time.tv_sec
+#define input_event_usec time.tv_usec
+
struct input_event {
struct timeval time;
uint16_t type;
@@ -138,6 +141,9 @@ struct input_keymap_entry {
#define BUS_GSC 0x1A
#define BUS_ATARI 0x1B
#define BUS_SPI 0x1C
+#define BUS_RMI 0x1D
+#define BUS_CEC 0x1E
+#define BUS_INTEL_ISHTP 0x1F
/*
* MT_TOOL types
diff --git a/freebsd/sys/dev/evdev/uinput.c b/freebsd/sys/dev/evdev/uinput.c
index 3b332d1f..28d740cc 100644
--- a/freebsd/sys/dev/evdev/uinput.c
+++ b/freebsd/sys/dev/evdev/uinput.c
@@ -161,10 +161,10 @@ uinput_knl_assert_unlocked(void *arg)
}
static void
-uinput_ev_event(struct evdev_dev *evdev, void *softc, uint16_t type,
- uint16_t code, int32_t value)
+uinput_ev_event(struct evdev_dev *evdev, uint16_t type, uint16_t code,
+ int32_t value)
{
- struct uinput_cdev_state *state = softc;
+ struct uinput_cdev_state *state = evdev_get_softc(evdev);
if (type == EV_LED)
evdev_push_event(evdev, type, code, value);
diff --git a/freebsd/sys/dev/fxp/if_fxp.c b/freebsd/sys/dev/fxp/if_fxp.c
index ca933aa8..032246d8 100644
--- a/freebsd/sys/dev/fxp/if_fxp.c
+++ b/freebsd/sys/dev/fxp/if_fxp.c
@@ -309,6 +309,8 @@ static devclass_t fxp_devclass;
DRIVER_MODULE_ORDERED(fxp, pci, fxp_driver, fxp_devclass, NULL, NULL,
SI_ORDER_ANY);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, fxp, fxp_ident_table,
+ sizeof(fxp_ident_table[0]), nitems(fxp_ident_table) - 1);
DRIVER_MODULE(miibus, fxp, miibus_driver, miibus_devclass, NULL, NULL);
static struct resource_spec fxp_res_spec_mem[] = {
diff --git a/freebsd/sys/dev/kbd/kbd.c b/freebsd/sys/dev/kbd/kbd.c
index c7512e89..b157e57e 100644
--- a/freebsd/sys/dev/kbd/kbd.c
+++ b/freebsd/sys/dev/kbd/kbd.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kbio.h>
+#include <dev/evdev/input-event-codes.h>
#include <dev/kbd/kbdreg.h>
#define KBD_INDEX(dev) dev2unit(dev)
@@ -1477,3 +1478,41 @@ genkbd_keyaction(keyboard_t *kbd, int keycode, int up, int *shiftstate,
}
/* NOT REACHED */
}
+
+void
+kbd_ev_event(keyboard_t *kbd, uint16_t type, uint16_t code, int32_t value)
+{
+ int delay[2], led = 0, leds, oleds;
+
+ if (type == EV_LED) {
+ leds = oleds = KBD_LED_VAL(kbd);
+ switch (code) {
+ case LED_CAPSL:
+ led = CLKED;
+ break;
+ case LED_NUML:
+ led = NLKED;
+ break;
+ case LED_SCROLLL:
+ led = SLKED;
+ break;
+ }
+
+ if (value)
+ leds |= led;
+ else
+ leds &= ~led;
+
+ if (leds != oleds)
+ kbdd_ioctl(kbd, KDSETLED, (caddr_t)&leds);
+
+ } else if (type == EV_REP && code == REP_DELAY) {
+ delay[0] = value;
+ delay[1] = kbd->kb_delay2;
+ kbdd_ioctl(kbd, KDSETREPEAT, (caddr_t)delay);
+ } else if (type == EV_REP && code == REP_PERIOD) {
+ delay[0] = kbd->kb_delay1;
+ delay[1] = value;
+ kbdd_ioctl(kbd, KDSETREPEAT, (caddr_t)delay);
+ }
+}
diff --git a/freebsd/sys/dev/kbd/kbdreg.h b/freebsd/sys/dev/kbd/kbdreg.h
index 5caafdda..07c4cfd9 100644
--- a/freebsd/sys/dev/kbd/kbdreg.h
+++ b/freebsd/sys/dev/kbd/kbdreg.h
@@ -253,6 +253,10 @@ keyboard_t *kbd_get_keyboard(int index);
int kbd_configure(int flags);
/* see `kb_config' above for flag bit definitions */
+/* evdev2kbd mappings */
+void kbd_ev_event(keyboard_t *kbd, uint16_t type,
+ uint16_t code, int32_t value);
+
#ifdef KBD_INSTALL_CDEV
/* virtual keyboard cdev driver functions */
diff --git a/freebsd/sys/dev/mii/icsphy.c b/freebsd/sys/dev/mii/icsphy.c
index bde90714..183814ae 100755
--- a/freebsd/sys/dev/mii/icsphy.c
+++ b/freebsd/sys/dev/mii/icsphy.c
@@ -115,9 +115,7 @@ static const struct mii_phydesc icsphys[] = {
MII_PHY_DESC(ICS, 1890),
MII_PHY_DESC(ICS, 1892),
MII_PHY_DESC(ICS, 1893),
-#ifdef __rtems__
MII_PHY_DESC(ICS, 1893C),
-#endif /* __rtems__ */
MII_PHY_END
};
diff --git a/freebsd/sys/dev/mmc/mmc.c b/freebsd/sys/dev/mmc/mmc.c
index 2c3cba3d..cfbce2a5 100644
--- a/freebsd/sys/dev/mmc/mmc.c
+++ b/freebsd/sys/dev/mmc/mmc.c
@@ -1600,10 +1600,13 @@ mmc_discover_cards(struct mmc_softc *sc)
uint32_t raw_cid[4];
struct mmc_ivars *ivar = NULL;
const struct mmc_quirk *quirk;
+ const uint8_t *ext_csd;
device_t child;
int err, host_caps, i, newcard;
uint32_t resp, sec_count, status;
uint16_t rca = 2;
+ int16_t rev;
+ uint8_t card_type;
host_caps = mmcbr_get_caps(sc->dev);
if (bootverbose || mmc_debug)
@@ -1791,6 +1794,7 @@ mmc_discover_cards(struct mmc_softc *sc)
goto free_ivar;
}
+ rev = -1;
/* Only MMC >= 4.x devices support EXT_CSD. */
if (ivar->csd.spec_vers >= 4) {
err = mmc_send_ext_csd(sc->dev, sc->dev,
@@ -1800,11 +1804,10 @@ mmc_discover_cards(struct mmc_softc *sc)
"Error reading EXT_CSD %d\n", err);
goto free_ivar;
}
+ ext_csd = ivar->raw_ext_csd;
+ rev = ext_csd[EXT_CSD_REV];
/* Handle extended capacity from EXT_CSD */
- sec_count = ivar->raw_ext_csd[EXT_CSD_SEC_CNT] +
- (ivar->raw_ext_csd[EXT_CSD_SEC_CNT + 1] << 8) +
- (ivar->raw_ext_csd[EXT_CSD_SEC_CNT + 2] << 16) +
- (ivar->raw_ext_csd[EXT_CSD_SEC_CNT + 3] << 24);
+ sec_count = le32dec(&ext_csd[EXT_CSD_SEC_CNT]);
if (sec_count != 0) {
ivar->sec_count = sec_count;
ivar->high_cap = 1;
@@ -1812,65 +1815,56 @@ mmc_discover_cards(struct mmc_softc *sc)
/* Find maximum supported bus width. */
ivar->bus_width = mmc_test_bus_width(sc);
/* Get device speeds beyond normal mode. */
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_HS_52) != 0) {
+ card_type = ext_csd[EXT_CSD_CARD_TYPE];
+ if ((card_type & EXT_CSD_CARD_TYPE_HS_52) != 0) {
setbit(&ivar->timings, bus_timing_hs);
ivar->hs_tran_speed = MMC_TYPE_HS_52_MAX;
- } else if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_HS_26) != 0) {
+ } else if ((card_type & EXT_CSD_CARD_TYPE_HS_26) != 0) {
setbit(&ivar->timings, bus_timing_hs);
ivar->hs_tran_speed = MMC_TYPE_HS_26_MAX;
}
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_DDR_52_1_2V) != 0 &&
+ if ((card_type & EXT_CSD_CARD_TYPE_DDR_52_1_2V) != 0 &&
(host_caps & MMC_CAP_SIGNALING_120) != 0) {
setbit(&ivar->timings, bus_timing_mmc_ddr52);
setbit(&ivar->vccq_120, bus_timing_mmc_ddr52);
}
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_DDR_52_1_8V) != 0 &&
+ if ((card_type & EXT_CSD_CARD_TYPE_DDR_52_1_8V) != 0 &&
(host_caps & MMC_CAP_SIGNALING_180) != 0) {
setbit(&ivar->timings, bus_timing_mmc_ddr52);
setbit(&ivar->vccq_180, bus_timing_mmc_ddr52);
}
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_HS200_1_2V) != 0 &&
+ if ((card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) != 0 &&
(host_caps & MMC_CAP_SIGNALING_120) != 0) {
setbit(&ivar->timings, bus_timing_mmc_hs200);
setbit(&ivar->vccq_120, bus_timing_mmc_hs200);
}
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_HS200_1_8V) != 0 &&
+ if ((card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) != 0 &&
(host_caps & MMC_CAP_SIGNALING_180) != 0) {
setbit(&ivar->timings, bus_timing_mmc_hs200);
setbit(&ivar->vccq_180, bus_timing_mmc_hs200);
}
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_HS400_1_2V) != 0 &&
+ if ((card_type & EXT_CSD_CARD_TYPE_HS400_1_2V) != 0 &&
(host_caps & MMC_CAP_SIGNALING_120) != 0 &&
ivar->bus_width == bus_width_8) {
setbit(&ivar->timings, bus_timing_mmc_hs400);
setbit(&ivar->vccq_120, bus_timing_mmc_hs400);
}
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_HS400_1_8V) != 0 &&
+ if ((card_type & EXT_CSD_CARD_TYPE_HS400_1_8V) != 0 &&
(host_caps & MMC_CAP_SIGNALING_180) != 0 &&
ivar->bus_width == bus_width_8) {
setbit(&ivar->timings, bus_timing_mmc_hs400);
setbit(&ivar->vccq_180, bus_timing_mmc_hs400);
}
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_HS400_1_2V) != 0 &&
- (ivar->raw_ext_csd[EXT_CSD_STROBE_SUPPORT] &
+ if ((card_type & EXT_CSD_CARD_TYPE_HS400_1_2V) != 0 &&
+ (ext_csd[EXT_CSD_STROBE_SUPPORT] &
EXT_CSD_STROBE_SUPPORT_EN) != 0 &&
(host_caps & MMC_CAP_SIGNALING_120) != 0 &&
ivar->bus_width == bus_width_8) {
setbit(&ivar->timings, bus_timing_mmc_hs400es);
setbit(&ivar->vccq_120, bus_timing_mmc_hs400es);
}
- if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] &
- EXT_CSD_CARD_TYPE_HS400_1_8V) != 0 &&
- (ivar->raw_ext_csd[EXT_CSD_STROBE_SUPPORT] &
+ if ((card_type & EXT_CSD_CARD_TYPE_HS400_1_8V) != 0 &&
+ (ext_csd[EXT_CSD_STROBE_SUPPORT] &
EXT_CSD_STROBE_SUPPORT_EN) != 0 &&
(host_caps & MMC_CAP_SIGNALING_180) != 0 &&
ivar->bus_width == bus_width_8) {
@@ -1882,13 +1876,13 @@ mmc_discover_cards(struct mmc_softc *sc)
* units of 10 ms), defaulting to 500 ms.
*/
ivar->cmd6_time = 500 * 1000;
- if (ivar->raw_ext_csd[EXT_CSD_REV] >= 6)
+ if (rev >= 6)
ivar->cmd6_time = 10 *
- ivar->raw_ext_csd[EXT_CSD_GEN_CMD6_TIME];
+ ext_csd[EXT_CSD_GEN_CMD6_TIME];
/* Handle HC erase sector size. */
- if (ivar->raw_ext_csd[EXT_CSD_ERASE_GRP_SIZE] != 0) {
+ if (ext_csd[EXT_CSD_ERASE_GRP_SIZE] != 0) {
ivar->erase_sector = 1024 *
- ivar->raw_ext_csd[EXT_CSD_ERASE_GRP_SIZE];
+ ext_csd[EXT_CSD_ERASE_GRP_SIZE];
err = mmc_switch(sc->dev, sc->dev, ivar->rca,
EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_ERASE_GRP_DEF,
@@ -1903,8 +1897,7 @@ mmc_discover_cards(struct mmc_softc *sc)
}
}
- mmc_decode_cid_mmc(ivar->raw_cid, &ivar->cid,
- ivar->raw_ext_csd[EXT_CSD_REV] >= 5);
+ mmc_decode_cid_mmc(ivar->raw_cid, &ivar->cid, rev >= 5);
child_common:
for (quirk = &mmc_quirks[0]; quirk->mid != 0x0; quirk++) {
diff --git a/freebsd/sys/dev/mmc/mmcbrvar.h b/freebsd/sys/dev/mmc/mmcbrvar.h
index 6e4bec70..acddd3a3 100644
--- a/freebsd/sys/dev/mmc/mmcbrvar.h
+++ b/freebsd/sys/dev/mmc/mmcbrvar.h
@@ -97,7 +97,6 @@ MMCBR_ACCESSOR(host_ocr, HOST_OCR, int)
MMCBR_ACCESSOR(mode, MODE, int)
MMCBR_ACCESSOR(ocr, OCR, int)
MMCBR_ACCESSOR(power_mode, POWER_MODE, int)
-MMCBR_ACCESSOR(retune_req, RETUNE_REQ, int)
MMCBR_ACCESSOR(vdd, VDD, int)
MMCBR_ACCESSOR(vccq, VCCQ, int)
MMCBR_ACCESSOR(caps, CAPS, int)
@@ -106,6 +105,20 @@ MMCBR_ACCESSOR(max_data, MAX_DATA, int)
MMCBR_ACCESSOR(max_busy_timeout, MAX_BUSY_TIMEOUT, u_int)
static int __inline
+mmcbr_get_retune_req(device_t dev)
+{
+ uintptr_t v;
+
+ if (__predict_false(BUS_READ_IVAR(device_get_parent(dev), dev,
+ MMCBR_IVAR_RETUNE_REQ, &v) != 0))
+ return (retune_req_none);
+ return ((int)v);
+}
+
+/*
+ * Convenience wrappers for the mmcbr interface
+ */
+static int __inline
mmcbr_update_ios(device_t dev)
{
diff --git a/freebsd/sys/dev/mmc/mmcsd.c b/freebsd/sys/dev/mmc/mmcsd.c
index 263da55d..e469c1d5 100644
--- a/freebsd/sys/dev/mmc/mmcsd.c
+++ b/freebsd/sys/dev/mmc/mmcsd.c
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bio.h>
#include <sys/bus.h>
#include <sys/conf.h>
+#include <sys/endian.h>
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
@@ -477,7 +478,7 @@ mmcsd_attach(device_t dev)
sc = device_get_softc(dev);
sc->dev = dev;
sc->mmcbus = mmcbus = device_get_parent(dev);
- sc->mode = mmcbr_get_mode(mmcbus);
+ sc->mode = mmc_get_card_type(dev);
/*
* Note that in principle with an SDHCI-like re-tuning implementation,
* the maximum data size can change at runtime due to a device removal/
@@ -542,10 +543,7 @@ mmcsd_attach(device_t dev)
* disabled.
*/
if (rev >= 6 && mmcsd_cache != 0) {
- size = ext_csd[EXT_CSD_CACHE_SIZE] |
- ext_csd[EXT_CSD_CACHE_SIZE + 1] << 8 |
- ext_csd[EXT_CSD_CACHE_SIZE + 2] << 16 |
- ext_csd[EXT_CSD_CACHE_SIZE + 3] << 24;
+ size = le32dec(&ext_csd[EXT_CSD_CACHE_SIZE]);
if (bootverbose)
device_printf(dev, "cache size %juKB\n", size);
if (size > 0) {
@@ -591,10 +589,8 @@ mmcsd_attach(device_t dev)
size *= erase_size * wp_size;
if (size != mmc_get_media_size(dev) * sector_size) {
sc->enh_size = size;
- sc->enh_base = (ext_csd[EXT_CSD_ENH_START_ADDR] +
- (ext_csd[EXT_CSD_ENH_START_ADDR + 1] << 8) +
- (ext_csd[EXT_CSD_ENH_START_ADDR + 2] << 16) +
- (ext_csd[EXT_CSD_ENH_START_ADDR + 3] << 24)) *
+ sc->enh_base =
+ le32dec(&ext_csd[EXT_CSD_ENH_START_ADDR]) *
(sc->high_cap == 0 ? MMC_SECTOR_SIZE : 1);
} else if (bootverbose)
device_printf(dev,
@@ -1578,7 +1574,7 @@ mmcsd_delete(struct mmcsd_part *part, struct bio *bp)
memset(&cmd, 0, sizeof(cmd));
cmd.mrq = &req;
req.cmd = &cmd;
- if (mmc_get_card_type(dev) == mode_sd)
+ if (sc->mode == mode_sd)
cmd.opcode = SD_ERASE_WR_BLK_START;
else
cmd.opcode = MMC_ERASE_GROUP_START;
@@ -1597,7 +1593,7 @@ mmcsd_delete(struct mmcsd_part *part, struct bio *bp)
memset(&req, 0, sizeof(req));
memset(&cmd, 0, sizeof(cmd));
req.cmd = &cmd;
- if (mmc_get_card_type(dev) == mode_sd)
+ if (sc->mode == mode_sd)
cmd.opcode = SD_ERASE_WR_BLK_END;
else
cmd.opcode = MMC_ERASE_GROUP_END;
diff --git a/freebsd/sys/dev/nvme/nvme.h b/freebsd/sys/dev/nvme/nvme.h
index 169f22d1..747767ce 100644
--- a/freebsd/sys/dev/nvme/nvme.h
+++ b/freebsd/sys/dev/nvme/nvme.h
@@ -110,13 +110,9 @@
/* Command field definitions */
-#define NVME_CMD_OPC_SHIFT (0)
-#define NVME_CMD_OPC_MASK (0xFF)
#define NVME_CMD_FUSE_SHIFT (8)
#define NVME_CMD_FUSE_MASK (0x3)
-#define NVME_CMD_SET_OPC(opc) (htole16(((uint16_t)(opc) & NVME_CMD_OPC_MASK) << NVME_CMD_OPC_SHIFT))
-
#define NVME_STATUS_P_SHIFT (0)
#define NVME_STATUS_P_MASK (0x1)
#define NVME_STATUS_SC_SHIFT (1)
@@ -428,7 +424,8 @@ _Static_assert(sizeof(struct nvme_registers) == 0x1008, "bad size for nvme_regis
struct nvme_command
{
/* dword 0 */
- uint16_t opc_fuse; /* opcode, fused operation */
+ uint8_t opc; /* opcode */
+ uint8_t fuse; /* fused operation */
uint16_t cid; /* command identifier */
/* dword 1 */
@@ -1288,7 +1285,7 @@ static inline
void nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid)
{
- cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_FLUSH);
+ cmd->opc = NVME_OPC_FLUSH;
cmd->nsid = htole32(nsid);
}
@@ -1296,7 +1293,7 @@ static inline
void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid,
uint64_t lba, uint32_t count)
{
- cmd->opc_fuse = NVME_CMD_SET_OPC(rwcmd);
+ cmd->opc = rwcmd;
cmd->nsid = htole32(nsid);
cmd->cdw10 = htole32(lba & 0xffffffffu);
cmd->cdw11 = htole32(lba >> 32);
@@ -1321,7 +1318,7 @@ static inline
void nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid,
uint32_t num_ranges)
{
- cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DATASET_MANAGEMENT);
+ cmd->opc = NVME_OPC_DATASET_MANAGEMENT;
cmd->nsid = htole32(nsid);
cmd->cdw10 = htole32(num_ranges - 1);
cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE);
diff --git a/freebsd/sys/dev/ofw/ofw_bus_subr.c b/freebsd/sys/dev/ofw/ofw_bus_subr.c
index 5038bb03..5a76e555 100644
--- a/freebsd/sys/dev/ofw/ofw_bus_subr.c
+++ b/freebsd/sys/dev/ofw/ofw_bus_subr.c
@@ -93,6 +93,9 @@ ofw_bus_gen_child_pnpinfo_str(device_t cbdev, device_t child, char *buf,
{
*buf = '\0';
+ if (!ofw_bus_status_okay(child))
+ return (0);
+
if (ofw_bus_get_name(child) != NULL) {
strlcat(buf, "name=", buflen);
strlcat(buf, ofw_bus_get_name(child), buflen);
@@ -102,6 +105,7 @@ ofw_bus_gen_child_pnpinfo_str(device_t cbdev, device_t child, char *buf,
strlcat(buf, " compat=", buflen);
strlcat(buf, ofw_bus_get_compat(child), buflen);
}
+
return (0);
};
diff --git a/freebsd/sys/dev/ofw/ofw_fdt.c b/freebsd/sys/dev/ofw/ofw_fdt.c
index b1bbadee..d05a5dc1 100644
--- a/freebsd/sys/dev/ofw/ofw_fdt.c
+++ b/freebsd/sys/dev/ofw/ofw_fdt.c
@@ -287,8 +287,6 @@ ofw_fdt_getprop(ofw_t ofw, phandle_t package, const char *propname, void *buf,
/* Emulate the 'name' property */
name = fdt_get_name(fdtp, offset, &len);
strncpy(buf, name, buflen);
- if (len + 1 > buflen)
- len = buflen;
return (len + 1);
}
@@ -307,9 +305,8 @@ ofw_fdt_getprop(ofw_t ofw, phandle_t package, const char *propname, void *buf,
if (prop == NULL)
return (-1);
- if (len > buflen)
- len = buflen;
- bcopy(prop, buf, len);
+ bcopy(prop, buf, min(len, buflen));
+
return (len);
}
diff --git a/freebsd/sys/dev/ofw/ofw_subr.c b/freebsd/sys/dev/ofw/ofw_subr.c
index 8359485e..4a20727c 100644
--- a/freebsd/sys/dev/ofw/ofw_subr.c
+++ b/freebsd/sys/dev/ofw/ofw_subr.c
@@ -34,6 +34,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/boot.h>
#include <sys/bus.h>
#include <sys/libkern.h>
#include <sys/reboot.h>
@@ -63,7 +64,9 @@ get_addr_props(phandle_t node, uint32_t *addrp, uint32_t *sizep, int *pcip)
res = OF_getprop(node, "device_type", type, sizeof(type));
if (res != -1) {
type[sizeof(type) - 1] = '\0';
- pci = (strcmp(type, "pci") == 0) ? 1 : 0;
+ if (strcmp(type, "pci") == 0 ||
+ strcmp(type, "pciex")== 0)
+ pci = 1;
}
}
if (addrp != NULL)
@@ -172,7 +175,7 @@ ofw_reg_to_paddr(phandle_t dev, int regno, bus_addr_t *paddr,
}
KASSERT(addr <= BUS_SPACE_MAXADDR,
- ("Bus sddress is too large: %jx", (uintmax_t)addr));
+ ("Bus address is too large: %jx", (uintmax_t)addr));
KASSERT(size <= BUS_SPACE_MAXSIZE,
("Bus size is too large: %jx", (uintmax_t)size));
@@ -184,44 +187,6 @@ ofw_reg_to_paddr(phandle_t dev, int regno, bus_addr_t *paddr,
return (0);
}
-/* Parse cmd line args as env - copied from xlp_machdep. */
-/* XXX-BZ this should really be centrally provided for all (boot) code. */
-static void
-_parse_bootargs(char *cmdline)
-{
- char *n, *v;
-
- while ((v = strsep(&cmdline, " \n")) != NULL) {
- if (*v == '\0')
- continue;
- if (*v == '-') {
- while (*v != '\0') {
- v++;
- switch (*v) {
- case 'a': boothowto |= RB_ASKNAME; break;
- /* Someone should simulate that ;-) */
- case 'C': boothowto |= RB_CDROM; break;
- case 'd': boothowto |= RB_KDB; break;
- case 'D': boothowto |= RB_MULTIPLE; break;
- case 'm': boothowto |= RB_MUTE; break;
- case 'g': boothowto |= RB_GDB; break;
- case 'h': boothowto |= RB_SERIAL; break;
- case 'p': boothowto |= RB_PAUSE; break;
- case 'r': boothowto |= RB_DFLTROOT; break;
- case 's': boothowto |= RB_SINGLE; break;
- case 'v': boothowto |= RB_VERBOSE; break;
- }
- }
- } else {
- n = strsep(&v, "=");
- if (v == NULL)
- kern_setenv(n, "1");
- else
- kern_setenv(n, v);
- }
- }
-}
-
/*
* This is intended to be called early on, right after the OF system is
* initialized, so pmap may not be up yet.
@@ -238,7 +203,7 @@ ofw_parse_bootargs(void)
return (chosen);
if ((err = OF_getprop(chosen, "bootargs", buf, sizeof(buf))) != -1) {
- _parse_bootargs(buf);
+ boothowto |= boot_parse_cmdline(buf);
return (0);
}
diff --git a/freebsd/sys/dev/pci/pci.c b/freebsd/sys/dev/pci/pci.c
index 55a21320..512e8636 100644
--- a/freebsd/sys/dev/pci/pci.c
+++ b/freebsd/sys/dev/pci/pci.c
@@ -401,6 +401,11 @@ static int pci_enable_ari = 1;
SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
0, "Enable support for PCIe Alternative RID Interpretation");
+static int pci_clear_aer_on_attach = 0;
+SYSCTL_INT(_hw_pci, OID_AUTO, clear_aer_on_attach, CTLFLAG_RWTUN,
+ &pci_clear_aer_on_attach, 0,
+ "Clear port and device AER state on driver attach");
+
static int
pci_has_quirk(uint32_t devid, int quirk)
{
@@ -4214,17 +4219,98 @@ pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
}
#endif
+static void
+pci_add_child_clear_aer(device_t dev, struct pci_devinfo *dinfo)
+{
+ int aer;
+ uint32_t r;
+ uint16_t r2;
+
+ if (dinfo->cfg.pcie.pcie_location != 0 &&
+ dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT) {
+ r2 = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_ROOT_CTL, 2);
+ r2 &= ~(PCIEM_ROOT_CTL_SERR_CORR |
+ PCIEM_ROOT_CTL_SERR_NONFATAL | PCIEM_ROOT_CTL_SERR_FATAL);
+ pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_ROOT_CTL, r2, 2);
+ }
+ if (pci_find_extcap(dev, PCIZ_AER, &aer) == 0) {
+ r = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
+ pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
+ if (r != 0 && bootverbose) {
+ pci_printf(&dinfo->cfg,
+ "clearing AER UC 0x%08x -> 0x%08x\n",
+ r, pci_read_config(dev, aer + PCIR_AER_UC_STATUS,
+ 4));
+ }
+
+ r = pci_read_config(dev, aer + PCIR_AER_UC_MASK, 4);
+ r &= ~(PCIM_AER_UC_TRAINING_ERROR |
+ PCIM_AER_UC_DL_PROTOCOL_ERROR |
+ PCIM_AER_UC_SURPRISE_LINK_DOWN |
+ PCIM_AER_UC_POISONED_TLP |
+ PCIM_AER_UC_FC_PROTOCOL_ERROR |
+ PCIM_AER_UC_COMPLETION_TIMEOUT |
+ PCIM_AER_UC_COMPLETER_ABORT |
+ PCIM_AER_UC_UNEXPECTED_COMPLETION |
+ PCIM_AER_UC_RECEIVER_OVERFLOW |
+ PCIM_AER_UC_MALFORMED_TLP |
+ PCIM_AER_UC_ECRC_ERROR |
+ PCIM_AER_UC_UNSUPPORTED_REQUEST |
+ PCIM_AER_UC_ACS_VIOLATION |
+ PCIM_AER_UC_INTERNAL_ERROR |
+ PCIM_AER_UC_MC_BLOCKED_TLP |
+ PCIM_AER_UC_ATOMIC_EGRESS_BLK |
+ PCIM_AER_UC_TLP_PREFIX_BLOCKED);
+ pci_write_config(dev, aer + PCIR_AER_UC_MASK, r, 4);
+
+ r = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
+ pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
+ if (r != 0 && bootverbose) {
+ pci_printf(&dinfo->cfg,
+ "clearing AER COR 0x%08x -> 0x%08x\n",
+ r, pci_read_config(dev, aer + PCIR_AER_COR_STATUS,
+ 4));
+ }
+
+ r = pci_read_config(dev, aer + PCIR_AER_COR_MASK, 4);
+ r &= ~(PCIM_AER_COR_RECEIVER_ERROR |
+ PCIM_AER_COR_BAD_TLP |
+ PCIM_AER_COR_BAD_DLLP |
+ PCIM_AER_COR_REPLAY_ROLLOVER |
+ PCIM_AER_COR_REPLAY_TIMEOUT |
+ PCIM_AER_COR_ADVISORY_NF_ERROR |
+ PCIM_AER_COR_INTERNAL_ERROR |
+ PCIM_AER_COR_HEADER_LOG_OVFLOW);
+ pci_write_config(dev, aer + PCIR_AER_COR_MASK, r, 4);
+
+ r = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_CTL, 2);
+ r |= PCIEM_CTL_COR_ENABLE | PCIEM_CTL_NFER_ENABLE |
+ PCIEM_CTL_FER_ENABLE | PCIEM_CTL_URR_ENABLE;
+ pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_CTL, r, 2);
+ }
+}
+
void
pci_add_child(device_t bus, struct pci_devinfo *dinfo)
{
- dinfo->cfg.dev = device_add_child(bus, NULL, -1);
- device_set_ivars(dinfo->cfg.dev, dinfo);
+ device_t dev;
+
+ dinfo->cfg.dev = dev = device_add_child(bus, NULL, -1);
+ device_set_ivars(dev, dinfo);
resource_list_init(&dinfo->resources);
- pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
- pci_cfg_restore(dinfo->cfg.dev, dinfo);
+ pci_cfg_save(dev, dinfo, 0);
+ pci_cfg_restore(dev, dinfo);
pci_print_verbose(dinfo);
- pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
+ pci_add_resources(bus, dev, 0, 0);
pci_child_added(dinfo->cfg.dev);
+
+ if (pci_clear_aer_on_attach)
+ pci_add_child_clear_aer(dev, dinfo);
+
EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
}
@@ -6263,3 +6349,164 @@ pcie_flr(device_t dev, u_int max_delay, bool force)
pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
return (true);
}
+
+const struct pci_device_table *
+pci_match_device(device_t child, const struct pci_device_table *id, size_t nelt)
+{
+ bool match;
+ uint16_t vendor, device, subvendor, subdevice, class, subclass, revid;
+
+ vendor = pci_get_vendor(child);
+ device = pci_get_device(child);
+ subvendor = pci_get_subvendor(child);
+ subdevice = pci_get_subdevice(child);
+ class = pci_get_class(child);
+ subclass = pci_get_subclass(child);
+ revid = pci_get_revid(child);
+ while (nelt-- > 0) {
+ match = true;
+ if (id->match_flag_vendor)
+ match &= vendor == id->vendor;
+ if (id->match_flag_device)
+ match &= device == id->device;
+ if (id->match_flag_subvendor)
+ match &= subvendor == id->subvendor;
+ if (id->match_flag_subdevice)
+ match &= subdevice == id->subdevice;
+ if (id->match_flag_class)
+ match &= class == id->class_id;
+ if (id->match_flag_subclass)
+ match &= subclass == id->subclass;
+ if (id->match_flag_revid)
+ match &= revid == id->revid;
+ if (match)
+ return (id);
+ id++;
+ }
+ return (NULL);
+}
+
+static void
+pci_print_faulted_dev_name(const struct pci_devinfo *dinfo)
+{
+ const char *dev_name;
+ device_t dev;
+
+ dev = dinfo->cfg.dev;
+ printf("pci%d:%d:%d:%d", dinfo->cfg.domain, dinfo->cfg.bus,
+ dinfo->cfg.slot, dinfo->cfg.func);
+ dev_name = device_get_name(dev);
+ if (dev_name != NULL)
+ printf(" (%s%d)", dev_name, device_get_unit(dev));
+}
+
+void
+pci_print_faulted_dev(void)
+{
+ struct pci_devinfo *dinfo;
+ device_t dev;
+ int aer, i;
+ uint32_t r1, r2;
+ uint16_t status;
+
+ STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
+ dev = dinfo->cfg.dev;
+ status = pci_read_config(dev, PCIR_STATUS, 2);
+ status &= PCIM_STATUS_MDPERR | PCIM_STATUS_STABORT |
+ PCIM_STATUS_RTABORT | PCIM_STATUS_RMABORT |
+ PCIM_STATUS_SERR | PCIM_STATUS_PERR;
+ if (status != 0) {
+ pci_print_faulted_dev_name(dinfo);
+ printf(" error 0x%04x\n", status);
+ }
+ if (dinfo->cfg.pcie.pcie_location != 0) {
+ status = pci_read_config(dev,
+ dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_STA, 2);
+ if ((status & (PCIEM_STA_CORRECTABLE_ERROR |
+ PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR |
+ PCIEM_STA_UNSUPPORTED_REQ)) != 0) {
+ pci_print_faulted_dev_name(dinfo);
+ printf(" PCIe DEVCTL 0x%04x DEVSTA 0x%04x\n",
+ pci_read_config(dev,
+ dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_CTL, 2),
+ status);
+ }
+ }
+ if (pci_find_extcap(dev, PCIZ_AER, &aer) == 0) {
+ r1 = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
+ r2 = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
+ if (r1 != 0 || r2 != 0) {
+ pci_print_faulted_dev_name(dinfo);
+ printf(" AER UC 0x%08x Mask 0x%08x Svr 0x%08x\n"
+ " COR 0x%08x Mask 0x%08x Ctl 0x%08x\n",
+ r1, pci_read_config(dev, aer +
+ PCIR_AER_UC_MASK, 4),
+ pci_read_config(dev, aer +
+ PCIR_AER_UC_SEVERITY, 4),
+ r2, pci_read_config(dev, aer +
+ PCIR_AER_COR_MASK, 4),
+ pci_read_config(dev, aer +
+ PCIR_AER_CAP_CONTROL, 4));
+ for (i = 0; i < 4; i++) {
+ r1 = pci_read_config(dev, aer +
+ PCIR_AER_HEADER_LOG + i * 4, 4);
+ printf(" HL%d: 0x%08x\n", i, r1);
+ }
+ }
+ }
+ }
+}
+
+#ifdef DDB
+DB_SHOW_COMMAND(pcierr, pci_print_faulted_dev_db)
+{
+
+ pci_print_faulted_dev();
+}
+
+static void
+db_clear_pcie_errors(const struct pci_devinfo *dinfo)
+{
+ device_t dev;
+ int aer;
+ uint32_t r;
+
+ dev = dinfo->cfg.dev;
+ r = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_STA, 2);
+ pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_STA, r, 2);
+
+ if (pci_find_extcap(dev, PCIZ_AER, &aer) != 0)
+ return;
+ r = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
+ if (r != 0)
+ pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
+ r = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
+ if (r != 0)
+ pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
+}
+
+DB_COMMAND(pci_clearerr, db_pci_clearerr)
+{
+ struct pci_devinfo *dinfo;
+ device_t dev;
+ uint16_t status, status1;
+
+ STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
+ dev = dinfo->cfg.dev;
+ status1 = status = pci_read_config(dev, PCIR_STATUS, 2);
+ status1 &= PCIM_STATUS_MDPERR | PCIM_STATUS_STABORT |
+ PCIM_STATUS_RTABORT | PCIM_STATUS_RMABORT |
+ PCIM_STATUS_SERR | PCIM_STATUS_PERR;
+ if (status1 != 0) {
+ status &= ~status1;
+ pci_write_config(dev, PCIR_STATUS, status, 2);
+ }
+ if (dinfo->cfg.pcie.pcie_location != 0)
+ db_clear_pcie_errors(dinfo);
+ }
+}
+#endif
diff --git a/freebsd/sys/dev/pci/pci_user.c b/freebsd/sys/dev/pci/pci_user.c
index c9d500a8..b3a2e9e2 100644
--- a/freebsd/sys/dev/pci/pci_user.c
+++ b/freebsd/sys/dev/pci/pci_user.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bus.h> /* XXX trim includes */
+#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
@@ -41,13 +42,19 @@ __FBSDID("$FreeBSD$");
#include <sys/fcntl.h>
#include <sys/conf.h>
#include <sys/kernel.h>
+#include <sys/mman.h>
#include <sys/proc.h>
#include <sys/queue.h>
-#include <sys/types.h>
+#include <sys/rwlock.h>
+#include <sys/sglist.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
#include <sys/bus.h>
#include <machine/bus.h>
@@ -441,6 +448,14 @@ pci_conf_match(u_long cmd, struct pci_match_conf *matches, int num_matches,
}
}
+/*
+ * Like PVE_NEXT but takes an explicit length since 'pve' is a user
+ * pointer that cannot be dereferenced.
+ */
+#define PVE_NEXT_LEN(pve, datalen) \
+ ((struct pci_vpd_element *)((char *)(pve) + \
+ sizeof(struct pci_vpd_element) + (datalen)))
+
static int
pci_list_vpd(device_t dev, struct pci_list_vpd_io *lvio)
{
@@ -489,7 +504,7 @@ pci_list_vpd(device_t dev, struct pci_list_vpd_io *lvio)
strlen(vpd->vpd_ident));
if (error)
return (error);
- vpd_user = PVE_NEXT(vpd_user);
+ vpd_user = PVE_NEXT_LEN(vpd_user, vpd_element.pve_datalen);
vpd_element.pve_flags = 0;
for (i = 0; i < vpd->vpd_rocnt; i++) {
vpd_element.pve_keyword[0] = vpd->vpd_ros[i].keyword[0];
@@ -502,7 +517,7 @@ pci_list_vpd(device_t dev, struct pci_list_vpd_io *lvio)
vpd->vpd_ros[i].len);
if (error)
return (error);
- vpd_user = PVE_NEXT(vpd_user);
+ vpd_user = PVE_NEXT_LEN(vpd_user, vpd_element.pve_datalen);
}
vpd_element.pve_flags = PVE_FLAG_RW;
for (i = 0; i < vpd->vpd_wcnt; i++) {
@@ -516,7 +531,7 @@ pci_list_vpd(device_t dev, struct pci_list_vpd_io *lvio)
vpd->vpd_w[i].len);
if (error)
return (error);
- vpd_user = PVE_NEXT(vpd_user);
+ vpd_user = PVE_NEXT_LEN(vpd_user, vpd_element.pve_datalen);
}
KASSERT((char *)vpd_user - (char *)lvio->plvi_data == len,
("length mismatch"));
@@ -698,6 +713,79 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup,
}
}
+#ifndef __rtems__
+static int
+pci_bar_mmap(device_t pcidev, struct pci_bar_mmap *pbm)
+{
+ vm_map_t map;
+ vm_object_t obj;
+ struct thread *td;
+ struct sglist *sg;
+ struct pci_map *pm;
+ vm_paddr_t pbase;
+ vm_size_t plen;
+ vm_offset_t addr;
+ vm_prot_t prot;
+ int error, flags;
+
+ td = curthread;
+ map = &td->td_proc->p_vmspace->vm_map;
+ if ((pbm->pbm_flags & ~(PCIIO_BAR_MMAP_FIXED | PCIIO_BAR_MMAP_EXCL |
+ PCIIO_BAR_MMAP_RW | PCIIO_BAR_MMAP_ACTIVATE)) != 0 ||
+ pbm->pbm_memattr != (vm_memattr_t)pbm->pbm_memattr ||
+ !pmap_is_valid_memattr(map->pmap, pbm->pbm_memattr))
+ return (EINVAL);
+
+ /* Fetch the BAR physical base and length. */
+ pm = pci_find_bar(pcidev, pbm->pbm_reg);
+ if (pm == NULL)
+ return (EINVAL);
+ if (!pci_bar_enabled(pcidev, pm))
+ return (EBUSY); /* XXXKIB enable if _ACTIVATE */
+ if (!PCI_BAR_MEM(pm->pm_value))
+ return (EIO);
+ pbase = trunc_page(pm->pm_value);
+ plen = round_page(pm->pm_value + ((pci_addr_t)1 << pm->pm_size)) -
+ pbase;
+ prot = VM_PROT_READ | (((pbm->pbm_flags & PCIIO_BAR_MMAP_RW) != 0) ?
+ VM_PROT_WRITE : 0);
+
+ /* Create vm structures and mmap. */
+ sg = sglist_alloc(1, M_WAITOK);
+ error = sglist_append_phys(sg, pbase, plen);
+ if (error != 0)
+ goto out;
+ obj = vm_pager_allocate(OBJT_SG, sg, plen, prot, 0, td->td_ucred);
+ if (obj == NULL) {
+ error = EIO;
+ goto out;
+ }
+ obj->memattr = pbm->pbm_memattr;
+ flags = MAP_SHARED;
+ addr = 0;
+ if ((pbm->pbm_flags & PCIIO_BAR_MMAP_FIXED) != 0) {
+ addr = (uintptr_t)pbm->pbm_map_base;
+ flags |= MAP_FIXED;
+ }
+ if ((pbm->pbm_flags & PCIIO_BAR_MMAP_EXCL) != 0)
+ flags |= MAP_CHECK_EXCL;
+ error = vm_mmap_object(map, &addr, plen, prot, prot, flags, obj, 0,
+ FALSE, td);
+ if (error != 0) {
+ vm_object_deallocate(obj);
+ goto out;
+ }
+ pbm->pbm_map_base = (void *)addr;
+ pbm->pbm_map_length = plen;
+ pbm->pbm_bar_off = pm->pm_value - pbase;
+ pbm->pbm_bar_length = (pci_addr_t)1 << pm->pm_size;
+
+out:
+ sglist_free(sg);
+ return (error);
+}
+#endif /* __rtems__ */
+
static int
pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
{
@@ -711,6 +799,9 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
struct pci_list_vpd_io *lvio;
struct pci_match_conf *pattern_buf;
struct pci_map *pm;
+#ifndef __rtems__
+ struct pci_bar_mmap *pbm;
+#endif /* __rtems__ */
size_t confsz, iolen;
int error, ionum, i, num_patterns;
union pci_conf_union pcu;
@@ -1055,6 +1146,20 @@ getconfexit:
}
error = pci_list_vpd(pcidev, lvio);
break;
+
+#ifndef __rtems__
+ case PCIOCBARMMAP:
+ pbm = (struct pci_bar_mmap *)data;
+ if ((flag & FWRITE) == 0 &&
+ (pbm->pbm_flags & PCIIO_BAR_MMAP_RW) != 0)
+ return (EPERM);
+ pcidev = pci_find_dbsf(pbm->pbm_sel.pc_domain,
+ pbm->pbm_sel.pc_bus, pbm->pbm_sel.pc_dev,
+ pbm->pbm_sel.pc_func);
+ error = pcidev == NULL ? ENODEV : pci_bar_mmap(pcidev, pbm);
+ break;
+#endif /* __rtems__ */
+
default:
error = ENOTTY;
break;
diff --git a/freebsd/sys/dev/pci/pcivar.h b/freebsd/sys/dev/pci/pcivar.h
index 492b7117..21d9bd03 100644
--- a/freebsd/sys/dev/pci/pcivar.h
+++ b/freebsd/sys/dev/pci/pcivar.h
@@ -259,6 +259,66 @@ typedef struct {
extern uint32_t pci_numdevs;
+struct pci_device_table {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint16_t
+ match_flag_vendor:1,
+ match_flag_device:1,
+ match_flag_subvendor:1,
+ match_flag_subdevice:1,
+ match_flag_class:1,
+ match_flag_subclass:1,
+ match_flag_revid:1,
+ match_flag_unused:9;
+#else
+ uint16_t
+ match_flag_unused:9,
+ match_flag_revid:1,
+ match_flag_subclass:1,
+ match_flag_class:1,
+ match_flag_subdevice:1,
+ match_flag_subvendor:1,
+ match_flag_device:1,
+ match_flag_vendor:1;
+#endif
+ uint16_t vendor;
+ uint16_t device;
+ uint16_t subvendor;
+ uint16_t subdevice;
+ uint16_t class_id;
+ uint16_t subclass;
+ uint16_t revid;
+ uint16_t unused;
+ uintptr_t driver_data;
+ char *descr;
+};
+
+#define PCI_DEV(v, d) \
+ .match_flag_vendor = 1, .vendor = (v), \
+ .match_flag_device = 1, .device = (d)
+#define PCI_SUBDEV(sv, sd) \
+ .match_flag_subvendor = 1, .subvendor = (sv), \
+ .match_flag_subdevice = 1, .subdevice = (sd)
+#define PCI_CLASS(x) \
+ .match_flag_class = 1, .class_id = (x)
+#define PCI_SUBCLASS(x) \
+ .match_flag_subclass = 1, .subclass = (x)
+#define PCI_REVID(x) \
+ .match_flag_revid = 1, .revid = (x)
+#define PCI_DESCR(x) \
+ .descr = (x)
+#define PCI_PNP_STR \
+ "M16:mask;U16:vendor;U16:device;U16:subvendor;U16:subdevice;" \
+ "U16:class;U16:subclass;U16:revid;"
+#define PCI_PNP_INFO(table) \
+ MODULE_PNP_INFO(PCI_PNP_STR, pci, table, table, sizeof(table[0]), \
+ sizeof(table) / sizeof(table[0]))
+
+const struct pci_device_table *pci_match_device(device_t child,
+ const struct pci_device_table *id, size_t nelt);
+#define PCI_MATCH(child, table) \
+ pci_match_device(child, (table), nitems(table));
+
/* Only if the prerequisites are present */
#if defined(_SYS_BUS_H_) && defined(_SYS_PCIIO_H_)
struct pci_devinfo {
@@ -416,7 +476,7 @@ pci_get_vpd_readonly(device_t dev, const char *kw, const char **vptr)
static __inline int
pci_is_vga_ioport_range(rman_res_t start, rman_res_t end)
{
-
+
return (((start >= 0x3b0 && end <= 0x3bb) ||
(start >= 0x3c0 && end <= 0x3df)) ? 1 : 0);
}
@@ -622,6 +682,8 @@ bool pcie_flr(device_t dev, u_int max_delay, bool force);
int pcie_get_max_completion_timeout(device_t dev);
bool pcie_wait_for_pending_transactions(device_t dev, u_int max_delay);
+void pci_print_faulted_dev(void);
+
#ifdef BUS_SPACE_MAXADDR
#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
#define PCI_DMA_BOUNDARY 0x100000000
diff --git a/freebsd/sys/dev/rtwn/rtl8812a/r12a_reg.h b/freebsd/sys/dev/rtwn/rtl8812a/r12a_reg.h
index 41cc160f..581e7078 100644
--- a/freebsd/sys/dev/rtwn/rtl8812a/r12a_reg.h
+++ b/freebsd/sys/dev/rtwn/rtl8812a/r12a_reg.h
@@ -58,6 +58,16 @@
/* Bits for R92C_LEDCFG2. */
#define R12A_LEDCFG2_ENA 0x20
+/* Bits for R12A_RXDMA_PRO. */
+#define R12A_DMA_MODE 0x02
+#define R12A_BURST_CNT_M 0x0c
+#define R12A_BURST_CNT_S 2
+#define R12A_BURST_SZ_M 0x30
+#define R12A_BURST_SZ_S 4
+#define R12A_BURST_SZ_USB3 0
+#define R12A_BURST_SZ_USB2 1
+#define R12A_BURST_SZ_USB1 2
+
/* Bits for R12A_CCK_CHECK. */
#define R12A_CCK_CHECK_BCN1 0x20
#define R12A_CCK_CHECK_5GHZ 0x80
diff --git a/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au.h b/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au.h
index 55d132c7..1ea08a2d 100644
--- a/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au.h
+++ b/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au.h
@@ -37,6 +37,7 @@
*/
/* r12au_init.c */
void r12au_init_rx_agg(struct rtwn_softc *);
+void r12au_init_burstlen_usb2(struct rtwn_softc *);
void r12au_init_burstlen(struct rtwn_softc *);
void r12au_init_ampdu_fwhw(struct rtwn_softc *);
void r12au_init_ampdu(struct rtwn_softc *);
diff --git a/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_init.c b/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_init.c
index 29016424..26a83ce9 100644
--- a/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_init.c
+++ b/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_init.c
@@ -74,19 +74,32 @@ r12au_init_rx_agg(struct rtwn_softc *sc)
}
void
+r12au_init_burstlen_usb2(struct rtwn_softc *sc)
+{
+ const uint8_t dma_count = R12A_DMA_MODE | SM(R12A_BURST_CNT, 3);
+
+ if ((rtwn_read_1(sc, R92C_USB_INFO) & 0x30) == 0) {
+ /* Set burst packet length to 512 B. */
+ rtwn_setbits_1(sc, R12A_RXDMA_PRO, R12A_BURST_SZ_M,
+ dma_count | SM(R12A_BURST_SZ, R12A_BURST_SZ_USB2));
+ } else {
+ /* Set burst packet length to 64 B. */
+ rtwn_setbits_1(sc, R12A_RXDMA_PRO, R12A_BURST_SZ_M,
+ dma_count | SM(R12A_BURST_SZ, R12A_BURST_SZ_USB1));
+ }
+}
+
+void
r12au_init_burstlen(struct rtwn_softc *sc)
{
- if (rtwn_read_1(sc, R92C_TYPE_ID + 3) & 0x80) {
- if ((rtwn_read_1(sc, R92C_USB_INFO) & 0x30) == 0) {
- /* Set burst packet length to 512 B. */
- rtwn_setbits_1(sc, R12A_RXDMA_PRO, 0x20, 0x1e);
- } else {
- /* Set burst packet length to 64 B. */
- rtwn_setbits_1(sc, R12A_RXDMA_PRO, 0x10, 0x2e);
- }
- } else { /* USB 3.0 */
+ const uint8_t dma_count = R12A_DMA_MODE | SM(R12A_BURST_CNT, 3);
+
+ if (rtwn_read_1(sc, R92C_TYPE_ID + 3) & 0x80)
+ r12au_init_burstlen_usb2(sc);
+ else { /* USB 3.0 */
/* Set burst packet length to 1 KB. */
- rtwn_setbits_1(sc, R12A_RXDMA_PRO, 0x30, 0x0e);
+ rtwn_setbits_1(sc, R12A_RXDMA_PRO, R12A_BURST_SZ_M,
+ dma_count | SM(R12A_BURST_SZ, R12A_BURST_SZ_USB3));
rtwn_setbits_1(sc, 0xf008, 0x18, 0);
}
diff --git a/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au.h b/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au.h
index 60aa476c..a327d2ad 100644
--- a/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au.h
+++ b/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au.h
@@ -37,7 +37,6 @@
*/
/* r21au_init.c */
void r21au_init_tx_agg(struct rtwn_softc *);
-void r21au_init_burstlen(struct rtwn_softc *);
/* r21au_dfs.c */
void r21au_chan_check(void *, int);
diff --git a/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c b/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
index 5cf721c2..976582ae 100644
--- a/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
+++ b/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c
@@ -137,7 +137,7 @@ r21a_attach_private(struct rtwn_softc *sc)
rs->rs_fix_spur = rtwn_nop_softc_chan;
rs->rs_set_band_2ghz = r21a_set_band_2ghz;
rs->rs_set_band_5ghz = r21a_set_band_5ghz;
- rs->rs_init_burstlen = r21au_init_burstlen;
+ rs->rs_init_burstlen = r12au_init_burstlen_usb2;
rs->rs_init_ampdu_fwhw = r21a_init_ampdu_fwhw;
rs->rs_crystalcap_write = r21a_crystalcap_write;
#ifndef RTWN_WITHOUT_UCODE
diff --git a/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_init.c b/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_init.c
index 31366e59..838e1b50 100644
--- a/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_init.c
+++ b/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_init.c
@@ -72,14 +72,3 @@ r21au_init_tx_agg(struct rtwn_softc *sc)
rtwn_write_1(sc, R21A_DWBCN1_CTRL, uc->tx_agg_desc_num << 1);
}
-void
-r21au_init_burstlen(struct rtwn_softc *sc)
-{
- if ((rtwn_read_1(sc, R92C_USB_INFO) & 0x30) == 0) {
- /* Set burst packet length to 512 B. */
- rtwn_setbits_1(sc, R12A_RXDMA_PRO, 0x20, 0x1e);
- } else {
- /* Set burst packet length to 64 B. */
- rtwn_setbits_1(sc, R12A_RXDMA_PRO, 0x10, 0x2e);
- }
-}
diff --git a/freebsd/sys/dev/sdhci/sdhci.c b/freebsd/sys/dev/sdhci/sdhci.c
index 39b9dc91..ccbf5d85 100644
--- a/freebsd/sys/dev/sdhci/sdhci.c
+++ b/freebsd/sys/dev/sdhci/sdhci.c
@@ -92,27 +92,49 @@ SYSCTL_INT(_hw_sdhci, OID_AUTO, quirk_set, CTLFLAG_RWTUN, &sdhci_quirk_set, 0,
#define WR_MULTI_4(slot, off, ptr, count) \
SDHCI_WRITE_MULTI_4((slot)->bus, (slot), (off), (ptr), (count))
+static void sdhci_acmd_irq(struct sdhci_slot *slot, uint16_t acmd_err);
static void sdhci_card_poll(void *arg);
static void sdhci_card_task(void *arg, int pending);
+static void sdhci_cmd_irq(struct sdhci_slot *slot, uint32_t intmask);
+static void sdhci_data_irq(struct sdhci_slot *slot, uint32_t intmask);
static int sdhci_exec_tuning(struct sdhci_slot *slot, bool reset);
+static void sdhci_handle_card_present_locked(struct sdhci_slot *slot,
+ bool is_present);
+static void sdhci_finish_command(struct sdhci_slot *slot);
+static void sdhci_init(struct sdhci_slot *slot);
+static void sdhci_read_block_pio(struct sdhci_slot *slot);
+static void sdhci_req_done(struct sdhci_slot *slot);
static void sdhci_req_wakeup(struct mmc_request *req);
+static void sdhci_reset(struct sdhci_slot *slot, uint8_t mask);
static void sdhci_retune(void *arg);
static void sdhci_set_clock(struct sdhci_slot *slot, uint32_t clock);
+static void sdhci_set_power(struct sdhci_slot *slot, u_char power);
+static void sdhci_set_transfer_mode(struct sdhci_slot *slot,
+ struct mmc_data *data);
static void sdhci_start(struct sdhci_slot *slot);
+static void sdhci_timeout(void *arg);
+static void sdhci_start_command(struct sdhci_slot *slot,
+ struct mmc_command *cmd);
static void sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data);
+static void sdhci_write_block_pio(struct sdhci_slot *slot);
+static void sdhci_transfer_pio(struct sdhci_slot *slot);
#ifdef MMCCAM
/* CAM-related */
-int sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot, int proposed_clock);
-static int sdhci_cam_update_ios(struct sdhci_slot *slot);
-static int sdhci_cam_request(struct sdhci_slot *slot, union ccb *ccb);
static void sdhci_cam_action(struct cam_sim *sim, union ccb *ccb);
+static int sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot,
+ int proposed_clock);
+static void sdhci_cam_handle_mmcio(struct cam_sim *sim, union ccb *ccb);
static void sdhci_cam_poll(struct cam_sim *sim);
+static int sdhci_cam_request(struct sdhci_slot *slot, union ccb *ccb);
static int sdhci_cam_settran_settings(struct sdhci_slot *slot, union ccb *ccb);
+static int sdhci_cam_update_ios(struct sdhci_slot *slot);
#endif
/* helper routines */
static void sdhci_dumpregs(struct sdhci_slot *slot);
+static void sdhci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs,
+ int error);
static int slot_printf(struct sdhci_slot *slot, const char * fmt, ...)
__printflike(2, 3);
static uint32_t sdhci_tuning_intmask(struct sdhci_slot *slot);
@@ -1562,20 +1584,19 @@ sdhci_set_transfer_mode(struct sdhci_slot *slot, struct mmc_data *data)
return;
mode = SDHCI_TRNS_BLK_CNT_EN;
- if (data->len > 512)
+ if (data->len > 512) {
mode |= SDHCI_TRNS_MULTI;
- if (data->flags & MMC_DATA_READ)
- mode |= SDHCI_TRNS_READ;
+ if (__predict_true(
#ifdef MMCCAM
- struct ccb_mmcio *mmcio;
- mmcio = &slot->ccb->mmcio;
- if (mmcio->stop.opcode == MMC_STOP_TRANSMISSION
- && !(slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP))
- mode |= SDHCI_TRNS_ACMD12;
+ slot->ccb->mmcio.stop.opcode == MMC_STOP_TRANSMISSION &&
#else
- if (slot->req->stop && !(slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP))
- mode |= SDHCI_TRNS_ACMD12;
+ slot->req->stop != NULL &&
#endif
+ !(slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP)))
+ mode |= SDHCI_TRNS_ACMD12;
+ }
+ if (data->flags & MMC_DATA_READ)
+ mode |= SDHCI_TRNS_READ;
if (slot->flags & SDHCI_USE_DMA)
mode |= SDHCI_TRNS_DMA;
@@ -2158,18 +2179,16 @@ done:
}
static void
-sdhci_acmd_irq(struct sdhci_slot *slot)
+sdhci_acmd_irq(struct sdhci_slot *slot, uint16_t acmd_err)
{
- uint16_t err;
- err = RD4(slot, SDHCI_ACMD12_ERR);
if (!slot->curcmd) {
slot_printf(slot, "Got AutoCMD12 error 0x%04x, but "
- "there is no active command.\n", err);
+ "there is no active command.\n", acmd_err);
sdhci_dumpregs(slot);
return;
}
- slot_printf(slot, "Got AutoCMD12 error 0x%04x\n", err);
+ slot_printf(slot, "Got AutoCMD12 error 0x%04x\n", acmd_err);
sdhci_reset(slot, SDHCI_RESET_CMD);
}
@@ -2177,6 +2196,7 @@ void
sdhci_generic_intr(struct sdhci_slot *slot)
{
uint32_t intmask, present;
+ uint16_t val16;
SDHCI_LOCK(slot);
/* Read slot interrupt status. */
@@ -2190,6 +2210,7 @@ sdhci_generic_intr(struct sdhci_slot *slot)
/* Handle tuning error interrupt. */
if (__predict_false(intmask & SDHCI_INT_TUNEERR)) {
+ WR4(slot, SDHCI_INT_STATUS, SDHCI_INT_TUNEERR);
slot_printf(slot, "Tuning error indicated\n");
slot->retune_req |= SDHCI_RETUNE_REQ_RESET;
if (slot->curcmd) {
@@ -2227,8 +2248,10 @@ sdhci_generic_intr(struct sdhci_slot *slot)
}
/* Handle AutoCMD12 error interrupt. */
if (intmask & SDHCI_INT_ACMD12ERR) {
+ /* Clearing SDHCI_INT_ACMD12ERR may clear SDHCI_ACMD12_ERR. */
+ val16 = RD2(slot, SDHCI_ACMD12_ERR);
WR4(slot, SDHCI_INT_STATUS, SDHCI_INT_ACMD12ERR);
- sdhci_acmd_irq(slot);
+ sdhci_acmd_irq(slot, val16);
}
/* Handle bus power interrupt. */
if (intmask & SDHCI_INT_BUS_POWER) {
@@ -2574,7 +2597,9 @@ sdhci_cam_poll(struct cam_sim *sim)
return;
}
-int sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot, int proposed_clock) {
+static int
+sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot, int proposed_clock)
+{
int max_clock, clock, i;
if (proposed_clock == 0)
diff --git a/freebsd/sys/dev/usb/controller/usb_controller.c b/freebsd/sys/dev/usb/controller/usb_controller.c
index a2633d0d..81901063 100644
--- a/freebsd/sys/dev/usb/controller/usb_controller.c
+++ b/freebsd/sys/dev/usb/controller/usb_controller.c
@@ -135,7 +135,6 @@ DRIVER_MODULE(usbus, ehci, usb_driver, usb_devclass, 0, 0);
DRIVER_MODULE(usbus, xhci, usb_driver, usb_devclass, 0, 0);
/* Device Only Drivers */
-DRIVER_MODULE(usbus, at91_udp, usb_driver, usb_devclass, 0, 0);
DRIVER_MODULE(usbus, musbotg, usb_driver, usb_devclass, 0, 0);
DRIVER_MODULE(usbus, uss820dci, usb_driver, usb_devclass, 0, 0);
DRIVER_MODULE(usbus, octusb, usb_driver, usb_devclass, 0, 0);
diff --git a/freebsd/sys/dev/usb/input/uep.c b/freebsd/sys/dev/usb/input/uep.c
index 701c8550..247bfb9c 100644
--- a/freebsd/sys/dev/usb/input/uep.c
+++ b/freebsd/sys/dev/usb/input/uep.c
@@ -34,6 +34,8 @@
* http://www.eeti.com.tw/pdf/Software%20Programming%20Guide_v2.0.pdf
*/
+#include <rtems/bsd/local/opt_evdev.h>
+
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/callout.h>
@@ -51,9 +53,14 @@
#include <dev/usb/usbhid.h>
#include <rtems/bsd/local/usbdevs.h>
+#ifdef EVDEV_SUPPORT
+#include <dev/evdev/input.h>
+#include <dev/evdev/evdev.h>
+#else
#include <sys/ioccom.h>
#include <sys/fcntl.h>
#include <sys/tty.h>
+#endif
#define USB_DEBUG_VAR uep_debug
#include <dev/usb/usb_debug.h>
@@ -92,11 +99,15 @@ struct uep_softc {
struct mtx mtx;
struct usb_xfer *xfer[UEP_N_TRANSFER];
+#ifdef EVDEV_SUPPORT
+ struct evdev_dev *evdev;
+#else
struct usb_fifo_sc fifo;
u_int pollrate;
u_int state;
#define UEP_ENABLED 0x01
+#endif
/* Reassembling buffer. */
u_char buf[UEP_PACKET_LEN_MAX];
@@ -109,6 +120,18 @@ static device_probe_t uep_probe;
static device_attach_t uep_attach;
static device_detach_t uep_detach;
+#ifdef EVDEV_SUPPORT
+
+static evdev_open_t uep_ev_open;
+static evdev_close_t uep_ev_close;
+
+static const struct evdev_methods uep_evdev_methods = {
+ .ev_open = &uep_ev_open,
+ .ev_close = &uep_ev_close,
+};
+
+#else /* !EVDEV_SUPPORT */
+
static usb_fifo_cmd_t uep_start_read;
static usb_fifo_cmd_t uep_stop_read;
static usb_fifo_open_t uep_open;
@@ -123,6 +146,7 @@ static struct usb_fifo_methods uep_fifo_methods = {
.f_stop_read = &uep_stop_read,
.basename[0] = "uep",
};
+#endif /* !EVDEV_SUPPORT */
static int
get_pkt_len(u_char *buf)
@@ -156,6 +180,9 @@ static void
uep_process_pkt(struct uep_softc *sc, u_char *buf)
{
int32_t x, y;
+#ifdef EVDEV_SUPPORT
+ int touch;
+#endif
if ((buf[0] & 0xFE) != 0x80) {
DPRINTF("bad input packet format 0x%.2x\n", buf[0]);
@@ -188,7 +215,17 @@ uep_process_pkt(struct uep_softc *sc, u_char *buf)
DPRINTFN(2, "x %u y %u\n", x, y);
+#ifdef EVDEV_SUPPORT
+ touch = buf[0] & (1 << 0);
+ if (touch) {
+ evdev_push_abs(sc->evdev, ABS_X, x);
+ evdev_push_abs(sc->evdev, ABS_Y, y);
+ }
+ evdev_push_key(sc->evdev, BTN_TOUCH, touch);
+ evdev_sync(sc->evdev);
+#else
uep_put_queue(sc, buf);
+#endif
}
static void
@@ -263,12 +300,13 @@ uep_intr_callback(struct usb_xfer *xfer, usb_error_t error)
}
case USB_ST_SETUP:
tr_setup:
+#ifndef EVDEV_SUPPORT
/* check if we can put more data into the FIFO */
- if (usb_fifo_put_bytes_max(sc->fifo.fp[USB_FIFO_RX]) != 0) {
- usbd_xfer_set_frame_len(xfer, 0,
- usbd_xfer_max_len(xfer));
- usbd_transfer_submit(xfer);
- }
+ if (usb_fifo_put_bytes_max(sc->fifo.fp[USB_FIFO_RX]) == 0)
+ break;
+#endif
+ usbd_xfer_set_frame_len(xfer, 0, usbd_xfer_max_len(xfer));
+ usbd_transfer_submit(xfer);
break;
default:
@@ -332,6 +370,28 @@ uep_attach(device_t dev)
goto detach;
}
+#ifdef EVDEV_SUPPORT
+ sc->evdev = evdev_alloc();
+ evdev_set_name(sc->evdev, device_get_desc(dev));
+ evdev_set_phys(sc->evdev, device_get_nameunit(dev));
+ evdev_set_id(sc->evdev, BUS_USB, uaa->info.idVendor,
+ uaa->info.idProduct, 0);
+ evdev_set_serial(sc->evdev, usb_get_serial(uaa->device));
+ evdev_set_methods(sc->evdev, sc, &uep_evdev_methods);
+ evdev_support_prop(sc->evdev, INPUT_PROP_DIRECT);
+ evdev_support_event(sc->evdev, EV_SYN);
+ evdev_support_event(sc->evdev, EV_ABS);
+ evdev_support_event(sc->evdev, EV_KEY);
+ evdev_support_key(sc->evdev, BTN_TOUCH);
+ evdev_support_abs(sc->evdev, ABS_X, 0, 0, UEP_MAX_X, 0, 0, 0);
+ evdev_support_abs(sc->evdev, ABS_Y, 0, 0, UEP_MAX_Y, 0, 0, 0);
+
+ error = evdev_register_mtx(sc->evdev, &sc->mtx);
+ if (error) {
+ DPRINTF("evdev_register_mtx error=%s\n", usbd_errstr(error));
+ goto detach;
+ }
+#else /* !EVDEV_SUPPORT */
error = usb_fifo_attach(uaa->device, sc, &sc->mtx, &uep_fifo_methods,
&sc->fifo, device_get_unit(dev), -1, uaa->info.bIfaceIndex,
UID_ROOT, GID_OPERATOR, 0644);
@@ -340,6 +400,7 @@ uep_attach(device_t dev)
DPRINTF("usb_fifo_attach error=%s\n", usbd_errstr(error));
goto detach;
}
+#endif /* !EVDEV_SUPPORT */
sc->buf_len = 0;
@@ -356,7 +417,11 @@ uep_detach(device_t dev)
{
struct uep_softc *sc = device_get_softc(dev);
+#ifdef EVDEV_SUPPORT
+ evdev_free(sc->evdev);
+#else
usb_fifo_detach(&sc->fifo);
+#endif
usbd_transfer_unsetup(sc->xfer, UEP_N_TRANSFER);
@@ -365,6 +430,32 @@ uep_detach(device_t dev)
return (0);
}
+#ifdef EVDEV_SUPPORT
+
+static int
+uep_ev_close(struct evdev_dev *evdev)
+{
+ struct uep_softc *sc = evdev_get_softc(evdev);
+
+ mtx_assert(&sc->mtx, MA_OWNED);
+ usbd_transfer_stop(sc->xfer[UEP_INTR_DT]);
+
+ return (0);
+}
+
+static int
+uep_ev_open(struct evdev_dev *evdev)
+{
+ struct uep_softc *sc = evdev_get_softc(evdev);
+
+ mtx_assert(&sc->mtx, MA_OWNED);
+ usbd_transfer_start(sc->xfer[UEP_INTR_DT]);
+
+ return (0);
+}
+
+#else /* !EVDEV_SUPPORT */
+
static void
uep_start_read(struct usb_fifo *fifo)
{
@@ -426,6 +517,7 @@ uep_close(struct usb_fifo *fifo, int fflags)
usb_fifo_free_buffer(fifo);
}
}
+#endif /* !EVDEV_SUPPORT */
static devclass_t uep_devclass;
@@ -444,5 +536,8 @@ static driver_t uep_driver = {
DRIVER_MODULE(uep, uhub, uep_driver, uep_devclass, NULL, NULL);
MODULE_DEPEND(uep, usb, 1, 1, 1);
+#ifdef EVDEV_SUPPORT
+MODULE_DEPEND(uep, evdev, 1, 1, 1);
+#endif
MODULE_VERSION(uep, 1);
USB_PNP_HOST_INFO(uep_devs);
diff --git a/freebsd/sys/dev/usb/input/ukbd.c b/freebsd/sys/dev/usb/input/ukbd.c
index 76fe76b6..8fb450bc 100644
--- a/freebsd/sys/dev/usb/input/ukbd.c
+++ b/freebsd/sys/dev/usb/input/ukbd.c
@@ -367,8 +367,10 @@ static device_detach_t ukbd_detach;
static device_resume_t ukbd_resume;
#ifdef EVDEV_SUPPORT
+static evdev_event_t ukbd_ev_event;
+
static const struct evdev_methods ukbd_evdev_methods = {
- .ev_event = evdev_ev_kbd_event,
+ .ev_event = ukbd_ev_event,
};
#endif
@@ -1474,6 +1476,22 @@ ukbd_resume(device_t dev)
return (0);
}
+#ifdef EVDEV_SUPPORT
+static void
+ukbd_ev_event(struct evdev_dev *evdev, uint16_t type, uint16_t code,
+ int32_t value)
+{
+ keyboard_t *kbd = evdev_get_softc(evdev);
+
+ if (evdev_rcpt_mask & EVDEV_RCPT_HW_KBD &&
+ (type == EV_LED || type == EV_REP)) {
+ mtx_lock(&Giant);
+ kbd_ev_event(kbd, type, code, value);
+ mtx_unlock(&Giant);
+ }
+}
+#endif
+
/* early keyboard probe, not supported */
static int
ukbd_configure(int flags)
diff --git a/freebsd/sys/dev/usb/input/ums.c b/freebsd/sys/dev/usb/input/ums.c
index 79bc2291..4a0d1f34 100644
--- a/freebsd/sys/dev/usb/input/ums.c
+++ b/freebsd/sys/dev/usb/input/ums.c
@@ -952,9 +952,9 @@ ums_reset_buf(struct ums_softc *sc)
#ifdef EVDEV_SUPPORT
static int
-ums_ev_open(struct evdev_dev *evdev, void *ev_softc)
+ums_ev_open(struct evdev_dev *evdev)
{
- struct ums_softc *sc = (struct ums_softc *)ev_softc;
+ struct ums_softc *sc = evdev_get_softc(evdev);
mtx_assert(&sc->sc_mtx, MA_OWNED);
@@ -968,10 +968,10 @@ ums_ev_open(struct evdev_dev *evdev, void *ev_softc)
return (0);
}
-static void
-ums_ev_close(struct evdev_dev *evdev, void *ev_softc)
+static int
+ums_ev_close(struct evdev_dev *evdev)
{
- struct ums_softc *sc = (struct ums_softc *)ev_softc;
+ struct ums_softc *sc = evdev_get_softc(evdev);
mtx_assert(&sc->sc_mtx, MA_OWNED);
@@ -979,6 +979,8 @@ ums_ev_close(struct evdev_dev *evdev, void *ev_softc)
if (sc->sc_fflags == 0)
ums_stop_rx(sc);
+
+ return (0);
}
#endif
diff --git a/freebsd/sys/dev/usb/net/if_ure.c b/freebsd/sys/dev/usb/net/if_ure.c
index 8a88feae..bcae02cb 100644
--- a/freebsd/sys/dev/usb/net/if_ure.c
+++ b/freebsd/sys/dev/usb/net/if_ure.c
@@ -70,6 +70,7 @@ SYSCTL_INT(_hw_usb_ure, OID_AUTO, debug, CTLFLAG_RWTUN, &ure_debug, 0,
static const STRUCT_USB_HOST_ID ure_devs[] = {
#define URE_DEV(v,p,i) { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, i) }
URE_DEV(LENOVO, RTL8153, 0),
+ URE_DEV(NVIDIA, RTL8153, 0),
URE_DEV(REALTEK, RTL8152, URE_FLAG_8152),
URE_DEV(REALTEK, RTL8153, 0),
URE_DEV(TPLINK, RTL8153, 0),
diff --git a/freebsd/sys/dev/usb/serial/u3g.c b/freebsd/sys/dev/usb/serial/u3g.c
index 6f1cfdb1..8d72ef49 100644
--- a/freebsd/sys/dev/usb/serial/u3g.c
+++ b/freebsd/sys/dev/usb/serial/u3g.c
@@ -210,6 +210,7 @@ static const STRUCT_USB_HOST_ID u3g_devs[] = {
U3G_DEV(ALINK, 3G, 0),
U3G_DEV(ALINK, 3GU, 0),
U3G_DEV(ALINK, DWM652U5, 0),
+ U3G_DEV(ALINK, SIM7600E, 0),
U3G_DEV(AMOI, H01, 0),
U3G_DEV(AMOI, H01A, 0),
U3G_DEV(AMOI, H02, 0),
diff --git a/freebsd/sys/dev/usb/usb_hid.c b/freebsd/sys/dev/usb/usb_hid.c
index c2d93f3a..7ae052b7 100644
--- a/freebsd/sys/dev/usb/usb_hid.c
+++ b/freebsd/sys/dev/usb/usb_hid.c
@@ -76,6 +76,7 @@ static uint8_t hid_get_byte(struct hid_data *s, const uint16_t wSize);
#define MAXUSAGE 64
#define MAXPUSH 4
#define MAXID 16
+#define MAXLOCCNT 1024
struct hid_pos_data {
int32_t rid;
@@ -93,10 +94,10 @@ struct hid_data {
int32_t usage_last; /* last seen usage */
uint32_t loc_size; /* last seen size */
uint32_t loc_count; /* last seen count */
+ uint32_t ncount; /* end usage item count */
+ uint32_t icount; /* current usage item count */
uint8_t kindset; /* we have 5 kinds so 8 bits are enough */
uint8_t pushlevel; /* current pushlevel */
- uint8_t ncount; /* end usage item count */
- uint8_t icount; /* current usage item count */
uint8_t nusage; /* end "usages_min/max" index */
uint8_t iusage; /* current "usages_min/max" index */
uint8_t ousage; /* current "usages_min/max" offset */
@@ -349,18 +350,19 @@ hid_get_item(struct hid_data *s, struct hid_item *h)
switch (bTag) {
case 8: /* Input */
c->kind = hid_input;
- c->flags = dval;
ret:
+ c->flags = dval;
c->loc.count = s->loc_count;
c->loc.size = s->loc_size;
if (c->flags & HIO_VARIABLE) {
/* range check usage count */
- if (c->loc.count > 255) {
+ if (c->loc.count > MAXLOCCNT) {
DPRINTFN(0, "Number of "
- "items(%u) truncated to 255\n",
- (unsigned)(c->loc.count));
- s->ncount = 255;
+ "items(%u) truncated to %u\n",
+ (unsigned)(c->loc.count),
+ MAXLOCCNT);
+ s->ncount = MAXLOCCNT;
} else
s->ncount = c->loc.count;
@@ -376,7 +378,6 @@ hid_get_item(struct hid_data *s, struct hid_item *h)
case 9: /* Output */
c->kind = hid_output;
- c->flags = dval;
goto ret;
case 10: /* Collection */
c->kind = hid_collection;
@@ -387,7 +388,6 @@ hid_get_item(struct hid_data *s, struct hid_item *h)
return (1);
case 11: /* Feature */
c->kind = hid_feature;
- c->flags = dval;
goto ret;
case 12: /* End collection */
c->kind = hid_endcollection;
diff --git a/freebsd/sys/dev/usb/usb_request.c b/freebsd/sys/dev/usb/usb_request.c
index cb69ce0e..d2a15f3c 100644
--- a/freebsd/sys/dev/usb/usb_request.c
+++ b/freebsd/sys/dev/usb/usb_request.c
@@ -992,7 +992,7 @@ usbd_req_get_desc(struct usb_device *udev,
uint8_t retries)
{
struct usb_device_request req;
- uint8_t *buf;
+ uint8_t *buf = desc;
usb_error_t err;
DPRINTFN(4, "id=%d, type=%d, index=%d, max_len=%d\n",
@@ -1014,6 +1014,32 @@ usbd_req_get_desc(struct usb_device *udev,
err = usbd_do_request_flags(udev, mtx, &req,
desc, 0, NULL, 500 /* ms */);
+ if (err != 0 && err != USB_ERR_TIMEOUT &&
+ min_len != max_len) {
+ /* clear descriptor data */
+ memset(desc, 0, max_len);
+
+ /* try to read full descriptor length */
+ USETW(req.wLength, max_len);
+
+ err = usbd_do_request_flags(udev, mtx, &req,
+ desc, USB_SHORT_XFER_OK, NULL, 500 /* ms */);
+
+ if (err == 0) {
+ /* verify length */
+ if (buf[0] > max_len)
+ buf[0] = max_len;
+ else if (buf[0] < 2)
+ err = USB_ERR_INVAL;
+
+ min_len = buf[0];
+
+ /* enforce descriptor type */
+ buf[1] = type;
+ goto done;
+ }
+ }
+
if (err) {
if (!retries) {
goto done;
@@ -1024,7 +1050,6 @@ usbd_req_get_desc(struct usb_device *udev,
continue;
}
- buf = desc;
if (min_len == max_len) {
diff --git a/freebsd/sys/dev/usb/wlan/if_run.c b/freebsd/sys/dev/usb/wlan/if_run.c
index 41f97ba4..3bd247e3 100644
--- a/freebsd/sys/dev/usb/wlan/if_run.c
+++ b/freebsd/sys/dev/usb/wlan/if_run.c
@@ -210,6 +210,7 @@ static const STRUCT_USB_HOST_ID run_devs[] = {
RUN_DEV(CYBERTAN, RT2870),
RUN_DEV(DLINK, RT2870),
RUN_DEV(DLINK, RT3072),
+ RUN_DEV(DLINK, DWA125A3),
RUN_DEV(DLINK, DWA127),
RUN_DEV(DLINK, DWA140B3),
RUN_DEV(DLINK, DWA160B2),
@@ -303,6 +304,7 @@ static const STRUCT_USB_HOST_ID run_devs[] = {
RUN_DEV(RALINK, RT3572),
RUN_DEV(RALINK, RT3573),
RUN_DEV(RALINK, RT5370),
+ RUN_DEV(RALINK, RT5372),
RUN_DEV(RALINK, RT5572),
RUN_DEV(RALINK, RT8070),
RUN_DEV(SAMSUNG, WIS09ABGN),
diff --git a/freebsd/sys/i386/include/machine/cpufunc.h b/freebsd/sys/i386/include/machine/cpufunc.h
index 0147abe9..c640b569 100644
--- a/freebsd/sys/i386/include/machine/cpufunc.h
+++ b/freebsd/sys/i386/include/machine/cpufunc.h
@@ -374,6 +374,15 @@ rdtsc(void)
return (rv);
}
+static __inline uint64_t
+rdtscp(void)
+{
+ uint64_t rv;
+
+ __asm __volatile("rdtscp" : "=A" (rv) : : "ecx");
+ return (rv);
+}
+
static __inline uint32_t
rdtsc32(void)
{
diff --git a/freebsd/sys/i386/include/machine/intr_machdep.h b/freebsd/sys/i386/include/machine/intr_machdep.h
index 1ac8f489..a0b28387 100644
--- a/freebsd/sys/i386/include/machine/intr_machdep.h
+++ b/freebsd/sys/i386/include/machine/intr_machdep.h
@@ -1,190 +1,6 @@
/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
+ * This file is in the public domain.
*/
+/* $FreeBSD$ */
-#ifndef __MACHINE_INTR_MACHDEP_H__
-#define __MACHINE_INTR_MACHDEP_H__
-
-#ifdef _KERNEL
-
-/*
- * The maximum number of I/O interrupts we allow. This number is rather
- * arbitrary as it is just the maximum IRQ resource value. The interrupt
- * source for a given IRQ maps that I/O interrupt to device interrupt
- * source whether it be a pin on an interrupt controller or an MSI interrupt.
- * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device
- * interrupts allocate IDT vectors on demand. Currently we have 191 IDT
- * vectors available for device interrupts. On many systems with I/O APICs,
- * a lot of the IRQs are not used, so this number can be much larger than
- * 191 and still be safe since only interrupt sources in actual use will
- * allocate IDT vectors.
- *
- * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
- * IRQ values from 256 to 767 are used by MSI. When running under the Xen
- * Hypervisor, IRQ values from 768 to 4863 are available for binding to
- * event channel events. We leave 255 unused to avoid confusion since 255 is
- * used in PCI to indicate an invalid IRQ.
- */
-#define NUM_MSI_INTS 512
-#define FIRST_MSI_INT 256
-#ifdef XENHVM
-#include <xen/xen-os.h>
-#include <xen/interface/event_channel.h>
-#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS
-#define FIRST_EVTCHN_INT \
- (FIRST_MSI_INT + NUM_MSI_INTS)
-#define LAST_EVTCHN_INT \
- (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#else /* !XENHVM */
-#define NUM_EVTCHN_INTS 0
-#endif
-#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS)
-
-/*
- * Default base address for MSI messages on x86 platforms.
- */
-#define MSI_INTEL_ADDR_BASE 0xfee00000
-
-/*
- * - 1 ??? dummy counter.
- * - 2 counters for each I/O interrupt.
- * - 1 counter for each CPU for lapic timer.
- * - 9 counters for each CPU for IPI counters for SMP.
- */
-#ifdef SMP
-#define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 9) * MAXCPU)
-#else
-#define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1)
-#endif
-
-#ifndef LOCORE
-
-typedef void inthand_t(void);
-
-#define IDTVEC(name) __CONCAT(X,name)
-
-struct intsrc;
-
-/*
- * Methods that a PIC provides to mask/unmask a given interrupt source,
- * "turn on" the interrupt on the CPU side by setting up an IDT entry, and
- * return the vector associated with this source.
- */
-struct pic {
- void (*pic_enable_source)(struct intsrc *);
- void (*pic_disable_source)(struct intsrc *, int);
- void (*pic_eoi_source)(struct intsrc *);
- void (*pic_enable_intr)(struct intsrc *);
- void (*pic_disable_intr)(struct intsrc *);
- int (*pic_vector)(struct intsrc *);
- int (*pic_source_pending)(struct intsrc *);
- void (*pic_suspend)(struct pic *);
- void (*pic_resume)(struct pic *, bool suspend_cancelled);
- int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
- enum intr_polarity);
- int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
- void (*pic_reprogram_pin)(struct intsrc *);
- TAILQ_ENTRY(pic) pics;
-};
-
-/* Flags for pic_disable_source() */
-enum {
- PIC_EOI,
- PIC_NO_EOI,
-};
-
-/*
- * An interrupt source. The upper-layer code uses the PIC methods to
- * control a given source. The lower-layer PIC drivers can store additional
- * private data in a given interrupt source such as an interrupt pin number
- * or an I/O APIC pointer.
- */
-struct intsrc {
- struct pic *is_pic;
- struct intr_event *is_event;
- u_long *is_count;
- u_long *is_straycount;
- u_int is_index;
- u_int is_handlers;
- u_int is_domain;
- u_int is_cpu;
-};
-
-struct trapframe;
-
-#ifdef SMP
-extern cpuset_t intr_cpus;
-#endif
-extern struct mtx icu_lock;
-extern int elcr_found;
-#ifdef SMP
-extern int msix_disable_migration;
-#endif
-
-#ifndef DEV_ATPIC
-void atpic_reset(void);
-#endif
-/* XXX: The elcr_* prototypes probably belong somewhere else. */
-int elcr_probe(void);
-enum intr_trigger elcr_read_trigger(u_int irq);
-void elcr_resume(void);
-void elcr_write_trigger(u_int irq, enum intr_trigger trigger);
-#ifdef SMP
-void intr_add_cpu(u_int cpu);
-#endif
-int intr_add_handler(const char *name, int vector, driver_filter_t filter,
- driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
- int domain);
-#ifdef SMP
-int intr_bind(u_int vector, u_char cpu);
-#endif
-int intr_config_intr(int vector, enum intr_trigger trig,
- enum intr_polarity pol);
-int intr_describe(u_int vector, void *ih, const char *descr);
-void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
-u_int intr_next_cpu(int domain);
-struct intsrc *intr_lookup_source(int vector);
-int intr_register_pic(struct pic *pic);
-int intr_register_source(struct intsrc *isrc);
-int intr_remove_handler(void *cookie);
-void intr_resume(bool suspend_cancelled);
-void intr_suspend(void);
-void intr_reprogram(void);
-void intrcnt_add(const char *name, u_long **countp);
-void nexus_add_irq(u_long irq);
-int msi_alloc(device_t dev, int count, int maxcount, int *irqs);
-void msi_init(void);
-int msi_map(int irq, uint64_t *addr, uint32_t *data);
-int msi_release(int* irqs, int count);
-int msix_alloc(device_t dev, int *irq);
-int msix_release(int irq);
-
-#endif /* !LOCORE */
-#endif /* _KERNEL */
-#endif /* !__MACHINE_INTR_MACHDEP_H__ */
+#include <x86/intr_machdep.h>
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c
index 42afff5e..fa4951d8 100644
--- a/freebsd/sys/kern/init_main.c
+++ b/freebsd/sys/kern/init_main.c
@@ -122,6 +122,18 @@ int bootverbose = BOOTVERBOSE;
SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0,
"Control the output of verbose kernel messages");
+#ifdef VERBOSE_SYSINIT
+/*
+ * We'll use the defined value of VERBOSE_SYSINIT from the kernel config to
+ * dictate the default VERBOSE_SYSINIT behavior. Significant values for this
+ * option and associated tunable are:
+ * - 0, 'compiled in but silent by default'
+ * - 1, 'compiled in but verbose by default' (default)
+ */
+int verbose_sysinit = VERBOSE_SYSINIT;
+TUNABLE_INT("debug.verbose_sysinit", &verbose_sysinit);
+#endif
+
#ifdef INVARIANTS
FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance");
#endif
@@ -287,7 +299,7 @@ restart:
continue;
#if defined(VERBOSE_SYSINIT)
- if ((*sipp)->subsystem > last) {
+ if ((*sipp)->subsystem > last && verbose_sysinit != 0) {
verbose = 1;
last = (*sipp)->subsystem;
printf("subsystem %x\n", last);
@@ -526,6 +538,7 @@ proc0_init(void *dummy __unused)
p->p_peers = 0;
p->p_leader = p;
p->p_reaper = p;
+ p->p_treeflag |= P_TREE_REAPER;
LIST_INIT(&p->p_reaplist);
strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
@@ -642,17 +655,23 @@ proc0_post(void *dummy __unused)
*/
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NEW) {
+ PROC_UNLOCK(p);
+ continue;
+ }
microuptime(&p->p_stats->p_start);
PROC_STATLOCK(p);
rufetch(p, &ru); /* Clears thread stats */
- PROC_STATUNLOCK(p);
p->p_rux.rux_runtime = 0;
p->p_rux.rux_uticks = 0;
p->p_rux.rux_sticks = 0;
p->p_rux.rux_iticks = 0;
+ PROC_STATUNLOCK(p);
FOREACH_THREAD_IN_PROC(p, td) {
td->td_runtime = 0;
}
+ PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
PCPU_SET(switchtime, cpu_ticks());
@@ -857,7 +876,6 @@ create_init(const void *udata __unused)
PROC_LOCK(initproc);
initproc->p_flag |= P_SYSTEM | P_INMEM;
initproc->p_treeflag |= P_TREE_REAPER;
- LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling);
oldcred = initproc->p_ucred;
crcopy(newcred, oldcred);
#ifdef MAC
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c
index 33fca549..25a9518f 100644
--- a/freebsd/sys/kern/kern_event.c
+++ b/freebsd/sys/kern/kern_event.c
@@ -179,6 +179,10 @@ static int filt_fileattach(struct knote *kn);
static void filt_timerexpire(void *knx);
static int filt_timerattach(struct knote *kn);
static void filt_timerdetach(struct knote *kn);
+static void filt_timerstart(struct knote *kn, sbintime_t to);
+static void filt_timertouch(struct knote *kn, struct kevent *kev,
+ u_long type);
+static int filt_timervalidate(struct knote *kn, sbintime_t *to);
static int filt_timer(struct knote *kn, long hint);
static int filt_userattach(struct knote *kn);
static void filt_userdetach(struct knote *kn);
@@ -209,6 +213,7 @@ static struct filterops timer_filtops = {
.f_attach = filt_timerattach,
.f_detach = filt_timerdetach,
.f_event = filt_timer,
+ .f_touch = filt_timertouch,
};
static struct filterops user_filtops = {
.f_attach = filt_userattach,
@@ -738,29 +743,44 @@ filt_timerexpire(void *knx)
* data contains amount of time to sleep
*/
static int
-filt_timerattach(struct knote *kn)
+filt_timervalidate(struct knote *kn, sbintime_t *to)
{
- struct kq_timer_cb_data *kc;
struct bintime bt;
- sbintime_t to, sbt;
- unsigned int ncallouts;
+ sbintime_t sbt;
if (kn->kn_sdata < 0)
return (EINVAL);
if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0)
kn->kn_sdata = 1;
- /* Only precision unit are supported in flags so far */
+ /*
+ * The only fflags values supported are the timer unit
+ * (precision) and the absolute time indicator.
+ */
if ((kn->kn_sfflags & ~(NOTE_TIMER_PRECMASK | NOTE_ABSTIME)) != 0)
return (EINVAL);
- to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
+ *to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) {
getboottimebin(&bt);
sbt = bttosbt(bt);
- to -= sbt;
+ *to -= sbt;
}
- if (to < 0)
+ if (*to < 0)
return (EINVAL);
+ return (0);
+}
+
+static int
+filt_timerattach(struct knote *kn)
+{
+ struct kq_timer_cb_data *kc;
+ sbintime_t to;
+ unsigned int ncallouts;
+ int error;
+
+ error = filt_timervalidate(kn, &to);
+ if (error != 0)
+ return (error);
do {
ncallouts = kq_ncallouts;
@@ -773,6 +793,17 @@ filt_timerattach(struct knote *kn)
kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */
kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK);
callout_init(&kc->c, 1);
+ filt_timerstart(kn, to);
+
+ return (0);
+}
+
+static void
+filt_timerstart(struct knote *kn, sbintime_t to)
+{
+ struct kq_timer_cb_data *kc;
+
+ kc = kn->kn_ptr.p_v;
if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) {
kc->next = to;
kc->to = 0;
@@ -782,8 +813,6 @@ filt_timerattach(struct knote *kn)
}
callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
PCPU_GET(cpuid), C_ABSOLUTE);
-
- return (0);
}
static void
@@ -800,6 +829,73 @@ filt_timerdetach(struct knote *kn)
kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */
}
+static void
+filt_timertouch(struct knote *kn, struct kevent *kev, u_long type)
+{
+ struct kq_timer_cb_data *kc;
+ struct kqueue *kq;
+ sbintime_t to;
+ int error;
+
+ switch (type) {
+ case EVENT_REGISTER:
+ /* Handle re-added timers that update data/fflags */
+ if (kev->flags & EV_ADD) {
+ kc = kn->kn_ptr.p_v;
+
+ /* Drain any existing callout. */
+ callout_drain(&kc->c);
+
+ /* Throw away any existing undelivered record
+ * of the timer expiration. This is done under
+ * the presumption that if a process is
+ * re-adding this timer with new parameters,
+ * it is no longer interested in what may have
+ * happened under the old parameters. If it is
+ * interested, it can wait for the expiration,
+ * delete the old timer definition, and then
+ * add the new one.
+ *
+ * This has to be done while the kq is locked:
+ * - if enqueued, dequeue
+ * - make it no longer active
+ * - clear the count of expiration events
+ */
+ kq = kn->kn_kq;
+ KQ_LOCK(kq);
+ if (kn->kn_status & KN_QUEUED)
+ knote_dequeue(kn);
+
+ kn->kn_status &= ~KN_ACTIVE;
+ kn->kn_data = 0;
+ KQ_UNLOCK(kq);
+
+ /* Reschedule timer based on new data/fflags */
+ kn->kn_sfflags = kev->fflags;
+ kn->kn_sdata = kev->data;
+ error = filt_timervalidate(kn, &to);
+ if (error != 0) {
+ kn->kn_flags |= EV_ERROR;
+ kn->kn_data = error;
+ } else
+ filt_timerstart(kn, to);
+ }
+ break;
+
+ case EVENT_PROCESS:
+ *kev = kn->kn_kevent;
+ if (kn->kn_flags & EV_CLEAR) {
+ kn->kn_data = 0;
+ kn->kn_fflags = 0;
+ }
+ break;
+
+ default:
+ panic("filt_timertouch() - invalid type (%ld)", type);
+ break;
+ }
+}
+
static int
filt_timer(struct knote *kn, long hint)
{
diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c
index 8f6c2a6d..04914e93 100644
--- a/freebsd/sys/kern/kern_intr.c
+++ b/freebsd/sys/kern/kern_intr.c
@@ -175,12 +175,13 @@ ithread_update(struct intr_thread *ithd)
ie = ithd->it_event;
td = ithd->it_thread;
+ mtx_assert(&ie->ie_lock, MA_OWNED);
/* Determine the overall priority of this event. */
- if (TAILQ_EMPTY(&ie->ie_handlers))
+ if (CK_SLIST_EMPTY(&ie->ie_handlers))
pri = PRI_MAX_ITHD;
else
- pri = TAILQ_FIRST(&ie->ie_handlers)->ih_pri;
+ pri = CK_SLIST_FIRST(&ie->ie_handlers)->ih_pri;
/* Update name and priority. */
#ifndef __rtems__
@@ -218,7 +219,7 @@ intr_event_update(struct intr_event *ie)
space = 1;
/* Run through all the handlers updating values. */
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
if (strlen(ie->ie_fullname) + strlen(ih->ih_name) + 1 <
sizeof(ie->ie_fullname)) {
strcat(ie->ie_fullname, " ");
@@ -280,7 +281,7 @@ intr_event_create(struct intr_event **event, void *source, int flags, int irq,
ie->ie_flags = flags;
ie->ie_irq = irq;
ie->ie_cpu = NOCPU;
- TAILQ_INIT(&ie->ie_handlers);
+ CK_SLIST_INIT(&ie->ie_handlers);
mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF);
va_start(ap, fmt);
@@ -402,7 +403,7 @@ intr_lookup(int irq)
TAILQ_FOREACH(ie, &event_list, ie_list)
if (ie->ie_irq == irq &&
(ie->ie_flags & IE_SOFT) == 0 &&
- TAILQ_FIRST(&ie->ie_handlers) != NULL)
+ CK_SLIST_FIRST(&ie->ie_handlers) != NULL)
break;
mtx_unlock(&event_lock);
return (ie);
@@ -498,7 +499,7 @@ intr_event_destroy(struct intr_event *ie)
mtx_lock(&event_lock);
mtx_lock(&ie->ie_lock);
- if (!TAILQ_EMPTY(&ie->ie_handlers)) {
+ if (!CK_SLIST_EMPTY(&ie->ie_handlers)) {
mtx_unlock(&ie->ie_lock);
mtx_unlock(&event_lock);
return (EBUSY);
@@ -532,7 +533,7 @@ ithread_create(const char *name)
error = kproc_kthread_add(ithread_loop, ithd, &intrproc,
&td, RFSTOPPED | RFHIGHPID,
- 0, "intr", "%s", name);
+ 0, "intr", "%s", name);
if (error)
panic("kproc_create() failed with %d", error);
thread_lock(td);
@@ -573,6 +574,7 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
enum intr_type flags, void **cookiep)
{
struct intr_handler *ih, *temp_ih;
+ struct intr_handler **prevptr;
struct intr_thread *it;
if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
@@ -595,9 +597,9 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
/* We can only have one exclusive handler in a event. */
mtx_lock(&ie->ie_lock);
- if (!TAILQ_EMPTY(&ie->ie_handlers)) {
+ if (!CK_SLIST_EMPTY(&ie->ie_handlers)) {
if ((flags & INTR_EXCL) ||
- (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
+ (CK_SLIST_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
mtx_unlock(&ie->ie_lock);
free(ih, M_ITHREAD);
return (EINVAL);
@@ -622,14 +624,12 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
}
/* Add the new handler to the event in priority order. */
- TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
+ CK_SLIST_FOREACH_PREVPTR(temp_ih, prevptr, &ie->ie_handlers, ih_next) {
if (temp_ih->ih_pri > ih->ih_pri)
break;
}
- if (temp_ih == NULL)
- TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
- else
- TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
+ CK_SLIST_INSERT_PREVPTR(prevptr, temp_ih, ih, ih_next);
+
intr_event_update(ie);
CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
@@ -656,7 +656,7 @@ intr_event_describe_handler(struct intr_event *ie, void *cookie,
mtx_lock(&ie->ie_lock);
#ifdef INVARIANTS
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
if (ih == cookie)
break;
}
@@ -718,6 +718,45 @@ intr_handler_source(void *cookie)
}
/*
+ * If intr_event_handle() is running in the ISR context at the time of the call,
+ * then wait for it to complete.
+ */
+static void
+intr_event_barrier(struct intr_event *ie)
+{
+ int phase;
+
+ mtx_assert(&ie->ie_lock, MA_OWNED);
+ phase = ie->ie_phase;
+
+ /*
+ * Switch phase to direct future interrupts to the other active counter.
+ * Make sure that any preceding stores are visible before the switch.
+ */
+ KASSERT(ie->ie_active[!phase] == 0, ("idle phase has activity"));
+ atomic_store_rel_int(&ie->ie_phase, !phase);
+
+ /*
+ * This code cooperates with wait-free iteration of ie_handlers
+ * in intr_event_handle.
+ * Make sure that the removal and the phase update are not reordered
+ * with the active count check.
+ * Note that no combination of acquire and release fences can provide
+ * that guarantee as Store->Load sequences can always be reordered.
+ */
+ atomic_thread_fence_seq_cst();
+
+ /*
+ * Now wait on the inactive phase.
+ * The acquire fence is needed so that that all post-barrier accesses
+ * are after the check.
+ */
+ while (ie->ie_active[phase] > 0)
+ cpu_spinwait();
+ atomic_thread_fence_acq();
+}
+
+/*
* Sleep until an ithread finishes executing an interrupt handler.
*
* XXX Doesn't currently handle interrupt filters or fast interrupt
@@ -757,16 +796,14 @@ _intr_drain(int irq)
}
#endif /* __rtems__ */
-
#ifndef __rtems__
int
intr_event_remove_handler(void *cookie)
{
struct intr_handler *handler = (struct intr_handler *)cookie;
struct intr_event *ie;
-#ifdef INVARIANTS
struct intr_handler *ih;
-#endif
+ struct intr_handler **prevptr;
#ifdef notyet
int dead;
#endif
@@ -777,60 +814,48 @@ intr_event_remove_handler(void *cookie)
KASSERT(ie != NULL,
("interrupt handler \"%s\" has a NULL interrupt event",
handler->ih_name));
+
mtx_lock(&ie->ie_lock);
CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
ie->ie_name);
-#ifdef INVARIANTS
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
+ CK_SLIST_FOREACH_PREVPTR(ih, prevptr, &ie->ie_handlers, ih_next) {
if (ih == handler)
- goto ok;
- mtx_unlock(&ie->ie_lock);
- panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
- ih->ih_name, ie->ie_name);
-ok:
-#endif
+ break;
+ }
+ if (ih == NULL) {
+ panic("interrupt handler \"%s\" not found in "
+ "interrupt event \"%s\"", handler->ih_name, ie->ie_name);
+ }
+
/*
- * If there is no ithread, then just remove the handler and return.
- * XXX: Note that an INTR_FAST handler might be running on another
- * CPU!
+ * If there is no ithread, then directly remove the handler. Note that
+ * intr_event_handle() iterates ie_handlers in a lock-less fashion, so
+ * care needs to be taken to keep ie_handlers consistent and to free
+ * the removed handler only when ie_handlers is quiescent.
*/
if (ie->ie_thread == NULL) {
- TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
+ CK_SLIST_REMOVE_PREVPTR(prevptr, ih, ih_next);
+ intr_event_barrier(ie);
+ intr_event_update(ie);
mtx_unlock(&ie->ie_lock);
free(handler, M_ITHREAD);
return (0);
}
/*
- * If the interrupt thread is already running, then just mark this
- * handler as being dead and let the ithread do the actual removal.
- *
- * During a cold boot while cold is set, msleep() does not sleep,
- * so we have to remove the handler here rather than letting the
- * thread do it.
+ * Let the interrupt thread do the job.
+ * The interrupt source is disabled when the interrupt thread is
+ * running, so it does not have to worry about interaction with
+ * intr_event_handle().
*/
- thread_lock(ie->ie_thread->it_thread);
- if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) {
- handler->ih_flags |= IH_DEAD;
-
- /*
- * Ensure that the thread will process the handler list
- * again and remove this handler if it has already passed
- * it on the list.
- *
- * The release part of the following store ensures
- * that the update of ih_flags is ordered before the
- * it_need setting. See the comment before
- * atomic_cmpset_acq(&ithd->it_need, ...) operation in
- * the ithread_execute_handlers().
- */
- atomic_store_rel_int(&ie->ie_thread->it_need, 1);
- } else
- TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
- thread_unlock(ie->ie_thread->it_thread);
+ KASSERT((handler->ih_flags & IH_DEAD) == 0,
+ ("duplicate handle remove"));
+ handler->ih_flags |= IH_DEAD;
+ intr_event_schedule_thread(ie);
while (handler->ih_flags & IH_DEAD)
msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
intr_event_update(ie);
+
#ifdef notyet
/*
* XXX: This could be bad in the case of ppbus(8). Also, I think
@@ -838,8 +863,8 @@ ok:
* interrupt.
*/
dead = 1;
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
- if (!(ih->ih_flags & IH_FAST)) {
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
+ if (ih->ih_handler != NULL) {
dead = 0;
break;
}
@@ -866,7 +891,7 @@ intr_event_schedule_thread(struct intr_event *ie)
/*
* If no ithread or no handlers, then we have a stray interrupt.
*/
- if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) ||
+ if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers) ||
ie->ie_thread == NULL)
return (EINVAL);
@@ -881,7 +906,7 @@ intr_event_schedule_thread(struct intr_event *ie)
if (ie->ie_flags & IE_ENTROPY) {
entropy.event = (uintptr_t)ie;
entropy.td = ctd;
- random_harvest_queue(&entropy, sizeof(entropy), 2, RANDOM_INTERRUPT);
+ random_harvest_queue(&entropy, sizeof(entropy), RANDOM_INTERRUPT);
}
#ifndef __rtems__
@@ -981,7 +1006,7 @@ swi_sched(void *cookie, int flags)
entropy.event = (uintptr_t)ih;
entropy.td = curthread;
- random_harvest_queue(&entropy, sizeof(entropy), 1, RANDOM_SWI);
+ random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI);
/*
* Set ih_need for this handler so that if the ithread is already
@@ -1012,32 +1037,37 @@ swi_remove(void *cookie)
return (intr_event_remove_handler(cookie));
}
-
-
#endif /* __rtems__ */
-/*
- * This is a public function for use by drivers that mux interrupt
- * handlers for child devices from their interrupt handler.
- */
-void
+
+static void
intr_event_execute_handlers(struct proc *p, struct intr_event *ie)
{
- struct intr_handler *ih, *ihn;
+ struct intr_handler *ih, *ihn, *ihp;
- TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
+ ihp = NULL;
+ CK_SLIST_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
/*
* If this handler is marked for death, remove it from
* the list of handlers and wake up the sleeper.
*/
if (ih->ih_flags & IH_DEAD) {
mtx_lock(&ie->ie_lock);
- TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
+ if (ihp == NULL)
+ CK_SLIST_REMOVE_HEAD(&ie->ie_handlers, ih_next);
+ else
+ CK_SLIST_REMOVE_AFTER(ihp, ih_next);
ih->ih_flags &= ~IH_DEAD;
wakeup(ih);
mtx_unlock(&ie->ie_lock);
continue;
}
+ /*
+ * Now that we know that the current element won't be removed
+ * update the previous element.
+ */
+ ihp = ih;
+
/* Skip filter only handlers */
if (ih->ih_handler == NULL)
continue;
@@ -1226,6 +1256,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
struct trapframe *oldframe;
struct thread *td;
int ret, thread;
+ int phase;
td = curthread;
@@ -1234,7 +1265,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
#endif
/* An interrupt with no event or handlers is a stray interrupt. */
- if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
+ if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers))
return (EINVAL);
/*
@@ -1249,7 +1280,17 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
critical_enter();
oldframe = td->td_intr_frame;
td->td_intr_frame = frame;
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
+
+ phase = ie->ie_phase;
+ atomic_add_int(&ie->ie_active[phase], 1);
+
+ /*
+ * This fence is required to ensure that no later loads are
+ * re-ordered before the ie_active store.
+ */
+ atomic_thread_fence_seq_cst();
+
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
if (ih->ih_filter == NULL) {
thread = 1;
continue;
@@ -1286,6 +1327,8 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
thread = 1;
}
}
+ atomic_add_rel_int(&ie->ie_active[phase], -1);
+
td->td_intr_frame = oldframe;
if (thread) {
@@ -1295,7 +1338,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
if (ie->ie_post_filter != NULL)
ie->ie_post_filter(ie->ie_source);
}
-
+
/* Schedule the ithread if needed. */
if (thread) {
int error __unused;
@@ -1441,7 +1484,7 @@ db_dump_intr_event(struct intr_event *ie, int handlers)
db_printf("\n");
if (handlers)
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next)
db_dump_intrhand(ih);
}
@@ -1456,7 +1499,7 @@ DB_SHOW_COMMAND(intr, db_show_intr)
verbose = strchr(modif, 'v') != NULL;
all = strchr(modif, 'a') != NULL;
TAILQ_FOREACH(ie, &event_list, ie_list) {
- if (!all && TAILQ_EMPTY(&ie->ie_handlers))
+ if (!all && CK_SLIST_EMPTY(&ie->ie_handlers))
continue;
db_dump_intr_event(ie, verbose);
if (db_pager_quit)
diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c
index b4e9711f..3baea2e4 100644
--- a/freebsd/sys/kern/kern_sysctl.c
+++ b/freebsd/sys/kern/kern_sysctl.c
@@ -195,13 +195,8 @@ sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
char path[96];
ssize_t rem = sizeof(path);
ssize_t len;
- uint8_t val_8;
- uint16_t val_16;
- uint32_t val_32;
- int val_int;
- long val_long;
- int64_t val_64;
- quad_t val_quad;
+ uint8_t data[512] __aligned(sizeof(uint64_t));
+ int size;
int error;
path[--rem] = 0;
@@ -229,85 +224,88 @@ sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
switch (oidp->oid_kind & CTLTYPE) {
case CTLTYPE_INT:
- if (getenv_int(path + rem, &val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int), GETENV_SIGNED) == 0)
return;
- req.newlen = sizeof(val_int);
- req.newptr = &val_int;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_UINT:
- if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int), GETENV_UNSIGNED) == 0)
return;
- req.newlen = sizeof(val_int);
- req.newptr = &val_int;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_LONG:
- if (getenv_long(path + rem, &val_long) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(long), GETENV_SIGNED) == 0)
return;
- req.newlen = sizeof(val_long);
- req.newptr = &val_long;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_ULONG:
- if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(long), GETENV_UNSIGNED) == 0)
return;
- req.newlen = sizeof(val_long);
- req.newptr = &val_long;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_S8:
- if (getenv_int(path + rem, &val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int8_t), GETENV_SIGNED) == 0)
return;
- val_8 = val_int;
- req.newlen = sizeof(val_8);
- req.newptr = &val_8;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_S16:
- if (getenv_int(path + rem, &val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int16_t), GETENV_SIGNED) == 0)
return;
- val_16 = val_int;
- req.newlen = sizeof(val_16);
- req.newptr = &val_16;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_S32:
- if (getenv_long(path + rem, &val_long) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int32_t), GETENV_SIGNED) == 0)
return;
- val_32 = val_long;
- req.newlen = sizeof(val_32);
- req.newptr = &val_32;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_S64:
- if (getenv_quad(path + rem, &val_quad) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int64_t), GETENV_SIGNED) == 0)
return;
- val_64 = val_quad;
- req.newlen = sizeof(val_64);
- req.newptr = &val_64;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_U8:
- if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(uint8_t), GETENV_UNSIGNED) == 0)
return;
- val_8 = val_int;
- req.newlen = sizeof(val_8);
- req.newptr = &val_8;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_U16:
- if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(uint16_t), GETENV_UNSIGNED) == 0)
return;
- val_16 = val_int;
- req.newlen = sizeof(val_16);
- req.newptr = &val_16;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_U32:
- if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(uint32_t), GETENV_UNSIGNED) == 0)
return;
- val_32 = val_long;
- req.newlen = sizeof(val_32);
- req.newptr = &val_32;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_U64:
- /* XXX there is no getenv_uquad() */
- if (getenv_quad(path + rem, &val_quad) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(uint64_t), GETENV_UNSIGNED) == 0)
return;
- val_64 = val_quad;
- req.newlen = sizeof(val_64);
- req.newptr = &val_64;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_STRING:
penv = kern_getenv(path + rem);
diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c
index a9c0547a..74b144cb 100644
--- a/freebsd/sys/kern/kern_time.c
+++ b/freebsd/sys/kern/kern_time.c
@@ -288,6 +288,8 @@ get_process_cputime(struct proc *targetp, struct timespec *ats)
PROC_STATLOCK(targetp);
rufetch(targetp, &ru);
runtime = targetp->p_rux.rux_runtime;
+ if (curthread->td_proc == targetp)
+ runtime += cpu_ticks() - PCPU_GET(switchtime);
PROC_STATUNLOCK(targetp);
cputick2timespec(runtime, ats);
}
@@ -1577,7 +1579,7 @@ realtimer_settime(struct itimer *it, int flags,
if ((flags & TIMER_ABSTIME) == 0) {
/* Convert to absolute time. */
timespecadd(&it->it_time.it_value, &cts,
- &it->it_time.it_value);
+ &it->it_time.it_value);
} else {
timespecsub(&ts, &cts, &ts);
/*
diff --git a/freebsd/sys/kern/subr_blist.c b/freebsd/sys/kern/subr_blist.c
index a7d78d86..e5b40e62 100644
--- a/freebsd/sys/kern/subr_blist.c
+++ b/freebsd/sys/kern/subr_blist.c
@@ -226,17 +226,19 @@ blist_create(daddr_t blocks, int flags)
u_daddr_t nodes, radix, skip;
int digit;
+ if (blocks == 0)
+ panic("invalid block count");
+
/*
- * Calculate the radix and node count used for scanning. Find the last
- * block that is followed by a terminator.
+ * Calculate the radix and node count used for scanning.
*/
last_block = blocks - 1;
radix = BLIST_BMAP_RADIX;
while (radix < blocks) {
if (((last_block / radix + 1) & BLIST_META_MASK) != 0)
/*
- * A terminator will be added. Update last_block to the
- * position just before that terminator.
+ * We must widen the blist to avoid partially
+ * filled nodes.
*/
last_block |= radix - 1;
radix *= BLIST_META_RADIX;
@@ -246,7 +248,9 @@ blist_create(daddr_t blocks, int flags)
* Count the meta-nodes in the expanded tree, including the final
* terminator, from the bottom level up to the root.
*/
- nodes = (last_block >= blocks) ? 2 : 1;
+ nodes = 1;
+ if (radix - blocks >= BLIST_BMAP_RADIX)
+ nodes++;
last_block /= BLIST_BMAP_RADIX;
while (last_block > 0) {
nodes += last_block + 1;
diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c
index 0626ec0a..391b2ed6 100644
--- a/freebsd/sys/kern/subr_bus.c
+++ b/freebsd/sys/kern/subr_bus.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
#include <sys/random.h>
#include <sys/rman.h>
+#include <sys/sbuf.h>
#include <sys/selinfo.h>
#include <sys/signalvar.h>
#include <sys/smp.h>
@@ -60,7 +61,6 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/uio.h>
#include <sys/bus.h>
-#include <sys/interrupt.h>
#include <sys/cpuset.h>
#include <net/vnet.h>
@@ -84,6 +84,8 @@ struct driverlink {
kobj_class_t driver;
TAILQ_ENTRY(driverlink) link; /* list of drivers in devclass */
int pass;
+ int flags;
+#define DL_DEFERRED_PROBE 1 /* Probe deferred on this */
TAILQ_ENTRY(driverlink) passlink;
};
@@ -155,6 +157,9 @@ EVENTHANDLER_LIST_DEFINE(device_detach);
EVENTHANDLER_LIST_DEFINE(dev_lookup);
static void devctl2_init(void);
+static bool device_frozen;
+#else /* __rtems__ */
+#define device_frozen false
#endif /* __rtems__ */
#define DRIVERNAME(d) ((d)? d->name : "no driver")
@@ -885,27 +890,18 @@ sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
* Strings are always terminated with a NUL, but may be truncated if longer
* than @p len bytes after quotes.
*
- * @param dst Buffer to hold the string. Must be at least @p len bytes long
+ * @param sb sbuf to place the characters into
* @param src Original buffer.
- * @param len Length of buffer pointed to by @dst, including trailing NUL
*/
void
-devctl_safe_quote(char *dst, const char *src, size_t len)
+devctl_safe_quote_sb(struct sbuf *sb, const char *src)
{
- char *walker = dst, *ep = dst + len - 1;
- if (len == 0)
- return;
- while (src != NULL && walker < ep)
- {
- if (*src == '"' || *src == '\\') {
- if (ep - walker < 2)
- break;
- *walker++ = '\\';
- }
- *walker++ = *src++;
+ while (*src != '\0') {
+ if (*src == '"' || *src == '\\')
+ sbuf_putc(sb, '\\');
+ sbuf_putc(sb, *src++);
}
- *walker = '\0';
}
/* End of /dev/devctl code */
@@ -1204,7 +1200,11 @@ devclass_add_driver(devclass_t dc, driver_t *driver, int pass, devclass_t *dcp)
dl->pass = pass;
driver_register_pass(dl);
- devclass_driver_added(dc, driver);
+ if (device_frozen) {
+ dl->flags |= DL_DEFERRED_PROBE;
+ } else {
+ devclass_driver_added(dc, driver);
+ }
bus_data_generation_update();
return (0);
}
@@ -1244,6 +1244,9 @@ devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver)
* Note that since a driver can be in multiple devclasses, we
* should not detach devices which are not children of devices in
* the affected devclass.
+ *
+ * If we're frozen, we don't generate NOMATCH events. Mark to
+ * generate later.
*/
for (i = 0; i < dc->maxunit; i++) {
if (dc->devices[i]) {
@@ -1252,9 +1255,14 @@ devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver)
dev->parent->devclass == busclass) {
if ((error = device_detach(dev)) != 0)
return (error);
- BUS_PROBE_NOMATCH(dev->parent, dev);
- devnomatch(dev);
- dev->flags |= DF_DONENOMATCH;
+ if (device_frozen) {
+ dev->flags &= ~DF_DONENOMATCH;
+ dev->flags |= DF_NEEDNOMATCH;
+ } else {
+ BUS_PROBE_NOMATCH(dev->parent, dev);
+ devnomatch(dev);
+ dev->flags |= DF_DONENOMATCH;
+ }
}
}
}
@@ -2958,6 +2966,7 @@ int
device_attach(device_t dev)
{
uint64_t attachtime;
+ uint16_t attachentropy;
int error;
#ifndef __rtems__
@@ -2985,19 +2994,12 @@ device_attach(device_t dev)
dev->state = DS_NOTPRESENT;
return (error);
}
- attachtime = get_cyclecount() - attachtime;
- /*
- * 4 bits per device is a reasonable value for desktop and server
- * hardware with good get_cyclecount() implementations, but WILL
- * need to be adjusted on other platforms.
+ dev->flags |= DF_ATTACHED_ONCE;
+ /* We only need the low bits of this time, but ranges from tens to thousands
+ * have been seen, so keep 2 bytes' worth.
*/
-#define RANDOM_PROBE_BIT_GUESS 4
- if (bootverbose)
- printf("random: harvesting attach, %zu bytes (%d bits) from %s%d\n",
- sizeof(attachtime), RANDOM_PROBE_BIT_GUESS,
- dev->driver->name, dev->unit);
- random_harvest_direct(&attachtime, sizeof(attachtime),
- RANDOM_PROBE_BIT_GUESS, RANDOM_ATTACH);
+ attachentropy = (uint16_t)(get_cyclecount() - attachtime);
+ random_harvest_direct(&attachentropy, sizeof(attachentropy), RANDOM_ATTACH);
device_sysctl_update(dev);
if (dev->busy)
dev->state = DS_BUSY;
@@ -5474,6 +5476,53 @@ driver_exists(device_t bus, const char *driver)
return (false);
}
+static void
+device_gen_nomatch(device_t dev)
+{
+ device_t child;
+
+ if (dev->flags & DF_NEEDNOMATCH &&
+ dev->state == DS_NOTPRESENT) {
+ BUS_PROBE_NOMATCH(dev->parent, dev);
+ devnomatch(dev);
+ dev->flags |= DF_DONENOMATCH;
+ }
+ dev->flags &= ~DF_NEEDNOMATCH;
+ TAILQ_FOREACH(child, &dev->children, link) {
+ device_gen_nomatch(child);
+ }
+}
+
+static void
+device_do_deferred_actions(void)
+{
+ devclass_t dc;
+ driverlink_t dl;
+
+ /*
+ * Walk through the devclasses to find all the drivers we've tagged as
+ * deferred during the freeze and call the driver added routines. They
+ * have already been added to the lists in the background, so the driver
+ * added routines that trigger a probe will have all the right bidders
+ * for the probe auction.
+ */
+ TAILQ_FOREACH(dc, &devclasses, link) {
+ TAILQ_FOREACH(dl, &dc->drivers, link) {
+ if (dl->flags & DL_DEFERRED_PROBE) {
+ devclass_driver_added(dc, dl->driver);
+ dl->flags &= ~DL_DEFERRED_PROBE;
+ }
+ }
+ }
+
+ /*
+ * We also defer no-match events during a freeze. Walk the tree and
+ * generate all the pent-up events that are still relevant.
+ */
+ device_gen_nomatch(root_bus);
+ bus_data_generation_update();
+}
+
static int
devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct thread *td)
@@ -5500,6 +5549,10 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
if (error == 0)
error = find_device(req, &dev);
break;
+ case DEV_FREEZE:
+ case DEV_THAW:
+ error = priv_check(td, PRIV_DRIVER);
+ break;
default:
error = ENOTTY;
break;
@@ -5703,7 +5756,23 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = device_delete_child(parent, dev);
break;
}
+#ifndef __rtems__
+ case DEV_FREEZE:
+ if (device_frozen)
+ error = EBUSY;
+ else
+ device_frozen = true;
+ break;
+ case DEV_THAW:
+ if (!device_frozen)
+ error = EBUSY;
+ else {
+ device_do_deferred_actions();
+ device_frozen = false;
+ }
+ break;
}
+#endif /* __rtems__ */
mtx_unlock(&Giant);
return (error);
}
diff --git a/freebsd/sys/kern/subr_counter.c b/freebsd/sys/kern/subr_counter.c
index e4c98fae..66cda02b 100644
--- a/freebsd/sys/kern/subr_counter.c
+++ b/freebsd/sys/kern/subr_counter.c
@@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$");
#define IN_SUBR_COUNTER_C
#include <sys/counter.h>
-
+
void
counter_u64_zero(counter_u64_t c)
{
@@ -62,20 +62,15 @@ counter_u64_fetch(counter_u64_t c)
counter_u64_t
counter_u64_alloc(int flags)
{
- counter_u64_t r;
-
- r = uma_zalloc(pcpu_zone_64, flags);
- if (r != NULL)
- counter_u64_zero(r);
- return (r);
+ return (uma_zalloc_pcpu(pcpu_zone_64, flags | M_ZERO));
}
void
counter_u64_free(counter_u64_t c)
{
- uma_zfree(pcpu_zone_64, c);
+ uma_zfree_pcpu(pcpu_zone_64, c);
}
int
@@ -142,7 +137,7 @@ counter_ratecheck(struct counter_rate *cr, int64_t limit)
val = cr->cr_over;
now = ticks;
- if (now - cr->cr_ticks >= hz) {
+ if ((u_int)(now - cr->cr_ticks) >= hz) {
/*
* Time to clear the structure, we are in the next second.
* First try unlocked read, and then proceed with atomic.
@@ -153,7 +148,7 @@ counter_ratecheck(struct counter_rate *cr, int64_t limit)
* Check if other thread has just went through the
* reset sequence before us.
*/
- if (now - cr->cr_ticks >= hz) {
+ if ((u_int)(now - cr->cr_ticks) >= hz) {
val = counter_u64_fetch(cr->cr_rate);
counter_u64_zero(cr->cr_rate);
cr->cr_over = 0;
diff --git a/freebsd/sys/kern/subr_gtaskqueue.c b/freebsd/sys/kern/subr_gtaskqueue.c
index aa5c922d..e56e90d7 100644
--- a/freebsd/sys/kern/subr_gtaskqueue.c
+++ b/freebsd/sys/kern/subr_gtaskqueue.c
@@ -921,6 +921,24 @@ taskqgroup_bind(struct taskqgroup *qgroup)
}
}
+static void
+taskqgroup_config_init(void *arg)
+{
+ struct taskqgroup *qgroup = qgroup_config;
+ LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
+
+ LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
+ grouptask, gt_list);
+ qgroup->tqg_queue[0].tgc_cnt = 0;
+ taskqgroup_cpu_create(qgroup, 0, 0);
+
+ qgroup->tqg_cnt = 1;
+ qgroup->tqg_stride = 1;
+}
+
+SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
+ taskqgroup_config_init, NULL);
+
static int
_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
{
diff --git a/freebsd/sys/kern/subr_hints.c b/freebsd/sys/kern/subr_hints.c
index 982059c3..78ab1b4e 100644
--- a/freebsd/sys/kern/subr_hints.c
+++ b/freebsd/sys/kern/subr_hints.c
@@ -33,228 +33,257 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
+#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/bus.h>
-/*
- * Access functions for device resources.
- */
-
#ifndef __rtems__
-static int checkmethod = 1;
-static int use_kenv;
-static char *hintp;
-#else /* __rtems__ */
-#define hintmode 1
-#define hintp static_hints
-#define use_kenv 0
-static char __used default_static_hints[] = "";
-__weak_reference(default_static_hints, static_hints);
-#endif /* __rtems__ */
+#define FBACK_MDENV 0 /* MD env (e.g. loader.conf) */
+#define FBACK_STENV 1 /* Static env */
+#define FBACK_STATIC 2 /* static_hints */
-#ifndef __rtems__
/*
- * Define kern.hintmode sysctl, which only accept value 2, that cause to
- * switch from Static KENV mode to Dynamic KENV. So systems that have hints
- * compiled into kernel will be able to see/modify KENV (and hints too).
+ * We'll use hintenv_merged to indicate that the dynamic environment has been
+ * properly prepared for hint usage. This implies that the dynamic environment
+ * has already been setup (dynamic_kenv) and that we have added any supplied
+ * static_hints to the dynamic environment.
+ */
+static bool hintenv_merged;
+/* Static environment and static hints cannot change, so we'll skip known bad */
+static bool stenv_skip;
+static bool sthints_skip;
+/*
+ * Access functions for device resources.
*/
-static int
-sysctl_hintmode(SYSCTL_HANDLER_ARGS)
+static void
+static_hints_to_env(void *data __unused)
{
const char *cp;
char *line, *eq;
- int eqidx, error, from_kenv, i, value;
-
- from_kenv = 0;
- cp = kern_envp;
- value = hintmode;
-
- /* Fetch candidate for new hintmode value */
- error = sysctl_handle_int(oidp, &value, 0, req);
- if (error || req->newptr == NULL)
- return (error);
-
- if (value != 2)
- /* Only accept swithing to hintmode 2 */
- return (EINVAL);
-
- /* Migrate from static to dynamic hints */
- switch (hintmode) {
- case 0:
- if (dynamic_kenv) {
- /*
- * Already here. But assign hintmode to 2, to not
- * check it in the future.
- */
- hintmode = 2;
- return (0);
- }
- from_kenv = 1;
- cp = kern_envp;
- break;
- case 1:
- cp = static_hints;
- break;
- case 2:
- /* Nothing to do, hintmode already 2 */
- return (0);
- }
+ int eqidx, i;
- while (cp) {
- i = strlen(cp);
- if (i == 0)
- break;
- if (from_kenv) {
- if (strncmp(cp, "hint.", 5) != 0)
- /* kenv can have not only hints */
- continue;
- }
+ cp = static_hints;
+ while (cp && *cp != '\0') {
eq = strchr(cp, '=');
if (eq == NULL)
/* Bad hint value */
continue;
eqidx = eq - cp;
- line = malloc(i+1, M_TEMP, M_WAITOK);
+ i = strlen(cp);
+ line = malloc(i + 1, M_TEMP, M_WAITOK);
strcpy(line, cp);
- line[eqidx] = '\0';
- kern_setenv(line, line + eqidx + 1);
+ line[eqidx] = line[i] = '\0';
+ /*
+ * Before adding a hint to the dynamic environment, check if
+ * another value for said hint has already been added. This is
+ * needed because static environment overrides static hints and
+ * dynamic environment overrides all.
+ */
+ if (testenv(line) == 0)
+ kern_setenv(line, line + eqidx + 1);
free(line, M_TEMP);
cp += i + 1;
}
-
- hintmode = value;
- use_kenv = 1;
- return (0);
+ hintenv_merged = true;
}
-SYSCTL_PROC(_kern, OID_AUTO, hintmode, CTLTYPE_INT|CTLFLAG_RW,
- &hintmode, 0, sysctl_hintmode, "I", "Get/set current hintmode");
+/* Any time after dynamic env is setup */
+SYSINIT(hintenv, SI_SUB_KMEM + 1, SI_ORDER_SECOND, static_hints_to_env, NULL);
+#else /* __rtems__ */
+#define sthints_skip false
+
+static char __used default_static_hints[] = "";
+__weak_reference(default_static_hints, static_hints);
#endif /* __rtems__ */
/*
+ * Checks the environment to see if we even have any hints. If it has no hints,
+ * then res_find can take the hint that there's no point in searching it and
+ * either move on to the next environment or fail early.
+ */
+static bool
+_res_checkenv(char *envp)
+{
+ char *cp;
+
+ cp = envp;
+ while (cp) {
+ if (strncmp(cp, "hint.", 5) == 0)
+ return (true);
+ while (*cp != '\0')
+ cp++;
+ cp++;
+ if (*cp == '\0')
+ break;
+ }
+ return (false);
+}
+
+/*
* Evil wildcarding resource string lookup.
* This walks the supplied env string table and returns a match.
* The start point can be remembered for incremental searches.
*/
static int
-res_find(int *line, int *startln,
+res_find(char **hintp_cookie, int *line, int *startln,
const char *name, int *unit, const char *resname, const char *value,
const char **ret_name, int *ret_namelen, int *ret_unit,
const char **ret_resname, int *ret_resnamelen, const char **ret_value)
{
- int n = 0, hit, i = 0;
+#ifndef __rtems__
+ int fbacklvl = FBACK_MDENV, i = 0, n = 0;
+#else /* __rtems__ */
+ int n = 0;
+#endif /* __rtems__ */
char r_name[32];
int r_unit;
char r_resname[32];
char r_value[128];
const char *s, *cp;
- char *p;
-
+ char *hintp, *p;
#ifndef __rtems__
- if (checkmethod) {
- hintp = NULL;
+ bool dyn_used = false;
- switch (hintmode) {
- case 0: /* loader hints in environment only */
- break;
- case 1: /* static hints only */
- hintp = static_hints;
- checkmethod = 0;
- break;
- case 2: /* fallback mode */
- if (dynamic_kenv) {
- mtx_lock(&kenv_lock);
- cp = kenvp[0];
- for (i = 0; cp != NULL; cp = kenvp[++i]) {
- if (!strncmp(cp, "hint.", 5)) {
- use_kenv = 1;
- checkmethod = 0;
- break;
- }
+
+ /*
+ * We are expecting that the caller will pass us a hintp_cookie that
+ * they are tracking. Upon entry, if *hintp_cookie is *not* set, this
+ * indicates to us that we should be figuring out based on the current
+ * environment where to search. This keeps us sane throughout the
+ * entirety of a single search.
+ */
+ if (*hintp_cookie == NULL) {
+ hintp = NULL;
+ if (hintenv_merged) {
+ /*
+ * static_hints, if it was previously used, has
+ * already been folded in to the environment
+ * by this point.
+ */
+ mtx_lock(&kenv_lock);
+ cp = kenvp[0];
+ for (i = 0; cp != NULL; cp = kenvp[++i]) {
+ if (!strncmp(cp, "hint.", 5)) {
+ hintp = kenvp[0];
+ break;
}
- mtx_unlock(&kenv_lock);
- } else {
- cp = kern_envp;
- while (cp) {
- if (strncmp(cp, "hint.", 5) == 0) {
- cp = NULL;
- hintp = kern_envp;
- break;
- }
- while (*cp != '\0')
- cp++;
- cp++;
- if (*cp == '\0') {
- cp = NULL;
- hintp = static_hints;
- break;
- }
+ }
+ mtx_unlock(&kenv_lock);
+ dyn_used = true;
+ } else {
+ /*
+ * We'll have a chance to keep coming back here until
+ * we've actually exhausted all of our possibilities.
+ * We might have chosen the MD/Static env because it
+ * had some kind of hints, but perhaps it didn't have
+ * the hint we are looking for. We don't provide any
+ * fallback when searching the dynamic environment.
+ */
+fallback:
+ if (dyn_used || fbacklvl >= FBACK_STATIC)
+ return (ENOENT);
+
+ switch (fbacklvl) {
+ case FBACK_MDENV:
+ fbacklvl++;
+ if (_res_checkenv(md_envp)) {
+ hintp = md_envp;
+ break;
}
+
+ /* FALLTHROUGH */
+ case FBACK_STENV:
+ fbacklvl++;
+ if (!stenv_skip && _res_checkenv(kern_envp)) {
+ hintp = kern_envp;
+ break;
+ } else
+ stenv_skip = true;
+
+ /* FALLTHROUGH */
+ case FBACK_STATIC:
+ fbacklvl++;
+#else /* __rtems__ */
+ hintp = NULL;
+#endif /* __rtems__ */
+ /* We'll fallback to static_hints if needed/can */
+ if (!sthints_skip &&
+ _res_checkenv(static_hints))
+ hintp = static_hints;
+#ifndef __rtems__
+ else
+ sthints_skip = true;
+
+ break;
+ default:
+ return (ENOENT);
}
- break;
- default:
- break;
- }
- if (hintp == NULL) {
- if (dynamic_kenv) {
- use_kenv = 1;
- checkmethod = 0;
- } else
- hintp = kern_envp;
}
+#endif /* __rtems__ */
+
+ if (hintp == NULL)
+ return (ENOENT);
+ *hintp_cookie = hintp;
+#ifndef __rtems__
+ } else {
+ hintp = *hintp_cookie;
+ if (hintenv_merged && hintp == kenvp[0])
+ dyn_used = true;
+ else
+ /*
+ * If we aren't using the dynamic environment, we need
+ * to run through the proper fallback procedure again.
+ * This is so that we do continuations right if we're
+ * working with *line and *startln.
+ */
+ goto fallback;
}
- if (use_kenv) {
+ if (dyn_used) {
mtx_lock(&kenv_lock);
i = 0;
- cp = kenvp[0];
- if (cp == NULL) {
- mtx_unlock(&kenv_lock);
- return (ENOENT);
- }
- } else
+ }
#endif /* __rtems__ */
- cp = hintp;
+
+ cp = hintp;
while (cp) {
- hit = 1;
(*line)++;
if (strncmp(cp, "hint.", 5) != 0)
- hit = 0;
- else
- n = sscanf(cp, "hint.%32[^.].%d.%32[^=]=%127s",
- r_name, &r_unit, r_resname, r_value);
- if (hit && n != 4) {
+ goto nexthint;
+ n = sscanf(cp, "hint.%32[^.].%d.%32[^=]=%127s", r_name, &r_unit,
+ r_resname, r_value);
+ if (n != 4) {
printf("CONFIG: invalid hint '%s'\n", cp);
p = strchr(cp, 'h');
*p = 'H';
- hit = 0;
+ goto nexthint;
}
- if (hit && startln && *startln >= 0 && *line < *startln)
- hit = 0;
- if (hit && name && strcmp(name, r_name) != 0)
- hit = 0;
- if (hit && unit && *unit != r_unit)
- hit = 0;
- if (hit && resname && strcmp(resname, r_resname) != 0)
- hit = 0;
- if (hit && value && strcmp(value, r_value) != 0)
- hit = 0;
- if (hit)
- break;
- if (use_kenv) {
+ if (startln && *startln >= 0 && *line < *startln)
+ goto nexthint;
+ if (name && strcmp(name, r_name) != 0)
+ goto nexthint;
+ if (unit && *unit != r_unit)
+ goto nexthint;
+ if (resname && strcmp(resname, r_resname) != 0)
+ goto nexthint;
+ if (value && strcmp(value, r_value) != 0)
+ goto nexthint;
+ /* Successfully found a hint matching all criteria */
+ break;
+nexthint:
#ifndef __rtems__
+ if (dyn_used) {
cp = kenvp[++i];
if (cp == NULL)
break;
-#else /* __rtems__ */
(void) i;
-#endif /* __rtems__ */
} else {
+#endif /* __rtems__ */
while (*cp != '\0')
cp++;
cp++;
@@ -262,14 +291,20 @@ res_find(int *line, int *startln,
cp = NULL;
break;
}
+#ifndef __rtems__
}
+#endif /* __rtems__ */
}
#ifndef __rtems__
- if (use_kenv)
+ if (dyn_used)
mtx_unlock(&kenv_lock);
#endif /* __rtems__ */
if (cp == NULL)
- return ENOENT;
+#ifndef __rtems__
+ goto fallback;
+#else /* __rtems__ */
+ return (ENOENT);
+#endif /* __rtems__ */
s = cp;
/* This is a bit of a hack, but at least is reentrant */
@@ -307,11 +342,13 @@ resource_find(int *line, int *startln,
{
int i;
int un;
+ char *hintp;
*line = 0;
+ hintp = NULL;
/* Search for exact unit matches first */
- i = res_find(line, startln, name, unit, resname, value,
+ i = res_find(&hintp, line, startln, name, unit, resname, value,
ret_name, ret_namelen, ret_unit, ret_resname, ret_resnamelen,
ret_value);
if (i == 0)
@@ -320,7 +357,7 @@ resource_find(int *line, int *startln,
return ENOENT;
/* If we are still here, search for wildcard matches */
un = -1;
- i = res_find(line, startln, name, &un, resname, value,
+ i = res_find(&hintp, line, startln, name, &un, resname, value,
ret_name, ret_namelen, ret_unit, ret_resname, ret_resnamelen,
ret_value);
if (i == 0)
diff --git a/freebsd/sys/kern/subr_module.c b/freebsd/sys/kern/subr_module.c
index d8d42653..21b2754c 100644
--- a/freebsd/sys/kern/subr_module.c
+++ b/freebsd/sys/kern/subr_module.c
@@ -35,6 +35,9 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/linker.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
/*
* Preloaded module support
*/
@@ -206,29 +209,42 @@ preload_search_info(caddr_t mod, int inf)
void
preload_delete_name(const char *name)
{
- caddr_t curp;
- uint32_t *hdr;
+ caddr_t addr, curp;
+ uint32_t *hdr, sz;
int next;
int clearing;
+
+ addr = 0;
+ sz = 0;
if (preload_metadata != NULL) {
-
+
clearing = 0;
curp = preload_metadata;
for (;;) {
hdr = (uint32_t *)curp;
- if (hdr[0] == 0 && hdr[1] == 0)
- break;
-
- /* Search for a MODINFO_NAME field */
- if (hdr[0] == MODINFO_NAME) {
+ if (hdr[0] == MODINFO_NAME || (hdr[0] == 0 && hdr[1] == 0)) {
+ /* Free memory used to store the file. */
+ if (addr != 0 && sz != 0)
+ kmem_bootstrap_free((vm_offset_t)addr, sz);
+ addr = 0;
+ sz = 0;
+
+ if (hdr[0] == 0)
+ break;
if (!strcmp(name, curp + sizeof(uint32_t) * 2))
clearing = 1; /* got it, start clearing */
- else if (clearing)
+ else if (clearing) {
clearing = 0; /* at next one now.. better stop */
+ }
}
- if (clearing)
+ if (clearing) {
+ if (hdr[0] == MODINFO_ADDR)
+ addr = *(caddr_t *)(curp + sizeof(uint32_t) * 2);
+ else if (hdr[0] == MODINFO_SIZE)
+ sz = *(uint32_t *)(curp + sizeof(uint32_t) * 2);
hdr[0] = MODINFO_EMPTY;
+ }
/* skip to next field */
next = sizeof(uint32_t) * 2 + hdr[1];
diff --git a/freebsd/sys/kern/subr_pcpu.c b/freebsd/sys/kern/subr_pcpu.c
index 1b866e3a..0ab77996 100644
--- a/freebsd/sys/kern/subr_pcpu.c
+++ b/freebsd/sys/kern/subr_pcpu.c
@@ -75,7 +75,7 @@ struct dpcpu_free {
TAILQ_ENTRY(dpcpu_free) df_link;
};
-static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]);
+DPCPU_DEFINE_STATIC(char, modspace[DPCPU_MODMIN] __aligned(__alignof(void *)));
static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head);
static struct sx dpcpu_lock;
uintptr_t dpcpu_off[MAXCPU];
diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c
index 4c45bcfe..6e719897 100644
--- a/freebsd/sys/kern/subr_prf.c
+++ b/freebsd/sys/kern/subr_prf.c
@@ -135,10 +135,22 @@ static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len, int upper);
static void snprintf_func(int ch, void *arg);
#ifndef __rtems__
-static int msgbufmapped; /* Set when safe to use msgbuf */
+static bool msgbufmapped; /* Set when safe to use msgbuf */
int msgbuftrigger;
struct msgbuf *msgbufp;
+#ifndef BOOT_TAG_SZ
+#define BOOT_TAG_SZ 32
+#endif
+#ifndef BOOT_TAG
+/* Tag used to mark the start of a boot in dmesg */
+#define BOOT_TAG "---<<BOOT>>---"
+#endif
+
+static char current_boot_tag[BOOT_TAG_SZ + 1] = BOOT_TAG;
+SYSCTL_STRING(_kern, OID_AUTO, boot_tag, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ current_boot_tag, 0, "Tag added to dmesg at start of boot");
+
static int log_console_output = 1;
SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RWTUN,
&log_console_output, 0, "Duplicate console output to the syslog");
@@ -743,6 +755,7 @@ reswitch: switch (ch = (u_char)*fmt++) {
padc = '0';
goto reswitch;
}
+ /* FALLTHROUGH */
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
for (n = 0;; ++fmt) {
@@ -1057,14 +1070,22 @@ msgbufinit(void *ptr, int size)
{
char *cp;
static struct msgbuf *oldp = NULL;
+ bool print_boot_tag;
size -= sizeof(*msgbufp);
cp = (char *)ptr;
+ print_boot_tag = !msgbufmapped;
+ /* Attempt to fetch kern.boot_tag tunable on first mapping */
+ if (!msgbufmapped)
+ TUNABLE_STR_FETCH("kern.boot_tag", current_boot_tag,
+ sizeof(current_boot_tag));
msgbufp = (struct msgbuf *)(cp + size);
msgbuf_reinit(msgbufp, cp, size);
if (msgbufmapped && oldp != msgbufp)
msgbuf_copy(oldp, msgbufp);
- msgbufmapped = 1;
+ msgbufmapped = true;
+ if (print_boot_tag && *current_boot_tag != '\0')
+ printf("%s\n", current_boot_tag);
oldp = msgbufp;
}
diff --git a/freebsd/sys/kern/sys_pipe.c b/freebsd/sys/kern/sys_pipe.c
index e527495a..8eb0aad9 100755
--- a/freebsd/sys/kern/sys_pipe.c
+++ b/freebsd/sys/kern/sys_pipe.c
@@ -572,9 +572,7 @@ pipe(int fildes[2])
* If it fails it will return ENOMEM.
*/
static int
-pipespace_new(cpipe, size)
- struct pipe *cpipe;
- int size;
+pipespace_new(struct pipe *cpipe, int size)
{
caddr_t buffer;
int error, cnt, firstseg;
@@ -646,9 +644,7 @@ retry:
* Wrapper for pipespace_new() that performs locking assertions.
*/
static int
-pipespace(cpipe, size)
- struct pipe *cpipe;
- int size;
+pipespace(struct pipe *cpipe, int size)
{
KASSERT(cpipe->pipe_state & PIPE_LOCKFL,
@@ -660,9 +656,7 @@ pipespace(cpipe, size)
* lock a pipe for I/O, blocking other access
*/
static __inline int
-pipelock(cpipe, catch)
- struct pipe *cpipe;
- int catch;
+pipelock(struct pipe *cpipe, int catch)
{
int error;
@@ -683,8 +677,7 @@ pipelock(cpipe, catch)
* unlock a pipe I/O lock
*/
static __inline void
-pipeunlock(cpipe)
- struct pipe *cpipe;
+pipeunlock(struct pipe *cpipe)
{
PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
@@ -698,8 +691,7 @@ pipeunlock(cpipe)
}
void
-pipeselwakeup(cpipe)
- struct pipe *cpipe;
+pipeselwakeup(struct pipe *cpipe)
{
PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
@@ -720,9 +712,7 @@ pipeselwakeup(cpipe)
* will start out zero'd from the ctor, so we just manage the kmem.
*/
static void
-pipe_create(pipe, backing)
- struct pipe *pipe;
- int backing;
+pipe_create(struct pipe *pipe, int backing)
{
if (backing) {
@@ -744,12 +734,8 @@ pipe_create(pipe, backing)
/* ARGSUSED */
static int
-pipe_read(fp, uio, active_cred, flags, td)
- struct file *fp;
- struct uio *uio;
- struct ucred *active_cred;
- struct thread *td;
- int flags;
+pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
{
struct pipe *rpipe;
int error;
@@ -995,9 +981,7 @@ rtems_bsd_pipe_readv(rtems_libio_t *iop, const struct iovec *iov,
* This is similar to a physical write operation.
*/
static int
-pipe_build_write_buffer(wpipe, uio)
- struct pipe *wpipe;
- struct uio *uio;
+pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio)
{
u_int size;
int i;
@@ -1041,8 +1025,7 @@ pipe_build_write_buffer(wpipe, uio)
* unmap and unwire the process buffer
*/
static void
-pipe_destroy_write_buffer(wpipe)
- struct pipe *wpipe;
+pipe_destroy_write_buffer(struct pipe *wpipe)
{
PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
@@ -1056,8 +1039,7 @@ pipe_destroy_write_buffer(wpipe)
* pages can be freed without loss of data.
*/
static void
-pipe_clone_write_buffer(wpipe)
- struct pipe *wpipe;
+pipe_clone_write_buffer(struct pipe *wpipe)
{
struct uio uio;
struct iovec iov;
@@ -1096,9 +1078,7 @@ pipe_clone_write_buffer(wpipe)
* the pipe buffer. Then the direct mapping write is set-up.
*/
static int
-pipe_direct_write(wpipe, uio)
- struct pipe *wpipe;
- struct uio *uio;
+pipe_direct_write(struct pipe *wpipe, struct uio *uio)
{
int error;
@@ -1197,12 +1177,8 @@ error1:
#endif
static int
-pipe_write(fp, uio, active_cred, flags, td)
- struct file *fp;
- struct uio *uio;
- struct ucred *active_cred;
- struct thread *td;
- int flags;
+pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
{
int error = 0;
int desiredsize;
@@ -1553,11 +1529,8 @@ rtems_bsd_pipe_writev(rtems_libio_t *iop, const struct iovec *iov,
/* ARGSUSED */
#ifndef __rtems__
static int
-pipe_truncate(fp, length, active_cred, td)
- struct file *fp;
- off_t length;
- struct ucred *active_cred;
- struct thread *td;
+pipe_truncate(struct file *fp, off_t length, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *cpipe;
int error;
@@ -1575,12 +1548,8 @@ pipe_truncate(fp, length, active_cred, td)
* we implement a very minimal set of ioctls for compatibility with sockets.
*/
static int
-pipe_ioctl(fp, cmd, data, active_cred, td)
- struct file *fp;
- u_long cmd;
- void *data;
- struct ucred *active_cred;
- struct thread *td;
+pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *mpipe = fp->f_data;
int error;
@@ -1672,11 +1641,8 @@ rtems_bsd_pipe_ioctl(rtems_libio_t *iop, ioctl_command_t request, void *buffer)
#endif /* __rtems__ */
static int
-pipe_poll(fp, events, active_cred, td)
- struct file *fp;
- int events;
- struct ucred *active_cred;
- struct thread *td;
+pipe_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *rpipe;
struct pipe *wpipe;
@@ -1786,11 +1752,8 @@ rtems_bsd_pipe_poll(rtems_libio_t *iop, int events)
*/
#ifndef __rtems__
static int
-pipe_stat(fp, ub, active_cred, td)
- struct file *fp;
- struct stat *ub;
- struct ucred *active_cred;
- struct thread *td;
+pipe_stat(struct file *fp, struct stat *ub, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *pipe;
#else /* __rtems__ */
@@ -1889,9 +1852,7 @@ rtems_bsd_pipe_stat(
/* ARGSUSED */
static int
-pipe_close(fp, td)
- struct file *fp;
- struct thread *td;
+pipe_close(struct file *fp, struct thread *td)
{
#ifndef __rtems__
@@ -1922,12 +1883,8 @@ pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct threa
}
static int
-pipe_chown(fp, uid, gid, active_cred, td)
- struct file *fp;
- uid_t uid;
- gid_t gid;
- struct ucred *active_cred;
- struct thread *td;
+pipe_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *cpipe;
int error;
@@ -1957,8 +1914,7 @@ pipe_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
#endif /* __rtems__ */
static void
-pipe_free_kmem(cpipe)
- struct pipe *cpipe;
+pipe_free_kmem(struct pipe *cpipe)
{
KASSERT(!mtx_owned(PIPE_MTX(cpipe)),
@@ -1988,8 +1944,7 @@ pipe_free_kmem(cpipe)
* shutdown the pipe
*/
static void
-pipeclose(cpipe)
- struct pipe *cpipe;
+pipeclose(struct pipe *cpipe)
{
struct pipepair *pp;
struct pipe *ppipe;
diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c
index ec493c04..cf99c615 100644
--- a/freebsd/sys/kern/uipc_sockbuf.c
+++ b/freebsd/sys/kern/uipc_sockbuf.c
@@ -961,23 +961,14 @@ sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
return (retval);
}
-int
+void
sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control)
{
- struct mbuf *m, *n, *mlast;
- int space;
-
- SOCKBUF_LOCK_ASSERT(sb);
+ struct mbuf *m, *mlast;
- if (control == NULL)
- panic("sbappendcontrol_locked");
- space = m_length(control, &n) + m_length(m0, NULL);
-
- if (space > sbspace(sb))
- return (0);
m_clrprotoflags(m0);
- n->m_next = m0; /* concatenate data to control */
+ m_last(control)->m_next = m0;
SBLASTRECORDCHK(sb);
@@ -991,18 +982,15 @@ sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
SBLASTMBUFCHK(sb);
SBLASTRECORDCHK(sb);
- return (1);
}
-int
+void
sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
{
- int retval;
SOCKBUF_LOCK(sb);
- retval = sbappendcontrol_locked(sb, m0, control);
+ sbappendcontrol_locked(sb, m0, control);
SOCKBUF_UNLOCK(sb);
- return (retval);
}
/*
@@ -1289,6 +1277,63 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
return (ret);
}
+struct mbuf *
+#ifndef __rtems__
+sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
+#else /* __rtems__ */
+sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff)
+#endif /* __rtems__ */
+{
+ struct mbuf *m;
+
+ KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
+ if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
+ *moff = off;
+ if (sb->sb_sndptr == NULL) {
+ sb->sb_sndptr = sb->sb_mb;
+ sb->sb_sndptroff = 0;
+ }
+ return (sb->sb_mb);
+ } else {
+ m = sb->sb_sndptr;
+ off -= sb->sb_sndptroff;
+ }
+ *moff = off;
+ return (m);
+}
+
+void
+#ifndef __rtems__
+sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
+#else /* __rtems__ */
+sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len)
+#endif /* __rtems__ */
+{
+ /*
+ * A small copy was done, advance forward the sb_sbsndptr to cover
+ * it.
+ */
+ struct mbuf *m;
+
+ if (mb != sb->sb_sndptr) {
+ /* Did not copyout at the same mbuf */
+ return;
+ }
+ m = mb;
+ while (m && (len > 0)) {
+ if (len >= m->m_len) {
+ len -= m->m_len;
+ if (m->m_next) {
+ sb->sb_sndptroff += m->m_len;
+ sb->sb_sndptr = m->m_next;
+ }
+ m = m->m_next;
+ } else {
+ len = 0;
+ }
+ }
+}
+
/*
* Return the first mbuf and the mbuf data offset for the provided
* send offset without changing the "sb_sndptroff" field.
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index e82642e4..3143a392 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -1126,6 +1126,8 @@ soclose(struct socket *so)
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
+ if (so->so_dtor != NULL)
+ so->so_dtor(so);
SOCK_LOCK(so);
if ((listening = (so->so_options & SO_ACCEPTCONN))) {
@@ -2191,7 +2193,6 @@ release:
/*
* Optimized version of soreceive() for stream (TCP) sockets.
- * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled.
*/
int
soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
@@ -2206,12 +2207,12 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
return (EINVAL);
if (psa != NULL)
*psa = NULL;
- if (controlp != NULL)
- return (EINVAL);
if (flagsp != NULL)
flags = *flagsp &~ MSG_EOR;
else
flags = 0;
+ if (controlp != NULL)
+ *controlp = NULL;
if (flags & MSG_OOB)
return (soreceive_rcvoob(so, uio, flags));
if (mp0 != NULL)
@@ -2815,6 +2816,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_BROADCAST:
case SO_REUSEADDR:
case SO_REUSEPORT:
+ case SO_REUSEPORT_LB:
case SO_OOBINLINE:
case SO_TIMESTAMP:
case SO_BINTIME:
@@ -3035,6 +3037,7 @@ sogetopt(struct socket *so, struct sockopt *sopt)
case SO_KEEPALIVE:
case SO_REUSEADDR:
case SO_REUSEPORT:
+ case SO_REUSEPORT_LB:
case SO_BROADCAST:
case SO_OOBINLINE:
case SO_ACCEPTCONN:
@@ -3046,6 +3049,10 @@ integer:
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
+ case SO_DOMAIN:
+ optval = so->so_proto->pr_domain->dom_family;
+ goto integer;
+
case SO_TYPE:
optval = so->so_type;
goto integer;
@@ -3867,6 +3874,17 @@ sodupsockaddr(const struct sockaddr *sa, int mflags)
}
/*
+ * Register per-socket destructor.
+ */
+void
+sodtor_set(struct socket *so, so_dtor_t *func)
+{
+
+ SOCK_LOCK_ASSERT(so);
+ so->so_dtor = func;
+}
+
+/*
* Register per-socket buffer upcalls.
*/
void
@@ -4027,12 +4045,12 @@ sotoxsocket(struct socket *so, struct xsocket *xso)
{
xso->xso_len = sizeof *xso;
- xso->xso_so = so;
+ xso->xso_so = (uintptr_t)so;
xso->so_type = so->so_type;
xso->so_options = so->so_options;
xso->so_linger = so->so_linger;
xso->so_state = so->so_state;
- xso->so_pcb = so->so_pcb;
+ xso->so_pcb = (uintptr_t)so->so_pcb;
xso->xso_protocol = so->so_proto->pr_protocol;
xso->xso_family = so->so_proto->pr_domain->dom_family;
xso->so_timeo = so->so_timeo;
diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c
index 0872aa62..9c4c52e4 100644
--- a/freebsd/sys/kern/uipc_syscalls.c
+++ b/freebsd/sys/kern/uipc_syscalls.c
@@ -60,6 +60,8 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
#include <sys/syscallsubr.h>
#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
@@ -831,6 +833,15 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol,
error = soconnect2(so2, so1);
if (error != 0)
goto free4;
+ } else if (so1->so_proto->pr_flags & PR_CONNREQUIRED) {
+ struct unpcb *unp, *unp2;
+ unp = sotounpcb(so1);
+ unp2 = sotounpcb(so2);
+ /*
+ * No need to lock the unps, because the sockets are brand-new.
+ * No other threads can be using them yet
+ */
+ unp_copy_peercred(td, unp, unp2, unp);
}
finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
&socketops);
@@ -1260,7 +1271,7 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
{
struct uio auio;
struct iovec *iov;
- struct mbuf *m, *control = NULL;
+ struct mbuf *control, *m;
caddr_t ctlbuf;
struct file *fp;
struct socket *so;
@@ -1307,6 +1318,7 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
if (KTRPOINT(td, KTR_GENIO))
ktruio = cloneuio(&auio);
#endif
+ control = NULL;
len = auio.uio_resid;
error = soreceive(so, &fromsa, &auio, NULL,
(mp->msg_control || controlp) ? &control : NULL,
@@ -1370,30 +1382,22 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
control->m_data += sizeof (struct cmsghdr);
}
#endif
+ ctlbuf = mp->msg_control;
len = mp->msg_controllen;
- m = control;
mp->msg_controllen = 0;
- ctlbuf = mp->msg_control;
-
- while (m && len > 0) {
- unsigned int tocopy;
-
- if (len >= m->m_len)
- tocopy = m->m_len;
- else {
- mp->msg_flags |= MSG_CTRUNC;
- tocopy = len;
- }
-
- if ((error = copyout(mtod(m, caddr_t),
- ctlbuf, tocopy)) != 0)
+ for (m = control; m != NULL && len >= m->m_len; m = m->m_next) {
+ if ((error = copyout(mtod(m, caddr_t), ctlbuf,
+ m->m_len)) != 0)
goto out;
- ctlbuf += tocopy;
- len -= tocopy;
- m = m->m_next;
+ ctlbuf += m->m_len;
+ len -= m->m_len;
+ mp->msg_controllen += m->m_len;
+ }
+ if (m != NULL) {
+ mp->msg_flags |= MSG_CTRUNC;
+ m_dispose_extcontrolm(m);
}
- mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
}
out:
fdrop(fp, td);
@@ -1405,8 +1409,11 @@ out:
if (error == 0 && controlp != NULL)
*controlp = control;
- else if (control)
+ else if (control != NULL) {
+ if (error != 0)
+ m_dispose_extcontrolm(control);
m_freem(control);
+ }
return (error);
}
@@ -2134,3 +2141,51 @@ getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len)
return (0);
#endif /* __rtems__ */
}
+
+/*
+ * Dispose of externalized rights from an SCM_RIGHTS message. This function
+ * should be used in error or truncation cases to avoid leaking file descriptors
+ * into the recipient's (the current thread's) table.
+ */
+void
+m_dispose_extcontrolm(struct mbuf *m)
+{
+ struct cmsghdr *cm;
+ struct file *fp;
+ struct thread *td;
+ socklen_t clen, datalen;
+ int error, fd, *fds, nfd;
+
+ td = curthread;
+ for (; m != NULL; m = m->m_next) {
+ if (m->m_type != MT_EXTCONTROL)
+ continue;
+ cm = mtod(m, struct cmsghdr *);
+ clen = m->m_len;
+ while (clen > 0) {
+ if (clen < sizeof(*cm))
+ panic("%s: truncated mbuf %p", __func__, m);
+ datalen = CMSG_SPACE(cm->cmsg_len - CMSG_SPACE(0));
+ if (clen < datalen)
+ panic("%s: truncated mbuf %p", __func__, m);
+
+ if (cm->cmsg_level == SOL_SOCKET &&
+ cm->cmsg_type == SCM_RIGHTS) {
+ fds = (int *)CMSG_DATA(cm);
+ nfd = (cm->cmsg_len - CMSG_SPACE(0)) /
+ sizeof(int);
+
+ while (nfd-- > 0) {
+ fd = *fds++;
+ error = fget(td, fd, &cap_no_rights,
+ &fp);
+ if (error == 0)
+ fdclose(td, fp, fd);
+ }
+ }
+ clen -= datalen;
+ cm = (struct cmsghdr *)((uint8_t *)cm + datalen);
+ }
+ m_chtype(m, MT_CONTROL);
+ }
+}
diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c
index 688682d4..c1885ed6 100644
--- a/freebsd/sys/kern/uipc_usrreq.c
+++ b/freebsd/sys/kern/uipc_usrreq.c
@@ -376,33 +376,32 @@ unp_pcb_lock2(struct unpcb *unp, struct unpcb *unp2)
}
static __noinline void
-unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p, int *freed)
-
+unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p,
+ int *freed)
{
struct unpcb *unp2;
unp2 = *unp2p;
- unp_pcb_hold((unp2));
- UNP_PCB_UNLOCK((unp));
- UNP_PCB_LOCK((unp2));
- UNP_PCB_LOCK((unp));
- *freed = unp_pcb_rele((unp2));
+ unp_pcb_hold(unp2);
+ UNP_PCB_UNLOCK(unp);
+ UNP_PCB_LOCK(unp2);
+ UNP_PCB_LOCK(unp);
+ *freed = unp_pcb_rele(unp2);
if (*freed)
*unp2p = NULL;
}
-#define unp_pcb_owned_lock2(unp, unp2, freed) do { \
- freed = 0; \
- UNP_PCB_LOCK_ASSERT((unp)); \
- UNP_PCB_UNLOCK_ASSERT((unp2)); \
- MPASS(unp != unp2); \
- if (__predict_true(UNP_PCB_TRYLOCK((unp2)))) \
- break; \
- else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \
- UNP_PCB_LOCK((unp2)); \
- else { \
- unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \
- } \
+#define unp_pcb_owned_lock2(unp, unp2, freed) do { \
+ freed = 0; \
+ UNP_PCB_LOCK_ASSERT(unp); \
+ UNP_PCB_UNLOCK_ASSERT(unp2); \
+ MPASS((unp) != (unp2)); \
+ if (__predict_true(UNP_PCB_TRYLOCK(unp2))) \
+ break; \
+ else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \
+ UNP_PCB_LOCK(unp2); \
+ else \
+ unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \
} while (0)
@@ -992,21 +991,19 @@ uipc_disconnect(struct socket *so)
UNP_PCB_UNLOCK(unp);
return (0);
}
- if (unp == unp2) {
- if (unp_pcb_rele(unp) == 0)
+ if (__predict_true(unp != unp2)) {
+ unp_pcb_owned_lock2(unp, unp2, freed);
+ if (__predict_false(freed)) {
UNP_PCB_UNLOCK(unp);
+ return (0);
+ }
+ unp_pcb_hold(unp2);
}
- unp_pcb_owned_lock2(unp, unp2, freed);
- if (__predict_false(freed)) {
- UNP_PCB_UNLOCK(unp);
- return (0);
- }
- unp_pcb_hold(unp2);
unp_pcb_hold(unp);
unp_disconnect(unp, unp2);
if (unp_pcb_rele(unp) == 0)
UNP_PCB_UNLOCK(unp);
- if (unp_pcb_rele(unp2) == 0)
+ if ((unp != unp2) && unp_pcb_rele(unp2) == 0)
UNP_PCB_UNLOCK(unp2);
return (0);
}
@@ -1305,16 +1302,22 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
control = unp_addsockcred(td, control);
#endif /* __rtems__ */
}
+
/*
- * Send to paired receive port, and then reduce send buffer
- * hiwater marks to maintain backpressure. Wake up readers.
+ * Send to paired receive port and wake up readers. Don't
+ * check for space available in the receive buffer if we're
+ * attaching ancillary data; Unix domain sockets only check
+ * for space in the sending sockbuf, and that check is
+ * performed one level up the stack. At that level we cannot
+ * precisely account for the amount of buffer space used
+ * (e.g., because control messages are not yet internalized).
*/
switch (so->so_type) {
case SOCK_STREAM:
if (control != NULL) {
- if (sbappendcontrol_locked(&so2->so_rcv, m,
- control))
- control = NULL;
+ sbappendcontrol_locked(&so2->so_rcv, m,
+ control);
+ control = NULL;
} else
sbappend_locked(&so2->so_rcv, m, flags);
break;
@@ -1323,14 +1326,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
const struct sockaddr *from;
from = &sun_noname;
- /*
- * Don't check for space available in so2->so_rcv.
- * Unix domain sockets only check for space in the
- * sending sockbuf, and that check is performed one
- * level up the stack.
- */
if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
- from, m, control))
+ from, m, control))
control = NULL;
break;
}
@@ -1396,14 +1393,21 @@ uipc_ready(struct socket *so, struct mbuf *m, int count)
unp = sotounpcb(so);
- UNP_LINK_RLOCK();
+ UNP_PCB_LOCK(unp);
if ((unp2 = unp->unp_conn) == NULL) {
- UNP_LINK_RUNLOCK();
- for (int i = 0; i < count; i++)
- m = m_free(m);
- return (ECONNRESET);
+ UNP_PCB_UNLOCK(unp);
+ goto error;
+ }
+ if (unp != unp2) {
+ if (UNP_PCB_TRYLOCK(unp2) == 0) {
+ unp_pcb_hold(unp2);
+ UNP_PCB_UNLOCK(unp);
+ UNP_PCB_LOCK(unp2);
+ if (unp_pcb_rele(unp2))
+ goto error;
+ } else
+ UNP_PCB_UNLOCK(unp);
}
- UNP_PCB_LOCK(unp2);
so2 = unp2->unp_socket;
SOCKBUF_LOCK(&so2->so_rcv);
@@ -1413,9 +1417,12 @@ uipc_ready(struct socket *so, struct mbuf *m, int count)
SOCKBUF_UNLOCK(&so2->so_rcv);
UNP_PCB_UNLOCK(unp2);
- UNP_LINK_RUNLOCK();
return (error);
+ error:
+ for (int i = 0; i < count; i++)
+ m = m_free(m);
+ return (ECONNRESET);
}
static int
@@ -1778,24 +1785,8 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
sa = NULL;
}
- /*
- * The connector's (client's) credentials are copied from its
- * process structure at the time of connect() (which is now).
- */
- cru2x(td->td_ucred, &unp3->unp_peercred);
- unp3->unp_flags |= UNP_HAVEPC;
+ unp_copy_peercred(td, unp3, unp, unp2);
- /*
- * The receiver's (server's) credentials are copied from the
- * unp_peercred member of socket on which the former called
- * listen(); uipc_listen() cached that process's credentials
- * at that time so we can use them now.
- */
- memcpy(&unp->unp_peercred, &unp2->unp_peercred,
- sizeof(unp->unp_peercred));
- unp->unp_flags |= UNP_HAVEPC;
- if (unp2->unp_flags & UNP_WANTCRED)
- unp3->unp_flags |= UNP_WANTCRED;
UNP_PCB_UNLOCK(unp2);
unp2 = unp3;
unp_pcb_owned_lock2(unp2, unp, freed);
@@ -1838,6 +1829,27 @@ bad:
return (error);
}
+/*
+ * Set socket peer credentials at connection time.
+ *
+ * The client's PCB credentials are copied from its process structure. The
+ * server's PCB credentials are copied from the socket on which it called
+ * listen(2). uipc_listen cached that process's credentials at the time.
+ */
+void
+unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
+ struct unpcb *server_unp, struct unpcb *listen_unp)
+{
+ cru2x(td->td_ucred, &client_unp->unp_peercred);
+ client_unp->unp_flags |= UNP_HAVEPC;
+
+ memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
+ sizeof(server_unp->unp_peercred));
+ server_unp->unp_flags |= UNP_HAVEPC;
+ if (listen_unp->unp_flags & UNP_WANTCRED)
+ client_unp->unp_flags |= UNP_WANTCRED;
+}
+
static int
unp_connect2(struct socket *so, struct socket *so2, int req)
{
@@ -2026,7 +2038,7 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
if (freeunp == 0 && unp->unp_gencnt <= gencnt) {
xu->xu_len = sizeof *xu;
- xu->xu_unpp = unp;
+ xu->xu_unpp = (uintptr_t)unp;
/*
* XXX - need more locking here to protect against
* connect/disconnect races for SMP.
@@ -2043,10 +2055,10 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
unp->unp_conn->unp_addr->sun_len);
else
bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
- xu->unp_vnode = unp->unp_vnode;
- xu->unp_conn = unp->unp_conn;
- xu->xu_firstref = LIST_FIRST(&unp->unp_refs);
- xu->xu_nextref = LIST_NEXT(unp, unp_reflink);
+ xu->unp_vnode = (uintptr_t)unp->unp_vnode;
+ xu->unp_conn = (uintptr_t)unp->unp_conn;
+ xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
+ xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
xu->unp_gencnt = unp->unp_gencnt;
sotoxsocket(unp->unp_socket, &xu->xu_socket);
UNP_PCB_UNLOCK(unp);
@@ -2220,6 +2232,13 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
&fdep[i]->fde_caps);
unp_externalize_fp(fdep[i]->fde_file);
}
+
+ /*
+ * The new type indicates that the mbuf data refers to
+ * kernel resources that may need to be released before
+ * the mbuf is freed.
+ */
+ m_chtype(*controlp, MT_EXTCONTROL);
FILEDESC_XUNLOCK(fdesc);
free(fdep[0], M_FILECAPS);
} else {
diff --git a/freebsd/sys/mips/include/machine/cpuregs.h b/freebsd/sys/mips/include/machine/cpuregs.h
index b9978bd0..096f322d 100644
--- a/freebsd/sys/mips/include/machine/cpuregs.h
+++ b/freebsd/sys/mips/include/machine/cpuregs.h
@@ -60,6 +60,10 @@
#ifndef _MIPS_CPUREGS_H_
#define _MIPS_CPUREGS_H_
+#ifndef _KVM_MINIDUMP
+#include <machine/cca.h>
+#endif
+
/*
* Address space.
* 32-bit mips CPUS partition their 32-bit address space into four segments:
@@ -105,96 +109,6 @@
#define MIPS_IS_VALID_PTR(x) (MIPS_IS_KSEG0_ADDR(x) || \
MIPS_IS_KSEG1_ADDR(x))
-/*
- * Cache Coherency Attributes:
- * UC: Uncached.
- * UA: Uncached accelerated.
- * C: Cacheable, coherency unspecified.
- * CNC: Cacheable non-coherent.
- * CC: Cacheable coherent.
- * CCS: Cacheable coherent, shared read.
- * CCE: Cacheable coherent, exclusive read.
- * CCEW: Cacheable coherent, exclusive write.
- * CCUOW: Cacheable coherent, update on write.
- *
- * Note that some bits vary in meaning across implementations (and that the
- * listing here is no doubt incomplete) and that the optimal cached mode varies
- * between implementations. 0x02 is required to be UC and 0x03 is required to
- * be a least C.
- *
- * We define the following logical bits:
- * UNCACHED:
- * The optimal uncached mode for the target CPU type. This must
- * be suitable for use in accessing memory-mapped devices.
- * CACHED: The optional cached mode for the target CPU type.
- */
-
-#define MIPS_CCA_UC 0x02 /* Uncached. */
-#define MIPS_CCA_C 0x03 /* Cacheable, coherency unspecified. */
-
-#if defined(CPU_R4000) || defined(CPU_R10000)
-#define MIPS_CCA_CNC 0x03
-#define MIPS_CCA_CCE 0x04
-#define MIPS_CCA_CCEW 0x05
-
-#ifdef CPU_R4000
-#define MIPS_CCA_CCUOW 0x06
-#endif
-
-#ifdef CPU_R10000
-#define MIPS_CCA_UA 0x07
-#endif
-
-#define MIPS_CCA_CACHED MIPS_CCA_CCEW
-#endif /* defined(CPU_R4000) || defined(CPU_R10000) */
-
-#if defined(CPU_SB1)
-#define MIPS_CCA_CC 0x05 /* Cacheable Coherent. */
-#endif
-
-#if defined(CPU_MIPS74K)
-#define MIPS_CCA_UNCACHED 0x02
-#define MIPS_CCA_CACHED 0x03
-#endif
-
-/*
- * 1004K and 1074K cores, as well as interAptiv and proAptiv cores, support
- * Cacheable Coherent CCAs 0x04 and 0x05, as well as Cacheable non-Coherent
- * CCA 0x03 and Uncached Accelerated CCA 0x07
- */
-#if defined(CPU_MIPS1004K) || defined(CPU_MIPS1074K) || \
- defined(CPU_INTERAPTIV) || defined(CPU_PROAPTIV)
-#define MIPS_CCA_CNC 0x03
-#define MIPS_CCA_CCE 0x04
-#define MIPS_CCA_CCS 0x05
-#define MIPS_CCA_UA 0x07
-
-/* We use shared read CCA for CACHED CCA */
-#define MIPS_CCA_CACHED MIPS_CCA_CCS
-#endif
-
-#if defined(CPU_XBURST)
-#define MIPS_CCA_UA 0x01
-#define MIPS_CCA_WC MIPS_CCA_UA
-#endif
-
-#ifndef MIPS_CCA_UNCACHED
-#define MIPS_CCA_UNCACHED MIPS_CCA_UC
-#endif
-
-/*
- * If we don't know which cached mode to use and there is a cache coherent
- * mode, use it. If there is not a cache coherent mode, use the required
- * cacheable mode.
- */
-#ifndef MIPS_CCA_CACHED
-#ifdef MIPS_CCA_CC
-#define MIPS_CCA_CACHED MIPS_CCA_CC
-#else
-#define MIPS_CCA_CACHED MIPS_CCA_C
-#endif
-#endif
-
#define MIPS_PHYS_TO_XKPHYS(cca,x) \
((0x2ULL << 62) | ((unsigned long long)(cca) << 59) | (x))
#define MIPS_PHYS_TO_XKPHYS_CACHED(x) \
diff --git a/freebsd/sys/net/altq/altq.h b/freebsd/sys/net/altq/altq.h
index 9cb97bc2..35024461 100644
--- a/freebsd/sys/net/altq/altq.h
+++ b/freebsd/sys/net/altq/altq.h
@@ -76,8 +76,8 @@ struct altqreq {
/* simple token backet meter profile */
struct tb_profile {
- u_int rate; /* rate in bit-per-sec */
- u_int depth; /* depth in bytes */
+ u_int64_t rate; /* rate in bit-per-sec */
+ u_int32_t depth; /* depth in bytes */
};
#ifdef ALTQ3_COMPAT
@@ -203,4 +203,29 @@ struct pktcntr {
#include <net/altq/altq_var.h>
#endif
+/*
+ * Can't put these versions in the scheduler-specific headers and include
+ * them all here as that will cause build failure due to cross-including
+ * each other scheduler's private bits into each scheduler's
+ * implementation.
+ */
+#define CBQ_STATS_VERSION 0 /* Latest version of class_stats_t */
+#define CODEL_STATS_VERSION 0 /* Latest version of codel_ifstats */
+#define FAIRQ_STATS_VERSION 0 /* Latest version of fairq_classstats */
+#define HFSC_STATS_VERSION 1 /* Latest version of hfsc_classstats */
+#define PRIQ_STATS_VERSION 0 /* Latest version of priq_classstats */
+
+/* Return the latest stats version for the given scheduler. */
+static inline int altq_stats_version(int scheduler)
+{
+ switch (scheduler) {
+ case ALTQT_CBQ: return (CBQ_STATS_VERSION);
+ case ALTQT_CODEL: return (CODEL_STATS_VERSION);
+ case ALTQT_FAIRQ: return (FAIRQ_STATS_VERSION);
+ case ALTQT_HFSC: return (HFSC_STATS_VERSION);
+ case ALTQT_PRIQ: return (PRIQ_STATS_VERSION);
+ default: return (0);
+ }
+}
+
#endif /* _ALTQ_ALTQ_H_ */
diff --git a/freebsd/sys/net/altq/altq_cbq.c b/freebsd/sys/net/altq/altq_cbq.c
index 1631d145..ac108bd1 100644
--- a/freebsd/sys/net/altq/altq_cbq.c
+++ b/freebsd/sys/net/altq/altq_cbq.c
@@ -454,7 +454,7 @@ cbq_remove_queue(struct pf_altq *a)
}
int
-cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
{
cbq_state_t *cbqp;
struct rm_class *cl;
diff --git a/freebsd/sys/net/altq/altq_cbq.h b/freebsd/sys/net/altq/altq_cbq.h
index 51e7cf9a..04bcab1a 100644
--- a/freebsd/sys/net/altq/altq_cbq.h
+++ b/freebsd/sys/net/altq/altq_cbq.h
@@ -99,6 +99,12 @@ typedef struct _cbq_class_stats_ {
struct codel_stats codel;
} class_stats_t;
+/*
+ * CBQ_STATS_VERSION is defined in altq.h to work around issues stemming
+ * from mixing of public-API and internal bits in each scheduler-specific
+ * header.
+ */
+
#ifdef ALTQ3_COMPAT
/*
* Define structures associated with IOCTLS for cbq.
diff --git a/freebsd/sys/net/altq/altq_codel.c b/freebsd/sys/net/altq/altq_codel.c
index 37a44216..4a55cdbe 100644
--- a/freebsd/sys/net/altq/altq_codel.c
+++ b/freebsd/sys/net/altq/altq_codel.c
@@ -158,7 +158,7 @@ codel_remove_altq(struct pf_altq *a)
}
int
-codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
{
struct codel_if *cif;
struct codel_ifstats stats;
diff --git a/freebsd/sys/net/altq/altq_codel.h b/freebsd/sys/net/altq/altq_codel.h
index 8d7178b4..d7341a87 100644
--- a/freebsd/sys/net/altq/altq_codel.h
+++ b/freebsd/sys/net/altq/altq_codel.h
@@ -57,6 +57,12 @@ struct codel_ifstats {
struct pktcntr cl_dropcnt; /* dropped packet counter */
};
+/*
+ * CBQ_STATS_VERSION is defined in altq.h to work around issues stemming
+ * from mixing of public-API and internal bits in each scheduler-specific
+ * header.
+ */
+
#ifdef _KERNEL
#include <net/altq/altq_classq.h>
diff --git a/freebsd/sys/net/altq/altq_fairq.c b/freebsd/sys/net/altq/altq_fairq.c
index 9979a1fa..a1bc3fdb 100644
--- a/freebsd/sys/net/altq/altq_fairq.c
+++ b/freebsd/sys/net/altq/altq_fairq.c
@@ -231,7 +231,7 @@ fairq_remove_queue(struct pf_altq *a)
}
int
-fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
{
struct fairq_if *pif;
struct fairq_class *cl;
diff --git a/freebsd/sys/net/altq/altq_fairq.h b/freebsd/sys/net/altq/altq_fairq.h
index 1a4b97dd..f1e3217c 100644
--- a/freebsd/sys/net/altq/altq_fairq.h
+++ b/freebsd/sys/net/altq/altq_fairq.h
@@ -82,6 +82,12 @@ struct fairq_classstats {
struct codel_stats codel;
};
+/*
+ * FAIRQ_STATS_VERSION is defined in altq.h to work around issues stemming
+ * from mixing of public-API and internal bits in each scheduler-specific
+ * header.
+ */
+
#ifdef _KERNEL
typedef struct fairq_bucket {
diff --git a/freebsd/sys/net/altq/altq_hfsc.c b/freebsd/sys/net/altq/altq_hfsc.c
index d31d55c3..8d8fdfdc 100644
--- a/freebsd/sys/net/altq/altq_hfsc.c
+++ b/freebsd/sys/net/altq/altq_hfsc.c
@@ -118,10 +118,10 @@ static struct hfsc_class *actlist_firstfit(struct hfsc_class *,
static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t);
static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t);
-static __inline u_int64_t m2sm(u_int);
-static __inline u_int64_t m2ism(u_int);
+static __inline u_int64_t m2sm(u_int64_t);
+static __inline u_int64_t m2ism(u_int64_t);
static __inline u_int64_t d2dx(u_int);
-static u_int sm2m(u_int64_t);
+static u_int64_t sm2m(u_int64_t);
static u_int dx2d(u_int64_t);
static void sc2isc(struct service_curve *, struct internal_sc *);
@@ -132,7 +132,9 @@ static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t);
static void rtsc_min(struct runtime_sc *, struct internal_sc *,
u_int64_t, u_int64_t);
-static void get_class_stats(struct hfsc_classstats *,
+static void get_class_stats_v0(struct hfsc_classstats_v0 *,
+ struct hfsc_class *);
+static void get_class_stats_v1(struct hfsc_classstats_v1 *,
struct hfsc_class *);
static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t);
@@ -160,7 +162,7 @@ altqdev_decl(hfsc);
*/
#define is_a_parent_class(cl) ((cl)->cl_children != NULL)
-#define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
+#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
#ifdef ALTQ3_COMPAT
/* hif_list keeps all hfsc_if's allocated. */
@@ -228,7 +230,7 @@ hfsc_add_queue(struct pf_altq *a)
{
struct hfsc_if *hif;
struct hfsc_class *cl, *parent;
- struct hfsc_opts *opts;
+ struct hfsc_opts_v1 *opts;
struct service_curve rtsc, lssc, ulsc;
if ((hif = a->altq_disc) == NULL)
@@ -282,11 +284,15 @@ hfsc_remove_queue(struct pf_altq *a)
}
int
-hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
{
struct hfsc_if *hif;
struct hfsc_class *cl;
- struct hfsc_classstats stats;
+ union {
+ struct hfsc_classstats_v0 v0;
+ struct hfsc_classstats_v1 v1;
+ } stats;
+ size_t stats_size;
int error = 0;
if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
@@ -295,14 +301,27 @@ hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
if ((cl = clh_to_clp(hif, a->qid)) == NULL)
return (EINVAL);
- if (*nbytes < sizeof(stats))
+ if (version > HFSC_STATS_VERSION)
return (EINVAL);
- get_class_stats(&stats, cl);
+ memset(&stats, 0, sizeof(stats));
+ switch (version) {
+ case 0:
+ get_class_stats_v0(&stats.v0, cl);
+ stats_size = sizeof(struct hfsc_classstats_v0);
+ break;
+ case 1:
+ get_class_stats_v1(&stats.v1, cl);
+ stats_size = sizeof(struct hfsc_classstats_v1);
+ break;
+ }
+
+ if (*nbytes < stats_size)
+ return (EINVAL);
- if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ if ((error = copyout((caddr_t)&stats, ubuf, stats_size)) != 0)
return (error);
- *nbytes = sizeof(stats);
+ *nbytes = stats_size;
return (0);
}
@@ -1359,27 +1378,17 @@ actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
* m: bits/sec
* d: msec
* internal service curve parameters
- * sm: (bytes/tsc_interval) << SM_SHIFT
- * ism: (tsc_count/byte) << ISM_SHIFT
- * dx: tsc_count
- *
- * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
- * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
- * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
- * digits in decimal using the following table.
+ * sm: (bytes/machclk tick) << SM_SHIFT
+ * ism: (machclk ticks/byte) << ISM_SHIFT
+ * dx: machclk ticks
*
- * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
- * ----------+-------------------------------------------------------
- * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
- * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
- * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
+ * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. we
+ * should be able to handle 100K-100Gbps linkspeed with 256 MHz machclk
+ * frequency and at least 3 effective digits in decimal.
*
- * nsec/byte 80000 8000 800 80 8
- * ism(500MHz) 40000 4000 400 40 4
- * ism(200MHz) 16000 1600 160 16 1.6
*/
#define SM_SHIFT 24
-#define ISM_SHIFT 10
+#define ISM_SHIFT 14
#define SM_MASK ((1LL << SM_SHIFT) - 1)
#define ISM_MASK ((1LL << ISM_SHIFT) - 1)
@@ -1415,16 +1424,16 @@ seg_y2x(u_int64_t y, u_int64_t ism)
}
static __inline u_int64_t
-m2sm(u_int m)
+m2sm(u_int64_t m)
{
u_int64_t sm;
- sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
+ sm = (m << SM_SHIFT) / 8 / machclk_freq;
return (sm);
}
static __inline u_int64_t
-m2ism(u_int m)
+m2ism(u_int64_t m)
{
u_int64_t ism;
@@ -1444,13 +1453,13 @@ d2dx(u_int d)
return (dx);
}
-static u_int
+static u_int64_t
sm2m(u_int64_t sm)
{
u_int64_t m;
m = (sm * 8 * machclk_freq) >> SM_SHIFT;
- return ((u_int)m);
+ return (m);
}
static u_int
@@ -1599,7 +1608,89 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
}
static void
-get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
+get_class_stats_v0(struct hfsc_classstats_v0 *sp, struct hfsc_class *cl)
+{
+ sp->class_id = cl->cl_id;
+ sp->class_handle = cl->cl_handle;
+
+#define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX)
+
+ if (cl->cl_rsc != NULL) {
+ sp->rsc.m1 = SATU32(sm2m(cl->cl_rsc->sm1));
+ sp->rsc.d = dx2d(cl->cl_rsc->dx);
+ sp->rsc.m2 = SATU32(sm2m(cl->cl_rsc->sm2));
+ } else {
+ sp->rsc.m1 = 0;
+ sp->rsc.d = 0;
+ sp->rsc.m2 = 0;
+ }
+ if (cl->cl_fsc != NULL) {
+ sp->fsc.m1 = SATU32(sm2m(cl->cl_fsc->sm1));
+ sp->fsc.d = dx2d(cl->cl_fsc->dx);
+ sp->fsc.m2 = SATU32(sm2m(cl->cl_fsc->sm2));
+ } else {
+ sp->fsc.m1 = 0;
+ sp->fsc.d = 0;
+ sp->fsc.m2 = 0;
+ }
+ if (cl->cl_usc != NULL) {
+ sp->usc.m1 = SATU32(sm2m(cl->cl_usc->sm1));
+ sp->usc.d = dx2d(cl->cl_usc->dx);
+ sp->usc.m2 = SATU32(sm2m(cl->cl_usc->sm2));
+ } else {
+ sp->usc.m1 = 0;
+ sp->usc.d = 0;
+ sp->usc.m2 = 0;
+ }
+
+#undef SATU32
+
+ sp->total = cl->cl_total;
+ sp->cumul = cl->cl_cumul;
+
+ sp->d = cl->cl_d;
+ sp->e = cl->cl_e;
+ sp->vt = cl->cl_vt;
+ sp->f = cl->cl_f;
+
+ sp->initvt = cl->cl_initvt;
+ sp->vtperiod = cl->cl_vtperiod;
+ sp->parentperiod = cl->cl_parentperiod;
+ sp->nactive = cl->cl_nactive;
+ sp->vtoff = cl->cl_vtoff;
+ sp->cvtmax = cl->cl_cvtmax;
+ sp->myf = cl->cl_myf;
+ sp->cfmin = cl->cl_cfmin;
+ sp->cvtmin = cl->cl_cvtmin;
+ sp->myfadj = cl->cl_myfadj;
+ sp->vtadj = cl->cl_vtadj;
+
+ sp->cur_time = read_machclk();
+ sp->machclk_freq = machclk_freq;
+
+ sp->qlength = qlen(cl->cl_q);
+ sp->qlimit = qlimit(cl->cl_q);
+ sp->xmit_cnt = cl->cl_stats.xmit_cnt;
+ sp->drop_cnt = cl->cl_stats.drop_cnt;
+ sp->period = cl->cl_stats.period;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_getstats(cl->cl_codel, &sp->codel);
+#endif
+}
+
+static void
+get_class_stats_v1(struct hfsc_classstats_v1 *sp, struct hfsc_class *cl)
{
sp->class_id = cl->cl_id;
sp->class_handle = cl->cl_handle;
diff --git a/freebsd/sys/net/altq/altq_hfsc.h b/freebsd/sys/net/altq/altq_hfsc.h
index 0a9fcf95..67ec0036 100644
--- a/freebsd/sys/net/altq/altq_hfsc.h
+++ b/freebsd/sys/net/altq/altq_hfsc.h
@@ -43,12 +43,21 @@
extern "C" {
#endif
-struct service_curve {
+struct service_curve_v0 {
u_int m1; /* slope of the first segment in bits/sec */
u_int d; /* the x-projection of the first segment in msec */
u_int m2; /* slope of the second segment in bits/sec */
};
+struct service_curve_v1 {
+ u_int64_t m1; /* slope of the first segment in bits/sec */
+ u_int d; /* the x-projection of the first segment in msec */
+ u_int64_t m2; /* slope of the second segment in bits/sec */
+};
+
+/* Latest version of struct service_curve_vX */
+#define HFSC_SERVICE_CURVE_VERSION 1
+
/* special class handles */
#define HFSC_NULLCLASS_HANDLE 0
#define HFSC_MAX_CLASSES 64
@@ -67,12 +76,55 @@ struct service_curve {
#define HFSC_UPPERLIMITSC 4
#define HFSC_DEFAULTSC (HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
-struct hfsc_classstats {
+struct hfsc_classstats_v0 {
+ u_int class_id;
+ u_int32_t class_handle;
+ struct service_curve_v0 rsc;
+ struct service_curve_v0 fsc;
+ struct service_curve_v0 usc; /* upper limit service curve */
+
+ u_int64_t total; /* total work in bytes */
+ u_int64_t cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t d; /* deadline */
+ u_int64_t e; /* eligible time */
+ u_int64_t vt; /* virtual time */
+ u_int64_t f; /* fit time for upper-limit */
+
+ /* info helpful for debugging */
+ u_int64_t initvt; /* init virtual time */
+ u_int64_t vtoff; /* cl_vt_ipoff */
+ u_int64_t cvtmax; /* cl_maxvt */
+ u_int64_t myf; /* cl_myf */
+ u_int64_t cfmin; /* cl_mincf */
+ u_int64_t cvtmin; /* cl_mincvt */
+ u_int64_t myfadj; /* cl_myfadj */
+ u_int64_t vtadj; /* cl_vtadj */
+ u_int64_t cur_time;
+ u_int32_t machclk_freq;
+
+ u_int qlength;
+ u_int qlimit;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+
+ u_int vtperiod; /* vt period sequence no */
+ u_int parentperiod; /* parent's vt period seqno */
+ int nactive; /* number of active children */
+
+ /* codel, red and rio related info */
+ int qtype;
+ struct redstats red[3];
+ struct codel_stats codel;
+};
+
+struct hfsc_classstats_v1 {
u_int class_id;
u_int32_t class_handle;
- struct service_curve rsc;
- struct service_curve fsc;
- struct service_curve usc; /* upper limit service curve */
+ struct service_curve_v1 rsc;
+ struct service_curve_v1 fsc;
+ struct service_curve_v1 usc; /* upper limit service curve */
u_int64_t total; /* total work in bytes */
u_int64_t cumul; /* cumulative work in bytes
@@ -110,6 +162,12 @@ struct hfsc_classstats {
struct codel_stats codel;
};
+/*
+ * HFSC_STATS_VERSION is defined in altq.h to work around issues stemming
+ * from mixing of public-API and internal bits in each scheduler-specific
+ * header.
+ */
+
#ifdef ALTQ3_COMPAT
struct hfsc_interface {
char hfsc_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
@@ -310,6 +368,35 @@ struct hfsc_if {
#endif
};
+/*
+ * Kernel code always wants the latest version - avoid a bunch of renames in
+ * the code to the current latest versioned name.
+ */
+#define service_curve __CONCAT(service_curve_v, HFSC_SERVICE_CURVE_VERSION)
+
+#else /* _KERNEL */
+
+#ifdef PFIOC_USE_LATEST
+/*
+ * Maintaining in-tree consumers of the ioctl interface is easier when that
+ * code can be written in terms old names that refer to the latest interface
+ * version as that reduces the required changes in the consumers to those
+ * that are functionally necessary to accommodate a new interface version.
+ */
+#define hfsc_classstats __CONCAT(hfsc_classstats_v, HFSC_STATS_VERSION)
+#define service_curve __CONCAT(service_curve_v, HFSC_SERVICE_CURVE_VERSION)
+
+#else
+/*
+ * When building out-of-tree code that is written for the old interface,
+ * such as may exist in ports for example, resolve the old struct tags to
+ * the v0 versions.
+ */
+#define hfsc_classstats __CONCAT(hfsc_classstats_v, 0)
+#define service_curve __CONCAT(service_curve_v, 0)
+
+#endif /* PFIOC_USE_LATEST */
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/freebsd/sys/net/altq/altq_priq.c b/freebsd/sys/net/altq/altq_priq.c
index 46a014ad..ce0830eb 100644
--- a/freebsd/sys/net/altq/altq_priq.c
+++ b/freebsd/sys/net/altq/altq_priq.c
@@ -201,7 +201,7 @@ priq_remove_queue(struct pf_altq *a)
}
int
-priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
{
struct priq_if *pif;
struct priq_class *cl;
diff --git a/freebsd/sys/net/altq/altq_priq.h b/freebsd/sys/net/altq/altq_priq.h
index fcbfee98..1a824d60 100644
--- a/freebsd/sys/net/altq/altq_priq.h
+++ b/freebsd/sys/net/altq/altq_priq.h
@@ -112,6 +112,12 @@ struct priq_classstats {
struct codel_stats codel;
};
+/*
+ * PRIQ_STATS_VERSION is defined in altq.h to work around issues stemming
+ * from mixing of public-API and internal bits in each scheduler-specific
+ * header.
+ */
+
#ifdef ALTQ3_COMPAT
struct priq_class_stats {
struct priq_interface iface;
diff --git a/freebsd/sys/net/altq/altq_subr.c b/freebsd/sys/net/altq/altq_subr.c
index 47a353fc..6da36129 100644
--- a/freebsd/sys/net/altq/altq_subr.c
+++ b/freebsd/sys/net/altq/altq_subr.c
@@ -294,12 +294,12 @@ altq_assert(file, line, failedexpr)
/*
* internal representation of token bucket parameters
- * rate: byte_per_unittime << 32
- * (((bits_per_sec) / 8) << 32) / machclk_freq
- * depth: byte << 32
+ * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq
+ * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq
+ * depth: byte << TBR_SHIFT
*
*/
-#define TBR_SHIFT 32
+#define TBR_SHIFT 29
#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
@@ -396,7 +396,20 @@ tbr_set(ifq, profile)
if (tbr->tbr_rate > 0)
tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
else
- tbr->tbr_filluptime = 0xffffffffffffffffLL;
+ tbr->tbr_filluptime = LLONG_MAX;
+ /*
+ * The longest time between tbr_dequeue() calls will be about 1
+ * system tick, as the callout that drives it is scheduled once per
+ * tick. The refill-time detection logic in tbr_dequeue() can only
+ * properly detect the passage of up to LLONG_MAX machclk ticks.
+ * Therefore, in order for this logic to function properly in the
+ * extreme case, the maximum value of tbr_filluptime should be
+ * LLONG_MAX less one system tick's worth of machclk ticks less
+ * some additional slop factor (here one more system tick's worth
+ * of machclk ticks).
+ */
+ if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick))
+ tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick;
tbr->tbr_token = tbr->tbr_depth;
tbr->tbr_last = read_machclk();
tbr->tbr_lastop = ALTDQ_REMOVE;
@@ -458,29 +471,6 @@ tbr_timeout(arg)
}
/*
- * get token bucket regulator profile
- */
-int
-tbr_get(ifq, profile)
- struct ifaltq *ifq;
- struct tb_profile *profile;
-{
- struct tb_regulator *tbr;
-
- IFQ_LOCK(ifq);
- if ((tbr = ifq->altq_tbr) == NULL) {
- profile->rate = 0;
- profile->depth = 0;
- } else {
- profile->rate =
- (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
- profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
- }
- IFQ_UNLOCK(ifq);
- return (0);
-}
-
-/*
* attach a discipline to the interface. if one already exists, it is
* overridden.
* Locking is done in the discipline specific attach functions. Basically
@@ -735,34 +725,34 @@ altq_remove_queue(struct pf_altq *a)
* copyout operations, also it is not yet clear which lock to use.
*/
int
-altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
{
int error = 0;
switch (a->scheduler) {
#ifdef ALTQ_CBQ
case ALTQT_CBQ:
- error = cbq_getqstats(a, ubuf, nbytes);
+ error = cbq_getqstats(a, ubuf, nbytes, version);
break;
#endif
#ifdef ALTQ_PRIQ
case ALTQT_PRIQ:
- error = priq_getqstats(a, ubuf, nbytes);
+ error = priq_getqstats(a, ubuf, nbytes, version);
break;
#endif
#ifdef ALTQ_HFSC
case ALTQT_HFSC:
- error = hfsc_getqstats(a, ubuf, nbytes);
+ error = hfsc_getqstats(a, ubuf, nbytes, version);
break;
#endif
#ifdef ALTQ_FAIRQ
case ALTQT_FAIRQ:
- error = fairq_getqstats(a, ubuf, nbytes);
+ error = fairq_getqstats(a, ubuf, nbytes, version);
break;
#endif
#ifdef ALTQ_CODEL
case ALTQT_CODEL:
- error = codel_getqstats(a, ubuf, nbytes);
+ error = codel_getqstats(a, ubuf, nbytes, version);
break;
#endif
default:
diff --git a/freebsd/sys/net/altq/altq_var.h b/freebsd/sys/net/altq/altq_var.h
index 1909599d..47326a03 100644
--- a/freebsd/sys/net/altq/altq_var.h
+++ b/freebsd/sys/net/altq/altq_var.h
@@ -196,7 +196,6 @@ u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *);
void write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t);
void altq_assert(const char *, int, const char *);
int tbr_set(struct ifaltq *, struct tb_profile *);
-int tbr_get(struct ifaltq *, struct tb_profile *);
int altq_pfattach(struct pf_altq *);
int altq_pfdetach(struct pf_altq *);
@@ -204,40 +203,40 @@ int altq_add(struct pf_altq *);
int altq_remove(struct pf_altq *);
int altq_add_queue(struct pf_altq *);
int altq_remove_queue(struct pf_altq *);
-int altq_getqstats(struct pf_altq *, void *, int *);
+int altq_getqstats(struct pf_altq *, void *, int *, int);
int cbq_pfattach(struct pf_altq *);
int cbq_add_altq(struct pf_altq *);
int cbq_remove_altq(struct pf_altq *);
int cbq_add_queue(struct pf_altq *);
int cbq_remove_queue(struct pf_altq *);
-int cbq_getqstats(struct pf_altq *, void *, int *);
+int cbq_getqstats(struct pf_altq *, void *, int *, int);
int codel_pfattach(struct pf_altq *);
int codel_add_altq(struct pf_altq *);
int codel_remove_altq(struct pf_altq *);
-int codel_getqstats(struct pf_altq *, void *, int *);
+int codel_getqstats(struct pf_altq *, void *, int *, int);
int priq_pfattach(struct pf_altq *);
int priq_add_altq(struct pf_altq *);
int priq_remove_altq(struct pf_altq *);
int priq_add_queue(struct pf_altq *);
int priq_remove_queue(struct pf_altq *);
-int priq_getqstats(struct pf_altq *, void *, int *);
+int priq_getqstats(struct pf_altq *, void *, int *, int);
int hfsc_pfattach(struct pf_altq *);
int hfsc_add_altq(struct pf_altq *);
int hfsc_remove_altq(struct pf_altq *);
int hfsc_add_queue(struct pf_altq *);
int hfsc_remove_queue(struct pf_altq *);
-int hfsc_getqstats(struct pf_altq *, void *, int *);
+int hfsc_getqstats(struct pf_altq *, void *, int *, int);
int fairq_pfattach(struct pf_altq *);
int fairq_add_altq(struct pf_altq *);
int fairq_remove_altq(struct pf_altq *);
int fairq_add_queue(struct pf_altq *);
int fairq_remove_queue(struct pf_altq *);
-int fairq_getqstats(struct pf_altq *, void *, int *);
+int fairq_getqstats(struct pf_altq *, void *, int *, int);
#endif /* _KERNEL */
#endif /* _ALTQ_ALTQ_VAR_H_ */
diff --git a/freebsd/sys/net/altq/if_altq.h b/freebsd/sys/net/altq/if_altq.h
index 3dcc96c2..7a093500 100644
--- a/freebsd/sys/net/altq/if_altq.h
+++ b/freebsd/sys/net/altq/if_altq.h
@@ -143,7 +143,11 @@ struct tb_regulator {
#define ALTRQ_PURGE 1 /* purge all packets */
#define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY)
+#ifdef ALTQ
#define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED)
+#else
+#define ALTQ_IS_ENABLED(ifq) 0
+#endif
#define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY)
#define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING)
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index 57aff5b8..357fd1b1 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -124,6 +124,11 @@ struct bpf_if {
CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
+#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock)
+#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock)
+#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock)
+#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock)
+
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
#define PRINET 26 /* interruptible */
@@ -217,7 +222,7 @@ SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
bpf_stats_sysctl, "bpf statistics portal");
-static VNET_DEFINE(int, bpf_optimize_writers) = 0;
+VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0;
#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(bpf_optimize_writers), 0,
@@ -1974,8 +1979,13 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
return (EINVAL);
}
#ifdef BPF_JITTER
- /* Filter is copied inside fcode and is perfectly valid. */
- jfunc = bpf_jitter(fcode, flen);
+ if (cmd != BIOCSETWF) {
+ /*
+ * Filter is copied inside fcode and is
+ * perfectly valid.
+ */
+ jfunc = bpf_jitter(fcode, flen);
+ }
#endif
}
diff --git a/freebsd/sys/net/bpf_jitter.c b/freebsd/sys/net/bpf_jitter.c
index ac3a6ddd..85782597 100644
--- a/freebsd/sys/net/bpf_jitter.c
+++ b/freebsd/sys/net/bpf_jitter.c
@@ -103,11 +103,13 @@ void
bpf_destroy_jit_filter(bpf_jit_filter *filter)
{
- if (filter->func != bpf_jit_accept_all)
- bpf_jit_free(filter->func, filter->size);
#ifdef _KERNEL
+ if (filter->func != bpf_jit_accept_all)
+ free(filter->func, M_BPFJIT);
free(filter, M_BPFJIT);
#else
+ if (filter->func != bpf_jit_accept_all)
+ munmap(filter->func, filter->size);
free(filter);
#endif
}
diff --git a/freebsd/sys/net/bpf_jitter.h b/freebsd/sys/net/bpf_jitter.h
index a7c7cd9f..23049d14 100644
--- a/freebsd/sys/net/bpf_jitter.h
+++ b/freebsd/sys/net/bpf_jitter.h
@@ -88,6 +88,5 @@ void bpf_destroy_jit_filter(bpf_jit_filter *filter);
struct bpf_insn;
bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *);
-void bpf_jit_free(void *, size_t);
#endif /* _NET_BPF_JITTER_H_ */
diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h
index 95093cff..2ce9204b 100644
--- a/freebsd/sys/net/bpfdesc.h
+++ b/freebsd/sys/net/bpfdesc.h
@@ -161,11 +161,6 @@ struct xbpf_d {
u_int64_t bd_spare[4];
};
-#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock)
-#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock)
-#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock)
-#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock)
-
#define BPFIF_FLAG_DYING 1 /* Reject new bpf consumers */
#endif
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 2a30f4a3..9a70d6a1 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -196,13 +196,13 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *,
static void lacp_dprintf(const struct lacp_port *, const char *, ...)
__attribute__((__format__(__printf__, 2, 3)));
-static VNET_DEFINE(int, lacp_debug);
+VNET_DEFINE_STATIC(int, lacp_debug);
#define V_lacp_debug VNET(lacp_debug)
SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad");
SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
&VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
-static VNET_DEFINE(int, lacp_default_strict_mode) = 1;
+VNET_DEFINE_STATIC(int, lacp_default_strict_mode) = 1;
SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode,
CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_default_strict_mode), 0,
"LACP strict protocol compliance default");
@@ -713,6 +713,8 @@ lacp_disable_distributing(struct lacp_port *lp)
}
lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
+ if_link_state_change(sc->sc_ifp,
+ sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN);
}
static void
@@ -747,6 +749,9 @@ lacp_enable_distributing(struct lacp_port *lp)
} else
/* try to become the active aggregator */
lacp_select_active_aggregator(lsc);
+
+ if_link_state_change(sc->sc_ifp,
+ sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN);
}
static void
@@ -1106,6 +1111,7 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_100_VG:
case IFM_100_T2:
case IFM_100_T:
+ case IFM_100_SGMII:
key = IFM_100_TX;
break;
case IFM_1000_SX:
@@ -1137,14 +1143,31 @@ lacp_compose_key(struct lacp_port *lp)
break;
case IFM_2500_KX:
case IFM_2500_T:
+ case IFM_2500_X:
key = IFM_2500_KX;
break;
case IFM_5000_T:
+ case IFM_5000_KR:
+ case IFM_5000_KR_S:
+ case IFM_5000_KR1:
key = IFM_5000_T;
break;
case IFM_50G_PCIE:
case IFM_50G_CR2:
case IFM_50G_KR2:
+ case IFM_50G_SR2:
+ case IFM_50G_LR2:
+ case IFM_50G_LAUI2_AC:
+ case IFM_50G_LAUI2:
+ case IFM_50G_AUI2_AC:
+ case IFM_50G_AUI2:
+ case IFM_50G_CP:
+ case IFM_50G_SR:
+ case IFM_50G_LR:
+ case IFM_50G_FR:
+ case IFM_50G_KR_PAM4:
+ case IFM_50G_AUI1_AC:
+ case IFM_50G_AUI1:
key = IFM_50G_PCIE;
break;
case IFM_56G_R4:
@@ -1157,6 +1180,12 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_25G_LR:
case IFM_25G_ACC:
case IFM_25G_AOC:
+ case IFM_25G_T:
+ case IFM_25G_CR_S:
+ case IFM_25G_CR1:
+ case IFM_25G_KR_S:
+ case IFM_25G_AUI:
+ case IFM_25G_KR1:
key = IFM_25G_PCIE;
break;
case IFM_40G_CR4:
@@ -1164,14 +1193,50 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_40G_LR4:
case IFM_40G_XLPPI:
case IFM_40G_KR4:
+ case IFM_40G_XLAUI:
+ case IFM_40G_XLAUI_AC:
+ case IFM_40G_ER4:
key = IFM_40G_CR4;
break;
case IFM_100G_CR4:
case IFM_100G_SR4:
case IFM_100G_KR4:
case IFM_100G_LR4:
+ case IFM_100G_CAUI4_AC:
+ case IFM_100G_CAUI4:
+ case IFM_100G_AUI4_AC:
+ case IFM_100G_AUI4:
+ case IFM_100G_CR_PAM4:
+ case IFM_100G_KR_PAM4:
+ case IFM_100G_CP2:
+ case IFM_100G_SR2:
+ case IFM_100G_DR:
+ case IFM_100G_KR2_PAM4:
+ case IFM_100G_CAUI2_AC:
+ case IFM_100G_CAUI2:
+ case IFM_100G_AUI2_AC:
+ case IFM_100G_AUI2:
key = IFM_100G_CR4;
break;
+ case IFM_200G_CR4_PAM4:
+ case IFM_200G_SR4:
+ case IFM_200G_FR4:
+ case IFM_200G_LR4:
+ case IFM_200G_DR4:
+ case IFM_200G_KR4_PAM4:
+ case IFM_200G_AUI4_AC:
+ case IFM_200G_AUI4:
+ case IFM_200G_AUI8_AC:
+ case IFM_200G_AUI8:
+ key = IFM_200G_CR4_PAM4;
+ break;
+ case IFM_400G_FR8:
+ case IFM_400G_LR8:
+ case IFM_400G_DR4:
+ case IFM_400G_AUI8_AC:
+ case IFM_400G_AUI8:
+ key = IFM_400G_FR8;
+ break;
default:
key = subtype;
break;
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index d4c18b46..4d3c303c 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -300,7 +300,7 @@ int ifqmaxlen = IFQ_MAXLEN;
VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */
VNET_DEFINE(struct ifgrouphead, ifg_head);
-static VNET_DEFINE(int, if_indexlim) = 8;
+VNET_DEFINE_STATIC(int, if_indexlim) = 8;
/* Table of ifnet by index. */
VNET_DEFINE(struct ifnet **, ifindex_table);
@@ -1769,29 +1769,35 @@ if_data_copy(struct ifnet *ifp, struct if_data *ifd)
void
if_addr_rlock(struct ifnet *ifp)
{
-
- IF_ADDR_RLOCK(ifp);
+ MPASS(*(uint64_t *)&ifp->if_addr_et == 0);
+ epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et);
}
void
if_addr_runlock(struct ifnet *ifp)
{
-
- IF_ADDR_RUNLOCK(ifp);
+ epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et);
+#ifdef INVARIANTS
+ bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker));
+#endif
}
void
if_maddr_rlock(if_t ifp)
{
- IF_ADDR_RLOCK((struct ifnet *)ifp);
+ MPASS(*(uint64_t *)&ifp->if_maddr_et == 0);
+ epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et);
}
void
if_maddr_runlock(if_t ifp)
{
- IF_ADDR_RUNLOCK((struct ifnet *)ifp);
+ epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et);
+#ifdef INVARIANTS
+ bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker));
+#endif
}
/*
@@ -1935,7 +1941,7 @@ ifa_ifwithaddr(const struct sockaddr *addr)
struct ifnet *ifp;
struct ifaddr *ifa;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
@@ -1978,7 +1984,7 @@ ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
struct ifnet *ifp;
struct ifaddr *ifa;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
continue;
@@ -2008,7 +2014,7 @@ ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
struct ifnet *ifp;
struct ifaddr *ifa;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
continue;
@@ -2041,7 +2047,7 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
u_int af = addr->sa_family;
const char *addr_data = addr->sa_data, *cplim;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
/*
* AF_LINK addresses can be looked up directly by their index number,
* so do that if we can.
@@ -2078,7 +2084,6 @@ next: continue;
*/
if (ifa->ifa_dstaddr != NULL &&
sa_equal(addr, ifa->ifa_dstaddr)) {
- IF_ADDR_RUNLOCK(ifp);
goto done;
}
} else {
@@ -2137,7 +2142,8 @@ ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
if (af >= AF_MAX)
return (NULL);
- MPASS(in_epoch());
+
+ MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != af)
continue;
@@ -2301,6 +2307,7 @@ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */
struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
int (*vlan_tag_p)(struct ifnet *, uint16_t *);
+int (*vlan_pcp_p)(struct ifnet *, uint16_t *);
int (*vlan_setcookie_p)(struct ifnet *, void *);
void *(*vlan_cookie_p)(struct ifnet *);
@@ -2988,8 +2995,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
#ifdef COMPAT_FREEBSD32
caddr_t saved_data = NULL;
struct ifmediareq ifmr;
-#endif
struct ifmediareq *ifmrp;
+#endif
struct ifnet *ifp;
struct ifreq *ifr;
int error;
@@ -3035,8 +3042,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
#endif
}
- ifmrp = NULL;
#ifdef COMPAT_FREEBSD32
+ ifmrp = NULL;
switch (cmd) {
case SIOCGIFMEDIA32:
case SIOCGIFXMEDIA32:
@@ -3564,6 +3571,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
error = ENOMEM;
goto free_llsa_out;
}
+ ll_ifma->ifma_flags |= IFMA_F_ENQUEUED;
CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
ifma_link);
} else
@@ -3576,6 +3584,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
* referenced link layer address. Add the primary address to the
* ifnet address list.
*/
+ ifma->ifma_flags |= IFMA_F_ENQUEUED;
CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
if (retifma != NULL)
@@ -3776,9 +3785,10 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
if (--ifma->ifma_refcount > 0)
return 0;
- if (ifp != NULL && detaching == 0)
+ if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) {
CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
-
+ ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
/*
* If this ifma is a network-layer ifma, a link-layer ifma may
* have been associated with it. Release it first if so.
@@ -3791,8 +3801,11 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
ll_ifma->ifma_ifp = NULL; /* XXX */
if (--ll_ifma->ifma_refcount == 0) {
if (ifp != NULL) {
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr,
- ifma_link);
+ if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr,
+ ifma_link);
+ ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
}
if_freemulti(ll_ifma);
}
@@ -3922,6 +3935,44 @@ if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
}
/*
+ * Tunnel interfaces can nest, also they may cause infinite recursion
+ * calls when misconfigured. We'll prevent this by detecting loops.
+ * High nesting level may cause stack exhaustion. We'll prevent this
+ * by introducing upper limit.
+ *
+ * Return 0, if tunnel nesting count is equal or less than limit.
+ */
+int
+if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie,
+ int limit)
+{
+ struct m_tag *mtag;
+ int count;
+
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > limit) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ if_name(ifp), count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+/*
* Get the link layer address that was read from the hardware at attach.
*
* This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index 3e774934..aa56be48 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -231,7 +231,7 @@ struct bridge_softc {
u_char sc_defaddr[6]; /* Default MAC address */
};
-static VNET_DEFINE(struct mtx, bridge_list_mtx);
+VNET_DEFINE_STATIC(struct mtx, bridge_list_mtx);
#define V_bridge_list_mtx VNET(bridge_list_mtx)
static eventhandler_tag bridge_detach_cookie;
@@ -356,59 +356,59 @@ SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
/* only pass IP[46] packets when pfil is enabled */
-static VNET_DEFINE(int, pfil_onlyip) = 1;
+VNET_DEFINE_STATIC(int, pfil_onlyip) = 1;
#define V_pfil_onlyip VNET(pfil_onlyip)
SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip,
CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0,
"Only pass IP packets when pfil is enabled");
/* run pfil hooks on the bridge interface */
-static VNET_DEFINE(int, pfil_bridge) = 1;
+VNET_DEFINE_STATIC(int, pfil_bridge) = 1;
#define V_pfil_bridge VNET(pfil_bridge)
SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge,
CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0,
"Packet filter on the bridge interface");
/* layer2 filter with ipfw */
-static VNET_DEFINE(int, pfil_ipfw);
+VNET_DEFINE_STATIC(int, pfil_ipfw);
#define V_pfil_ipfw VNET(pfil_ipfw)
/* layer2 ARP filter with ipfw */
-static VNET_DEFINE(int, pfil_ipfw_arp);
+VNET_DEFINE_STATIC(int, pfil_ipfw_arp);
#define V_pfil_ipfw_arp VNET(pfil_ipfw_arp)
SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp,
CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0,
"Filter ARP packets through IPFW layer2");
/* run pfil hooks on the member interface */
-static VNET_DEFINE(int, pfil_member) = 1;
+VNET_DEFINE_STATIC(int, pfil_member) = 1;
#define V_pfil_member VNET(pfil_member)
SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member,
CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0,
"Packet filter on the member interface");
/* run pfil hooks on the physical interface for locally destined packets */
-static VNET_DEFINE(int, pfil_local_phys);
+VNET_DEFINE_STATIC(int, pfil_local_phys);
#define V_pfil_local_phys VNET(pfil_local_phys)
SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0,
"Packet filter on the physical interface for locally destined packets");
/* log STP state changes */
-static VNET_DEFINE(int, log_stp);
+VNET_DEFINE_STATIC(int, log_stp);
#define V_log_stp VNET(log_stp)
SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp,
CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0,
"Log STP state changes");
/* share MAC with first bridge member */
-static VNET_DEFINE(int, bridge_inherit_mac);
+VNET_DEFINE_STATIC(int, bridge_inherit_mac);
#define V_bridge_inherit_mac VNET(bridge_inherit_mac)
SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0,
"Inherit MAC address from the first bridge member");
-static VNET_DEFINE(int, allow_llz_overlap) = 0;
+VNET_DEFINE_STATIC(int, allow_llz_overlap) = 0;
#define V_allow_llz_overlap VNET(allow_llz_overlap)
SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap,
CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0,
@@ -512,7 +512,7 @@ const struct bridge_control bridge_control_table[] = {
};
const int bridge_control_table_size = nitems(bridge_control_table);
-static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list);
+VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list);
#define V_bridge_list VNET(bridge_list)
#define BRIDGE_LIST_LOCK_INIT(x) mtx_init(&V_bridge_list_mtx, \
"if_bridge list", NULL, MTX_DEF)
@@ -520,7 +520,7 @@ static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list);
#define BRIDGE_LIST_LOCK(x) mtx_lock(&V_bridge_list_mtx)
#define BRIDGE_LIST_UNLOCK(x) mtx_unlock(&V_bridge_list_mtx)
-static VNET_DEFINE(struct if_clone *, bridge_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner);
#define V_bridge_cloner VNET(bridge_cloner)
static const char bridge_name[] = "bridge";
diff --git a/freebsd/sys/net/if_clone.c b/freebsd/sys/net/if_clone.c
index 5a9c20c2..1fa79766 100644
--- a/freebsd/sys/net/if_clone.c
+++ b/freebsd/sys/net/if_clone.c
@@ -110,7 +110,7 @@ static int ifc_simple_destroy(struct if_clone *, struct ifnet *);
static struct mtx if_cloners_mtx;
MTX_SYSINIT(if_cloners_lock, &if_cloners_mtx, "if_cloners lock", MTX_DEF);
-static VNET_DEFINE(int, if_cloners_count);
+VNET_DEFINE_STATIC(int, if_cloners_count);
VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners);
#define V_if_cloners_count VNET(if_cloners_count)
diff --git a/freebsd/sys/net/if_disc.c b/freebsd/sys/net/if_disc.c
index b3ff7ff8..1c0bc166 100644
--- a/freebsd/sys/net/if_disc.c
+++ b/freebsd/sys/net/if_disc.c
@@ -78,7 +78,7 @@ static void disc_clone_destroy(struct ifnet *);
static const char discname[] = "disc";
static MALLOC_DEFINE(M_DISC, discname, "Discard interface");
-static VNET_DEFINE(struct if_clone *, disc_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, disc_cloner);
#define V_disc_cloner VNET(disc_cloner)
static int
diff --git a/freebsd/sys/net/if_edsc.c b/freebsd/sys/net/if_edsc.c
index 6b5671c1..b12e0bb8 100644
--- a/freebsd/sys/net/if_edsc.c
+++ b/freebsd/sys/net/if_edsc.c
@@ -74,7 +74,7 @@ struct edsc_softc {
/*
* Attach to the interface cloning framework.
*/
-static VNET_DEFINE(struct if_clone *, edsc_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, edsc_cloner);
#define V_edsc_cloner VNET(edsc_cloner)
static int edsc_clone_create(struct if_clone *, int, caddr_t);
static void edsc_clone_destroy(struct ifnet *);
diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c
index 8ca8aa4d..ebfbf5cb 100644
--- a/freebsd/sys/net/if_enc.c
+++ b/freebsd/sys/net/if_enc.c
@@ -88,9 +88,9 @@ struct enchdr {
struct enc_softc {
struct ifnet *sc_ifp;
};
-static VNET_DEFINE(struct enc_softc *, enc_sc);
+VNET_DEFINE_STATIC(struct enc_softc *, enc_sc);
#define V_enc_sc VNET(enc_sc)
-static VNET_DEFINE(struct if_clone *, enc_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, enc_cloner);
#define V_enc_cloner VNET(enc_cloner)
static int enc_ioctl(struct ifnet *, u_long, caddr_t);
@@ -113,10 +113,10 @@ static const char encname[] = "enc";
* some changes to the packet, e.g. address translation. If PFIL hook
* consumes mbuf, nothing will be captured.
*/
-static VNET_DEFINE(int, filter_mask_in) = IPSEC_ENC_BEFORE;
-static VNET_DEFINE(int, bpf_mask_in) = IPSEC_ENC_BEFORE;
-static VNET_DEFINE(int, filter_mask_out) = IPSEC_ENC_BEFORE;
-static VNET_DEFINE(int, bpf_mask_out) = IPSEC_ENC_BEFORE | IPSEC_ENC_AFTER;
+VNET_DEFINE_STATIC(int, filter_mask_in) = IPSEC_ENC_BEFORE;
+VNET_DEFINE_STATIC(int, bpf_mask_in) = IPSEC_ENC_BEFORE;
+VNET_DEFINE_STATIC(int, filter_mask_out) = IPSEC_ENC_BEFORE;
+VNET_DEFINE_STATIC(int, bpf_mask_out) = IPSEC_ENC_BEFORE | IPSEC_ENC_AFTER;
#define V_filter_mask_in VNET(filter_mask_in)
#define V_bpf_mask_in VNET(bpf_mask_in)
#define V_filter_mask_out VNET(filter_mask_out)
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
index d727bc2c..69ff3efc 100644
--- a/freebsd/sys/net/if_epair.c
+++ b/freebsd/sys/net/if_epair.c
@@ -109,6 +109,7 @@ static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int epair_clone_destroy(struct if_clone *, struct ifnet *);
static const char epairname[] = "epair";
+static unsigned int next_index = 0;
/* Netisr related definitions and sysctl. */
static struct netisr_handler epair_nh = {
@@ -181,7 +182,7 @@ STAILQ_HEAD(eid_list, epair_ifp_drain);
static MALLOC_DEFINE(M_EPAIR, epairname,
"Pair of virtual cross-over connected Ethernet-like interfaces");
-static VNET_DEFINE(struct if_clone *, epair_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, epair_cloner);
#define V_epair_cloner VNET(epair_cloner)
/*
@@ -845,12 +846,22 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
/*
* Calculate the etheraddr hashing the hostid and the
- * interface index. The result would be hopefully unique
+ * interface index. The result would be hopefully unique.
+ * Note that the "a" component of an epair instance may get moved
+ * to a different VNET after creation. In that case its index
+ * will be freed and the index can get reused by new epair instance.
+ * Make sure we do not create same etheraddr again.
*/
getcredhostid(curthread->td_ucred, (unsigned long *)&hostid);
if (hostid == 0)
arc4rand(&hostid, sizeof(hostid), 0);
- key[0] = (uint32_t)ifp->if_index;
+
+ if (ifp->if_index > next_index)
+ next_index = ifp->if_index;
+ else
+ next_index++;
+
+ key[0] = (uint32_t)next_index;
key[1] = (uint32_t)(hostid & 0xffffffff);
key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff);
hash = jenkins_hash32(key, 3, 0);
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 3893d331..01e757e5 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -463,7 +463,8 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m)
uint8_t pcp;
pcp = ifp->if_pcp;
- if (pcp != IFNET_PCP_NONE && !ether_set_pcp(&m, ifp, pcp))
+ if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN &&
+ !ether_set_pcp(&m, ifp, pcp))
return (0);
if (PFIL_HOOKED(&V_link_pfil_hook)) {
@@ -515,7 +516,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
}
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
- random_harvest_queue_ether(m, sizeof(*m), 2);
+ random_harvest_queue_ether(m, sizeof(*m));
CURVNET_SET_QUIET(ifp->if_vnet);
@@ -1293,7 +1294,7 @@ static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
"for consistency");
-static VNET_DEFINE(int, soft_pad);
+VNET_DEFINE_STATIC(int, soft_pad);
#define V_soft_pad VNET(soft_pad)
SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(soft_pad), 0,
diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c
index 6a90538a..5a67e7ff 100644
--- a/freebsd/sys/net/if_gif.c
+++ b/freebsd/sys/net/if_gif.c
@@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,7 +42,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -57,7 +57,6 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <sys/priv.h>
#include <sys/proc.h>
-#include <sys/protosw.h>
#include <sys/conf.h>
#include <machine/cpu.h>
@@ -87,8 +86,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/ip6_ecn.h>
#include <netinet6/ip6_var.h>
-#include <netinet6/scope6_var.h>
-#include <netinet6/ip6protosw.h>
#endif /* INET6 */
#include <netinet/ip_encap.h>
@@ -100,42 +97,24 @@ __FBSDID("$FreeBSD$");
static const char gifname[] = "gif";
-/*
- * gif_mtx protects a per-vnet gif_softc_list.
- */
-static VNET_DEFINE(struct mtx, gif_mtx);
-#define V_gif_mtx VNET(gif_mtx)
-static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
-static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
-#define V_gif_softc_list VNET(gif_softc_list)
+MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
static struct sx gif_ioctl_sx;
SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
-#define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \
- NULL, MTX_DEF)
-#define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx)
-#define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx)
-#define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx)
-
void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
void (*ng_gif_attach_p)(struct ifnet *ifp);
void (*ng_gif_detach_p)(struct ifnet *ifp);
-static int gif_check_nesting(struct ifnet *, struct mbuf *);
-static int gif_set_tunnel(struct ifnet *, struct sockaddr *,
- struct sockaddr *);
-static void gif_delete_tunnel(struct ifnet *);
+static void gif_delete_tunnel(struct gif_softc *);
static int gif_ioctl(struct ifnet *, u_long, caddr_t);
static int gif_transmit(struct ifnet *, struct mbuf *);
static void gif_qflush(struct ifnet *);
static int gif_clone_create(struct if_clone *, int, caddr_t);
static void gif_clone_destroy(struct ifnet *);
-static VNET_DEFINE(struct if_clone *, gif_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, gif_cloner);
#define V_gif_cloner VNET(gif_cloner)
-static int gifmodevent(module_t, int, void *);
-
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
"Generic Tunnel Interface");
@@ -150,26 +129,11 @@ static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
*/
#define MAX_GIF_NEST 1
#endif
-static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
+VNET_DEFINE_STATIC(int, max_gif_nesting) = MAX_GIF_NEST;
#define V_max_gif_nesting VNET(max_gif_nesting)
SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
-/*
- * By default, we disallow creation of multiple tunnels between the same
- * pair of addresses. Some applications require this functionality so
- * we allow control over this check here.
- */
-#ifdef XBONEHACK
-static VNET_DEFINE(int, parallel_tunnels) = 1;
-#else
-static VNET_DEFINE(int, parallel_tunnels) = 0;
-#endif
-#define V_parallel_tunnels VNET(parallel_tunnels)
-SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
- "Allow parallel tunnels?");
-
static int
gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
@@ -182,20 +146,15 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
sc->gif_fibnum = BSD_DEFAULT_FIB;
#endif /* __rtems__ */
GIF2IFP(sc) = if_alloc(IFT_GIF);
- GIF_LOCK_INIT(sc);
GIF2IFP(sc)->if_softc = sc;
if_initname(GIF2IFP(sc), gifname, unit);
GIF2IFP(sc)->if_addrlen = 0;
GIF2IFP(sc)->if_mtu = GIF_MTU;
GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
-#if 0
- /* turn off ingress filter */
- GIF2IFP(sc)->if_flags |= IFF_LINK2;
-#endif
GIF2IFP(sc)->if_ioctl = gif_ioctl;
- GIF2IFP(sc)->if_transmit = gif_transmit;
- GIF2IFP(sc)->if_qflush = gif_qflush;
+ GIF2IFP(sc)->if_transmit = gif_transmit;
+ GIF2IFP(sc)->if_qflush = gif_qflush;
GIF2IFP(sc)->if_output = gif_output;
GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
@@ -204,9 +163,6 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
if (ng_gif_attach_p != NULL)
(*ng_gif_attach_p)(GIF2IFP(sc));
- GIF_LIST_LOCK();
- LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
- GIF_LIST_UNLOCK();
return (0);
}
@@ -217,10 +173,7 @@ gif_clone_destroy(struct ifnet *ifp)
sx_xlock(&gif_ioctl_sx);
sc = ifp->if_softc;
- gif_delete_tunnel(ifp);
- GIF_LIST_LOCK();
- LIST_REMOVE(sc, gif_list);
- GIF_LIST_UNLOCK();
+ gif_delete_tunnel(sc);
if (ng_gif_detach_p != NULL)
(*ng_gif_detach_p)(ifp);
bpfdetach(ifp);
@@ -228,8 +181,8 @@ gif_clone_destroy(struct ifnet *ifp)
ifp->if_softc = NULL;
sx_xunlock(&gif_ioctl_sx);
+ GIF_WAIT();
if_free(ifp);
- GIF_LOCK_DESTROY(sc);
free(sc, M_GIF);
}
@@ -237,10 +190,14 @@ static void
vnet_gif_init(const void *unused __unused)
{
- LIST_INIT(&V_gif_softc_list);
- GIF_LIST_LOCK_INIT();
V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
gif_clone_destroy, 0);
+#ifdef INET
+ in_gif_init();
+#endif
+#ifdef INET6
+ in6_gif_init();
+#endif
}
VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_gif_init, NULL);
@@ -250,7 +207,12 @@ vnet_gif_uninit(const void *unused __unused)
{
if_clone_detach(V_gif_cloner);
- GIF_LIST_LOCK_DESTROY();
+#ifdef INET
+ in_gif_uninit();
+#endif
+#ifdef INET6
+ in6_gif_uninit();
+#endif
}
VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_gif_uninit, NULL);
@@ -278,67 +240,28 @@ static moduledata_t gif_mod = {
DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_gif, 1);
-int
-gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+struct gif_list *
+gif_hashinit(void)
{
- GIF_RLOCK_TRACKER;
- const struct ip *ip;
- struct gif_softc *sc;
- int ret;
-
- sc = (struct gif_softc *)arg;
- if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
- return (0);
+ struct gif_list *hash;
+ int i;
- ret = 0;
- GIF_RLOCK(sc);
+ hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE,
+ M_GIF, M_WAITOK);
+ for (i = 0; i < GIF_HASH_SIZE; i++)
+ CK_LIST_INIT(&hash[i]);
- /* no physical address */
- if (sc->gif_family == 0)
- goto done;
-
- switch (proto) {
-#ifdef INET
- case IPPROTO_IPV4:
-#endif
-#ifdef INET6
- case IPPROTO_IPV6:
-#endif
- case IPPROTO_ETHERIP:
- break;
- default:
- goto done;
- }
+ return (hash);
+}
- /* Bail on short packets */
- M_ASSERTPKTHDR(m);
- if (m->m_pkthdr.len < sizeof(struct ip))
- goto done;
+void
+gif_hashdestroy(struct gif_list *hash)
+{
- ip = mtod(m, const struct ip *);
- switch (ip->ip_v) {
-#ifdef INET
- case 4:
- if (sc->gif_family != AF_INET)
- goto done;
- ret = in_gif_encapcheck(m, off, proto, arg);
- break;
-#endif
-#ifdef INET6
- case 6:
- if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
- goto done;
- if (sc->gif_family != AF_INET6)
- goto done;
- ret = in6_gif_encapcheck(m, off, proto, arg);
- break;
-#endif
- }
-done:
- GIF_RUNLOCK(sc);
- return (ret);
+ free(hash, M_GIF);
}
+#define MTAG_GIF 1080679712
static int
gif_transmit(struct ifnet *ifp, struct mbuf *m)
{
@@ -363,11 +286,13 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m)
}
#endif
error = ENETDOWN;
+ GIF_RLOCK();
sc = ifp->if_softc;
if ((ifp->if_flags & IFF_MONITOR) != 0 ||
(ifp->if_flags & IFF_UP) == 0 ||
sc->gif_family == 0 ||
- (error = gif_check_nesting(ifp, m)) != 0) {
+ (error = if_tunnel_check_nesting(ifp, m, MTAG_GIF,
+ V_max_gif_nesting)) != 0) {
m_freem(m);
goto err;
}
@@ -450,6 +375,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m)
err:
if (error)
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ GIF_RUNLOCK();
return (error);
}
@@ -459,42 +385,6 @@ gif_qflush(struct ifnet *ifp __unused)
}
-#define MTAG_GIF 1080679712
-static int
-gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
-{
- struct m_tag *mtag;
- int count;
-
- /*
- * gif may cause infinite recursion calls when misconfigured.
- * We'll prevent this by detecting loops.
- *
- * High nesting level may cause stack exhaustion.
- * We'll prevent this by introducing upper limit.
- */
- count = 1;
- mtag = NULL;
- while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
- if (*(struct ifnet **)(mtag + 1) == ifp) {
- log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
- return (EIO);
- }
- count++;
- }
- if (count > V_max_gif_nesting) {
- log(LOG_NOTICE,
- "%s: if_output recursively called too many times(%d)\n",
- if_name(ifp), count);
- return (EIO);
- }
- mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
- if (mtag == NULL)
- return (ENOMEM);
- *(struct ifnet **)(mtag + 1) = ifp;
- m_tag_prepend(m, mtag);
- return (0);
-}
int
gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
@@ -622,7 +512,8 @@ gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
break;
#endif
case AF_LINK:
- n = sizeof(struct etherip_header) + sizeof(struct ether_header);
+ n = sizeof(struct etherip_header) +
+ sizeof(struct ether_header);
if (n > m->m_len)
m = m_pullup(m, n);
if (m == NULL)
@@ -680,20 +571,11 @@ drop:
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
-/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
-int
+static int
gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- GIF_RLOCK_TRACKER;
struct ifreq *ifr = (struct ifreq*)data;
- struct sockaddr *dst, *src;
struct gif_softc *sc;
-#ifdef INET
- struct sockaddr_in *sin = NULL;
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6 = NULL;
-#endif
u_int options;
int error;
@@ -721,176 +603,25 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
error = 0;
switch (cmd) {
- case SIOCSIFPHYADDR:
-#ifdef INET6
- case SIOCSIFPHYADDR_IN6:
-#endif
- error = EINVAL;
- switch (cmd) {
-#ifdef INET
- case SIOCSIFPHYADDR:
- src = (struct sockaddr *)
- &(((struct in_aliasreq *)data)->ifra_addr);
- dst = (struct sockaddr *)
- &(((struct in_aliasreq *)data)->ifra_dstaddr);
- break;
-#endif
-#ifdef INET6
- case SIOCSIFPHYADDR_IN6:
- src = (struct sockaddr *)
- &(((struct in6_aliasreq *)data)->ifra_addr);
- dst = (struct sockaddr *)
- &(((struct in6_aliasreq *)data)->ifra_dstaddr);
- break;
-#endif
- default:
- goto bad;
- }
- /* sa_family must be equal */
- if (src->sa_family != dst->sa_family ||
- src->sa_len != dst->sa_len)
- goto bad;
-
- /* validate sa_len */
- /* check sa_family looks sane for the cmd */
- switch (src->sa_family) {
-#ifdef INET
- case AF_INET:
- if (src->sa_len != sizeof(struct sockaddr_in))
- goto bad;
- if (cmd != SIOCSIFPHYADDR) {
- error = EAFNOSUPPORT;
- goto bad;
- }
- if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
- satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
- goto bad;
- }
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (src->sa_len != sizeof(struct sockaddr_in6))
- goto bad;
- if (cmd != SIOCSIFPHYADDR_IN6) {
- error = EAFNOSUPPORT;
- goto bad;
- }
- error = EADDRNOTAVAIL;
- if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
- ||
- IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
- goto bad;
- /*
- * Check validity of the scope zone ID of the
- * addresses, and convert it into the kernel
- * internal form if necessary.
- */
- error = sa6_embedscope(satosin6(src), 0);
- if (error != 0)
- goto bad;
- error = sa6_embedscope(satosin6(dst), 0);
- if (error != 0)
- goto bad;
- break;
-#endif
- default:
- error = EAFNOSUPPORT;
- goto bad;
- }
- error = gif_set_tunnel(ifp, src, dst);
- break;
case SIOCDIFPHYADDR:
- gif_delete_tunnel(ifp);
+ if (sc->gif_family == 0)
+ break;
+ gif_delete_tunnel(sc);
break;
+#ifdef INET
+ case SIOCSIFPHYADDR:
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
+ error = in_gif_ioctl(sc, cmd, data);
+ break;
+#endif
#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
-#endif
- if (sc->gif_family == 0) {
- error = EADDRNOTAVAIL;
- break;
- }
- GIF_RLOCK(sc);
- switch (cmd) {
-#ifdef INET
- case SIOCGIFPSRCADDR:
- case SIOCGIFPDSTADDR:
- if (sc->gif_family != AF_INET) {
- error = EADDRNOTAVAIL;
- break;
- }
- sin = (struct sockaddr_in *)&ifr->ifr_addr;
- memset(sin, 0, sizeof(*sin));
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- break;
-#endif
-#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
- case SIOCGIFPDSTADDR_IN6:
- if (sc->gif_family != AF_INET6) {
- error = EADDRNOTAVAIL;
- break;
- }
- sin6 = (struct sockaddr_in6 *)
- &(((struct in6_ifreq *)data)->ifr_addr);
- memset(sin6, 0, sizeof(*sin6));
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(*sin6);
- break;
-#endif
- default:
- error = EAFNOSUPPORT;
- }
- if (error == 0) {
- switch (cmd) {
-#ifdef INET
- case SIOCGIFPSRCADDR:
- sin->sin_addr = sc->gif_iphdr->ip_src;
- break;
- case SIOCGIFPDSTADDR:
- sin->sin_addr = sc->gif_iphdr->ip_dst;
- break;
-#endif
-#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
- sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
- break;
- case SIOCGIFPDSTADDR_IN6:
- sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
- break;
-#endif
- }
- }
- GIF_RUNLOCK(sc);
- if (error != 0)
- break;
- switch (cmd) {
-#ifdef INET
- case SIOCGIFPSRCADDR:
- case SIOCGIFPDSTADDR:
- error = prison_if(curthread->td_ucred,
- (struct sockaddr *)sin);
- if (error != 0)
- memset(sin, 0, sizeof(*sin));
- break;
-#endif
-#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
- case SIOCGIFPDSTADDR_IN6:
- error = prison_if(curthread->td_ucred,
- (struct sockaddr *)sin6);
- if (error == 0)
- error = sa6_recoverscope(sin6);
- if (error != 0)
- memset(sin6, 0, sizeof(*sin6));
-#endif
- }
+ error = in6_gif_ioctl(sc, cmd, data);
break;
+#endif
case SIOCGTUNFIB:
ifr->ifr_fib = sc->gif_fibnum;
break;
@@ -914,159 +645,63 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
sizeof(options));
if (error)
break;
- if (options & ~GIF_OPTMASK)
+ if (options & ~GIF_OPTMASK) {
error = EINVAL;
- else
- sc->gif_options = options;
- break;
- default:
- error = EINVAL;
- break;
- }
-bad:
- sx_xunlock(&gif_ioctl_sx);
- return (error);
-}
-
-static void
-gif_detach(struct gif_softc *sc)
-{
-
- sx_assert(&gif_ioctl_sx, SA_XLOCKED);
- if (sc->gif_ecookie != NULL)
- encap_detach(sc->gif_ecookie);
- sc->gif_ecookie = NULL;
-}
-
-static int
-gif_attach(struct gif_softc *sc, int af)
-{
-
- sx_assert(&gif_ioctl_sx, SA_XLOCKED);
- switch (af) {
-#ifdef INET
- case AF_INET:
- return (in_gif_attach(sc));
-#endif
-#ifdef INET6
- case AF_INET6:
- return (in6_gif_attach(sc));
-#endif
- }
- return (EAFNOSUPPORT);
-}
-
-static int
-gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
-{
- struct gif_softc *sc = ifp->if_softc;
- struct gif_softc *tsc;
+ break;
+ }
+ if (sc->gif_options != options) {
+ switch (sc->gif_family) {
#ifdef INET
- struct ip *ip;
+ case AF_INET:
+ error = in_gif_setopts(sc, options);
+ break;
#endif
#ifdef INET6
- struct ip6_hdr *ip6;
-#endif
- void *hdr;
- int error = 0;
-
- if (sc == NULL)
- return (ENXIO);
- /* Disallow parallel tunnels unless instructed otherwise. */
- if (V_parallel_tunnels == 0) {
- GIF_LIST_LOCK();
- LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
- if (tsc == sc || tsc->gif_family != src->sa_family)
- continue;
-#ifdef INET
- if (tsc->gif_family == AF_INET &&
- tsc->gif_iphdr->ip_src.s_addr ==
- satosin(src)->sin_addr.s_addr &&
- tsc->gif_iphdr->ip_dst.s_addr ==
- satosin(dst)->sin_addr.s_addr) {
- error = EADDRNOTAVAIL;
- GIF_LIST_UNLOCK();
- goto bad;
- }
+ case AF_INET6:
+ error = in6_gif_setopts(sc, options);
+ break;
#endif
-#ifdef INET6
- if (tsc->gif_family == AF_INET6 &&
- IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
- &satosin6(src)->sin6_addr) &&
- IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
- &satosin6(dst)->sin6_addr)) {
- error = EADDRNOTAVAIL;
- GIF_LIST_UNLOCK();
- goto bad;
+ default:
+ /* No need to invoke AF-handler */
+ sc->gif_options = options;
}
-#endif
}
- GIF_LIST_UNLOCK();
+ break;
+ default:
+ error = EINVAL;
+ break;
}
- switch (src->sa_family) {
+ if (error == 0 && sc->gif_family != 0) {
+ if (
#ifdef INET
- case AF_INET:
- hdr = ip = malloc(sizeof(struct ip), M_GIF,
- M_WAITOK | M_ZERO);
- ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
- ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
- break;
+ cmd == SIOCSIFPHYADDR ||
#endif
#ifdef INET6
- case AF_INET6:
- hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
- M_WAITOK | M_ZERO);
- ip6->ip6_src = satosin6(src)->sin6_addr;
- ip6->ip6_dst = satosin6(dst)->sin6_addr;
- ip6->ip6_vfc = IPV6_VERSION;
- break;
+ cmd == SIOCSIFPHYADDR_IN6 ||
#endif
- default:
- return (EAFNOSUPPORT);
+ 0) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ }
}
-
- if (sc->gif_family != src->sa_family)
- gif_detach(sc);
- if (sc->gif_family == 0 ||
- sc->gif_family != src->sa_family)
- error = gif_attach(sc, src->sa_family);
-
- GIF_WLOCK(sc);
- if (sc->gif_family != 0)
- free(sc->gif_hdr, M_GIF);
- sc->gif_family = src->sa_family;
- sc->gif_hdr = hdr;
- GIF_WUNLOCK(sc);
-#if defined(INET) || defined(INET6)
bad:
-#endif
- if (error == 0 && sc->gif_family != 0) {
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- if_link_state_change(ifp, LINK_STATE_UP);
- } else {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- if_link_state_change(ifp, LINK_STATE_DOWN);
- }
+ sx_xunlock(&gif_ioctl_sx);
return (error);
}
static void
-gif_delete_tunnel(struct ifnet *ifp)
+gif_delete_tunnel(struct gif_softc *sc)
{
- struct gif_softc *sc = ifp->if_softc;
- int family;
- if (sc == NULL)
- return;
-
- GIF_WLOCK(sc);
- family = sc->gif_family;
- sc->gif_family = 0;
- GIF_WUNLOCK(sc);
- if (family != 0) {
- gif_detach(sc);
+ sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+ if (sc->gif_family != 0) {
+ CK_LIST_REMOVE(sc, chain);
+ /* Wait until it become safe to free gif_hdr */
+ GIF_WAIT();
free(sc->gif_hdr, M_GIF);
}
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- if_link_state_change(ifp, LINK_STATE_DOWN);
+ sc->gif_family = 0;
+ GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN);
}
+
diff --git a/freebsd/sys/net/if_gif.h b/freebsd/sys/net/if_gif.h
index 556c2acc..501a4e5d 100644
--- a/freebsd/sys/net/if_gif.h
+++ b/freebsd/sys/net/if_gif.h
@@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,14 +37,9 @@
#define _NET_IF_GIF_H_
#ifdef _KERNEL
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <netinet/in.h>
struct ip;
struct ip6_hdr;
-struct encaptab;
extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
int af);
@@ -55,8 +51,6 @@ extern void (*ng_gif_detach_p)(struct ifnet *ifp);
struct gif_softc {
struct ifnet *gif_ifp;
- struct rmlock gif_lock;
- const struct encaptab *gif_ecookie;
int gif_family;
int gif_flags;
u_int gif_fibnum;
@@ -65,28 +59,22 @@ struct gif_softc {
union {
void *hdr;
struct ip *iphdr;
-#ifdef INET6
struct ip6_hdr *ip6hdr;
-#endif
} gif_uhdr;
- LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
+
+ CK_LIST_ENTRY(gif_softc) chain;
};
-#define GIF2IFP(sc) ((sc)->gif_ifp)
-#define GIF_LOCK_INIT(sc) rm_init(&(sc)->gif_lock, "gif softc")
-#define GIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->gif_lock)
-#define GIF_RLOCK_TRACKER struct rm_priotracker gif_tracker
-#define GIF_RLOCK(sc) rm_rlock(&(sc)->gif_lock, &gif_tracker)
-#define GIF_RUNLOCK(sc) rm_runlock(&(sc)->gif_lock, &gif_tracker)
-#define GIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_RLOCKED)
-#define GIF_WLOCK(sc) rm_wlock(&(sc)->gif_lock)
-#define GIF_WUNLOCK(sc) rm_wunlock(&(sc)->gif_lock)
-#define GIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_WLOCKED)
+CK_LIST_HEAD(gif_list, gif_softc);
+MALLOC_DECLARE(M_GIF);
+#ifndef GIF_HASH_SIZE
+#define GIF_HASH_SIZE (1 << 4)
+#endif
+
+#define GIF2IFP(sc) ((sc)->gif_ifp)
#define gif_iphdr gif_uhdr.iphdr
#define gif_hdr gif_uhdr.hdr
-#ifdef INET6
#define gif_ip6hdr gif_uhdr.ip6hdr
-#endif
#define GIF_MTU (1280) /* Default MTU */
#define GIF_MTU_MIN (1280) /* Minimum MTU */
@@ -108,21 +96,29 @@ struct etherip_header {
/* mbuf adjust factor to force 32-bit alignment of IP header */
#define ETHERIP_ALIGN 2
+#define GIF_RLOCK() struct epoch_tracker gif_et; epoch_enter_preempt(net_epoch_preempt, &gif_et)
+#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gif_et)
+#define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt)
+
/* Prototypes */
+struct gif_list *gif_hashinit(void);
+void gif_hashdestroy(struct gif_list *);
+
void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
-int gif_encapcheck(const struct mbuf *, int, int, void *);
-#ifdef INET
+
+void in_gif_init(void);
+void in_gif_uninit(void);
int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
-int in_gif_encapcheck(const struct mbuf *, int, int, void *);
-int in_gif_attach(struct gif_softc *);
-#endif
-#ifdef INET6
+int in_gif_ioctl(struct gif_softc *, u_long, caddr_t);
+int in_gif_setopts(struct gif_softc *, u_int);
+
+void in6_gif_init(void);
+void in6_gif_uninit(void);
int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
-int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
-int in6_gif_attach(struct gif_softc *);
-#endif
+int in6_gif_ioctl(struct gif_softc *, u_long, caddr_t);
+int in6_gif_setopts(struct gif_softc *, u_int);
#endif /* _KERNEL */
#define GIFGOPTS _IOWR('i', 150, struct ifreq)
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index 0bff9bc9..5ff41259 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -4,7 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 1998 The NetBSD Foundation, Inc.
- * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -43,17 +43,13 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
-#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mbuf.h>
#include <sys/priv.h>
#include <sys/proc.h>
-#include <sys/protosw.h>
-#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sx.h>
@@ -72,7 +68,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#ifdef INET
-#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
@@ -82,7 +77,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
-#include <netinet6/scope6_var.h>
#endif
#include <netinet/ip_encap.h>
@@ -93,24 +87,16 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
#define GREMTU 1476
+
static const char grename[] = "gre";
-static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
-static VNET_DEFINE(struct mtx, gre_mtx);
-#define V_gre_mtx VNET(gre_mtx)
-#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
- MTX_DEF)
-#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx)
-#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx)
-#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx)
-
-static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
-#define V_gre_softc_list VNET(gre_softc_list)
+MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
+
static struct sx gre_ioctl_sx;
SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
static int gre_clone_create(struct if_clone *, int, caddr_t);
static void gre_clone_destroy(struct ifnet *);
-static VNET_DEFINE(struct if_clone *, gre_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, gre_cloner);
#define V_gre_cloner VNET(gre_cloner)
static void gre_qflush(struct ifnet *);
@@ -118,11 +104,7 @@ static int gre_transmit(struct ifnet *, struct mbuf *);
static int gre_ioctl(struct ifnet *, u_long, caddr_t);
static int gre_output(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
-
-static void gre_updatehdr(struct gre_softc *);
-static int gre_set_tunnel(struct ifnet *, struct sockaddr *,
- struct sockaddr *);
-static void gre_delete_tunnel(struct ifnet *);
+static void gre_delete_tunnel(struct gre_softc *);
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
@@ -139,7 +121,7 @@ static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
#define MAX_GRE_NEST 1
#endif
-static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
+VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST;
#define V_max_gre_nesting VNET(max_gre_nesting)
SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
@@ -147,10 +129,15 @@ SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
static void
vnet_gre_init(const void *unused __unused)
{
- LIST_INIT(&V_gre_softc_list);
- GRE_LIST_LOCK_INIT();
+
V_gre_cloner = if_clone_simple(grename, gre_clone_create,
gre_clone_destroy, 0);
+#ifdef INET
+ in_gre_init();
+#endif
+#ifdef INET6
+ in6_gre_init();
+#endif
}
VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_gre_init, NULL);
@@ -160,7 +147,12 @@ vnet_gre_uninit(const void *unused __unused)
{
if_clone_detach(V_gre_cloner);
- GRE_LIST_LOCK_DESTROY();
+#ifdef INET
+ in_gre_uninit();
+#endif
+#ifdef INET6
+ in6_gre_uninit();
+#endif
}
VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_gre_uninit, NULL);
@@ -177,7 +169,6 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
sc->gre_fibnum = BSD_DEFAULT_FIB;
#endif /* __rtems__ */
GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
- GRE_LOCK_INIT(sc);
GRE2IFP(sc)->if_softc = sc;
if_initname(GRE2IFP(sc), grename, unit);
@@ -191,9 +182,6 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(GRE2IFP(sc));
bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
- GRE_LIST_LOCK();
- LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
- GRE_LIST_UNLOCK();
return (0);
}
@@ -204,33 +192,22 @@ gre_clone_destroy(struct ifnet *ifp)
sx_xlock(&gre_ioctl_sx);
sc = ifp->if_softc;
- gre_delete_tunnel(ifp);
- GRE_LIST_LOCK();
- LIST_REMOVE(sc, gre_list);
- GRE_LIST_UNLOCK();
+ gre_delete_tunnel(sc);
bpfdetach(ifp);
if_detach(ifp);
ifp->if_softc = NULL;
sx_xunlock(&gre_ioctl_sx);
+ GRE_WAIT();
if_free(ifp);
- GRE_LOCK_DESTROY(sc);
free(sc, M_GRE);
}
static int
gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- GRE_RLOCK_TRACKER;
struct ifreq *ifr = (struct ifreq *)data;
- struct sockaddr *src, *dst;
struct gre_softc *sc;
-#ifdef INET
- struct sockaddr_in *sin = NULL;
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6 = NULL;
-#endif
uint32_t opt;
int error;
@@ -255,7 +232,6 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case GREGPROTO:
return (EOPNOTSUPP);
}
- src = dst = NULL;
sx_xlock(&gre_ioctl_sx);
sc = ifp->if_softc;
if (sc == NULL) {
@@ -264,189 +240,25 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
error = 0;
switch (cmd) {
- case SIOCSIFPHYADDR:
-#ifdef INET6
- case SIOCSIFPHYADDR_IN6:
-#endif
- error = EINVAL;
- switch (cmd) {
-#ifdef INET
- case SIOCSIFPHYADDR:
- src = (struct sockaddr *)
- &(((struct in_aliasreq *)data)->ifra_addr);
- dst = (struct sockaddr *)
- &(((struct in_aliasreq *)data)->ifra_dstaddr);
- break;
-#endif
-#ifdef INET6
- case SIOCSIFPHYADDR_IN6:
- src = (struct sockaddr *)
- &(((struct in6_aliasreq *)data)->ifra_addr);
- dst = (struct sockaddr *)
- &(((struct in6_aliasreq *)data)->ifra_dstaddr);
- break;
-#endif
- default:
- error = EAFNOSUPPORT;
- goto end;
- }
- /* sa_family must be equal */
- if (src->sa_family != dst->sa_family ||
- src->sa_len != dst->sa_len)
- goto end;
-
- /* validate sa_len */
- switch (src->sa_family) {
-#ifdef INET
- case AF_INET:
- if (src->sa_len != sizeof(struct sockaddr_in))
- goto end;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (src->sa_len != sizeof(struct sockaddr_in6))
- goto end;
- break;
-#endif
- default:
- error = EAFNOSUPPORT;
- goto end;
- }
- /* check sa_family looks sane for the cmd */
- error = EAFNOSUPPORT;
- switch (cmd) {
-#ifdef INET
- case SIOCSIFPHYADDR:
- if (src->sa_family == AF_INET)
- break;
- goto end;
-#endif
-#ifdef INET6
- case SIOCSIFPHYADDR_IN6:
- if (src->sa_family == AF_INET6)
- break;
- goto end;
-#endif
- }
- error = EADDRNOTAVAIL;
- switch (src->sa_family) {
-#ifdef INET
- case AF_INET:
- if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
- satosin(dst)->sin_addr.s_addr == INADDR_ANY)
- goto end;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
- ||
- IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
- goto end;
- /*
- * Check validity of the scope zone ID of the
- * addresses, and convert it into the kernel
- * internal form if necessary.
- */
- error = sa6_embedscope(satosin6(src), 0);
- if (error != 0)
- goto end;
- error = sa6_embedscope(satosin6(dst), 0);
- if (error != 0)
- goto end;
-#endif
- }
- error = gre_set_tunnel(ifp, src, dst);
- break;
case SIOCDIFPHYADDR:
- gre_delete_tunnel(ifp);
+ if (sc->gre_family == 0)
+ break;
+ gre_delete_tunnel(sc);
break;
+#ifdef INET
+ case SIOCSIFPHYADDR:
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
+ error = in_gre_ioctl(sc, cmd, data);
+ break;
+#endif
#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
-#endif
- if (sc->gre_family == 0) {
- error = EADDRNOTAVAIL;
- break;
- }
- GRE_RLOCK(sc);
- switch (cmd) {
-#ifdef INET
- case SIOCGIFPSRCADDR:
- case SIOCGIFPDSTADDR:
- if (sc->gre_family != AF_INET) {
- error = EADDRNOTAVAIL;
- break;
- }
- sin = (struct sockaddr_in *)&ifr->ifr_addr;
- memset(sin, 0, sizeof(*sin));
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- break;
-#endif
-#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
- case SIOCGIFPDSTADDR_IN6:
- if (sc->gre_family != AF_INET6) {
- error = EADDRNOTAVAIL;
- break;
- }
- sin6 = (struct sockaddr_in6 *)
- &(((struct in6_ifreq *)data)->ifr_addr);
- memset(sin6, 0, sizeof(*sin6));
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(*sin6);
- break;
-#endif
- }
- if (error == 0) {
- switch (cmd) {
-#ifdef INET
- case SIOCGIFPSRCADDR:
- sin->sin_addr = sc->gre_oip.ip_src;
- break;
- case SIOCGIFPDSTADDR:
- sin->sin_addr = sc->gre_oip.ip_dst;
- break;
-#endif
-#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
- sin6->sin6_addr = sc->gre_oip6.ip6_src;
- break;
- case SIOCGIFPDSTADDR_IN6:
- sin6->sin6_addr = sc->gre_oip6.ip6_dst;
- break;
-#endif
- }
- }
- GRE_RUNLOCK(sc);
- if (error != 0)
- break;
- switch (cmd) {
-#ifdef INET
- case SIOCGIFPSRCADDR:
- case SIOCGIFPDSTADDR:
- error = prison_if(curthread->td_ucred,
- (struct sockaddr *)sin);
- if (error != 0)
- memset(sin, 0, sizeof(*sin));
- break;
-#endif
-#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
- case SIOCGIFPDSTADDR_IN6:
- error = prison_if(curthread->td_ucred,
- (struct sockaddr *)sin6);
- if (error == 0)
- error = sa6_recoverscope(sin6);
- if (error != 0)
- memset(sin6, 0, sizeof(*sin6));
-#endif
- }
+ error = in6_gre_ioctl(sc, cmd, data);
break;
+#endif
case SIOCGTUNFIB:
ifr->ifr_fib = sc->gre_fibnum;
break;
@@ -459,40 +271,50 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
sc->gre_fibnum = ifr->ifr_fib;
break;
case GRESKEY:
+ case GRESOPTS:
if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
sizeof(opt))) != 0)
break;
- if (sc->gre_key != opt) {
- GRE_WLOCK(sc);
- sc->gre_key = opt;
- gre_updatehdr(sc);
- GRE_WUNLOCK(sc);
+ if (cmd == GRESKEY) {
+ if (sc->gre_key == opt)
+ break;
+ } else if (cmd == GRESOPTS) {
+ if (opt & ~GRE_OPTMASK) {
+ error = EINVAL;
+ break;
+ }
+ if (sc->gre_options == opt)
+ break;
}
- break;
- case GREGKEY:
- error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr),
- sizeof(sc->gre_key));
- break;
- case GRESOPTS:
- if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+ switch (sc->gre_family) {
+#ifdef INET
+ case AF_INET:
+ in_gre_setopts(sc, cmd, opt);
break;
- if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
- sizeof(opt))) != 0)
+#endif
+#ifdef INET6
+ case AF_INET6:
+ in6_gre_setopts(sc, cmd, opt);
break;
- if (opt & ~GRE_OPTMASK)
- error = EINVAL;
- else {
- if (sc->gre_options != opt) {
- GRE_WLOCK(sc);
+#endif
+ default:
+ if (cmd == GRESKEY)
+ sc->gre_key = opt;
+ else
sc->gre_options = opt;
- gre_updatehdr(sc);
- GRE_WUNLOCK(sc);
- }
+ break;
}
+ /*
+ * XXX: Do we need to initiate change of interface
+ * state here?
+ */
+ break;
+ case GREGKEY:
+ error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr),
+ sizeof(sc->gre_key));
break;
-
case GREGOPTS:
error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
sizeof(sc->gre_options));
@@ -501,40 +323,68 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = EINVAL;
break;
}
+ if (error == 0 && sc->gre_family != 0) {
+ if (
+#ifdef INET
+ cmd == SIOCSIFPHYADDR ||
+#endif
+#ifdef INET6
+ cmd == SIOCSIFPHYADDR_IN6 ||
+#endif
+ 0) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ }
+ }
end:
sx_xunlock(&gre_ioctl_sx);
return (error);
}
static void
-gre_updatehdr(struct gre_softc *sc)
+gre_delete_tunnel(struct gre_softc *sc)
+{
+
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ if (sc->gre_family != 0) {
+ CK_LIST_REMOVE(sc, chain);
+ GRE_WAIT();
+ free(sc->gre_hdr, M_GRE);
+ sc->gre_family = 0;
+ }
+ GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
+}
+
+struct gre_list *
+gre_hashinit(void)
+{
+ struct gre_list *hash;
+ int i;
+
+ hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE,
+ M_GRE, M_WAITOK);
+ for (i = 0; i < GRE_HASH_SIZE; i++)
+ CK_LIST_INIT(&hash[i]);
+
+ return (hash);
+}
+
+void
+gre_hashdestroy(struct gre_list *hash)
+{
+
+ free(hash, M_GRE);
+}
+
+void
+gre_updatehdr(struct gre_softc *sc, struct grehdr *gh)
{
- struct grehdr *gh = NULL;
uint32_t *opts;
uint16_t flags;
- GRE_WLOCK_ASSERT(sc);
- switch (sc->gre_family) {
-#ifdef INET
- case AF_INET:
- sc->gre_hlen = sizeof(struct greip);
- sc->gre_oip.ip_v = IPPROTO_IPV4;
- sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
- sc->gre_oip.ip_p = IPPROTO_GRE;
- gh = &sc->gre_gihdr->gi_gre;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- sc->gre_hlen = sizeof(struct greip6);
- sc->gre_oip6.ip6_vfc = IPV6_VERSION;
- sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
- gh = &sc->gre_gi6hdr->gi6_gre;
- break;
-#endif
- default:
- return;
- }
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+
flags = 0;
opts = gh->gre_opts;
if (sc->gre_options & GRE_ENABLE_CSUM) {
@@ -556,136 +406,12 @@ gre_updatehdr(struct gre_softc *sc)
gh->gre_flags = htons(flags);
}
-static void
-gre_detach(struct gre_softc *sc)
-{
-
- sx_assert(&gre_ioctl_sx, SA_XLOCKED);
- if (sc->gre_ecookie != NULL)
- encap_detach(sc->gre_ecookie);
- sc->gre_ecookie = NULL;
-}
-
-static int
-gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
- struct sockaddr *dst)
-{
- struct gre_softc *sc, *tsc;
-#ifdef INET6
- struct ip6_hdr *ip6;
-#endif
-#ifdef INET
- struct ip *ip;
-#endif
- void *hdr;
- int error;
-
- sx_assert(&gre_ioctl_sx, SA_XLOCKED);
- GRE_LIST_LOCK();
- sc = ifp->if_softc;
- LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
- if (tsc == sc || tsc->gre_family != src->sa_family)
- continue;
-#ifdef INET
- if (tsc->gre_family == AF_INET &&
- tsc->gre_oip.ip_src.s_addr ==
- satosin(src)->sin_addr.s_addr &&
- tsc->gre_oip.ip_dst.s_addr ==
- satosin(dst)->sin_addr.s_addr) {
- GRE_LIST_UNLOCK();
- return (EADDRNOTAVAIL);
- }
-#endif
-#ifdef INET6
- if (tsc->gre_family == AF_INET6 &&
- IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
- &satosin6(src)->sin6_addr) &&
- IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
- &satosin6(dst)->sin6_addr)) {
- GRE_LIST_UNLOCK();
- return (EADDRNOTAVAIL);
- }
-#endif
- }
- GRE_LIST_UNLOCK();
-
- switch (src->sa_family) {
-#ifdef INET
- case AF_INET:
- hdr = ip = malloc(sizeof(struct greip) +
- 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
- ip->ip_src = satosin(src)->sin_addr;
- ip->ip_dst = satosin(dst)->sin_addr;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- hdr = ip6 = malloc(sizeof(struct greip6) +
- 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
- ip6->ip6_src = satosin6(src)->sin6_addr;
- ip6->ip6_dst = satosin6(dst)->sin6_addr;
- break;
-#endif
- default:
- return (EAFNOSUPPORT);
- }
- if (sc->gre_family != 0)
- gre_detach(sc);
- GRE_WLOCK(sc);
- if (sc->gre_family != 0)
- free(sc->gre_hdr, M_GRE);
- sc->gre_family = src->sa_family;
- sc->gre_hdr = hdr;
- sc->gre_oseq = 0;
- sc->gre_iseq = UINT32_MAX;
- gre_updatehdr(sc);
- GRE_WUNLOCK(sc);
-
- error = 0;
- switch (src->sa_family) {
-#ifdef INET
- case AF_INET:
- error = in_gre_attach(sc);
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- error = in6_gre_attach(sc);
- break;
-#endif
- }
- if (error == 0) {
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- if_link_state_change(ifp, LINK_STATE_UP);
- }
- return (error);
-}
-
-static void
-gre_delete_tunnel(struct ifnet *ifp)
-{
- struct gre_softc *sc = ifp->if_softc;
- int family;
-
- GRE_WLOCK(sc);
- family = sc->gre_family;
- sc->gre_family = 0;
- GRE_WUNLOCK(sc);
- if (family != 0) {
- gre_detach(sc);
- free(sc->gre_hdr, M_GRE);
- }
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- if_link_state_change(ifp, LINK_STATE_DOWN);
-}
-
int
-gre_input(struct mbuf **mp, int *offp, int proto)
+gre_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct gre_softc *sc;
+ struct gre_softc *sc = arg;
struct grehdr *gh;
struct ifnet *ifp;
- struct mbuf *m;
uint32_t *opts;
#ifdef notyet
uint32_t key;
@@ -693,12 +419,8 @@ gre_input(struct mbuf **mp, int *offp, int proto)
uint16_t flags;
int hlen, isr, af;
- m = *mp;
- sc = encap_getarg(m);
- KASSERT(sc != NULL, ("encap_getarg returned NULL"));
-
ifp = GRE2IFP(sc);
- hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
+ hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
if (m->m_pkthdr.len < hlen)
goto drop;
if (m->m_len < hlen) {
@@ -706,7 +428,7 @@ gre_input(struct mbuf **mp, int *offp, int proto)
if (m == NULL)
goto drop;
}
- gh = (struct grehdr *)mtodo(m, *offp);
+ gh = (struct grehdr *)mtodo(m, off);
flags = ntohs(gh->gre_flags);
if (flags & ~GRE_FLAGS_MASK)
goto drop;
@@ -716,7 +438,7 @@ gre_input(struct mbuf **mp, int *offp, int proto)
/* reserved1 field must be zero */
if (((uint16_t *)opts)[1] != 0)
goto drop;
- if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+ if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0)
goto drop;
hlen += 2 * sizeof(uint16_t);
opts++;
@@ -766,7 +488,7 @@ gre_input(struct mbuf **mp, int *offp, int proto)
default:
goto drop;
}
- m_adj(m, *offp + hlen);
+ m_adj(m, off + hlen);
m_clrprotoflags(m);
m->m_pkthdr.rcvif = ifp;
M_SETFIB(m, ifp->if_fib);
@@ -787,70 +509,23 @@ drop:
return (IPPROTO_DONE);
}
-#define MTAG_GRE 1307983903
-static int
-gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
-{
- struct m_tag *mtag;
- int count;
-
- count = 1;
- mtag = NULL;
- while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) {
- if (*(struct ifnet **)(mtag + 1) == ifp) {
- log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
- return (EIO);
- }
- count++;
- }
- if (count > V_max_gre_nesting) {
- log(LOG_NOTICE,
- "%s: if_output recursively called too many times(%d)\n",
- ifp->if_xname, count);
- return (EIO);
- }
- mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
- if (mtag == NULL)
- return (ENOMEM);
- *(struct ifnet **)(mtag + 1) = ifp;
- m_tag_prepend(m, mtag);
- return (0);
-}
-
static int
gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
uint32_t af;
- int error;
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error != 0)
- goto drop;
-#endif
- if ((ifp->if_flags & IFF_MONITOR) != 0 ||
- (ifp->if_flags & IFF_UP) == 0) {
- error = ENETDOWN;
- goto drop;
- }
-
- error = gre_check_nesting(ifp, m);
- if (error != 0)
- goto drop;
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
else
af = dst->sa_family;
- BPF_MTAP2(ifp, &af, sizeof(af), m);
- m->m_pkthdr.csum_data = af; /* save af for if_transmit */
+ /*
+ * Now save the af in the inbound pkt csum data, this is a cheat since
+ * we are using the inbound csum_data field to carry the af over to
+ * the gre_transmit() routine, avoiding using yet another mtag.
+ */
+ m->m_pkthdr.csum_data = af;
return (ifp->if_transmit(ifp, m));
-drop:
- m_freem(m);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (error);
}
static void
@@ -870,95 +545,95 @@ gre_setseqn(struct grehdr *gh, uint32_t seq)
*opts = htonl(seq);
}
+#define MTAG_GRE 1307983903
static int
gre_transmit(struct ifnet *ifp, struct mbuf *m)
{
- GRE_RLOCK_TRACKER;
struct gre_softc *sc;
struct grehdr *gh;
- uint32_t iaf, oaf, oseq;
- int error, hlen, olen, plen;
- int want_seq, want_csum;
+ uint32_t af;
+ int error, len;
+ uint16_t proto;
- plen = 0;
- sc = ifp->if_softc;
- if (sc == NULL) {
- error = ENETDOWN;
+ len = 0;
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error) {
m_freem(m);
goto drop;
}
- GRE_RLOCK(sc);
- if (sc->gre_family == 0) {
- GRE_RUNLOCK(sc);
- error = ENETDOWN;
+#endif
+ error = ENETDOWN;
+ GRE_RLOCK();
+ sc = ifp->if_softc;
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0 ||
+ sc->gre_family == 0 ||
+ (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE,
+ V_max_gre_nesting)) != 0) {
m_freem(m);
goto drop;
}
- iaf = m->m_pkthdr.csum_data;
- oaf = sc->gre_family;
- hlen = sc->gre_hlen;
- want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
- if (want_seq)
- oseq = sc->gre_oseq++; /* XXX */
- else
- oseq = 0; /* Make compiler happy. */
- want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
+ af = m->m_pkthdr.csum_data;
M_SETFIB(m, sc->gre_fibnum);
- M_PREPEND(m, hlen, M_NOWAIT);
+ M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
if (m == NULL) {
- GRE_RUNLOCK(sc);
error = ENOBUFS;
goto drop;
}
- bcopy(sc->gre_hdr, mtod(m, void *), hlen);
- GRE_RUNLOCK(sc);
- switch (oaf) {
+ bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen);
+ /* Determine GRE proto */
+ switch (af) {
#ifdef INET
case AF_INET:
- olen = sizeof(struct ip);
+ proto = htons(ETHERTYPE_IP);
break;
#endif
#ifdef INET6
case AF_INET6:
- olen = sizeof(struct ip6_hdr);
+ proto = htons(ETHERTYPE_IPV6);
break;
#endif
default:
+ m_freem(m);
error = ENETDOWN;
goto drop;
}
- gh = (struct grehdr *)mtodo(m, olen);
- switch (iaf) {
+ /* Determine offset of GRE header */
+ switch (sc->gre_family) {
#ifdef INET
case AF_INET:
- gh->gre_proto = htons(ETHERTYPE_IP);
+ len = sizeof(struct ip);
break;
#endif
#ifdef INET6
case AF_INET6:
- gh->gre_proto = htons(ETHERTYPE_IPV6);
+ len = sizeof(struct ip6_hdr);
break;
#endif
default:
+ m_freem(m);
error = ENETDOWN;
goto drop;
}
- if (want_seq)
- gre_setseqn(gh, oseq);
- if (want_csum) {
+ gh = (struct grehdr *)mtodo(m, len);
+ gh->gre_proto = proto;
+ if (sc->gre_options & GRE_ENABLE_SEQ)
+ gre_setseqn(gh, sc->gre_oseq++);
+ if (sc->gre_options & GRE_ENABLE_CSUM) {
*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
- m->m_pkthdr.len, olen);
+ m->m_pkthdr.len, len);
}
- plen = m->m_pkthdr.len - hlen;
- switch (oaf) {
+ len = m->m_pkthdr.len - len;
+ switch (sc->gre_family) {
#ifdef INET
case AF_INET:
- error = in_gre_output(m, iaf, hlen);
+ error = in_gre_output(m, af, sc->gre_hlen);
break;
#endif
#ifdef INET6
case AF_INET6:
- error = in6_gre_output(m, iaf, hlen);
+ error = in6_gre_output(m, af, sc->gre_hlen);
break;
#endif
default:
@@ -970,8 +645,9 @@ drop:
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
else {
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
- if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
}
+ GRE_RUNLOCK();
return (error);
}
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 0eac9e9f..cc8b08f9 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -64,8 +64,6 @@ struct greip6 {
struct gre_softc {
struct ifnet *gre_ifp;
- LIST_ENTRY(gre_softc) gre_list;
- struct rmlock gre_lock;
int gre_family; /* AF of delivery header */
uint32_t gre_iseq;
uint32_t gre_oseq;
@@ -82,18 +80,20 @@ struct gre_softc {
struct greip6 *gi6hdr;
#endif
} gre_uhdr;
- const struct encaptab *gre_ecookie;
+
+ CK_LIST_ENTRY(gre_softc) chain;
};
+CK_LIST_HEAD(gre_list, gre_softc);
+MALLOC_DECLARE(M_GRE);
+
+#ifndef GRE_HASH_SIZE
+#define GRE_HASH_SIZE (1 << 4)
+#endif
+
#define GRE2IFP(sc) ((sc)->gre_ifp)
-#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc")
-#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock)
-#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker
-#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker)
-#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker)
-#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED)
-#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock)
-#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock)
-#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED)
+#define GRE_RLOCK() struct epoch_tracker gre_et; epoch_enter_preempt(net_epoch_preempt, &gre_et)
+#define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gre_et)
+#define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define gre_hdr gre_uhdr.hdr
#define gre_gihdr gre_uhdr.gihdr
@@ -101,15 +101,23 @@ struct gre_softc {
#define gre_oip gre_gihdr->gi_ip
#define gre_oip6 gre_gi6hdr->gi6_ip6
-int gre_input(struct mbuf **, int *, int);
-#ifdef INET
-int in_gre_attach(struct gre_softc *);
+struct gre_list *gre_hashinit(void);
+void gre_hashdestroy(struct gre_list *);
+
+int gre_input(struct mbuf *, int, int, void *);
+void gre_updatehdr(struct gre_softc *, struct grehdr *);
+
+void in_gre_init(void);
+void in_gre_uninit(void);
+void in_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int in_gre_ioctl(struct gre_softc *, u_long, caddr_t);
int in_gre_output(struct mbuf *, int, int);
-#endif
-#ifdef INET6
-int in6_gre_attach(struct gre_softc *);
+
+void in6_gre_init(void);
+void in6_gre_uninit(void);
+void in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int in6_gre_ioctl(struct gre_softc *, u_long, caddr_t);
int in6_gre_output(struct mbuf *, int, int);
-#endif
/*
* CISCO uses special type for GRE tunnel created as part of WCCP
* connection, while in fact those packets are just IPv4 encapsulated
diff --git a/freebsd/sys/net/if_ipsec.c b/freebsd/sys/net/if_ipsec.c
index eaeecd5a..5b1d5e82 100644
--- a/freebsd/sys/net/if_ipsec.c
+++ b/freebsd/sys/net/if_ipsec.c
@@ -122,9 +122,9 @@ RM_SYSINIT(ipsec_sc_lock, &ipsec_sc_lock, "if_ipsec softc list");
#define IPSEC_SC_WLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_WLOCKED)
LIST_HEAD(ipsec_iflist, ipsec_softc);
-static VNET_DEFINE(struct ipsec_iflist, ipsec_sc_list);
-static VNET_DEFINE(struct ipsec_iflist *, ipsec_sc_htbl);
-static VNET_DEFINE(u_long, ipsec_sc_hmask);
+VNET_DEFINE_STATIC(struct ipsec_iflist, ipsec_sc_list);
+VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec_sc_htbl);
+VNET_DEFINE_STATIC(u_long, ipsec_sc_hmask);
#define V_ipsec_sc_list VNET(ipsec_sc_list)
#define V_ipsec_sc_htbl VNET(ipsec_sc_htbl)
#define V_ipsec_sc_hmask VNET(ipsec_sc_hmask)
@@ -164,7 +164,7 @@ static void ipsec_qflush(struct ifnet *);
static int ipsec_clone_create(struct if_clone *, int, caddr_t);
static void ipsec_clone_destroy(struct ifnet *);
-static VNET_DEFINE(struct if_clone *, ipsec_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, ipsec_cloner);
#define V_ipsec_cloner VNET(ipsec_cloner)
static int
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index 578078c2..4d5aaa29 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -75,10 +75,10 @@ __FBSDID("$FreeBSD$");
#include <net/if_lagg.h>
#include <net/ieee8023ad_lacp.h>
-#define LAGG_RLOCK() epoch_enter_preempt(net_epoch_preempt)
-#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt)
-#define LAGG_RLOCK_ASSERT() MPASS(in_epoch())
-#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch())
+#define LAGG_RLOCK() struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et)
+#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &lagg_et)
+#define LAGG_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt))
+#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch(net_epoch_preempt))
#define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx")
#define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx)
@@ -99,7 +99,7 @@ static struct {
VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
#define V_lagg_list VNET(lagg_list)
-static VNET_DEFINE(struct mtx, lagg_list_mtx);
+VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
#define V_lagg_list_mtx VNET(lagg_list_mtx)
#define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \
"if_lagg list", NULL, MTX_DEF)
@@ -110,7 +110,7 @@ eventhandler_tag lagg_detach_cookie = NULL;
static int lagg_clone_create(struct if_clone *, int, caddr_t);
static void lagg_clone_destroy(struct ifnet *);
-static VNET_DEFINE(struct if_clone *, lagg_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner);
#define V_lagg_cloner VNET(lagg_cloner)
static const char laggname[] = "lagg";
@@ -251,21 +251,21 @@ SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
"Link Aggregation");
/* Allow input on any failover links */
-static VNET_DEFINE(int, lagg_failover_rx_all);
+VNET_DEFINE_STATIC(int, lagg_failover_rx_all);
#define V_lagg_failover_rx_all VNET(lagg_failover_rx_all)
SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(lagg_failover_rx_all), 0,
"Accept input from any interface in a failover lagg");
/* Default value for using flowid */
-static VNET_DEFINE(int, def_use_flowid) = 0;
+VNET_DEFINE_STATIC(int, def_use_flowid) = 0;
#define V_def_use_flowid VNET(def_use_flowid)
SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
&VNET_NAME(def_use_flowid), 0,
"Default setting for using flow id for load sharing");
/* Default value for flowid shift */
-static VNET_DEFINE(int, def_flowid_shift) = 16;
+VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
#define V_def_flowid_shift VNET(def_flowid_shift)
SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
&VNET_NAME(def_flowid_shift), 0,
@@ -1739,6 +1739,10 @@ lagg_linkstate(struct lagg_softc *sc)
LAGG_XLOCK_ASSERT(sc);
+ /* LACP handles link state itself */
+ if (sc->sc_proto == LAGG_PROTO_LACP)
+ return;
+
/* Our link is considered up if at least one of our ports is active */
LAGG_RLOCK();
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
@@ -1793,6 +1797,7 @@ struct lagg_port *
lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
{
struct lagg_port *lp_next, *rval = NULL;
+ struct epoch_tracker net_et;
/*
* Search a port which reports an active link state.
@@ -1811,15 +1816,14 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
}
search:
- LAGG_RLOCK();
+ epoch_enter_preempt(net_epoch_preempt, &net_et);
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp_next)) {
- LAGG_RUNLOCK();
- rval = lp_next;
- goto found;
+ epoch_exit_preempt(net_epoch_preempt, &net_et);
+ return (lp_next);
}
}
- LAGG_RUNLOCK();
+ epoch_exit_preempt(net_epoch_preempt, &net_et);
found:
return (rval);
}
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index d6e9dbaf..b220d7aa 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
-static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) =
+VNET_DEFINE_STATIC(SLIST_HEAD(, lltable), lltables) =
SLIST_HEAD_INITIALIZER(lltables);
#define V_lltables VNET(lltables)
@@ -438,6 +438,9 @@ llentry_free(struct llentry *lle)
pkts_dropped = lltable_drop_entry_queue(lle);
+ /* cancel timer */
+ if (callout_stop(&lle->lle_timer) > 0)
+ LLE_REMREF(lle);
LLE_FREE_LOCKED(lle);
return (pkts_dropped);
@@ -524,8 +527,6 @@ lltable_free(struct lltable *llt)
IF_AFDATA_WUNLOCK(llt->llt_ifp);
CK_LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) {
- if (callout_stop(&lle->lle_timer) > 0)
- LLE_REMREF(lle);
llentry_free(lle);
}
diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c
index 988b9f9d..a96144b8 100644
--- a/freebsd/sys/net/if_loop.c
+++ b/freebsd/sys/net/if_loop.c
@@ -101,7 +101,7 @@ static void lo_clone_destroy(struct ifnet *);
VNET_DEFINE(struct ifnet *, loif); /* Used externally */
#ifdef VIMAGE
-static VNET_DEFINE(struct if_clone *, lo_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, lo_cloner);
#define V_lo_cloner VNET(lo_cloner)
#endif
@@ -382,6 +382,7 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_UP);
/*
* Everything else is done at a higher level.
*/
diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h
index 97cd6552..8192a679 100644
--- a/freebsd/sys/net/if_media.h
+++ b/freebsd/sys/net/if_media.h
@@ -202,6 +202,62 @@ uint64_t ifmedia_baudrate(int);
#define IFM_10G_AOC IFM_X(59) /* 10G active optical cable */
#define IFM_25G_ACC IFM_X(60) /* 25G active copper cable */
#define IFM_25G_AOC IFM_X(61) /* 25G active optical cable */
+#define IFM_100_SGMII IFM_X(62) /* 100M media interface */
+#define IFM_2500_X IFM_X(63) /* 2500BaseX */
+#define IFM_5000_KR IFM_X(64) /* 5GBase-KR backplane */
+#define IFM_25G_T IFM_X(65) /* 25GBase-T - RJ45 */
+#define IFM_25G_CR_S IFM_X(66) /* 25GBase-CR (short) */
+#define IFM_25G_CR1 IFM_X(67) /* 25GBase-CR1 DA cable */
+#define IFM_25G_KR_S IFM_X(68) /* 25GBase-KR (short) */
+#define IFM_5000_KR_S IFM_X(69) /* 5GBase-KR backplane (short) */
+#define IFM_5000_KR1 IFM_X(70) /* 5GBase-KR backplane */
+#define IFM_25G_AUI IFM_X(71) /* 25G-AUI-C2C (chip to chip) */
+#define IFM_40G_XLAUI IFM_X(72) /* 40G-XLAUI */
+#define IFM_40G_XLAUI_AC IFM_X(73) /* 40G active copper/optical */
+#define IFM_40G_ER4 IFM_X(74) /* 40GBase-ER4 */
+#define IFM_50G_SR2 IFM_X(75) /* 50GBase-SR2 */
+#define IFM_50G_LR2 IFM_X(76) /* 50GBase-LR2 */
+#define IFM_50G_LAUI2_AC IFM_X(77) /* 50G active copper/optical */
+#define IFM_50G_LAUI2 IFM_X(78) /* 50G-LAUI2 */
+#define IFM_50G_AUI2_AC IFM_X(79) /* 50G active copper/optical */
+#define IFM_50G_AUI2 IFM_X(80) /* 50G-AUI2 */
+#define IFM_50G_CP IFM_X(81) /* 50GBase-CP */
+#define IFM_50G_SR IFM_X(82) /* 50GBase-SR */
+#define IFM_50G_LR IFM_X(83) /* 50GBase-LR */
+#define IFM_50G_FR IFM_X(84) /* 50GBase-FR */
+#define IFM_50G_KR_PAM4 IFM_X(85) /* 50GBase-KR PAM4 */
+#define IFM_25G_KR1 IFM_X(86) /* 25GBase-KR1 */
+#define IFM_50G_AUI1_AC IFM_X(87) /* 50G active copper/optical */
+#define IFM_50G_AUI1 IFM_X(88) /* 50G-AUI1 */
+#define IFM_100G_CAUI4_AC IFM_X(89) /* 100G-CAUI4 active copper/optical */
+#define IFM_100G_CAUI4 IFM_X(90) /* 100G-CAUI4 */
+#define IFM_100G_AUI4_AC IFM_X(91) /* 100G-AUI4 active copper/optical */
+#define IFM_100G_AUI4 IFM_X(92) /* 100G-AUI4 */
+#define IFM_100G_CR_PAM4 IFM_X(93) /* 100GBase-CR PAM4 */
+#define IFM_100G_KR_PAM4 IFM_X(94) /* 100GBase-CR PAM4 */
+#define IFM_100G_CP2 IFM_X(95) /* 100GBase-CP2 */
+#define IFM_100G_SR2 IFM_X(96) /* 100GBase-SR2 */
+#define IFM_100G_DR IFM_X(97) /* 100GBase-DR */
+#define IFM_100G_KR2_PAM4 IFM_X(98) /* 100GBase-KR2 PAM4 */
+#define IFM_100G_CAUI2_AC IFM_X(99) /* 100G-CAUI2 active copper/optical */
+#define IFM_100G_CAUI2 IFM_X(100) /* 100G-CAUI2 */
+#define IFM_100G_AUI2_AC IFM_X(101) /* 100G-AUI2 active copper/optical */
+#define IFM_100G_AUI2 IFM_X(102) /* 100G-AUI2 */
+#define IFM_200G_CR4_PAM4 IFM_X(103) /* 200GBase-CR4 PAM4 */
+#define IFM_200G_SR4 IFM_X(104) /* 200GBase-SR4 */
+#define IFM_200G_FR4 IFM_X(105) /* 200GBase-FR4 */
+#define IFM_200G_LR4 IFM_X(106) /* 200GBase-LR4 */
+#define IFM_200G_DR4 IFM_X(107) /* 200GBase-DR4 */
+#define IFM_200G_KR4_PAM4 IFM_X(108) /* 200GBase-KR4 PAM4 */
+#define IFM_200G_AUI4_AC IFM_X(109) /* 200G-AUI4 active copper/optical */
+#define IFM_200G_AUI4 IFM_X(110) /* 200G-AUI4 */
+#define IFM_200G_AUI8_AC IFM_X(111) /* 200G-AUI8 active copper/optical */
+#define IFM_200G_AUI8 IFM_X(112) /* 200G-AUI8 */
+#define IFM_400G_FR8 IFM_X(113) /* 400GBase-FR8 */
+#define IFM_400G_LR8 IFM_X(114) /* 400GBase-LR8 */
+#define IFM_400G_DR4 IFM_X(115) /* 400GBase-DR4 */
+#define IFM_400G_AUI8_AC IFM_X(116) /* 400G-AUI8 active copper/optical */
+#define IFM_400G_AUI8 IFM_X(117) /* 400G-AUI8 */
/*
* Please update ieee8023ad_lacp.c:lacp_compose_key()
@@ -432,6 +488,62 @@ struct ifmedia_description {
{ IFM_10G_AOC, "10GBase-AOC" }, \
{ IFM_25G_ACC, "25GBase-ACC" }, \
{ IFM_25G_AOC, "25GBase-AOC" }, \
+ { IFM_100_SGMII, "100M-SGMII" }, \
+ { IFM_2500_X, "2500Base-X" }, \
+ { IFM_5000_KR, "5000Base-KR" }, \
+ { IFM_25G_T, "25GBase-T" }, \
+ { IFM_25G_CR_S, "25GBase-CR-S" }, \
+ { IFM_25G_CR1, "25GBase-CR1" }, \
+ { IFM_25G_KR_S, "25GBase-KR-S" }, \
+ { IFM_5000_KR_S, "5000Base-KR-S" }, \
+ { IFM_5000_KR1, "5000Base-KR1" }, \
+ { IFM_25G_AUI, "25G-AUI" }, \
+ { IFM_40G_XLAUI, "40G-XLAUI" }, \
+ { IFM_40G_XLAUI_AC, "40G-XLAUI-AC" }, \
+ { IFM_40G_ER4, "40GBase-ER4" }, \
+ { IFM_50G_SR2, "50GBase-SR2" }, \
+ { IFM_50G_LR2, "50GBase-LR2" }, \
+ { IFM_50G_LAUI2_AC, "50G-LAUI2-AC" }, \
+ { IFM_50G_LAUI2, "50G-LAUI2" }, \
+ { IFM_50G_AUI2_AC, "50G-AUI2-AC" }, \
+ { IFM_50G_AUI2, "50G-AUI2" }, \
+ { IFM_50G_CP, "50GBase-CP" }, \
+ { IFM_50G_SR, "50GBase-SR" }, \
+ { IFM_50G_LR, "50GBase-LR" }, \
+ { IFM_50G_FR, "50GBase-FR" }, \
+ { IFM_50G_KR_PAM4, "50GBase-KR-PAM4" }, \
+ { IFM_25G_KR1, "25GBase-KR1" }, \
+ { IFM_50G_AUI1_AC, "50G-AUI1-AC" }, \
+ { IFM_50G_AUI1, "50G-AUI1" }, \
+ { IFM_100G_CAUI4_AC, "100G-CAUI4-AC" }, \
+ { IFM_100G_CAUI4, "100G-CAUI4" }, \
+ { IFM_100G_AUI4_AC, "100G-AUI4-AC" }, \
+ { IFM_100G_AUI4, "100G-AUI4" }, \
+ { IFM_100G_CR_PAM4, "100GBase-CR-PAM4" }, \
+ { IFM_100G_KR_PAM4, "100GBase-KR-PAM4" }, \
+ { IFM_100G_CP2, "100GBase-CP2" }, \
+ { IFM_100G_SR2, "100GBase-SR2" }, \
+ { IFM_100G_DR, "100GBase-DR" }, \
+ { IFM_100G_KR2_PAM4, "100GBase-KR2-PAM4" }, \
+ { IFM_100G_CAUI2_AC, "100G-CAUI2-AC" }, \
+ { IFM_100G_CAUI2, "100G-CAUI2" }, \
+ { IFM_100G_AUI2_AC, "100G-AUI2-AC" }, \
+ { IFM_100G_AUI2, "100G-AUI2" }, \
+ { IFM_200G_CR4_PAM4, "200GBase-CR4-PAM4" }, \
+ { IFM_200G_SR4, "200GBase-SR4" }, \
+ { IFM_200G_FR4, "200GBase-FR4" }, \
+ { IFM_200G_LR4, "200GBase-LR4" }, \
+ { IFM_200G_DR4, "200GBase-DR4" }, \
+ { IFM_200G_KR4_PAM4, "200GBase-KR4-PAM4" }, \
+ { IFM_200G_AUI4_AC, "200G-AUI4-AC" }, \
+ { IFM_200G_AUI4, "200G-AUI4" }, \
+ { IFM_200G_AUI8_AC, "200G-AUI8-AC" }, \
+ { IFM_200G_AUI8, "200G-AUI8" }, \
+ { IFM_400G_FR8, "400GBase-FR8" }, \
+ { IFM_400G_LR8, "400GBase-LR8" }, \
+ { IFM_400G_DR4, "400GBase-DR4" }, \
+ { IFM_400G_AUI8_AC, "400G-AUI8-AC" }, \
+ { IFM_400G_AUI8, "400G-AUI8" }, \
{ 0, NULL }, \
}
@@ -719,6 +831,62 @@ struct ifmedia_baudrate {
{ IFM_ETHER | IFM_10G_AOC, IF_Gbps(10ULL) }, \
{ IFM_ETHER | IFM_25G_ACC, IF_Gbps(25ULL) }, \
{ IFM_ETHER | IFM_25G_AOC, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_100_SGMII, IF_Mbps(100) }, \
+ { IFM_ETHER | IFM_2500_X, IF_Mbps(2500ULL) }, \
+ { IFM_ETHER | IFM_5000_KR, IF_Mbps(5000ULL) }, \
+ { IFM_ETHER | IFM_25G_T, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_CR_S, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_CR1, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_KR_S, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_5000_KR_S, IF_Mbps(5000ULL) }, \
+ { IFM_ETHER | IFM_5000_KR1, IF_Mbps(5000ULL) }, \
+ { IFM_ETHER | IFM_25G_AUI, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_40G_XLAUI, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_40G_XLAUI_AC, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_40G_ER4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_50G_SR2, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_LR2, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_LAUI2_AC, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_LAUI2, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_AUI2_AC, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_AUI2, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_CP, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_SR, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_LR, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_FR, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_KR_PAM4, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_25G_KR1, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_50G_AUI1_AC, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_AUI1, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_100G_CAUI4_AC, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_CAUI4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_AUI4_AC, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_AUI4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_CR_PAM4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_KR_PAM4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_CP2, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_SR2, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_DR, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_KR2_PAM4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_CAUI2_AC, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_CAUI2, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_AUI2_AC, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_AUI2, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_200G_CR4_PAM4, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_SR4, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_FR4, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_LR4, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_DR4, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_KR4_PAM4, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_AUI4_AC, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_AUI4, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_AUI8_AC, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_200G_AUI8, IF_Gbps(200ULL) }, \
+ { IFM_ETHER | IFM_400G_FR8, IF_Gbps(400ULL) }, \
+ { IFM_ETHER | IFM_400G_LR8, IF_Gbps(400ULL) }, \
+ { IFM_ETHER | IFM_400G_DR4, IF_Gbps(400ULL) }, \
+ { IFM_ETHER | IFM_400G_AUI8_AC, IF_Gbps(400ULL) }, \
+ { IFM_ETHER | IFM_400G_AUI8, IF_Gbps(400ULL) }, \
\
{ IFM_IEEE80211 | IFM_IEEE80211_FH1, IF_Mbps(1) }, \
{ IFM_IEEE80211 | IFM_IEEE80211_FH2, IF_Mbps(2) }, \
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index 1d16b2d7..1102a62d 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -87,7 +87,6 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/module.h>
-#include <sys/protosw.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/rmlock.h>
@@ -153,19 +152,7 @@ static const char stfname[] = "stf";
static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
static const int ip_stf_ttl = 40;
-extern struct domain inetdomain;
-static int in_stf_input(struct mbuf **, int *, int);
-static struct protosw in_stf_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_IPV6,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = in_stf_input,
- .pr_output = rip_output,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
+static int in_stf_input(struct mbuf *, int, int, void *);
static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
static int stfmodevent(module_t, int, void *);
@@ -185,6 +172,14 @@ static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int stf_clone_destroy(struct if_clone *, struct ifnet *);
static struct if_clone *stf_cloner;
+static const struct encap_config ipv4_encap_cfg = {
+ .proto = IPPROTO_IPV6,
+ .min_length = sizeof(struct ip),
+ .exact_match = (sizeof(in_addr_t) << 3) + 8,
+ .check = stf_encapcheck,
+ .input = in_stf_input
+};
+
static int
stf_clone_match(struct if_clone *ifc, const char *name)
{
@@ -256,8 +251,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp->if_dname = stfname;
ifp->if_dunit = IF_DUNIT_NONE;
- sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6,
- stf_encapcheck, &in_stf_protosw, sc);
+ sc->encap_cookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK);
if (sc->encap_cookie == NULL) {
if_printf(ifp, "attach failed\n");
free(sc, M_STF);
@@ -280,7 +274,7 @@ stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
struct stf_softc *sc = ifp->if_softc;
int err __unused;
- err = encap_detach(sc->encap_cookie);
+ err = ip_encap_detach(sc->encap_cookie);
KASSERT(err == 0, ("Unexpected error detaching encap_cookie"));
bpfdetach(ifp);
if_detach(ifp);
@@ -614,18 +608,13 @@ stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp)
}
static int
-in_stf_input(struct mbuf **mp, int *offp, int proto)
+in_stf_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct stf_softc *sc;
+ struct stf_softc *sc = arg;
struct ip *ip;
struct ip6_hdr *ip6;
- struct mbuf *m;
u_int8_t otos, itos;
struct ifnet *ifp;
- int off;
-
- m = *mp;
- off = *offp;
if (proto != IPPROTO_IPV6) {
m_freem(m);
@@ -633,9 +622,6 @@ in_stf_input(struct mbuf **mp, int *offp, int proto)
}
ip = mtod(m, struct ip *);
-
- sc = (struct stf_softc *)encap_getarg(m);
-
if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
m_freem(m);
return (IPPROTO_DONE);
@@ -686,7 +672,7 @@ in_stf_input(struct mbuf **mp, int *offp, int proto)
ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
m->m_pkthdr.rcvif = ifp;
-
+
if (bpf_peers_present(ifp->if_bpf)) {
/*
* We need to prepend the address family as
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
index e4a0b02f..14a75645 100644
--- a/freebsd/sys/net/if_tun.c
+++ b/freebsd/sys/net/if_tun.c
@@ -924,7 +924,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
m_freem(m);
return (EAFNOSUPPORT);
}
- random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_TUN);
+ random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
CURVNET_SET(ifp->if_vnet);
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index f8e18f7e..00fcbebd 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -390,6 +390,8 @@ struct ifnet {
struct netdump_methods *if_netdump_methods;
#endif /* __rtems__ */
struct epoch_context if_epoch_ctx;
+ struct epoch_tracker if_addr_et;
+ struct epoch_tracker if_maddr_et;
#ifndef __rtems__
/*
@@ -421,15 +423,17 @@ struct rtems_ifinputreq {
*/
#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF)
#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock)
-#define IF_ADDR_RLOCK(if) epoch_enter_preempt(net_epoch_preempt);
-#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt);
+#define IF_ADDR_RLOCK(if) struct epoch_tracker if_addr_et; epoch_enter_preempt(net_epoch_preempt, &if_addr_et);
+#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt, &if_addr_et);
#define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock)
#define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock)
-#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch() || mtx_owned(&(if)->if_addr_lock))
+#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock))
#define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED)
-#define NET_EPOCH_ENTER() epoch_enter_preempt(net_epoch_preempt)
-#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt)
+#define NET_EPOCH_ENTER() struct epoch_tracker nep_et; epoch_enter_preempt(net_epoch_preempt, &nep_et)
+#define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et))
+#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et)
+#define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et))
/*
@@ -505,16 +509,16 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF)
#define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock)
-#define IF_AFDATA_RLOCK(ifp) epoch_enter_preempt(net_epoch_preempt)
+#define IF_AFDATA_RLOCK(ifp) struct epoch_tracker if_afdata_et; epoch_enter_preempt(net_epoch_preempt, &if_afdata_et)
#define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock)
-#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt)
+#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt, &if_afdata_et)
#define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp)
#define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp)
#define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock)
-#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch() || mtx_owned(&(ifp)->if_afdata_lock))
-#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch());
+#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock))
+#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt));
#define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED)
#define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED)
@@ -567,12 +571,14 @@ void ifa_ref(struct ifaddr *ifa);
* Multicast address structure. This is analogous to the ifaddr
* structure except that it keeps track of multicast addresses.
*/
+#define IFMA_F_ENQUEUED 0x1
struct ifmultiaddr {
CK_STAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
struct sockaddr *ifma_addr; /* address this membership is for */
struct sockaddr *ifma_lladdr; /* link-layer translation, if any */
struct ifnet *ifma_ifp; /* back-pointer to interface */
u_int ifma_refcount; /* reference count */
+ int ifma_flags;
void *ifma_protospec; /* protocol-specific state, if any */
struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
struct epoch_context ifma_epoch_ctx;
@@ -596,16 +602,16 @@ extern struct sx ifnet_sxlock;
* write, but also whether it was acquired with sleep support or not.
*/
#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED)
-#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch())
+#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt))
#define IFNET_WLOCK_ASSERT() do { \
sx_assert(&ifnet_sxlock, SA_XLOCKED); \
rw_assert(&ifnet_rwlock, RA_WLOCKED); \
} while (0)
#define IFNET_RLOCK() sx_slock(&ifnet_sxlock)
-#define IFNET_RLOCK_NOSLEEP() epoch_enter_preempt(net_epoch_preempt)
+#define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et)
#define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)
-#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt)
+#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt, &ifnet_rlock_et)
/*
* Look up an ifnet given its index; the _ref variant also acquires a
@@ -663,6 +669,7 @@ int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
void if_ref(struct ifnet *);
void if_rele(struct ifnet *);
int if_setlladdr(struct ifnet *, const u_char *, int);
+int if_tunnel_check_nesting(struct ifnet *, struct mbuf *, uint32_t, int);
void if_up(struct ifnet *);
int ifioctl(struct socket *, u_long, caddr_t, struct thread *);
int ifpromisc(struct ifnet *, int);
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 26f6bbde..22061dc4 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -337,7 +337,7 @@ static void vlan_lladdr_fn(void *arg, int pending);
static struct if_clone *vlan_cloner;
#ifdef VIMAGE
-static VNET_DEFINE(struct if_clone *, vlan_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, vlan_cloner);
#define V_vlan_cloner VNET(vlan_cloner)
#endif
@@ -760,6 +760,18 @@ vlan_tag(struct ifnet *ifp, uint16_t *vidp)
return (0);
}
+static int
+vlan_pcp(struct ifnet *ifp, uint16_t *pcpp)
+{
+ struct ifvlan *ifv;
+
+ if (ifp->if_type != IFT_L2VLAN)
+ return (EINVAL);
+ ifv = ifp->if_softc;
+ *pcpp = ifv->ifv_pcp;
+ return (0);
+}
+
/*
* Return a driver specific cookie for this interface. Synchronization
* with setcookie must be provided by the driver.
@@ -863,6 +875,7 @@ vlan_modevent(module_t mod, int type, void *data)
vlan_cookie_p = vlan_cookie;
vlan_setcookie_p = vlan_setcookie;
vlan_tag_p = vlan_tag;
+ vlan_pcp_p = vlan_pcp;
vlan_devat_p = vlan_devat;
#ifndef VIMAGE
vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
@@ -1424,6 +1437,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
ifp->if_resolvemulti = p->if_resolvemulti;
ifp->if_addrlen = p->if_addrlen;
ifp->if_broadcastaddr = p->if_broadcastaddr;
+ ifp->if_pcp = ifv->ifv_pcp;
/*
* Copy only a selected subset of flags from the parent.
@@ -1948,6 +1962,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
ifv->ifv_pcp = ifr->ifr_vlan_pcp;
+ ifp->if_pcp = ifv->ifv_pcp;
vlan_tag_recalculate(ifv);
/* broadcast event about PCP change */
EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP);
diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h
index b926df80..0b66ec0a 100644
--- a/freebsd/sys/net/if_vlan_var.h
+++ b/freebsd/sys/net/if_vlan_var.h
@@ -132,6 +132,8 @@ struct vlanreq {
((_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL)
#define VLAN_TAG(_ifp, _vid) \
((_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_vid)) : EINVAL)
+#define VLAN_PCP(_ifp, _pcp) \
+ ((_ifp)->if_type == IFT_L2VLAN ? (*vlan_pcp_p)((_ifp), (_pcp)) : EINVAL)
#define VLAN_COOKIE(_ifp) \
((_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL)
#define VLAN_SETCOOKIE(_ifp, _cookie) \
@@ -144,6 +146,7 @@ extern void (*vlan_trunk_cap_p)(struct ifnet *);
extern struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
extern struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
extern int (*vlan_tag_p)(struct ifnet *, uint16_t *);
+extern int (*vlan_pcp_p)(struct ifnet *, uint16_t *);
extern int (*vlan_setcookie_p)(struct ifnet *, void *);
extern void *(*vlan_cookie_p)(struct ifnet *);
diff --git a/freebsd/sys/net/iflib.h b/freebsd/sys/net/iflib.h
index 140c488b..6e1eee63 100644
--- a/freebsd/sys/net/iflib.h
+++ b/freebsd/sys/net/iflib.h
@@ -45,13 +45,6 @@ struct if_clone;
*/
typedef uint16_t qidx_t;
#define QIDX_INVALID 0xFFFF
-/*
- * Most cards can handle much larger TSO requests
- * but the FreeBSD TCP stack will break on larger
- * values
- */
-#define FREEBSD_TSO_SIZE_MAX 65518
-
struct iflib_ctx;
typedef struct iflib_ctx *if_ctx_t;
@@ -216,6 +209,7 @@ typedef struct if_softc_ctx {
int isc_tx_tso_size_max;
int isc_tx_tso_segsize_max;
int isc_tx_csum_flags;
+ int isc_capabilities;
int isc_capenable;
int isc_rss_table_size;
int isc_rss_table_mask;
@@ -242,6 +236,8 @@ struct if_shared_ctx {
bus_size_t isc_q_align;
bus_size_t isc_tx_maxsize;
bus_size_t isc_tx_maxsegsize;
+ bus_size_t isc_tso_maxsize;
+ bus_size_t isc_tso_maxsegsize;
bus_size_t isc_rx_maxsize;
bus_size_t isc_rx_maxsegsize;
int isc_rx_nsegments;
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index 8f2430eb..a3da964b 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -227,7 +227,7 @@ static struct netisr_proto netisr_proto[NETISR_MAXPROT];
* mechanism to stop netisr processing for vnet teardown.
* Apart from that we expect a VNET to always be enabled.
*/
-static VNET_DEFINE(u_int, netisr_enable[NETISR_MAXPROT]);
+VNET_DEFINE_STATIC(u_int, netisr_enable[NETISR_MAXPROT]);
#define V_netisr_enable VNET(netisr_enable)
#endif
diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h
index 824b8ec3..5e80b665 100644
--- a/freebsd/sys/net/pfvar.h
+++ b/freebsd/sys/net/pfvar.h
@@ -621,9 +621,9 @@ struct pf_rule {
#define PFRULE_IFBOUND 0x00010000 /* if-bound */
#define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */
-#define PFSTATE_HIWAT 10000 /* default state table size */
-#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */
-#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */
+#define PFSTATE_HIWAT 100000 /* default state table size */
+#define PFSTATE_ADAPT_START 60000 /* default adaptive timeout start */
+#define PFSTATE_ADAPT_END 120000 /* default adaptive timeout end */
struct pf_threshold {
@@ -1300,21 +1300,56 @@ struct pfioc_limit {
unsigned limit;
};
-struct pfioc_altq {
+struct pfioc_altq_v0 {
u_int32_t action;
u_int32_t ticket;
u_int32_t nr;
- struct pf_altq altq;
+ struct pf_altq_v0 altq;
};
-struct pfioc_qstats {
+struct pfioc_altq_v1 {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t nr;
+ /*
+ * Placed here so code that only uses the above parameters can be
+ * written entirely in terms of the v0 or v1 type.
+ */
+ u_int32_t version;
+ struct pf_altq_v1 altq;
+};
+
+/*
+ * Latest version of struct pfioc_altq_vX. This must move in lock-step with
+ * the latest version of struct pf_altq_vX as it has that struct as a
+ * member.
+ */
+#define PFIOC_ALTQ_VERSION PF_ALTQ_VERSION
+
+struct pfioc_qstats_v0 {
+ u_int32_t ticket;
+ u_int32_t nr;
+ void *buf;
+ int nbytes;
+ u_int8_t scheduler;
+};
+
+struct pfioc_qstats_v1 {
u_int32_t ticket;
u_int32_t nr;
void *buf;
int nbytes;
u_int8_t scheduler;
+ /*
+ * Placed here so code that only uses the above parameters can be
+ * written entirely in terms of the v0 or v1 type.
+ */
+ u_int32_t version; /* Requested version of stats struct */
};
+/* Latest version of struct pfioc_qstats_vX */
+#define PFIOC_QSTATS_VERSION 1
+
struct pfioc_ruleset {
u_int32_t nr;
char path[MAXPATHLEN];
@@ -1403,11 +1438,16 @@ struct pfioc_iface {
#define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill)
#define DIOCSTARTALTQ _IO ('D', 42)
#define DIOCSTOPALTQ _IO ('D', 43)
-#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq)
-#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq)
-#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq)
-#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq)
-#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats)
+#define DIOCADDALTQV0 _IOWR('D', 45, struct pfioc_altq_v0)
+#define DIOCADDALTQV1 _IOWR('D', 45, struct pfioc_altq_v1)
+#define DIOCGETALTQSV0 _IOWR('D', 47, struct pfioc_altq_v0)
+#define DIOCGETALTQSV1 _IOWR('D', 47, struct pfioc_altq_v1)
+#define DIOCGETALTQV0 _IOWR('D', 48, struct pfioc_altq_v0)
+#define DIOCGETALTQV1 _IOWR('D', 48, struct pfioc_altq_v1)
+#define DIOCCHANGEALTQV0 _IOWR('D', 49, struct pfioc_altq_v0)
+#define DIOCCHANGEALTQV1 _IOWR('D', 49, struct pfioc_altq_v1)
+#define DIOCGETQSTATSV0 _IOWR('D', 50, struct pfioc_qstats_v0)
+#define DIOCGETQSTATSV1 _IOWR('D', 50, struct pfioc_qstats_v1)
#define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr)
#define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr)
#define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr)
@@ -1445,11 +1485,63 @@ struct pfioc_iface {
#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface)
#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface)
#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill)
-struct pf_ifspeed {
+struct pf_ifspeed_v0 {
char ifname[IFNAMSIZ];
u_int32_t baudrate;
};
-#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed)
+
+struct pf_ifspeed_v1 {
+ char ifname[IFNAMSIZ];
+ u_int32_t baudrate32;
+ /* layout identical to struct pf_ifspeed_v0 up to this point */
+ u_int64_t baudrate;
+};
+
+/* Latest version of struct pf_ifspeed_vX */
+#define PF_IFSPEED_VERSION 1
+
+#define DIOCGIFSPEEDV0 _IOWR('D', 92, struct pf_ifspeed_v0)
+#define DIOCGIFSPEEDV1 _IOWR('D', 92, struct pf_ifspeed_v1)
+
+/*
+ * Compatibility and convenience macros
+ */
+#ifndef _KERNEL
+#ifdef PFIOC_USE_LATEST
+/*
+ * Maintaining in-tree consumers of the ioctl interface is easier when that
+ * code can be written in terms old names that refer to the latest interface
+ * version as that reduces the required changes in the consumers to those
+ * that are functionally necessary to accommodate a new interface version.
+ */
+#define pfioc_altq __CONCAT(pfioc_altq_v, PFIOC_ALTQ_VERSION)
+#define pfioc_qstats __CONCAT(pfioc_qstats_v, PFIOC_QSTATS_VERSION)
+#define pf_ifspeed __CONCAT(pf_ifspeed_v, PF_IFSPEED_VERSION)
+
+#define DIOCADDALTQ __CONCAT(DIOCADDALTQV, PFIOC_ALTQ_VERSION)
+#define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, PFIOC_ALTQ_VERSION)
+#define DIOCGETALTQ __CONCAT(DIOCGETALTQV, PFIOC_ALTQ_VERSION)
+#define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, PFIOC_ALTQ_VERSION)
+#define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, PFIOC_QSTATS_VERSION)
+#define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, PF_IFSPEED_VERSION)
+#else
+/*
+ * When building out-of-tree code that is written for the old interface,
+ * such as may exist in ports for example, resolve the old struct tags and
+ * ioctl command names to the v0 versions.
+ */
+#define pfioc_altq __CONCAT(pfioc_altq_v, 0)
+#define pfioc_qstats __CONCAT(pfioc_qstats_v, 0)
+#define pf_ifspeed __CONCAT(pf_ifspeed_v, 0)
+
+#define DIOCADDALTQ __CONCAT(DIOCADDALTQV, 0)
+#define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, 0)
+#define DIOCGETALTQ __CONCAT(DIOCGETALTQV, 0)
+#define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, 0)
+#define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, 0)
+#define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, 0)
+#endif /* PFIOC_USE_LATEST */
+#endif /* _KERNEL */
#ifdef _KERNEL
LIST_HEAD(pf_src_node_list, pf_src_node);
@@ -1470,7 +1562,7 @@ struct pf_idhash {
extern u_long pf_hashmask;
extern u_long pf_srchashmask;
-#define PF_HASHSIZ (32768)
+#define PF_HASHSIZ (131072)
#define PF_SRCHASHSIZ (PF_HASHSIZ/4)
VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
VNET_DECLARE(struct pf_idhash *, pf_idhash);
diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c
index bbfd5f65..9fbfb298 100644
--- a/freebsd/sys/net/radix.c
+++ b/freebsd/sys/net/radix.c
@@ -41,7 +41,7 @@
#ifdef _KERNEL
#include <sys/lock.h>
#include <sys/mutex.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/syslog.h>
diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h
index 05f0f490..a0e5e5c5 100644
--- a/freebsd/sys/net/radix.h
+++ b/freebsd/sys/net/radix.h
@@ -38,7 +38,7 @@
#ifdef _KERNEL
#include <sys/_lock.h>
#include <sys/_mutex.h>
-#include <sys/_rwlock.h>
+#include <sys/_rmlock.h>
#endif
#ifdef MALLOC_DECLARE
@@ -138,7 +138,7 @@ struct radix_node_head {
rn_close_t *rnh_close; /*do something when the last ref drops*/
struct radix_node rnh_nodes[3]; /* empty tree for common case */
#ifdef _KERNEL
- struct rwlock rnh_lock; /* locks entire radix tree */
+ struct rmlock rnh_lock; /* locks entire radix tree */
#endif
};
@@ -159,18 +159,18 @@ void rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes,
#define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO))
#define R_Free(p) free((caddr_t)p, M_RTABLE);
+#define RADIX_NODE_HEAD_RLOCK_TRACKER struct rm_priotracker _rnh_tracker
#define RADIX_NODE_HEAD_LOCK_INIT(rnh) \
- rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0)
-#define RADIX_NODE_HEAD_LOCK(rnh) rw_wlock(&(rnh)->rnh_lock)
-#define RADIX_NODE_HEAD_UNLOCK(rnh) rw_wunlock(&(rnh)->rnh_lock)
-#define RADIX_NODE_HEAD_RLOCK(rnh) rw_rlock(&(rnh)->rnh_lock)
-#define RADIX_NODE_HEAD_RUNLOCK(rnh) rw_runlock(&(rnh)->rnh_lock)
-#define RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh) rw_try_upgrade(&(rnh)->rnh_lock)
-
-
-#define RADIX_NODE_HEAD_DESTROY(rnh) rw_destroy(&(rnh)->rnh_lock)
-#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_LOCKED)
-#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED)
+ rm_init(&(rnh)->rnh_lock, "radix node head")
+#define RADIX_NODE_HEAD_LOCK(rnh) rm_wlock(&(rnh)->rnh_lock)
+#define RADIX_NODE_HEAD_UNLOCK(rnh) rm_wunlock(&(rnh)->rnh_lock)
+#define RADIX_NODE_HEAD_RLOCK(rnh) rm_rlock(&(rnh)->rnh_lock,\
+ &_rnh_tracker)
+#define RADIX_NODE_HEAD_RUNLOCK(rnh) rm_runlock(&(rnh)->rnh_lock,\
+ &_rnh_tracker)
+#define RADIX_NODE_HEAD_DESTROY(rnh) rm_destroy(&(rnh)->rnh_lock)
+#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rm_assert(&(rnh)->rnh_lock, RA_LOCKED)
+#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rm_assert(&(rnh)->rnh_lock, RA_WLOCKED)
#endif /* _KERNEL */
int rn_inithead(void **, int);
diff --git a/freebsd/sys/net/radix_mpath.c b/freebsd/sys/net/radix_mpath.c
index 7eccd8f1..3f90a9da 100644
--- a/freebsd/sys/net/radix_mpath.c
+++ b/freebsd/sys/net/radix_mpath.c
@@ -45,12 +45,15 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/socket.h>
#include <sys/domain.h>
#include <sys/syslog.h>
#include <net/radix.h>
#include <net/radix_mpath.h>
+#include <sys/rmlock.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/if.h>
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index c2348e31..3cd909c1 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -56,6 +56,8 @@
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -141,7 +143,7 @@ VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
*/
#define RNTORT(p) ((struct rtentry *)(p))
-static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
+VNET_DEFINE_STATIC(uma_zone_t, rtzone); /* Routing table UMA zone. */
#define V_rtzone VNET(rtzone)
static int rtrequest1_fib_change(struct rib_head *, struct rt_addrinfo *,
@@ -472,6 +474,7 @@ struct rtentry *
rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
u_int fibnum)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *newrt;
@@ -762,7 +765,7 @@ ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway,
struct ifaddr *ifa;
int not_found = 0;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
if ((flags & RTF_GATEWAY) == 0) {
/*
* If we are adding a route to an interface,
@@ -955,6 +958,7 @@ int
rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
uint32_t flowid, struct rt_addrinfo *info)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
@@ -1976,6 +1980,7 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma
static inline int
rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
{
+ RIB_RLOCK_TRACKER;
struct sockaddr *dst;
struct sockaddr *netmask;
struct rtentry *rt = NULL;
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index 28f1db01..15ec1b3e 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -410,10 +410,8 @@ struct rt_addrinfo {
} while (0)
#define RO_RTFREE(_ro) do { \
- if ((_ro)->ro_rt) { \
- RT_LOCK((_ro)->ro_rt); \
- RTFREE_LOCKED((_ro)->ro_rt); \
- } \
+ if ((_ro)->ro_rt) \
+ RTFREE((_ro)->ro_rt); \
} while (0)
#define RO_INVALIDATE_CACHE(ro) do { \
diff --git a/freebsd/sys/net/route_var.h b/freebsd/sys/net/route_var.h
index f32dbc21..9d0d1931 100644
--- a/freebsd/sys/net/route_var.h
+++ b/freebsd/sys/net/route_var.h
@@ -44,18 +44,19 @@ struct rib_head {
rt_gen_t rnh_gen; /* generation counter */
int rnh_multipath; /* multipath capable ? */
struct radix_node rnh_nodes[3]; /* empty tree for common case */
- struct rwlock rib_lock; /* config/data path lock */
+ struct rmlock rib_lock; /* config/data path lock */
struct radix_mask_head rmhead; /* masks radix head */
};
-#define RIB_LOCK_INIT(rh) rw_init(&(rh)->rib_lock, "rib head lock")
-#define RIB_LOCK_DESTROY(rh) rw_destroy(&(rh)->rib_lock)
-#define RIB_RLOCK(rh) rw_rlock(&(rh)->rib_lock)
-#define RIB_RUNLOCK(rh) rw_runlock(&(rh)->rib_lock)
-#define RIB_WLOCK(rh) rw_wlock(&(rh)->rib_lock)
-#define RIB_WUNLOCK(rh) rw_wunlock(&(rh)->rib_lock)
-#define RIB_LOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_LOCKED)
-#define RIB_WLOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_WLOCKED)
+#define RIB_RLOCK_TRACKER struct rm_priotracker _rib_tracker
+#define RIB_LOCK_INIT(rh) rm_init(&(rh)->rib_lock, "rib head lock")
+#define RIB_LOCK_DESTROY(rh) rm_destroy(&(rh)->rib_lock)
+#define RIB_RLOCK(rh) rm_rlock(&(rh)->rib_lock, &_rib_tracker)
+#define RIB_RUNLOCK(rh) rm_runlock(&(rh)->rib_lock, &_rib_tracker)
+#define RIB_WLOCK(rh) rm_wlock(&(rh)->rib_lock)
+#define RIB_WUNLOCK(rh) rm_wunlock(&(rh)->rib_lock)
+#define RIB_LOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_LOCKED)
+#define RIB_WLOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_WLOCKED)
struct rib_head *rt_tables_get_rnh(int fib, int family);
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index c0c5c5c2..84afd627 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -47,6 +47,7 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/rwlock.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@@ -141,7 +142,7 @@ typedef struct {
int ip6_count; /* attached w/ AF_INET6 */
int any_count; /* total attached */
} route_cb_t;
-static VNET_DEFINE(route_cb_t, route_cb);
+VNET_DEFINE_STATIC(route_cb_t, route_cb);
#define V_route_cb VNET(route_cb)
struct mtx rtsock_mtx;
@@ -550,6 +551,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
static int
route_output(struct mbuf *m, struct socket *so, ...)
{
+ RIB_RLOCK_TRACKER;
struct rt_msghdr *rtm = NULL;
struct rtentry *rt = NULL;
struct rib_head *rnh;
@@ -1746,15 +1748,15 @@ sysctl_iflist(int af, struct walkarg *w)
struct rt_addrinfo info;
int len, error = 0;
struct sockaddr_storage ss;
+ struct epoch_tracker et;
bzero((caddr_t)&info, sizeof(info));
bzero(&ifd, sizeof(ifd));
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER_ET(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
if_data_copy(ifp, &ifd);
- IF_ADDR_RLOCK(ifp);
ifa = ifp->if_addr;
info.rti_info[RTAX_IFP] = ifa->ifa_addr;
error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
@@ -1795,15 +1797,12 @@ sysctl_iflist(int af, struct walkarg *w)
goto done;
}
}
- IF_ADDR_RUNLOCK(ifp);
info.rti_info[RTAX_IFA] = NULL;
info.rti_info[RTAX_NETMASK] = NULL;
info.rti_info[RTAX_BRD] = NULL;
}
done:
- if (ifp != NULL)
- IF_ADDR_RUNLOCK(ifp);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT_ET(et);
return (error);
}
@@ -1862,6 +1861,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
static int
sysctl_rtsock(SYSCTL_HANDLER_ARGS)
{
+ RIB_RLOCK_TRACKER;
int *name = (int *)arg1;
u_int namelen = arg2;
struct rib_head *rnh = NULL; /* silence compiler. */
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index 0ec00aad..b4168750 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -93,6 +93,8 @@ struct vnet {
#define VNET_PCPUSTAT_DEFINE(type, name) \
VNET_DEFINE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+#define VNET_PCPUSTAT_DEFINE_STATIC(type, name) \
+ VNET_DEFINE_STATIC(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
#define VNET_PCPUSTAT_ALLOC(name, wait) \
COUNTER_ARRAY_ALLOC(VNET(name), \
@@ -268,7 +270,20 @@ extern struct sx vnet_sxlock;
*/
#define VNET_NAME(n) vnet_entry_##n
#define VNET_DECLARE(t, n) extern t VNET_NAME(n)
-#define VNET_DEFINE(t, n) t VNET_NAME(n) __section(VNET_SETNAME) __used
+/* struct _hack is to stop this from being used with static data */
+#define VNET_DEFINE(t, n) \
+ struct _hack; t VNET_NAME(n) __section(VNET_SETNAME) __used
+#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv))
+/*
+ * As with DPCPU_DEFINE_STATIC we are unable to mark this data as static
+ * in modules on some architectures.
+ */
+#define VNET_DEFINE_STATIC(t, n) \
+ t VNET_NAME(n) __section(VNET_SETNAME) __used
+#else
+#define VNET_DEFINE_STATIC(t, n) \
+ static t VNET_NAME(n) __section(VNET_SETNAME) __used
+#endif
#define _VNET_PTR(b, n) (__typeof(VNET_NAME(n))*) \
((b) + (uintptr_t)&VNET_NAME(n))
@@ -400,7 +415,8 @@ do { \
*/
#define VNET_NAME(n) n
#define VNET_DECLARE(t, n) extern t n
-#define VNET_DEFINE(t, n) t n
+#define VNET_DEFINE(t, n) struct _hack; t n
+#define VNET_DEFINE_STATIC(t, n) static t n
#define _VNET_PTR(b, n) &VNET_NAME(n)
/*
diff --git a/freebsd/sys/net80211/ieee80211.c b/freebsd/sys/net80211/ieee80211.c
index d914c2e4..a81b5343 100644
--- a/freebsd/sys/net80211/ieee80211.c
+++ b/freebsd/sys/net80211/ieee80211.c
@@ -278,14 +278,14 @@ null_update_chw(struct ieee80211com *ic)
int
ic_printf(struct ieee80211com *ic, const char * fmt, ...)
-{
+{
va_list ap;
int retval;
retval = printf("%s: ", ic->ic_name);
va_start(ap, fmt);
retval += vprintf(fmt, ap);
- va_end(ap);
+ va_end(ap);
return (retval);
}
@@ -388,6 +388,15 @@ ieee80211_ifdetach(struct ieee80211com *ic)
{
struct ieee80211vap *vap;
+ /*
+ * We use this as an indicator that ifattach never had a chance to be
+ * called, e.g. early driver attach failed and ifdetach was called
+ * during subsequent detach. Never fear, for we have nothing to do
+ * here.
+ */
+ if (ic->ic_tq == NULL)
+ return;
+
mtx_lock(&ic_list_mtx);
LIST_REMOVE(ic, ic_next);
mtx_unlock(&ic_list_mtx);
@@ -704,7 +713,7 @@ ieee80211_vap_attach(struct ieee80211vap *vap, ifm_change_cb_t media_change,
return 1;
}
-/*
+/*
* Tear down vap state and reclaim the ifnet.
* The driver is assumed to have prepared for
* this; e.g. by turning off interrupts for the
@@ -1762,7 +1771,7 @@ addmedia(struct ifmedia *media, int caps, int addsta, int mode, int mword)
#define ADD(_ic, _s, _o) \
ifmedia_add(media, \
IFM_MAKEWORD(IFM_IEEE80211, (_s), (_o), 0), 0, NULL)
- static const u_int mopts[IEEE80211_MODE_MAX] = {
+ static const u_int mopts[IEEE80211_MODE_MAX] = {
[IEEE80211_MODE_AUTO] = IFM_AUTO,
[IEEE80211_MODE_11A] = IFM_IEEE80211_11A,
[IEEE80211_MODE_11B] = IFM_IEEE80211_11B,
@@ -2388,13 +2397,13 @@ ieee80211_rate2media(struct ieee80211com *ic, int rate, enum ieee80211_phymode m
case IEEE80211_MODE_11NA:
case IEEE80211_MODE_TURBO_A:
case IEEE80211_MODE_STURBO_A:
- return findmedia(rates, nitems(rates),
+ return findmedia(rates, nitems(rates),
rate | IFM_IEEE80211_11A);
case IEEE80211_MODE_11B:
- return findmedia(rates, nitems(rates),
+ return findmedia(rates, nitems(rates),
rate | IFM_IEEE80211_11B);
case IEEE80211_MODE_FH:
- return findmedia(rates, nitems(rates),
+ return findmedia(rates, nitems(rates),
rate | IFM_IEEE80211_FH);
case IEEE80211_MODE_AUTO:
/* NB: ic may be NULL for some drivers */
diff --git a/freebsd/sys/net80211/ieee80211_hwmp.c b/freebsd/sys/net80211/ieee80211_hwmp.c
index 37beae83..b8950c5e 100644
--- a/freebsd/sys/net80211/ieee80211_hwmp.c
+++ b/freebsd/sys/net80211/ieee80211_hwmp.c
@@ -922,7 +922,7 @@ hwmp_update_transmitter(struct ieee80211vap *vap, struct ieee80211_node *ni,
rttran->rt_metric > metric)
{
IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni,
- "%s path to transmiter %6D of %s, metric %d:%d",
+ "%s path to transmitter %6D of %s, metric %d:%d",
rttran->rt_flags & IEEE80211_MESHRT_FLAGS_VALID ?
"prefer" : "update", ni->ni_macaddr, ":", hwmp_frame,
rttran->rt_metric, metric);
diff --git a/freebsd/sys/net80211/ieee80211_node.c b/freebsd/sys/net80211/ieee80211_node.c
index 45d6fa73..55e51299 100644
--- a/freebsd/sys/net80211/ieee80211_node.c
+++ b/freebsd/sys/net80211/ieee80211_node.c
@@ -34,13 +34,13 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_wlan.h>
#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/socket.h>
-
+
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_media.h>
@@ -139,6 +139,7 @@ ieee80211_node_detach(struct ieee80211com *ic)
callout_drain(&ic->ic_inact);
ieee80211_node_table_cleanup(&ic->ic_sta);
+ ieee80211_ageq_drain(&ic->ic_stageq);
ieee80211_ageq_cleanup(&ic->ic_stageq);
}
@@ -204,7 +205,7 @@ ieee80211_node_vdetach(struct ieee80211vap *vap)
}
}
-/*
+/*
* Port authorize/unauthorize interfaces for use by an authenticator.
*/
@@ -380,7 +381,7 @@ ieee80211_create_ibss(struct ieee80211vap* vap, struct ieee80211_channel *chan)
memcpy(ni->ni_meshid, vap->iv_mesh->ms_id, ni->ni_meshidlen);
#endif
}
- /*
+ /*
* Fix the channel and related attributes.
*/
/* clear DFS CAC state on previous channel */
@@ -572,7 +573,7 @@ check_bss_debug(struct ieee80211vap *vap, struct ieee80211_node *ni)
printf("%s\n", fail & 0x10 ? "!" : "");
}
#endif /* IEEE80211_DEBUG */
-
+
int
ieee80211_ibss_merge_check(struct ieee80211_node *ni)
@@ -881,7 +882,7 @@ ieee80211_sta_join1(struct ieee80211_node *selbs)
* Set the erp state (mostly the slot time) to deal with
* the auto-select case; this should be redundant if the
* mode is locked.
- */
+ */
ieee80211_reset_erp(ic);
ieee80211_wme_initparams(vap);
@@ -1679,7 +1680,7 @@ ieee80211_fakeup_adhoc_node(struct ieee80211vap *vap,
/*
* In adhoc demo mode there are no management
* frames to use to discover neighbor capabilities,
- * so blindly propagate the local configuration
+ * so blindly propagate the local configuration
* so we can do interesting things (e.g. use
* WME to disable ACK's).
*/
@@ -2353,7 +2354,7 @@ timeout_stations(void *arg __unused, struct ieee80211_node *ni)
/* XXX before inact decrement? */
if (ni == vap->iv_bss)
return;
- if (ni->ni_associd != 0 ||
+ if (ni->ni_associd != 0 ||
(vap->iv_opmode == IEEE80211_M_IBSS ||
vap->iv_opmode == IEEE80211_M_AHDEMO)) {
/*
diff --git a/freebsd/sys/netinet/cc/cc_newreno.c b/freebsd/sys/netinet/cc/cc_newreno.c
index 4d5f8644..2450f08e 100644
--- a/freebsd/sys/netinet/cc/cc_newreno.c
+++ b/freebsd/sys/netinet/cc/cc_newreno.c
@@ -90,8 +90,8 @@ static void newreno_cong_signal(struct cc_var *ccv, uint32_t type);
static void newreno_post_recovery(struct cc_var *ccv);
static int newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf);
-static VNET_DEFINE(uint32_t, newreno_beta) = 50;
-static VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
+VNET_DEFINE_STATIC(uint32_t, newreno_beta) = 50;
+VNET_DEFINE_STATIC(uint32_t, newreno_beta_ecn) = 80;
#define V_newreno_beta VNET(newreno_beta)
#define V_newreno_beta_ecn VNET(newreno_beta_ecn)
@@ -129,9 +129,7 @@ newreno_malloc(struct cc_var *ccv)
static void
newreno_cb_destroy(struct cc_var *ccv)
{
-
- if (ccv->cc_data != NULL)
- free(ccv->cc_data, M_NEWRENO);
+ free(ccv->cc_data, M_NEWRENO);
}
static void
diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c
index 0d608180..6ee6b71c 100644
--- a/freebsd/sys/netinet/if_ether.c
+++ b/freebsd/sys/netinet/if_ether.c
@@ -96,13 +96,13 @@ static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
/* timer values */
-static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20
+VNET_DEFINE_STATIC(int, arpt_keep) = (20*60); /* once resolved, good for 20
* minutes */
-static VNET_DEFINE(int, arp_maxtries) = 5;
-static VNET_DEFINE(int, arp_proxyall) = 0;
-static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for
+VNET_DEFINE_STATIC(int, arp_maxtries) = 5;
+VNET_DEFINE_STATIC(int, arp_proxyall) = 0;
+VNET_DEFINE_STATIC(int, arpt_down) = 20; /* keep incomplete entries for
* 20 seconds */
-static VNET_DEFINE(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/
+VNET_DEFINE_STATIC(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/
VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
VNET_PCPUSTAT_SYSINIT(arpstat);
@@ -110,7 +110,7 @@ VNET_PCPUSTAT_SYSINIT(arpstat);
VNET_PCPUSTAT_SYSUNINIT(arpstat);
#endif /* VIMAGE */
-static VNET_DEFINE(int, arp_maxhold) = 1;
+VNET_DEFINE_STATIC(int, arp_maxhold) = 1;
#define V_arpt_keep VNET(arpt_keep)
#define V_arpt_down VNET(arpt_down)
diff --git a/freebsd/sys/netinet/igmp.c b/freebsd/sys/netinet/igmp.c
index a4b99f62..970a01a0 100644
--- a/freebsd/sys/netinet/igmp.c
+++ b/freebsd/sys/netinet/igmp.c
@@ -219,11 +219,11 @@ static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
* FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
* policy to control the address used by IGMP on the link.
*/
-static VNET_DEFINE(int, interface_timers_running); /* IGMPv3 general
+VNET_DEFINE_STATIC(int, interface_timers_running); /* IGMPv3 general
* query response */
-static VNET_DEFINE(int, state_change_timers_running); /* IGMPv3 state-change
+VNET_DEFINE_STATIC(int, state_change_timers_running); /* IGMPv3 state-change
* retransmit */
-static VNET_DEFINE(int, current_state_timers_running); /* IGMPv1/v2 host
+VNET_DEFINE_STATIC(int, current_state_timers_running); /* IGMPv1/v2 host
* report; IGMPv3 g/sg
* query response */
@@ -231,25 +231,25 @@ static VNET_DEFINE(int, current_state_timers_running); /* IGMPv1/v2 host
#define V_state_change_timers_running VNET(state_change_timers_running)
#define V_current_state_timers_running VNET(current_state_timers_running)
-static VNET_DEFINE(LIST_HEAD(, igmp_ifsoftc), igi_head) =
+VNET_DEFINE_STATIC(LIST_HEAD(, igmp_ifsoftc), igi_head) =
LIST_HEAD_INITIALIZER(igi_head);
-static VNET_DEFINE(struct igmpstat, igmpstat) = {
+VNET_DEFINE_STATIC(struct igmpstat, igmpstat) = {
.igps_version = IGPS_VERSION_3,
.igps_len = sizeof(struct igmpstat),
};
-static VNET_DEFINE(struct timeval, igmp_gsrdelay) = {10, 0};
+VNET_DEFINE_STATIC(struct timeval, igmp_gsrdelay) = {10, 0};
#define V_igi_head VNET(igi_head)
#define V_igmpstat VNET(igmpstat)
#define V_igmp_gsrdelay VNET(igmp_gsrdelay)
-static VNET_DEFINE(int, igmp_recvifkludge) = 1;
-static VNET_DEFINE(int, igmp_sendra) = 1;
-static VNET_DEFINE(int, igmp_sendlocal) = 1;
-static VNET_DEFINE(int, igmp_v1enable) = 1;
-static VNET_DEFINE(int, igmp_v2enable) = 1;
-static VNET_DEFINE(int, igmp_legacysupp);
-static VNET_DEFINE(int, igmp_default_version) = IGMP_VERSION_3;
+VNET_DEFINE_STATIC(int, igmp_recvifkludge) = 1;
+VNET_DEFINE_STATIC(int, igmp_sendra) = 1;
+VNET_DEFINE_STATIC(int, igmp_sendlocal) = 1;
+VNET_DEFINE_STATIC(int, igmp_v1enable) = 1;
+VNET_DEFINE_STATIC(int, igmp_v2enable) = 1;
+VNET_DEFINE_STATIC(int, igmp_legacysupp);
+VNET_DEFINE_STATIC(int, igmp_default_version) = IGMP_VERSION_3;
#define V_igmp_recvifkludge VNET(igmp_recvifkludge)
#define V_igmp_sendra VNET(igmp_sendra)
diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c
index 7233f9a2..78fd00c0 100644
--- a/freebsd/sys/netinet/in.c
+++ b/freebsd/sys/netinet/in.c
@@ -80,7 +80,7 @@ static int in_difaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
static void in_socktrim(struct sockaddr_in *);
static void in_purgemaddrs(struct ifnet *);
-static VNET_DEFINE(int, nosameprefix);
+VNET_DEFINE_STATIC(int, nosameprefix);
#define V_nosameprefix VNET(nosameprefix)
SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(nosameprefix), 0,
@@ -624,8 +624,7 @@ in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
in_ifadown(&ia->ia_ifa, 1);
if (ia->ia_ifa.ifa_carp)
- (*carp_detach_p)(&ia->ia_ifa,
- (cmd == SIOCDIFADDR) ? false : true);
+ (*carp_detach_p)(&ia->ia_ifa, cmd == SIOCAIFADDR);
/*
* If this is the last IPv4 address configured on this
@@ -1169,10 +1168,6 @@ in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
lltable_unlink_entry(llt, lle);
}
- /* cancel timer */
- if (callout_stop(&lle->lle_timer) > 0)
- LLE_REMREF(lle);
-
/* Drop hold queue */
pkts_dropped = llentry_free(lle);
ARPSTAT_ADD(dropped, pkts_dropped);
diff --git a/freebsd/sys/netinet/in_fib.c b/freebsd/sys/netinet/in_fib.c
index f62bc4a1..f61909ea 100644
--- a/freebsd/sys/netinet/in_fib.c
+++ b/freebsd/sys/netinet/in_fib.c
@@ -39,7 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -136,6 +136,7 @@ int
fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
uint32_t flowid, struct nhop4_basic *pnh4)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in sin;
@@ -184,6 +185,7 @@ int
fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
uint32_t flowid, struct nhop4_extended *pnh4)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in sin;
diff --git a/freebsd/sys/netinet/in_gif.c b/freebsd/sys/netinet/in_gif.c
index d072161f..03aaaf08 100644
--- a/freebsd/sys/netinet/in_gif.c
+++ b/freebsd/sys/netinet/in_gif.c
@@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,18 +41,18 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
#include <sys/systm.h>
+#include <sys/jail.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
-#include <sys/protosw.h>
#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
@@ -72,35 +73,161 @@ __FBSDID("$FreeBSD$");
#include <net/if_gif.h>
-static int in_gif_input(struct mbuf **, int *, int);
-
-extern struct domain inetdomain;
-static struct protosw in_gif_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = 0/* IPPROTO_IPV[46] */,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = in_gif_input,
- .pr_output = rip_output,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
#define GIF_TTL 30
-static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
+VNET_DEFINE_STATIC(int, ip_gif_ttl) = GIF_TTL;
#define V_ip_gif_ttl VNET(ip_gif_ttl)
SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(ip_gif_ttl), 0, "");
+ &VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets");
+
+/*
+ * We keep interfaces in a hash table using src+dst as key.
+ * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list.
+ */
+VNET_DEFINE_STATIC(struct gif_list *, ipv4_hashtbl) = NULL;
+VNET_DEFINE_STATIC(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER();
+#define V_ipv4_hashtbl VNET(ipv4_hashtbl)
+#define V_ipv4_list VNET(ipv4_list)
+
+#define GIF_HASH(src, dst) (V_ipv4_hashtbl[\
+ in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)])
+#define GIF_HASH_SC(sc) GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\
+ (sc)->gif_iphdr->ip_dst.s_addr)
+static uint32_t
+in_gif_hashval(in_addr_t src, in_addr_t dst)
+{
+ uint32_t ret;
+
+ ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+ return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static int
+in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst)
+{
+ struct gif_softc *tmp;
+
+ if (sc->gif_family == AF_INET &&
+ sc->gif_iphdr->ip_src.s_addr == src &&
+ sc->gif_iphdr->ip_dst.s_addr == dst)
+ return (EEXIST);
+
+ CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) {
+ if (tmp == sc)
+ continue;
+ if (tmp->gif_iphdr->ip_src.s_addr == src &&
+ tmp->gif_iphdr->ip_dst.s_addr == dst)
+ return (EADDRNOTAVAIL);
+ }
+ return (0);
+}
+
+static void
+in_gif_attach(struct gif_softc *sc)
+{
+
+ if (sc->gif_options & GIF_IGNORE_SOURCE)
+ CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain);
+ else
+ CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain);
+}
+
+int
+in_gif_setopts(struct gif_softc *sc, u_int options)
+{
+
+ /* NOTE: we are protected with gif_ioctl_sx lock */
+ MPASS(sc->gif_family == AF_INET);
+ MPASS(sc->gif_options != options);
+
+ if ((options & GIF_IGNORE_SOURCE) !=
+ (sc->gif_options & GIF_IGNORE_SOURCE)) {
+ CK_LIST_REMOVE(sc, chain);
+ sc->gif_options = options;
+ in_gif_attach(sc);
+ }
+ return (0);
+}
+
+int
+in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr_in *dst, *src;
+ struct ip *ip;
+ int error;
+
+ /* NOTE: we are protected with gif_ioctl_sx lock */
+ error = EINVAL;
+ switch (cmd) {
+ case SIOCSIFPHYADDR:
+ src = &((struct in_aliasreq *)data)->ifra_addr;
+ dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
+
+ /* sanity checks */
+ if (src->sin_family != dst->sin_family ||
+ src->sin_family != AF_INET ||
+ src->sin_len != dst->sin_len ||
+ src->sin_len != sizeof(*src))
+ break;
+ if (src->sin_addr.s_addr == INADDR_ANY ||
+ dst->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ if (V_ipv4_hashtbl == NULL)
+ V_ipv4_hashtbl = gif_hashinit();
+ error = in_gif_checkdup(sc, src->sin_addr.s_addr,
+ dst->sin_addr.s_addr);
+ if (error == EADDRNOTAVAIL)
+ break;
+ if (error == EEXIST) {
+ /* Addresses are the same. Just return. */
+ error = 0;
+ break;
+ }
+ ip = malloc(sizeof(*ip), M_GIF, M_WAITOK | M_ZERO);
+ ip->ip_src.s_addr = src->sin_addr.s_addr;
+ ip->ip_dst.s_addr = dst->sin_addr.s_addr;
+ if (sc->gif_family != 0) {
+ /* Detach existing tunnel first */
+ CK_LIST_REMOVE(sc, chain);
+ GIF_WAIT();
+ free(sc->gif_hdr, M_GIF);
+ /* XXX: should we notify about link state change? */
+ }
+ sc->gif_family = AF_INET;
+ sc->gif_iphdr = ip;
+ in_gif_attach(sc);
+ break;
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gif_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ src = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin_family = AF_INET;
+ src->sin_len = sizeof(*src);
+ src->sin_addr = (cmd == SIOCGIFPSRCADDR) ?
+ sc->gif_iphdr->ip_src: sc->gif_iphdr->ip_dst;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ }
+ return (error);
+}
int
in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
- GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
struct ip *ip;
int len;
/* prepend new IP header */
+ MPASS(in_epoch(net_epoch_preempt));
len = sizeof(struct ip);
#ifndef __NO_STRICT_ALIGNMENT
if (proto == IPPROTO_ETHERIP)
@@ -119,15 +246,9 @@ in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
}
#endif
ip = mtod(m, struct ip *);
- GIF_RLOCK(sc);
- if (sc->gif_family != AF_INET) {
- m_freem(m);
- GIF_RUNLOCK(sc);
- return (ENETDOWN);
- }
- bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
- GIF_RUNLOCK(sc);
+ MPASS(sc->gif_family == AF_INET);
+ bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
ip->ip_p = proto;
/* version will be set in ip_output() */
ip->ip_ttl = V_ip_gif_ttl;
@@ -138,15 +259,14 @@ in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
}
static int
-in_gif_input(struct mbuf **mp, int *offp, int proto)
+in_gif_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct mbuf *m = *mp;
- struct gif_softc *sc;
+ struct gif_softc *sc = arg;
struct ifnet *gifp;
struct ip *ip;
uint8_t ecn;
- sc = encap_getarg(m);
+ MPASS(in_epoch(net_epoch_preempt));
if (sc == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_nogif);
@@ -156,7 +276,7 @@ in_gif_input(struct mbuf **mp, int *offp, int proto)
if ((gifp->if_flags & IFF_UP) != 0) {
ip = mtod(m, struct ip *);
ecn = ip->ip_tos;
- m_adj(m, *offp);
+ m_adj(m, off);
gif_input(m, gifp, proto, ecn);
} else {
m_freem(m);
@@ -165,56 +285,125 @@ in_gif_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}
-/*
- * we know that we are in IFF_UP, outer address available, and outer family
- * matched the physical addr family. see gif_encapcheck().
- */
-int
-in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+static int
+in_gif_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
const struct ip *ip;
struct gif_softc *sc;
int ret;
- /* sanity check done in caller */
- sc = (struct gif_softc *)arg;
- GIF_RLOCK_ASSERT(sc);
+ if (V_ipv4_hashtbl == NULL)
+ return (0);
- /* check for address match */
+ MPASS(in_epoch(net_epoch_preempt));
ip = mtod(m, const struct ip *);
- if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr)
+ /*
+ * NOTE: it is safe to iterate without any locking here, because softc
+ * can be reclaimed only when we are not within net_epoch_preempt
+ * section, but ip_encap lookup+input are executed in epoch section.
+ */
+ ret = 0;
+ CK_LIST_FOREACH(sc, &GIF_HASH(ip->ip_dst.s_addr,
+ ip->ip_src.s_addr), chain) {
+ /*
+ * This is an inbound packet, its ip_dst is source address
+ * in softc.
+ */
+ if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr &&
+ sc->gif_iphdr->ip_dst.s_addr == ip->ip_src.s_addr) {
+ ret = ENCAP_DRV_LOOKUP;
+ goto done;
+ }
+ }
+ /*
+ * No exact match.
+ * Check the list of interfaces with GIF_IGNORE_SOURCE flag.
+ */
+ CK_LIST_FOREACH(sc, &V_ipv4_list, chain) {
+ if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr) {
+ ret = 32 + 8; /* src + proto */
+ goto done;
+ }
+ }
+ return (0);
+done:
+ if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
- ret = 32;
- if (sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) {
- if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
- return (0);
- } else
- ret += 32;
-
/* ingress filters on outer source */
if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
struct nhop4_basic nh4;
struct in_addr dst;
dst = ip->ip_src;
-
if (fib4_lookup_nh_basic(sc->gif_fibnum, dst, 0, 0, &nh4) != 0)
return (0);
-
if (nh4.nh_ifp != m->m_pkthdr.rcvif)
return (0);
}
+ *arg = sc;
return (ret);
}
-int
-in_gif_attach(struct gif_softc *sc)
+static struct {
+ const struct encap_config encap;
+ const struct encaptab *cookie;
+} ipv4_encap_cfg[] = {
+ {
+ .encap = {
+ .proto = IPPROTO_IPV4,
+ .min_length = 2 * sizeof(struct ip),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in_gif_lookup,
+ .input = in_gif_input
+ },
+ },
+#ifdef INET6
+ {
+ .encap = {
+ .proto = IPPROTO_IPV6,
+ .min_length = sizeof(struct ip) +
+ sizeof(struct ip6_hdr),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in_gif_lookup,
+ .input = in_gif_input
+ },
+ },
+#endif
+ {
+ .encap = {
+ .proto = IPPROTO_ETHERIP,
+ .min_length = sizeof(struct ip) +
+ sizeof(struct etherip_header) +
+ sizeof(struct ether_header),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in_gif_lookup,
+ .input = in_gif_input
+ },
+ }
+};
+
+void
+in_gif_init(void)
{
+ int i;
- KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
- sc->gif_ecookie = encap_attach_func(AF_INET, -1, gif_encapcheck,
- &in_gif_protosw, sc);
- if (sc->gif_ecookie == NULL)
- return (EEXIST);
- return (0);
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+ for (i = 0; i < nitems(ipv4_encap_cfg); i++)
+ ipv4_encap_cfg[i].cookie = ip_encap_attach(
+ &ipv4_encap_cfg[i].encap, NULL, M_WAITOK);
+}
+
+void
+in_gif_uninit(void)
+{
+ int i;
+
+ if (IS_DEFAULT_VNET(curvnet)) {
+ for (i = 0; i < nitems(ipv4_encap_cfg); i++)
+ ip_encap_detach(ipv4_encap_cfg[i].cookie);
+ }
+ if (V_ipv4_hashtbl != NULL)
+ gif_hashdestroy(V_ipv4_hashtbl);
}
+
diff --git a/freebsd/sys/netinet/in_gif.h b/freebsd/sys/netinet/in_gif.h
deleted file mode 100644
index e1f4ae48..00000000
--- a/freebsd/sys/netinet/in_gif.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* $FreeBSD$ */
-/* $KAME: in_gif.h,v 1.5 2000/04/14 08:36:02 itojun Exp $ */
-
-/*-
- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the project nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _NETINET_IN_GIF_H_
-#define _NETINET_IN_GIF_H_
-
-#define GIF_TTL 30
-
-struct gif_softc;
-void in_gif_input(struct mbuf *, int);
-int in_gif_output(struct ifnet *, int, struct mbuf *);
-int gif_encapcheck4(const struct mbuf *, int, int, void *);
-int in_gif_attach(struct gif_softc *);
-int in_gif_detach(struct gif_softc *);
-
-#endif /*_NETINET_IN_GIF_H_*/
diff --git a/freebsd/sys/netinet/in_kdtrace.h b/freebsd/sys/netinet/in_kdtrace.h
index ba63a9a9..ccf53833 100644
--- a/freebsd/sys/netinet/in_kdtrace.h
+++ b/freebsd/sys/netinet/in_kdtrace.h
@@ -34,6 +34,8 @@
SDT_PROBE6(ip, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
#define UDP_PROBE(probe, arg0, arg1, arg2, arg3, arg4) \
SDT_PROBE5(udp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define UDPLITE_PROBE(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(udplite, , , probe, arg0, arg1, arg2, arg3, arg4)
#define TCP_PROBE1(probe, arg0) \
SDT_PROBE1(tcp, , , probe, arg0)
#define TCP_PROBE2(probe, arg0, arg1) \
@@ -46,14 +48,32 @@
SDT_PROBE5(tcp, , , probe, arg0, arg1, arg2, arg3, arg4)
#define TCP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
SDT_PROBE6(tcp, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
+#define SCTP_PROBE1(probe, arg0) \
+ SDT_PROBE1(sctp, , , probe, arg0)
+#define SCTP_PROBE2(probe, arg0, arg1) \
+ SDT_PROBE2(sctp, , , probe, arg0, arg1)
+#define SCTP_PROBE3(probe, arg0, arg1, arg2) \
+ SDT_PROBE3(sctp, , , probe, arg0, arg1, arg2)
+#define SCTP_PROBE4(probe, arg0, arg1, arg2, arg3) \
+ SDT_PROBE4(sctp, , , probe, arg0, arg1, arg2, arg3)
+#define SCTP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(sctp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define SCTP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
+ SDT_PROBE6(sctp, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
SDT_PROVIDER_DECLARE(ip);
+SDT_PROVIDER_DECLARE(sctp);
SDT_PROVIDER_DECLARE(tcp);
SDT_PROVIDER_DECLARE(udp);
+SDT_PROVIDER_DECLARE(udplite);
SDT_PROBE_DECLARE(ip, , , receive);
SDT_PROBE_DECLARE(ip, , , send);
+SDT_PROBE_DECLARE(sctp, , , receive);
+SDT_PROBE_DECLARE(sctp, , , send);
+SDT_PROBE_DECLARE(sctp, , , state__change);
+
SDT_PROBE_DECLARE(tcp, , , accept__established);
SDT_PROBE_DECLARE(tcp, , , accept__refused);
SDT_PROBE_DECLARE(tcp, , , connect__established);
@@ -72,4 +92,7 @@ SDT_PROBE_DECLARE(tcp, , , receive__autoresize);
SDT_PROBE_DECLARE(udp, , , receive);
SDT_PROBE_DECLARE(udp, , , send);
+SDT_PROBE_DECLARE(udplite, , , receive);
+SDT_PROBE_DECLARE(udplite, , , send);
+
#endif
diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c
index ea4779fc..e0fd4c37 100644
--- a/freebsd/sys/netinet/in_mcast.c
+++ b/freebsd/sys/netinet/in_mcast.c
@@ -233,8 +233,13 @@ static void inm_init(void)
taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task");
}
+#ifdef EARLY_AP_STARTUP
SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST,
inm_init, NULL);
+#else
+SYSINIT(inm_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_FIRST,
+ inm_init, NULL);
+#endif
void
@@ -260,7 +265,10 @@ inm_disconnect(struct in_multi *inm)
ifma = inm->inm_ifma;
if_ref(ifp);
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
+ if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
+ ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
if ((ll_ifma = ifma->ifma_llifma) != NULL) {
MPASS(ifma != ll_ifma);
@@ -268,7 +276,10 @@ inm_disconnect(struct in_multi *inm)
MPASS(ll_ifma->ifma_llifma == NULL);
MPASS(ll_ifma->ifma_ifp == ifp);
if (--ll_ifma->ifma_refcount == 0) {
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
+ if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
+ ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
if_freemulti(ll_ifma);
ifma_restart = true;
@@ -1581,23 +1592,24 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
* Begin state merge transaction at IGMP layer.
*/
IN_MULTI_LOCK();
- IN_MULTI_LIST_LOCK();
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ IN_MULTI_LIST_LOCK();
error = inm_merge(inm, imf);
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
+ IN_MULTI_LIST_UNLOCK();
goto out_in_multi_locked;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
error = igmp_change_state(inm);
+ IN_MULTI_LIST_UNLOCK();
if (error)
CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
out_in_multi_locked:
IN_MULTI_UNLOCK();
- IN_MULTI_UNLOCK();
out_imf_rollback:
if (error)
imf_rollback(imf);
@@ -1664,16 +1676,13 @@ inp_findmoptions(struct inpcb *inp)
}
static void
-inp_gcmoptions(epoch_context_t ctx)
+inp_gcmoptions(struct ip_moptions *imo)
{
- struct ip_moptions *imo;
struct in_mfilter *imf;
struct in_multi *inm;
struct ifnet *ifp;
size_t idx, nmships;
- imo = __containerof(ctx, struct ip_moptions, imo_epoch_ctx);
-
nmships = imo->imo_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
@@ -1709,7 +1718,7 @@ inp_freemoptions(struct ip_moptions *imo)
{
if (imo == NULL)
return;
- epoch_call(net_epoch_preempt, &imo->imo_epoch_ctx, inp_gcmoptions);
+ inp_gcmoptions(imo);
}
/*
@@ -2261,7 +2270,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
__func__);
IN_MULTI_LIST_UNLOCK();
goto out_imo_free;
- }
+ }
+ inm_acquire(inm);
imo->imo_membership[idx] = inm;
} else {
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
@@ -2301,6 +2311,12 @@ out_in_multi_locked:
out_imo_free:
if (error && is_new) {
+ inm = imo->imo_membership[idx];
+ if (inm != NULL) {
+ IN_MULTI_LIST_LOCK();
+ inm_release_deferred(inm);
+ IN_MULTI_LIST_UNLOCK();
+ }
imo->imo_membership[idx] = NULL;
--imo->imo_num_memberships;
}
@@ -2494,6 +2510,7 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
__func__);
+ IN_MULTI_LIST_UNLOCK();
goto out_in_multi_locked;
}
@@ -2738,12 +2755,12 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
INP_WLOCK_ASSERT(inp);
IN_MULTI_LOCK();
- IN_MULTI_LIST_LOCK();
/*
* Begin state merge transaction at IGMP layer.
*/
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
+ IN_MULTI_LIST_LOCK();
error = inm_merge(inm, imf);
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c
index f89487b6..5ba918fa 100644
--- a/freebsd/sys/netinet/in_pcb.c
+++ b/freebsd/sys/netinet/in_pcb.c
@@ -114,6 +114,9 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
+#define INPCBLBGROUP_SIZMIN 8
+#define INPCBLBGROUP_SIZMAX 256
+
static struct callout ipport_tick_callout;
/*
@@ -141,7 +144,7 @@ VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */
VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */
VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */
VNET_DEFINE(int, ipport_tcpallocs);
-static VNET_DEFINE(int, ipport_tcplastcount);
+VNET_DEFINE_STATIC(int, ipport_tcplastcount);
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
@@ -223,6 +226,222 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
* functions often modify hash chains or addresses in pcbs.
*/
+static struct inpcblbgroup *
+in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, u_char vflag,
+ uint16_t port, const union in_dependaddr *addr, int size)
+{
+ struct inpcblbgroup *grp;
+ size_t bytes;
+
+ bytes = __offsetof(struct inpcblbgroup, il_inp[size]);
+ grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT);
+ if (!grp)
+ return (NULL);
+ grp->il_vflag = vflag;
+ grp->il_lport = port;
+ grp->il_dependladdr = *addr;
+ grp->il_inpsiz = size;
+ CK_LIST_INSERT_HEAD(hdr, grp, il_list);
+ return (grp);
+}
+
+static void
+in_pcblbgroup_free_deferred(epoch_context_t ctx)
+{
+ struct inpcblbgroup *grp;
+
+ grp = __containerof(ctx, struct inpcblbgroup, il_epoch_ctx);
+ free(grp, M_PCB);
+}
+
+static void
+in_pcblbgroup_free(struct inpcblbgroup *grp)
+{
+
+ CK_LIST_REMOVE(grp, il_list);
+ epoch_call(net_epoch_preempt, &grp->il_epoch_ctx,
+ in_pcblbgroup_free_deferred);
+}
+
+static struct inpcblbgroup *
+in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
+ struct inpcblbgroup *old_grp, int size)
+{
+ struct inpcblbgroup *grp;
+ int i;
+
+ grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag,
+ old_grp->il_lport, &old_grp->il_dependladdr, size);
+ if (!grp)
+ return (NULL);
+
+ KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
+ ("invalid new local group size %d and old local group count %d",
+ grp->il_inpsiz, old_grp->il_inpcnt));
+
+ for (i = 0; i < old_grp->il_inpcnt; ++i)
+ grp->il_inp[i] = old_grp->il_inp[i];
+ grp->il_inpcnt = old_grp->il_inpcnt;
+ in_pcblbgroup_free(old_grp);
+ return (grp);
+}
+
+/*
+ * PCB at index 'i' is removed from the group. Pull up the ones below il_inp[i]
+ * and shrink group if possible.
+ */
+static void
+in_pcblbgroup_reorder(struct inpcblbgrouphead *hdr, struct inpcblbgroup **grpp,
+ int i)
+{
+ struct inpcblbgroup *grp = *grpp;
+
+ for (; i + 1 < grp->il_inpcnt; ++i)
+ grp->il_inp[i] = grp->il_inp[i + 1];
+ grp->il_inpcnt--;
+
+ if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
+ grp->il_inpcnt <= (grp->il_inpsiz / 4)) {
+ /* Shrink this group. */
+ struct inpcblbgroup *new_grp =
+ in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2);
+ if (new_grp)
+ *grpp = new_grp;
+ }
+ return;
+}
+
+/*
+ * Add PCB to load balance group for SO_REUSEPORT_LB option.
+ */
+static int
+in_pcbinslbgrouphash(struct inpcb *inp)
+{
+ const static struct timeval interval = { 60, 0 };
+ static struct timeval lastprint;
+ struct inpcbinfo *pcbinfo;
+ struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ uint16_t hashmask, lport;
+ uint32_t group_index;
+ struct ucred *cred;
+
+ pcbinfo = inp->inp_pcbinfo;
+
+ INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
+ if (pcbinfo->ipi_lbgrouphashbase == NULL)
+ return (0);
+
+ hashmask = pcbinfo->ipi_lbgrouphashmask;
+ lport = inp->inp_lport;
+ group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask);
+ hdr = &pcbinfo->ipi_lbgrouphashbase[group_index];
+
+ /*
+ * Don't allow jailed socket to join local group.
+ */
+ if (inp->inp_socket != NULL)
+ cred = inp->inp_socket->so_cred;
+ else
+ cred = NULL;
+ if (cred != NULL && jailed(cred))
+ return (0);
+
+#ifdef INET6
+ /*
+ * Don't allow IPv4 mapped INET6 wild socket.
+ */
+ if ((inp->inp_vflag & INP_IPV4) &&
+ inp->inp_laddr.s_addr == INADDR_ANY &&
+ INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) {
+ return (0);
+ }
+#endif
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBLBGROUP_PORTHASH(inp->inp_lport,
+ pcbinfo->ipi_lbgrouphashmask)];
+ CK_LIST_FOREACH(grp, hdr, il_list) {
+ if (grp->il_vflag == inp->inp_vflag &&
+ grp->il_lport == inp->inp_lport &&
+ memcmp(&grp->il_dependladdr,
+ &inp->inp_inc.inc_ie.ie_dependladdr,
+ sizeof(grp->il_dependladdr)) == 0) {
+ break;
+ }
+ }
+ if (grp == NULL) {
+ /* Create new load balance group. */
+ grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag,
+ inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
+ INPCBLBGROUP_SIZMIN);
+ if (!grp)
+ return (ENOBUFS);
+ } else if (grp->il_inpcnt == grp->il_inpsiz) {
+ if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
+ if (ratecheck(&lastprint, &interval))
+ printf("lb group port %d, limit reached\n",
+ ntohs(grp->il_lport));
+ return (0);
+ }
+
+ /* Expand this local group. */
+ grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
+ if (!grp)
+ return (ENOBUFS);
+ }
+
+ KASSERT(grp->il_inpcnt < grp->il_inpsiz,
+ ("invalid local group size %d and count %d",
+ grp->il_inpsiz, grp->il_inpcnt));
+
+ grp->il_inp[grp->il_inpcnt] = inp;
+ grp->il_inpcnt++;
+ return (0);
+}
+
+/*
+ * Remove PCB from load balance group.
+ */
+static void
+in_pcbremlbgrouphash(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ int i;
+
+ pcbinfo = inp->inp_pcbinfo;
+
+ INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
+ if (pcbinfo->ipi_lbgrouphashbase == NULL)
+ return;
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBLBGROUP_PORTHASH(inp->inp_lport,
+ pcbinfo->ipi_lbgrouphashmask)];
+
+ CK_LIST_FOREACH(grp, hdr, il_list) {
+ for (i = 0; i < grp->il_inpcnt; ++i) {
+ if (grp->il_inp[i] != inp)
+ continue;
+
+ if (grp->il_inpcnt == 1) {
+ /* We are the last, free this local group. */
+ in_pcblbgroup_free(grp);
+ } else {
+ /* Pull up inpcbs, shrink group if possible. */
+ in_pcblbgroup_reorder(hdr, &grp, i);
+ }
+ return;
+ }
+ }
+}
+
/*
* Different protocols initialize their inpcbs differently - giving
* different name to the lock. But they all are disposed the same.
@@ -252,12 +471,14 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
pcbinfo->ipi_vnet = curvnet;
#endif
pcbinfo->ipi_listhead = listhead;
- LIST_INIT(pcbinfo->ipi_listhead);
+ CK_LIST_INIT(pcbinfo->ipi_listhead);
pcbinfo->ipi_count = 0;
pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
+ pcbinfo->ipi_lbgrouphashbase = hashinit(hash_nelements, M_PCB,
+ &pcbinfo->ipi_lbgrouphashmask);
#ifdef PCBGROUP
in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
#endif
@@ -281,6 +502,8 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
+ hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
+ pcbinfo->ipi_lbgrouphashmask);
#ifdef PCBGROUP
in_pcbgroup_destroy(pcbinfo);
#endif
@@ -341,7 +564,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
#endif
INP_WLOCK(inp);
INP_LIST_WLOCK(pcbinfo);
- LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
+ CK_LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
pcbinfo->ipi_count++;
so->so_pcb = (caddr_t)inp;
#ifdef INET6
@@ -519,18 +742,20 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
/*
* Return cached socket options.
*/
-short
+int
inp_so_options(const struct inpcb *inp)
{
- short so_options;
+ int so_options;
- so_options = 0;
+ so_options = 0;
- if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
- so_options |= SO_REUSEPORT;
- if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
- so_options |= SO_REUSEADDR;
- return (so_options);
+ if ((inp->inp_flags2 & INP_REUSEPORT_LB) != 0)
+ so_options |= SO_REUSEPORT_LB;
+ if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
+ so_options |= SO_REUSEPORT;
+ if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
+ so_options |= SO_REUSEADDR;
+ return (so_options);
}
#endif /* INET || INET6 */
@@ -589,6 +814,12 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
int error;
/*
+ * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
+ * so that we don't have to add to the (already messy) code below.
+ */
+ int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
+
+ /*
* No state changes, so read locks are sufficient here.
*/
INP_LOCK_ASSERT(inp);
@@ -599,7 +830,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
laddr.s_addr = *laddrp;
if (nam != NULL && laddr.s_addr != INADDR_ANY)
return (EINVAL);
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
if (nam == NULL) {
if ((error = prison_local_ip4(cred, &laddr)) != 0)
@@ -636,16 +867,23 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
*/
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR|SO_REUSEPORT;
+ /*
+ * XXX: How to deal with SO_REUSEPORT_LB here?
+ * Treat same as SO_REUSEPORT for now.
+ */
+ if ((so->so_options &
+ (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
+ reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
} else if (sin->sin_addr.s_addr != INADDR_ANY) {
sin->sin_port = 0; /* yech... */
bzero(&sin->sin_zero, sizeof(sin->sin_zero));
/*
- * Is the address a local IP address?
+ * Is the address a local IP address?
* If INP_BINDANY is set, then the socket may be bound
* to any endpoint address, local or not.
*/
if ((inp->inp_flags & INP_BINDANY) == 0 &&
- ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
+ ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
return (EADDRNOTAVAIL);
}
laddr = sin->sin_addr;
@@ -675,7 +913,8 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_flags2 & INP_REUSEPORT) == 0) &&
+ (t->inp_flags2 & INP_REUSEPORT) ||
+ (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
#ifndef __rtems__
(inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
@@ -704,11 +943,15 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
*/
tw = intotw(t);
if (tw == NULL ||
- (reuseport & tw->tw_so_options) == 0)
+ ((reuseport & tw->tw_so_options) == 0 &&
+ (reuseport_lb &
+ tw->tw_so_options) == 0)) {
return (EADDRINUSE);
+ }
} else if (t &&
- ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
- (reuseport & inp_so_options(t)) == 0) {
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
+ (reuseport & inp_so_options(t)) == 0 &&
+ (reuseport_lb & inp_so_options(t)) == 0) {
#ifdef INET6
if (ntohl(sin->sin_addr.s_addr) !=
INADDR_ANY ||
@@ -717,7 +960,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
(inp->inp_vflag & INP_IPV6PROTO) == 0 ||
(t->inp_vflag & INP_IPV6PROTO) == 0)
#endif
- return (EADDRINUSE);
+ return (EADDRINUSE);
if (t && (! in_pcbbind_check_bindmulti(inp, t)))
return (EADDRINUSE);
}
@@ -862,7 +1105,6 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
ifp = ia->ia_ifp;
ia = NULL;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr;
@@ -876,10 +1118,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- IF_ADDR_RUNLOCK(ifp);
goto done;
}
- IF_ADDR_RUNLOCK(ifp);
/* 3. As a last resort return the 'default' jail address. */
error = prison_get_ip4(cred, laddr);
@@ -921,7 +1161,6 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
*/
ia = NULL;
ifp = sro.ro_rt->rt_ifp;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET)
@@ -934,10 +1173,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- IF_ADDR_RUNLOCK(ifp);
goto done;
}
- IF_ADDR_RUNLOCK(ifp);
/* 3. As a last resort return the 'default' jail address. */
error = prison_get_ip4(cred, laddr);
@@ -985,9 +1222,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
ifp = ia->ia_ifp;
ia = NULL;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-
sa = ifa->ifa_addr;
if (sa->sa_family != AF_INET)
continue;
@@ -1000,10 +1235,8 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
}
if (ia != NULL) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- IF_ADDR_RUNLOCK(ifp);
goto done;
}
- IF_ADDR_RUNLOCK(ifp);
}
/* 3. As a last resort return the 'default' jail address. */
@@ -1347,6 +1580,58 @@ in_pcblist_rele_rlocked(epoch_context_t ctx)
free(il, M_TEMP);
}
+static void
+inpcbport_free(epoch_context_t ctx)
+{
+ struct inpcbport *phd;
+
+ phd = __containerof(ctx, struct inpcbport, phd_epoch_ctx);
+ free(phd, M_PCB);
+}
+
+static void
+in_pcbfree_deferred(epoch_context_t ctx)
+{
+ struct inpcb *inp;
+ int released __unused;
+
+ inp = __containerof(ctx, struct inpcb, inp_epoch_ctx);
+
+ INP_WLOCK(inp);
+#ifdef INET
+ struct ip_moptions *imo = inp->inp_moptions;
+ inp->inp_moptions = NULL;
+#endif
+ /* XXXRW: Do as much as possible here. */
+#if defined(IPSEC) || defined(IPSEC_SUPPORT)
+ if (inp->inp_sp != NULL)
+ ipsec_delete_pcbpolicy(inp);
+#endif
+#ifdef INET6
+ struct ip6_moptions *im6o = NULL;
+ if (inp->inp_vflag & INP_IPV6PROTO) {
+ ip6_freepcbopts(inp->in6p_outputopts);
+ im6o = inp->in6p_moptions;
+ inp->in6p_moptions = NULL;
+ }
+#endif
+ if (inp->inp_options)
+ (void)m_free(inp->inp_options);
+ inp->inp_vflag = 0;
+ crfree(inp->inp_cred);
+#ifdef MAC
+ mac_inpcb_destroy(inp);
+#endif
+ released = in_pcbrele_wlocked(inp);
+ MPASS(released);
+#ifdef INET6
+ ip6_freemoptions(im6o);
+#endif
+#ifdef INET
+ inp_freemoptions(imo);
+#endif
+}
+
/*
* Unconditionally schedule an inpcb to be freed by decrementing its
* reference count, which should occur only after the inpcb has been detached
@@ -1361,14 +1646,7 @@ in_pcbfree(struct inpcb *inp)
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
-#ifdef INET6
- struct ip6_moptions *im6o = NULL;
-#endif
-#ifdef INET
- struct ip_moptions *imo = NULL;
-#endif
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
-
KASSERT((inp->inp_flags2 & INP_FREED) == 0,
("%s: called twice for pcb %p", __func__, inp));
if (inp->inp_flags2 & INP_FREED) {
@@ -1384,45 +1662,14 @@ in_pcbfree(struct inpcb *inp)
}
#endif
INP_WLOCK_ASSERT(inp);
-
-#ifdef INET
- imo = inp->inp_moptions;
- inp->inp_moptions = NULL;
-#endif
- /* XXXRW: Do as much as possible here. */
-#if defined(IPSEC) || defined(IPSEC_SUPPORT)
- if (inp->inp_sp != NULL)
- ipsec_delete_pcbpolicy(inp);
-#endif
INP_LIST_WLOCK(pcbinfo);
- inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
in_pcbremlists(inp);
INP_LIST_WUNLOCK(pcbinfo);
-#ifdef INET6
- if (inp->inp_vflag & INP_IPV6PROTO) {
- ip6_freepcbopts(inp->in6p_outputopts);
- im6o = inp->in6p_moptions;
- inp->in6p_moptions = NULL;
- }
-#endif
- if (inp->inp_options)
- (void)m_free(inp->inp_options);
RO_INVALIDATE_CACHE(&inp->inp_route);
-
- inp->inp_vflag = 0;
+ /* mark as destruction in progress */
inp->inp_flags2 |= INP_FREED;
- crfree(inp->inp_cred);
-#ifdef MAC
- mac_inpcb_destroy(inp);
-#endif
-#ifdef INET6
- ip6_freemoptions(im6o);
-#endif
-#ifdef INET
- inp_freemoptions(imo);
-#endif
- if (!in_pcbrele_wlocked(inp))
- INP_WUNLOCK(inp);
+ INP_WUNLOCK(inp);
+ epoch_call(net_epoch_preempt, &inp->inp_epoch_ctx, in_pcbfree_deferred);
}
/*
@@ -1444,6 +1691,10 @@ in_pcbdrop(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
+#ifdef INVARIANTS
+ if (inp->inp_socket != NULL && inp->inp_ppcb != NULL)
+ MPASS(inp->inp_refcount > 1);
+#endif
/*
* XXXRW: Possibly we should protect the setting of INP_DROPPED with
@@ -1454,11 +1705,12 @@ in_pcbdrop(struct inpcb *inp)
struct inpcbport *phd = inp->inp_phd;
INP_HASH_WLOCK(inp->inp_pcbinfo);
- LIST_REMOVE(inp, inp_hash);
- LIST_REMOVE(inp, inp_portlist);
- if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
- LIST_REMOVE(phd, phd_hash);
- free(phd, M_PCB);
+ in_pcbremlbgrouphash(inp);
+ CK_LIST_REMOVE(inp, inp_hash);
+ CK_LIST_REMOVE(inp, inp_portlist);
+ if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
+ CK_LIST_REMOVE(phd, phd_hash);
+ epoch_call(net_epoch_preempt, &phd->phd_epoch_ctx, inpcbport_free);
}
INP_HASH_WUNLOCK(inp->inp_pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
@@ -1532,7 +1784,7 @@ in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
struct inpcb *inp, *inp_temp;
INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
+ CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
INP_WLOCK(inp);
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0) {
@@ -1559,7 +1811,7 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
int i, gap;
INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
+ CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(inp);
imo = inp->inp_moptions;
if ((inp->inp_vflag & INP_IPV4) &&
@@ -1624,7 +1876,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
*/
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
0, pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1658,7 +1910,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
*/
porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
pcbinfo->ipi_porthashmask)];
- LIST_FOREACH(phd, porthash, phd_hash) {
+ CK_LIST_FOREACH(phd, porthash, phd_hash) {
if (phd->phd_port == lport)
break;
}
@@ -1667,7 +1919,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
* Port is in use by one or more PCBs. Look for best
* fit.
*/
- LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
+ CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
wildcard = 0;
if (cred != NULL &&
!prison_equal_ip4(inp->inp_cred->cr_prison,
@@ -1717,6 +1969,50 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
}
#undef INP_LOOKUP_MAPPED_PCB_COST
+static struct inpcb *
+in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb *local_wild;
+ const struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ uint32_t idx;
+
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[INP_PCBLBGROUP_PORTHASH(lport,
+ pcbinfo->ipi_lbgrouphashmask)];
+
+ /*
+ * Order of socket selection:
+ * 1. non-wild.
+ * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
+ *
+ * NOTE:
+ * - Load balanced group does not contain jailed sockets
+ * - Load balanced group does not contain IPv4 mapped INET6 wild sockets
+ */
+ local_wild = NULL;
+ CK_LIST_FOREACH(grp, hdr, il_list) {
+#ifdef INET6
+ if (!(grp->il_vflag & INP_IPV4))
+ continue;
+#endif
+ if (grp->il_lport != lport)
+ continue;
+
+ idx = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport) %
+ grp->il_inpcnt;
+ if (grp->il_laddr.s_addr == laddr->s_addr)
+ return (grp->il_inp[idx]);
+ if (grp->il_laddr.s_addr == INADDR_ANY &&
+ (lookupflags & INPLOOKUP_WILDCARD) != 0)
+ local_wild = grp->il_inp[idx];
+ }
+ return (local_wild);
+}
+
#ifdef PCBGROUP
/*
* Lookup PCB in hash list, using pcbgroup tables.
@@ -1738,7 +2034,7 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
INP_GROUP_LOCK(pcbgroup);
head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
pcbgroup->ipg_hashmask)];
- LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1788,7 +2084,7 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
head = &pcbgroup->ipg_hashbase[INP_PCBHASH(INADDR_ANY,
lport, 0, pcbgroup->ipg_hashmask)];
- LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1862,7 +2158,7 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
*/
head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
0, pcbinfo->ipi_wildmask)];
- LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1922,7 +2218,13 @@ found:
locked = INP_TRY_RLOCK(inp);
else
panic("%s: locking bug", __func__);
- if (!locked)
+ if (__predict_false(locked && (inp->inp_flags2 & INP_FREED))) {
+ if (lookupflags & INPLOOKUP_WLOCKPCB)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
+ return (NULL);
+ } else if (!locked)
in_pcbref(inp);
INP_GROUP_UNLOCK(pcbgroup);
if (!locked) {
@@ -1960,18 +2262,19 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
+#ifdef INVARIANTS
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
-
- INP_HASH_LOCK_ASSERT(pcbinfo);
-
+ if (!mtx_owned(&pcbinfo->ipi_hash_lock))
+ MPASS(in_epoch_verbose(net_epoch_preempt, 1));
+#endif
/*
* First look for an exact match.
*/
tmpinp = NULL;
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1996,6 +2299,18 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
return (tmpinp);
/*
+ * Then look in lb group (for wildcard match).
+ */
+ if (pcbinfo->ipi_lbgrouphashbase != NULL &&
+ (lookupflags & INPLOOKUP_WILDCARD)) {
+ inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr,
+ fport, lookupflags);
+ if (inp != NULL) {
+ return (inp);
+ }
+ }
+
+ /*
* Then look for a wildcard match, if requested.
*/
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
@@ -2016,7 +2331,7 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
0, pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -2080,40 +2395,35 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
struct ifnet *ifp)
{
struct inpcb *inp;
- bool locked;
INP_HASH_RLOCK(pcbinfo);
inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
(lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
if (inp != NULL) {
- if (lookupflags & INPLOOKUP_WLOCKPCB)
- locked = INP_TRY_WLOCK(inp);
- else if (lookupflags & INPLOOKUP_RLOCKPCB)
- locked = INP_TRY_RLOCK(inp);
- else
- panic("%s: locking bug", __func__);
- if (!locked)
- in_pcbref(inp);
- INP_HASH_RUNLOCK(pcbinfo);
- if (!locked) {
- if (lookupflags & INPLOOKUP_WLOCKPCB) {
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- return (NULL);
- } else {
- INP_RLOCK(inp);
- if (in_pcbrele_rlocked(inp))
- return (NULL);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_WUNLOCK(inp);
+ inp = NULL;
}
- }
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(inp);
+ inp = NULL;
+ }
+ } else
+ panic("%s: locking bug", __func__);
#ifdef INVARIANTS
- if (lookupflags & INPLOOKUP_WLOCKPCB)
- INP_WLOCK_ASSERT(inp);
- else
- INP_RLOCK_ASSERT(inp);
+ if (inp != NULL) {
+ if (lookupflags & INPLOOKUP_WLOCKPCB)
+ INP_WLOCK_ASSERT(inp);
+ else
+ INP_RLOCK_ASSERT(inp);
+ }
#endif
- } else
- INP_HASH_RUNLOCK(pcbinfo);
+ }
+ INP_HASH_RUNLOCK(pcbinfo);
return (inp);
}
@@ -2212,6 +2522,7 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct inpcbport *phd;
u_int32_t hashkey_faddr;
+ int so_options;
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
@@ -2233,9 +2544,22 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
/*
+ * Add entry to load balance group.
+ * Only do this if SO_REUSEPORT_LB is set.
+ */
+ so_options = inp_so_options(inp);
+ if (so_options & SO_REUSEPORT_LB) {
+ int ret = in_pcbinslbgrouphash(inp);
+ if (ret) {
+ /* pcb lb group malloc fail (ret=ENOBUFS). */
+ return (ret);
+ }
+ }
+
+ /*
* Go through port list and look for a head for this lport.
*/
- LIST_FOREACH(phd, pcbporthash, phd_hash) {
+ CK_LIST_FOREACH(phd, pcbporthash, phd_hash) {
if (phd->phd_port == inp->inp_lport)
break;
}
@@ -2247,13 +2571,14 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
if (phd == NULL) {
return (ENOBUFS); /* XXX */
}
+ bzero(&phd->phd_epoch_ctx, sizeof(struct epoch_context));
phd->phd_port = inp->inp_lport;
- LIST_INIT(&phd->phd_pcblist);
- LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
+ CK_LIST_INIT(&phd->phd_pcblist);
+ CK_LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
}
inp->inp_phd = phd;
- LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
- LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
+ CK_LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
+ CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
inp->inp_flags |= INP_INHASHLIST;
#ifdef PCBGROUP
if (do_pcbgroup_update)
@@ -2316,8 +2641,8 @@ in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
- LIST_REMOVE(inp, inp_hash);
- LIST_INSERT_HEAD(head, inp, inp_hash);
+ CK_LIST_REMOVE(inp, inp_hash);
+ CK_LIST_INSERT_HEAD(head, inp, inp_hash);
#ifdef PCBGROUP
if (m != NULL)
@@ -2358,16 +2683,20 @@ in_pcbremlists(struct inpcb *inp)
struct inpcbport *phd = inp->inp_phd;
INP_HASH_WLOCK(pcbinfo);
- LIST_REMOVE(inp, inp_hash);
- LIST_REMOVE(inp, inp_portlist);
- if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
- LIST_REMOVE(phd, phd_hash);
- free(phd, M_PCB);
+
+ /* XXX: Only do if SO_REUSEPORT_LB set? */
+ in_pcbremlbgrouphash(inp);
+
+ CK_LIST_REMOVE(inp, inp_hash);
+ CK_LIST_REMOVE(inp, inp_portlist);
+ if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
+ CK_LIST_REMOVE(phd, phd_hash);
+ epoch_call(net_epoch_preempt, &phd->phd_epoch_ctx, inpcbport_free);
}
INP_HASH_WUNLOCK(pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
}
- LIST_REMOVE(inp, inp_list);
+ CK_LIST_REMOVE(inp, inp_list);
pcbinfo->ipi_count--;
#ifdef PCBGROUP
in_pcbgroup_remove(inp);
@@ -2511,7 +2840,7 @@ inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
struct inpcb *inp;
INP_INFO_WLOCK(&V_tcbinfo);
- LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
+ CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inp);
func(inp, arg);
INP_WUNLOCK(inp);
@@ -2594,7 +2923,7 @@ in_pcbtoxinpcb(const struct inpcb *inp, struct xinpcb *xi)
bzero(&xi->xi_socket, sizeof(struct xsocket));
bcopy(&inp->inp_inc, &xi->inp_inc, sizeof(struct in_conninfo));
xi->inp_gencnt = inp->inp_gencnt;
- xi->inp_ppcb = inp->inp_ppcb;
+ xi->inp_ppcb = (uintptr_t)inp->inp_ppcb;
xi->inp_flow = inp->inp_flow;
xi->inp_flowid = inp->inp_flowid;
xi->inp_flowtype = inp->inp_flowtype;
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h
index d00dd456..86c9705c 100644
--- a/freebsd/sys/netinet/in_pcb.h
+++ b/freebsd/sys/netinet/in_pcb.h
@@ -51,8 +51,11 @@
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
+#include <net/if.h>
+#include <net/if_var.h>
#include <vm/uma.h>
#endif
+#include <sys/ck.h>
#define in6pcb inpcb /* for KAME src sync over BSD*'s */
#define in6p_sp inp_sp /* for KAME src sync over BSD*'s */
@@ -65,8 +68,9 @@
* numbers, and pointers up (to a socket structure) and down (to a
* protocol-specific control block) are stored here.
*/
-LIST_HEAD(inpcbhead, inpcb);
-LIST_HEAD(inpcbporthead, inpcbport);
+CK_LIST_HEAD(inpcbhead, inpcb);
+CK_LIST_HEAD(inpcbporthead, inpcbport);
+CK_LIST_HEAD(inpcblbgrouphead, inpcblbgroup);
typedef uint64_t inp_gen_t;
/*
@@ -79,6 +83,11 @@ struct in_addr_4in6 {
struct in_addr ia46_addr4;
};
+union in_dependaddr {
+ struct in_addr_4in6 id46_addr;
+ struct in6_addr id6_addr;
+};
+
/*
* NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
* some extra padding to accomplish this.
@@ -89,22 +98,14 @@ struct in_endpoints {
u_int16_t ie_fport; /* foreign port */
u_int16_t ie_lport; /* local port */
/* protocol dependent part, local and foreign addr */
- union {
- /* foreign host table entry */
- struct in_addr_4in6 ie46_foreign;
- struct in6_addr ie6_foreign;
- } ie_dependfaddr;
- union {
- /* local host table entry */
- struct in_addr_4in6 ie46_local;
- struct in6_addr ie6_local;
- } ie_dependladdr;
+ union in_dependaddr ie_dependfaddr; /* foreign host table entry */
+ union in_dependaddr ie_dependladdr; /* local host table entry */
+#define ie_faddr ie_dependfaddr.id46_addr.ia46_addr4
+#define ie_laddr ie_dependladdr.id46_addr.ia46_addr4
+#define ie6_faddr ie_dependfaddr.id6_addr
+#define ie6_laddr ie_dependladdr.id6_addr
u_int32_t ie6_zoneid; /* scope zone id */
};
-#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4
-#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4
-#define ie6_faddr ie_dependfaddr.ie6_foreign
-#define ie6_laddr ie_dependladdr.ie6_local
/*
* XXX The defines for inc_* are hacks and should be changed to direct
@@ -122,8 +123,8 @@ struct in_conninfo {
* Flags for inc_flags.
*/
#define INC_ISIPV6 0x01
+#define INC_IPV6MINMTU 0x02
-#define inc_isipv6 inc_flags /* temp compatibility */
#define inc_fport inc_ie.ie_fport
#define inc_lport inc_ie.ie_lport
#define inc_faddr inc_ie.ie_faddr
@@ -159,6 +160,7 @@ struct in_conninfo {
* Key:
* (b) - Protected by the hpts lock.
* (c) - Constant after initialization
+ * (e) - Protected by the net_epoch_prempt epoch
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
@@ -233,8 +235,8 @@ struct inpcbpolicy;
struct m_snd_tag;
struct inpcb {
/* Cache line #1 (amd64) */
- LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
- LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
+ CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */
+ CK_LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
#define inp_start_zero inp_hpts
@@ -278,7 +280,7 @@ struct inpcb {
TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
- LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
+ CK_LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
u_char inp_vflag; /* (i) IP version flag (v4/v6) */
@@ -316,18 +318,19 @@ struct inpcb {
int in6p_cksum;
short in6p_hops;
};
- LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
+ CK_LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
struct inpcbport *inp_phd; /* (i/h) head of this list */
inp_gen_t inp_gencnt; /* (c) generation count */
- struct llentry *inp_lle; /* cached L2 information */
+ void *spare_ptr; /* Spare pointer. */
rt_gen_t inp_rt_cookie; /* generation for route entry */
union { /* cached L3 information */
struct route inp_route;
struct route_in6 inp_route6;
};
- LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
- /* (p[w]) for list iteration */
- /* (p[r]/l) for addition/removal */
+ CK_LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
+ /* (e[r]) for list iteration */
+ /* (p[w]/l) for addition/removal */
+ struct epoch_context inp_epoch_ctx;
};
#endif /* _KERNEL */
@@ -364,14 +367,11 @@ struct inpcb {
*/
#ifdef _SYS_SOCKETVAR_H_
struct xinpcb {
- size_t xi_len; /* length of this structure */
+ ksize_t xi_len; /* length of this structure */
struct xsocket xi_socket; /* (s,p) */
struct in_conninfo inp_inc; /* (s,p) */
uint64_t inp_gencnt; /* (s,p) */
- union {
- void *inp_ppcb; /* (s) netstat(1) */
- int64_t ph_ppcb;
- };
+ kvaddr_t inp_ppcb; /* (s) netstat(1) */
int64_t inp_spare64[4];
uint32_t inp_flow; /* (s) */
uint32_t inp_flowid; /* (s) */
@@ -392,10 +392,12 @@ struct xinpcb {
} __aligned(8);
struct xinpgen {
- size_t xig_len; /* length of this structure */
+ ksize_t xig_len; /* length of this structure */
u_int xig_count; /* number of PCBs at this time */
+ uint32_t _xig_spare32;
inp_gen_t xig_gen; /* generation count at this time */
so_gen_t xig_sogen; /* socket generation count this time */
+ uint64_t _xig_spare64[4];
} __aligned(8);
#ifdef _KERNEL
void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *);
@@ -403,7 +405,8 @@ void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *);
#endif /* _SYS_SOCKETVAR_H_ */
struct inpcbport {
- LIST_ENTRY(inpcbport) phd_hash;
+ struct epoch_context phd_epoch_ctx;
+ CK_LIST_ENTRY(inpcbport) phd_hash;
struct inpcbhead phd_pcblist;
u_short phd_port;
};
@@ -436,22 +439,23 @@ struct in_pcblist {
* Locking key:
*
* (c) Constant or nearly constant after initialisation
+ * (e) - Protected by the net_epoch_prempt epoch
* (g) Locked by ipi_lock
* (l) Locked by ipi_list_lock
- * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting full inpcb list traversal
+ * Global lock protecting inpcb list modification
*/
- struct rwlock ipi_lock;
+ struct mtx ipi_lock;
/*
* Global list of inpcbs on the protocol.
*/
- struct inpcbhead *ipi_listhead; /* (g/l) */
+ struct inpcbhead *ipi_listhead; /* [r](e) [w](g/l) */
u_int ipi_count; /* (l) */
/*
@@ -482,9 +486,9 @@ struct inpcbinfo {
u_int ipi_hashfields; /* (c) */
/*
- * Global lock protecting non-pcbgroup hash lookup tables.
+ * Global lock protecting modification non-pcbgroup hash lookup tables.
*/
- struct rwlock ipi_hash_lock;
+ struct mtx ipi_hash_lock;
/*
* Global hash of inpcbs, hashed by local and foreign addresses and
@@ -508,6 +512,13 @@ struct inpcbinfo {
u_long ipi_wildmask; /* (p) */
/*
+ * Load balance groups used for the SO_REUSEPORT_LB option,
+ * hashed by local port.
+ */
+ struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (h) */
+ u_long ipi_lbgrouphashmask; /* (h) */
+
+ /*
* Pointer to network stack instance
*/
struct vnet *ipi_vnet; /* (c) */
@@ -549,6 +560,27 @@ struct inpcbgroup {
struct mtx ipg_lock;
} __aligned(CACHE_LINE_SIZE);
+/*
+ * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
+ * (or unique address:port combination) can be re-used at most
+ * INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which
+ * is dynamically resized as processes bind/unbind to that specific group.
+ */
+struct inpcblbgroup {
+ CK_LIST_ENTRY(inpcblbgroup) il_list;
+ struct epoch_context il_epoch_ctx;
+ uint16_t il_lport; /* (c) */
+ u_char il_vflag; /* (c) */
+ u_char il_pad;
+ uint32_t il_pad2;
+ union in_dependaddr il_dependladdr; /* (c) */
+#define il_laddr il_dependladdr.id46_addr.ia46_addr4
+#define il6_laddr il_dependladdr.id6_addr
+ uint32_t il_inpsiz; /* max count in il_inp[] (h) */
+ uint32_t il_inpcnt; /* cur count in il_inp[] (h) */
+ struct inpcb *il_inp[]; /* (h) */
+};
+
#define INP_LOCK_INIT(inp, d, t) \
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
@@ -593,25 +625,24 @@ struct tcpcb *
inp_inpcbtotcpcb(struct inpcb *inp);
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
uint32_t *faddr, uint16_t *fp);
-short inp_so_options(const struct inpcb *inp);
+int inp_so_options(const struct inpcb *inp);
#endif /* _KERNEL */
#define INP_INFO_LOCK_INIT(ipi, d) \
- rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE)
-#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock)
-#define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock)
-#define INP_INFO_WLOCK(ipi) rw_wlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock)
-#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock)
-#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock)
-#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock)
-#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock)
-#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
-#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED)
-#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
-#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+ mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
+#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
+#define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER_ET((et))
+#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
+#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
+#define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et))
+#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT_ET(*(tp)->t_inpcb->inp_et)
+#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
+#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock))
+#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt))
+#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
+#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock))
#define INP_LIST_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
@@ -632,17 +663,16 @@ short inp_so_options(const struct inpcb *inp);
#define INP_LIST_UNLOCK_ASSERT(ipi) \
rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
-#define INP_HASH_LOCK_INIT(ipi, d) \
- rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
-#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
- RA_LOCKED)
-#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
- RA_WLOCKED)
+#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF)
+#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RLOCK(ipi) struct epoch_tracker inp_hash_et; epoch_enter_preempt(net_epoch_preempt, &inp_hash_et)
+#define INP_HASH_RLOCK_ET(ipi, et) epoch_enter_preempt(net_epoch_preempt, &(et))
+#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT_ET(inp_hash_et)
+#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et))
+#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock))
+#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
MTX_DEF | MTX_DUPOK)
@@ -656,6 +686,10 @@ short inp_so_options(const struct inpcb *inp);
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
(ntohs((lport)) & (mask))
+#define INP_PCBLBGROUP_PORTHASH(lport, mask) \
+ (ntohs((lport)) & (mask))
+#define INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) \
+ ((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport)))
#define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3])
/*
@@ -711,8 +745,8 @@ short inp_so_options(const struct inpcb *inp);
/*
* Flags for inp_flags2.
*/
-#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
-#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
+#define INP_2UNUSED1 0x00000001
+#define INP_2UNUSED2 0x00000002
#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
#define INP_FREED 0x00000010 /* inp itself is not valid */
@@ -724,6 +758,7 @@ short inp_so_options(const struct inpcb *inp);
#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */
#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */
#define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */
+#define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */
/*
* Flags passed to in_pcblookup*() functions.
diff --git a/freebsd/sys/netinet/ip.h b/freebsd/sys/netinet/ip.h
index 6d902fe4..934bd812 100644
--- a/freebsd/sys/netinet/ip.h
+++ b/freebsd/sys/netinet/ip.h
@@ -94,6 +94,11 @@ struct ip {
#define IPTOS_PREC_ROUTINE IPTOS_DSCP_CS0
/*
+ * Offset of Diffserv decimal value to convert it to tos value .
+ */
+#define IPTOS_DSCP_OFFSET 2
+
+/*
* Definitions for DiffServ Codepoints as per RFC2474 and RFC5865.
*/
#define IPTOS_DSCP_CS0 0x00
diff --git a/freebsd/sys/netinet/ip6.h b/freebsd/sys/netinet/ip6.h
index a0dfcb0f..1f4be3fd 100644
--- a/freebsd/sys/netinet/ip6.h
+++ b/freebsd/sys/netinet/ip6.h
@@ -104,6 +104,7 @@ struct ip6_hdr {
#define IPV6_FLOWLABEL_MASK 0xffff0f00 /* flow label (20 bits) */
#endif /* LITTLE_ENDIAN */
#endif
+#define IPV6_FLOWLABEL_LEN 20
#if 1
/* ECN bits proposed by Sally Floyd */
#define IP6TOS_CE 0x01 /* congestion experienced */
diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c
index 6f5160e0..8f7f6edf 100644
--- a/freebsd/sys/netinet/ip_carp.c
+++ b/freebsd/sys/netinet/ip_carp.c
@@ -189,36 +189,44 @@ static int proto_reg[] = {-1, -1};
*/
/* Accept incoming CARP packets. */
-static VNET_DEFINE(int, carp_allow) = 1;
+VNET_DEFINE_STATIC(int, carp_allow) = 1;
#define V_carp_allow VNET(carp_allow)
+/* Set DSCP in outgoing CARP packets. */
+VNET_DEFINE_STATIC(int, carp_dscp) = 56;
+#define V_carp_dscp VNET(carp_dscp)
+
/* Preempt slower nodes. */
-static VNET_DEFINE(int, carp_preempt) = 0;
+VNET_DEFINE_STATIC(int, carp_preempt) = 0;
#define V_carp_preempt VNET(carp_preempt)
/* Log level. */
-static VNET_DEFINE(int, carp_log) = 1;
+VNET_DEFINE_STATIC(int, carp_log) = 1;
#define V_carp_log VNET(carp_log)
/* Global advskew demotion. */
-static VNET_DEFINE(int, carp_demotion) = 0;
+VNET_DEFINE_STATIC(int, carp_demotion) = 0;
#define V_carp_demotion VNET(carp_demotion)
/* Send error demotion factor. */
-static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
+VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
#define V_carp_senderr_adj VNET(carp_senderr_adj)
/* Iface down demotion factor. */
-static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
+VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
#define V_carp_ifdown_adj VNET(carp_ifdown_adj)
static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
+static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_allow_sysctl, "I",
"Accept incoming CARP packets");
+SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_dscp_sysctl, "I",
+ "DSCP value for carp packets");
SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
@@ -935,7 +943,7 @@ carp_send_ad_locked(struct carp_softc *sc)
ip = mtod(m, struct ip *);
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(*ip) >> 2;
- ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
ip->ip_len = htons(len);
ip->ip_off = htons(IP_DF);
ip->ip_ttl = CARP_DFLTTL;
@@ -985,6 +993,10 @@ carp_send_ad_locked(struct carp_softc *sc)
ip6 = mtod(m, struct ip6_hdr *);
bzero(ip6, sizeof(*ip6));
ip6->ip6_vfc |= IPV6_VERSION;
+ /* Traffic class isn't defined in ip6 struct instead
+ * it gets offset into flowid field */
+ ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
+ IPTOS_DSCP_OFFSET));
ip6->ip6_hlim = CARP_DFLTTL;
ip6->ip6_nxt = IPPROTO_CARP;
@@ -1413,6 +1425,7 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
free(im6o->im6o_membership, M_CARP);
break;
}
+ in6m_acquire(in6m);
im6o->im6o_membership[0] = in6m;
im6o->im6o_num_memberships++;
@@ -1434,6 +1447,7 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
free(im6o->im6o_membership, M_CARP);
break;
}
+ in6m_acquire(in6m);
im6o->im6o_membership[1] = in6m;
im6o->im6o_num_memberships++;
break;
@@ -2104,6 +2118,24 @@ carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
}
static int
+carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ int new, error;
+
+ new = V_carp_dscp;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ if (new < 0 || new > 63)
+ return (EINVAL);
+
+ V_carp_dscp = new;
+
+ return (0);
+}
+
+static int
carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
{
int new, error;
diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c
index 84f39023..fbf74ca1 100644
--- a/freebsd/sys/netinet/ip_divert.c
+++ b/freebsd/sys/netinet/ip_divert.c
@@ -113,8 +113,8 @@ __FBSDID("$FreeBSD$");
*/
/* Internal variables. */
-static VNET_DEFINE(struct inpcbhead, divcb);
-static VNET_DEFINE(struct inpcbinfo, divcbinfo);
+VNET_DEFINE_STATIC(struct inpcbhead, divcb);
+VNET_DEFINE_STATIC(struct inpcbinfo, divcbinfo);
#define V_divcb VNET(divcb)
#define V_divcbinfo VNET(divcbinfo)
@@ -194,6 +194,7 @@ divert_packet(struct mbuf *m, int incoming)
u_int16_t nport;
struct sockaddr_in divsrc;
struct m_tag *mtag;
+ struct epoch_tracker et;
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
if (mtag == NULL) {
@@ -274,8 +275,8 @@ divert_packet(struct mbuf *m, int incoming)
/* Put packet on socket queue, if any */
sa = NULL;
nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
- INP_INFO_RLOCK(&V_divcbinfo);
- LIST_FOREACH(inp, &V_divcb, inp_list) {
+ INP_INFO_RLOCK_ET(&V_divcbinfo, et);
+ CK_LIST_FOREACH(inp, &V_divcb, inp_list) {
/* XXX why does only one socket match? */
if (inp->inp_lport == nport) {
INP_RLOCK(inp);
@@ -292,7 +293,7 @@ divert_packet(struct mbuf *m, int incoming)
break;
}
}
- INP_INFO_RUNLOCK(&V_divcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
if (sa == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_noproto);
@@ -554,7 +555,6 @@ div_detach(struct socket *so)
KASSERT(inp != NULL, ("div_detach: inp == NULL"));
INP_INFO_WLOCK(&V_divcbinfo);
INP_WLOCK(inp);
- /* XXX defer destruction to epoch_call */
in_pcbdetach(inp);
in_pcbfree(inp);
INP_INFO_WUNLOCK(&V_divcbinfo);
@@ -634,10 +634,10 @@ static int
div_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
- struct in_pcblist *il;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
+ struct epoch_tracker et;
/*
* The process of preparing the TCB list is too time-consuming and
@@ -656,10 +656,10 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_divcbinfo);
+ INP_INFO_WLOCK(&V_divcbinfo);
gencnt = V_divcbinfo.ipi_gencnt;
n = V_divcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_divcbinfo);
+ INP_INFO_WUNLOCK(&V_divcbinfo);
error = sysctl_wire_old_buffer(req,
2 * sizeof(xig) + n*sizeof(struct xinpcb));
@@ -674,12 +674,13 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
if (error)
return error;
- il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS);
- inp_list = il->il_inp_list;
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == NULL)
+ return ENOMEM;
- INP_INFO_RLOCK(&V_divcbinfo);
- for (inp = LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = LIST_NEXT(inp, inp_list)) {
+ INP_INFO_RLOCK_ET(&V_divcbinfo, et);
+ for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
@@ -688,7 +689,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_divcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
n = i;
error = 0;
@@ -704,11 +705,17 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- il->il_count = n;
- il->il_pcbinfo = &V_divcbinfo;
- epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked);
+ INP_INFO_WLOCK(&V_divcbinfo);
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
+ }
+ INP_INFO_WUNLOCK(&V_divcbinfo);
if (!error) {
+ struct epoch_tracker et;
/*
* Give the user an updated idea of our state.
* If the generation differs from what we told
@@ -716,13 +723,14 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_divcbinfo);
+ INP_INFO_RLOCK_ET(&V_divcbinfo, et);
xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_divcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_divcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
+ free(inp_list, M_TEMP);
return error;
}
@@ -802,7 +810,6 @@ div_modevent(module_t mod, int type, void *unused)
break;
}
ip_divert_ptr = NULL;
- /* XXX defer to epoch_call ? */
err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
INP_INFO_WUNLOCK(&V_divcbinfo);
#ifndef VIMAGE
diff --git a/freebsd/sys/netinet/ip_encap.c b/freebsd/sys/netinet/ip_encap.c
index 52cd0b40..1e794f73 100644
--- a/freebsd/sys/netinet/ip_encap.c
+++ b/freebsd/sys/netinet/ip_encap.c
@@ -6,6 +6,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,417 +59,214 @@
* So, clearly good old protosw does not work for protocol #4 and #41.
* The code will let you match protocol via src/dst address pair.
*/
-/* XXX is M_NETADDR correct? */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_mrouting.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/mutex.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
-#include <sys/protosw.h>
-#include <sys/queue.h>
+#include <sys/socket.h>
#include <net/if.h>
-#include <net/route.h>
+#include <net/if_var.h>
#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_encap.h>
#ifdef INET6
-#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
-#include <machine/stdarg.h>
+static MALLOC_DEFINE(M_NETADDR, "encap_export_host",
+ "Export host address structure");
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure");
+struct encaptab {
+ CK_LIST_ENTRY(encaptab) chain;
+ int proto;
+ int min_length;
+ int exact_match;
+ void *arg;
-static void encap_add(struct encaptab *);
-static int mask_match(const struct encaptab *, const struct sockaddr *,
- const struct sockaddr *);
-static void encap_fillarg(struct mbuf *, void *);
+ encap_lookup_t lookup;
+ encap_check_t check;
+ encap_input_t input;
+};
+
+CK_LIST_HEAD(encaptab_head, encaptab);
+#ifdef INET
+static struct encaptab_head ipv4_encaptab = CK_LIST_HEAD_INITIALIZER();
+#endif
+#ifdef INET6
+static struct encaptab_head ipv6_encaptab = CK_LIST_HEAD_INITIALIZER();
+#endif
-/*
- * All global variables in ip_encap.c are locked using encapmtx.
- */
static struct mtx encapmtx;
MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF);
-static LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab);
-
-#ifdef INET
-int
-encap4_input(struct mbuf **mp, int *offp, int proto)
+#define ENCAP_WLOCK() mtx_lock(&encapmtx)
+#define ENCAP_WUNLOCK() mtx_unlock(&encapmtx)
+#define ENCAP_RLOCK() struct epoch_tracker encap_et; epoch_enter_preempt(net_epoch_preempt, &encap_et)
+#define ENCAP_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &encap_et)
+#define ENCAP_WAIT() epoch_wait_preempt(net_epoch_preempt)
+
+static struct encaptab *
+encap_attach(struct encaptab_head *head, const struct encap_config *cfg,
+ void *arg, int mflags)
{
- struct ip *ip;
- struct mbuf *m;
- struct sockaddr_in s, d;
- const struct protosw *psw;
- struct encaptab *ep, *match;
- void *arg;
- int matchprio, off, prio;
-
- m = *mp;
- off = *offp;
- ip = mtod(m, struct ip *);
-
- bzero(&s, sizeof(s));
- s.sin_family = AF_INET;
- s.sin_len = sizeof(struct sockaddr_in);
- s.sin_addr = ip->ip_src;
- bzero(&d, sizeof(d));
- d.sin_family = AF_INET;
- d.sin_len = sizeof(struct sockaddr_in);
- d.sin_addr = ip->ip_dst;
-
- arg = NULL;
- psw = NULL;
- match = NULL;
- matchprio = 0;
- mtx_lock(&encapmtx);
- LIST_FOREACH(ep, &encaptab, chain) {
- if (ep->af != AF_INET)
- continue;
- if (ep->proto >= 0 && ep->proto != proto)
- continue;
- if (ep->func)
- prio = (*ep->func)(m, off, proto, ep->arg);
- else {
- /*
- * it's inbound traffic, we need to match in reverse
- * order
- */
- prio = mask_match(ep, (struct sockaddr *)&d,
- (struct sockaddr *)&s);
- }
+ struct encaptab *ep, *tmp;
- /*
- * We prioritize the matches by using bit length of the
- * matches. mask_match() and user-supplied matching function
- * should return the bit length of the matches (for example,
- * if both src/dst are matched for IPv4, 64 should be returned).
- * 0 or negative return value means "it did not match".
- *
- * The question is, since we have two "mask" portion, we
- * cannot really define total order between entries.
- * For example, which of these should be preferred?
- * mask_match() returns 48 (32 + 16) for both of them.
- * src=3ffe::/16, dst=3ffe:501::/32
- * src=3ffe:501::/32, dst=3ffe::/16
- *
- * We need to loop through all the possible candidates
- * to get the best match - the search takes O(n) for
- * n attachments (i.e. interfaces).
- */
- if (prio <= 0)
- continue;
- if (prio > matchprio) {
- matchprio = prio;
- match = ep;
- }
- }
- if (match != NULL) {
- psw = match->psw;
- arg = match->arg;
- }
- mtx_unlock(&encapmtx);
+ if (cfg == NULL || cfg->input == NULL ||
+ (cfg->check == NULL && cfg->lookup == NULL) ||
+ (cfg->lookup != NULL && cfg->exact_match != ENCAP_DRV_LOOKUP) ||
+ (cfg->exact_match == ENCAP_DRV_LOOKUP && cfg->lookup == NULL))
+ return (NULL);
- if (match != NULL) {
- /* found a match, "match" has the best one */
- if (psw != NULL && psw->pr_input != NULL) {
- encap_fillarg(m, arg);
- (*psw->pr_input)(mp, offp, proto);
- } else
- m_freem(m);
- return (IPPROTO_DONE);
+ ep = malloc(sizeof(*ep), M_NETADDR, mflags);
+ if (ep == NULL)
+ return (NULL);
+
+ ep->proto = cfg->proto;
+ ep->min_length = cfg->min_length;
+ ep->exact_match = cfg->exact_match;
+ ep->arg = arg;
+ ep->lookup = cfg->exact_match == ENCAP_DRV_LOOKUP ? cfg->lookup: NULL;
+ ep->check = cfg->exact_match != ENCAP_DRV_LOOKUP ? cfg->check: NULL;
+ ep->input = cfg->input;
+
+ ENCAP_WLOCK();
+ CK_LIST_FOREACH(tmp, head, chain) {
+ if (tmp->exact_match <= ep->exact_match)
+ break;
}
+ if (tmp == NULL)
+ CK_LIST_INSERT_HEAD(head, ep, chain);
+ else
+ CK_LIST_INSERT_BEFORE(tmp, ep, chain);
+ ENCAP_WUNLOCK();
+ return (ep);
+}
+
+static int
+encap_detach(struct encaptab_head *head, const struct encaptab *cookie)
+{
+ struct encaptab *ep;
- /* last resort: inject to raw socket */
- return (rip_input(mp, offp, proto));
+ ENCAP_WLOCK();
+ CK_LIST_FOREACH(ep, head, chain) {
+ if (ep == cookie) {
+ CK_LIST_REMOVE(ep, chain);
+ ENCAP_WUNLOCK();
+ ENCAP_WAIT();
+ free(ep, M_NETADDR);
+ return (0);
+ }
+ }
+ ENCAP_WUNLOCK();
+ return (EINVAL);
}
-#endif
-#ifdef INET6
-int
-encap6_input(struct mbuf **mp, int *offp, int proto)
+static int
+encap_input(struct encaptab_head *head, struct mbuf *m, int off, int proto)
{
- struct mbuf *m = *mp;
- struct ip6_hdr *ip6;
- struct sockaddr_in6 s, d;
- const struct protosw *psw;
struct encaptab *ep, *match;
void *arg;
- int prio, matchprio;
-
- ip6 = mtod(m, struct ip6_hdr *);
+ int matchprio, ret;
- bzero(&s, sizeof(s));
- s.sin6_family = AF_INET6;
- s.sin6_len = sizeof(struct sockaddr_in6);
- s.sin6_addr = ip6->ip6_src;
- bzero(&d, sizeof(d));
- d.sin6_family = AF_INET6;
- d.sin6_len = sizeof(struct sockaddr_in6);
- d.sin6_addr = ip6->ip6_dst;
-
- arg = NULL;
- psw = NULL;
match = NULL;
matchprio = 0;
- mtx_lock(&encapmtx);
- LIST_FOREACH(ep, &encaptab, chain) {
- if (ep->af != AF_INET6)
- continue;
+
+ ENCAP_RLOCK();
+ CK_LIST_FOREACH(ep, head, chain) {
if (ep->proto >= 0 && ep->proto != proto)
continue;
- if (ep->func)
- prio = (*ep->func)(m, *offp, proto, ep->arg);
- else {
- /*
- * it's inbound traffic, we need to match in reverse
- * order
- */
- prio = mask_match(ep, (struct sockaddr *)&d,
- (struct sockaddr *)&s);
- }
-
- /* see encap4_input() for issues here */
- if (prio <= 0)
+ if (ep->min_length > m->m_pkthdr.len)
continue;
- if (prio > matchprio) {
- matchprio = prio;
+ if (ep->exact_match == ENCAP_DRV_LOOKUP)
+ ret = (*ep->lookup)(m, off, proto, &arg);
+ else
+ ret = (*ep->check)(m, off, proto, ep->arg);
+ if (ret <= 0)
+ continue;
+ if (ret > matchprio) {
match = ep;
+ if (ep->exact_match != ENCAP_DRV_LOOKUP)
+ arg = ep->arg;
+ /*
+ * No need to continue the search, we got the
+ * exact match.
+ */
+ if (ret >= ep->exact_match)
+ break;
+ matchprio = ret;
}
}
- if (match != NULL) {
- psw = match->psw;
- arg = match->arg;
- }
- mtx_unlock(&encapmtx);
if (match != NULL) {
- /* found a match */
- if (psw != NULL && psw->pr_input != NULL) {
- encap_fillarg(m, arg);
- return (*psw->pr_input)(mp, offp, proto);
- } else {
- m_freem(m);
- return (IPPROTO_DONE);
- }
+ /* found a match, "match" has the best one */
+ ret = (*match->input)(m, off, proto, arg);
+ ENCAP_RUNLOCK();
+ MPASS(ret == IPPROTO_DONE);
+ return (IPPROTO_DONE);
}
-
- /* last resort: inject to raw socket */
- return rip6_input(mp, offp, proto);
-}
-#endif
-
-/*lint -sem(encap_add, custodial(1)) */
-static void
-encap_add(struct encaptab *ep)
-{
-
- mtx_assert(&encapmtx, MA_OWNED);
- LIST_INSERT_HEAD(&encaptab, ep, chain);
+ ENCAP_RUNLOCK();
+ return (0);
}
-/*
- * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
- * length of mask (sm and dm) is assumed to be same as sp/dp.
- * Return value will be necessary as input (cookie) for encap_detach().
- */
+#ifdef INET
const struct encaptab *
-encap_attach(int af, int proto, const struct sockaddr *sp,
- const struct sockaddr *sm, const struct sockaddr *dp,
- const struct sockaddr *dm, const struct protosw *psw, void *arg)
+ip_encap_attach(const struct encap_config *cfg, void *arg, int mflags)
{
- struct encaptab *ep;
-
- /* sanity check on args */
- if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst))
- return (NULL);
- if (sp->sa_len != dp->sa_len)
- return (NULL);
- if (af != sp->sa_family || af != dp->sa_family)
- return (NULL);
- /* check if anyone have already attached with exactly same config */
- mtx_lock(&encapmtx);
- LIST_FOREACH(ep, &encaptab, chain) {
- if (ep->af != af)
- continue;
- if (ep->proto != proto)
- continue;
- if (ep->src.ss_len != sp->sa_len ||
- bcmp(&ep->src, sp, sp->sa_len) != 0 ||
- bcmp(&ep->srcmask, sm, sp->sa_len) != 0)
- continue;
- if (ep->dst.ss_len != dp->sa_len ||
- bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
- bcmp(&ep->dstmask, dm, dp->sa_len) != 0)
- continue;
-
- mtx_unlock(&encapmtx);
- return (NULL);
- }
-
- ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/
- if (ep == NULL) {
- mtx_unlock(&encapmtx);
- return (NULL);
- }
- bzero(ep, sizeof(*ep));
-
- ep->af = af;
- ep->proto = proto;
- bcopy(sp, &ep->src, sp->sa_len);
- bcopy(sm, &ep->srcmask, sp->sa_len);
- bcopy(dp, &ep->dst, dp->sa_len);
- bcopy(dm, &ep->dstmask, dp->sa_len);
- ep->psw = psw;
- ep->arg = arg;
-
- encap_add(ep);
- mtx_unlock(&encapmtx);
- return (ep);
+ return (encap_attach(&ipv4_encaptab, cfg, arg, mflags));
}
-const struct encaptab *
-encap_attach_func(int af, int proto,
- int (*func)(const struct mbuf *, int, int, void *),
- const struct protosw *psw, void *arg)
+int
+ip_encap_detach(const struct encaptab *cookie)
{
- struct encaptab *ep;
- /* sanity check on args */
- if (!func)
- return (NULL);
-
- ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/
- if (ep == NULL)
- return (NULL);
- bzero(ep, sizeof(*ep));
-
- ep->af = af;
- ep->proto = proto;
- ep->func = func;
- ep->psw = psw;
- ep->arg = arg;
-
- mtx_lock(&encapmtx);
- encap_add(ep);
- mtx_unlock(&encapmtx);
- return (ep);
+ return (encap_detach(&ipv4_encaptab, cookie));
}
int
-encap_detach(const struct encaptab *cookie)
+encap4_input(struct mbuf **mp, int *offp, int proto)
{
- const struct encaptab *ep = cookie;
- struct encaptab *p;
-
- mtx_lock(&encapmtx);
- LIST_FOREACH(p, &encaptab, chain) {
- if (p == ep) {
- LIST_REMOVE(p, chain);
- mtx_unlock(&encapmtx);
- free(p, M_NETADDR); /*XXX*/
- return 0;
- }
- }
- mtx_unlock(&encapmtx);
- return EINVAL;
+ if (encap_input(&ipv4_encaptab, *mp, *offp, proto) != IPPROTO_DONE)
+ return (rip_input(mp, offp, proto));
+ return (IPPROTO_DONE);
}
+#endif /* INET */
-static int
-mask_match(const struct encaptab *ep, const struct sockaddr *sp,
- const struct sockaddr *dp)
+#ifdef INET6
+const struct encaptab *
+ip6_encap_attach(const struct encap_config *cfg, void *arg, int mflags)
{
- struct sockaddr_storage s;
- struct sockaddr_storage d;
- int i;
- const u_int8_t *p, *q;
- u_int8_t *r;
- int matchlen;
-
- if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
- return 0;
- if (sp->sa_family != ep->af || dp->sa_family != ep->af)
- return 0;
- if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len)
- return 0;
-
- matchlen = 0;
-
- p = (const u_int8_t *)sp;
- q = (const u_int8_t *)&ep->srcmask;
- r = (u_int8_t *)&s;
- for (i = 0 ; i < sp->sa_len; i++) {
- r[i] = p[i] & q[i];
- /* XXX estimate */
- matchlen += (q[i] ? 8 : 0);
- }
- p = (const u_int8_t *)dp;
- q = (const u_int8_t *)&ep->dstmask;
- r = (u_int8_t *)&d;
- for (i = 0 ; i < dp->sa_len; i++) {
- r[i] = p[i] & q[i];
- /* XXX rough estimate */
- matchlen += (q[i] ? 8 : 0);
- }
-
- /* need to overwrite len/family portion as we don't compare them */
- s.ss_len = sp->sa_len;
- s.ss_family = sp->sa_family;
- d.ss_len = dp->sa_len;
- d.ss_family = dp->sa_family;
-
- if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
- bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
- return matchlen;
- } else
- return 0;
+ return (encap_attach(&ipv6_encaptab, cfg, arg, mflags));
}
-static void
-encap_fillarg(struct mbuf *m, void *arg)
+int
+ip6_encap_detach(const struct encaptab *cookie)
{
- struct m_tag *tag;
- if (arg != NULL) {
- tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT);
- if (tag != NULL) {
- *(void**)(tag+1) = arg;
- m_tag_prepend(m, tag);
- }
- }
+ return (encap_detach(&ipv6_encaptab, cookie));
}
-void *
-encap_getarg(struct mbuf *m)
+int
+encap6_input(struct mbuf **mp, int *offp, int proto)
{
- void *p = NULL;
- struct m_tag *tag;
- tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL);
- if (tag) {
- p = *(void**)(tag+1);
- m_tag_delete(m, tag);
- }
- return p;
+ if (encap_input(&ipv6_encaptab, *mp, *offp, proto) != IPPROTO_DONE)
+ return (rip6_input(mp, offp, proto));
+ return (IPPROTO_DONE);
}
+#endif /* INET6 */
diff --git a/freebsd/sys/netinet/ip_encap.h b/freebsd/sys/netinet/ip_encap.h
index ef232189..f3d1d3af 100644
--- a/freebsd/sys/netinet/ip_encap.h
+++ b/freebsd/sys/netinet/ip_encap.h
@@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,29 +38,33 @@
#ifdef _KERNEL
-struct encaptab {
- LIST_ENTRY(encaptab) chain;
- int af;
- int proto; /* -1: don't care, I'll check myself */
- struct sockaddr_storage src; /* my addr */
- struct sockaddr_storage srcmask;
- struct sockaddr_storage dst; /* remote addr */
- struct sockaddr_storage dstmask;
- int (*func)(const struct mbuf *, int, int, void *);
- const struct protosw *psw; /* only pr_input will be used */
- void *arg; /* passed via m->m_pkthdr.aux */
-};
-
int encap4_input(struct mbuf **, int *, int);
int encap6_input(struct mbuf **, int *, int);
-const struct encaptab *encap_attach(int, int, const struct sockaddr *,
- const struct sockaddr *, const struct sockaddr *,
- const struct sockaddr *, const struct protosw *, void *);
-const struct encaptab *encap_attach_func(int, int,
- int (*)(const struct mbuf *, int, int, void *),
- const struct protosw *, void *);
-int encap_detach(const struct encaptab *);
-void *encap_getarg(struct mbuf *);
+
+typedef int (*encap_lookup_t)(const struct mbuf *, int, int, void **);
+typedef int (*encap_check_t)(const struct mbuf *, int, int, void *);
+typedef int (*encap_input_t)(struct mbuf *, int , int, void *);
+
+struct encap_config {
+ int proto; /* protocol */
+ int min_length; /* minimum packet length */
+ int exact_match; /* a packet is exactly matched */
+#define ENCAP_DRV_LOOKUP 0x7fffffff
+
+ encap_lookup_t lookup;
+ encap_check_t check;
+ encap_input_t input;
+};
+
+struct encaptab;
+
+const struct encaptab *ip_encap_attach(const struct encap_config *,
+ void *arg, int mflags);
+const struct encaptab *ip6_encap_attach(const struct encap_config *,
+ void *arg, int mflags);
+
+int ip_encap_detach(const struct encaptab *);
+int ip6_encap_detach(const struct encaptab *);
#endif
#endif /*_NETINET_IP_ENCAP_H_*/
diff --git a/freebsd/sys/netinet/ip_fastfwd.c b/freebsd/sys/netinet/ip_fastfwd.c
index b084fdc6..05deb4d8 100644
--- a/freebsd/sys/netinet/ip_fastfwd.c
+++ b/freebsd/sys/netinet/ip_fastfwd.c
@@ -155,7 +155,7 @@ ip_tryforward(struct mbuf *m)
struct mbuf *m0 = NULL;
struct nhop4_basic nh;
struct sockaddr_in dst;
- struct in_addr odest, dest;
+ struct in_addr dest, odest, rtdest;
uint16_t ip_len, ip_off;
int error = 0;
struct m_tag *fwd_tag = NULL;
@@ -296,12 +296,31 @@ passin:
#endif
/*
+ * Next hop forced by pfil(9) hook?
+ */
+ if ((m->m_flags & M_IP_NEXTHOP) &&
+ ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
+ /*
+ * Now we will find route to forced destination.
+ */
+ dest.s_addr = ((struct sockaddr_in *)
+ (fwd_tag + 1))->sin_addr.s_addr;
+ m_tag_delete(m, fwd_tag);
+ m->m_flags &= ~M_IP_NEXTHOP;
+ }
+
+ /*
* Find route to destination.
*/
if (ip_findroute(&nh, dest, m) != 0)
return (NULL); /* icmp unreach already sent */
/*
+ * Avoid second route lookup by caching destination.
+ */
+ rtdest.s_addr = dest.s_addr;
+
+ /*
* Step 5: outgoing firewall packet processing
*/
if (!PFIL_HOOKED(&V_inet_pfil_hook))
@@ -323,6 +342,8 @@ passin:
*/
if (m->m_flags & M_IP_NEXTHOP)
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+ else
+ fwd_tag = NULL;
if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
/*
* Is it now for a local address on this host?
@@ -344,7 +365,8 @@ forwardlocal:
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP_NEXTHOP;
}
- if (ip_findroute(&nh, dest, m) != 0)
+ if (dest.s_addr != rtdest.s_addr &&
+ ip_findroute(&nh, dest, m) != 0)
return (NULL); /* icmp unreach already sent */
}
diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h
index 286eb03f..a7bf5b4d 100644
--- a/freebsd/sys/netinet/ip_fw.h
+++ b/freebsd/sys/netinet/ip_fw.h
@@ -285,6 +285,8 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_EXTERNAL_INSTANCE, /* arg1=id of eaction handler instance */
O_EXTERNAL_DATA, /* variable length data */
+ O_SKIP_ACTION, /* none */
+
O_LAST_OPCODE /* not an opcode! */
};
diff --git a/freebsd/sys/netinet/ip_gre.c b/freebsd/sys/netinet/ip_gre.c
index 673e23d5..65ab0ab9 100644
--- a/freebsd/sys/netinet/ip_gre.c
+++ b/freebsd/sys/netinet/ip_gre.c
@@ -4,7 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
*
* Copyright (c) 1998 The NetBSD Foundation, Inc.
- * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -43,18 +43,17 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
+#include <sys/jail.h>
#include <sys/systm.h>
-#include <sys/mbuf.h>
#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/protosw.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
#include <sys/errno.h>
-#include <sys/time.h>
#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
#include <sys/sysctl.h>
-#include <net/ethernet.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
#include <net/if.h>
#include <net/if_var.h>
#include <net/vnet.h>
@@ -71,61 +70,177 @@ __FBSDID("$FreeBSD$");
#include <net/if_gre.h>
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_GRE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_input,
- .pr_output = rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
#define GRE_TTL 30
VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
#define V_ip_gre_ttl VNET(ip_gre_ttl)
SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(ip_gre_ttl), 0, "");
+ &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets");
+
+VNET_DEFINE_STATIC(struct gre_list *, ipv4_hashtbl) = NULL;
+#define V_ipv4_hashtbl VNET(ipv4_hashtbl)
+#define GRE_HASH(src, dst) (V_ipv4_hashtbl[\
+ in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)])
+#define GRE_HASH_SC(sc) GRE_HASH((sc)->gre_oip.ip_src.s_addr,\
+ (sc)->gre_oip.ip_dst.s_addr)
+
+static uint32_t
+in_gre_hashval(in_addr_t src, in_addr_t dst)
+{
+ uint32_t ret;
+
+ ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+ return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static int
+in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst)
+{
+ struct gre_softc *tmp;
+
+ if (sc->gre_family == AF_INET &&
+ sc->gre_oip.ip_src.s_addr == src &&
+ sc->gre_oip.ip_dst.s_addr == dst)
+ return (EEXIST);
+
+ CK_LIST_FOREACH(tmp, &GRE_HASH(src, dst), chain) {
+ if (tmp == sc)
+ continue;
+ if (tmp->gre_oip.ip_src.s_addr == src &&
+ tmp->gre_oip.ip_dst.s_addr == dst)
+ return (EADDRNOTAVAIL);
+ }
+ return (0);
+}
static int
-in_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+in_gre_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
- GRE_RLOCK_TRACKER;
+ const struct ip *ip;
struct gre_softc *sc;
- struct ip *ip;
- sc = (struct gre_softc *)arg;
- if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+ if (V_ipv4_hashtbl == NULL)
return (0);
- M_ASSERTPKTHDR(m);
- /*
- * We expect that payload contains at least IPv4
- * or IPv6 packet.
- */
- if (m->m_pkthdr.len < sizeof(struct greip) + sizeof(struct ip))
- return (0);
+ MPASS(in_epoch(net_epoch_preempt));
+ ip = mtod(m, const struct ip *);
+ CK_LIST_FOREACH(sc, &GRE_HASH(ip->ip_dst.s_addr,
+ ip->ip_src.s_addr), chain) {
+ /*
+ * This is an inbound packet, its ip_dst is source address
+ * in softc.
+ */
+ if (sc->gre_oip.ip_src.s_addr == ip->ip_dst.s_addr &&
+ sc->gre_oip.ip_dst.s_addr == ip->ip_src.s_addr) {
+ if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
+ *arg = sc;
+ return (ENCAP_DRV_LOOKUP);
+ }
+ }
+ return (0);
+}
- GRE_RLOCK(sc);
- if (sc->gre_family == 0)
- goto bad;
+static void
+in_gre_attach(struct gre_softc *sc)
+{
- KASSERT(sc->gre_family == AF_INET,
- ("wrong gre_family: %d", sc->gre_family));
+ sc->gre_hlen = sizeof(struct greip);
+ sc->gre_oip.ip_v = IPVERSION;
+ sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+ sc->gre_oip.ip_p = IPPROTO_GRE;
+ gre_updatehdr(sc, &sc->gre_gihdr->gi_gre);
+ CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
+}
- ip = mtod(m, struct ip *);
- if (sc->gre_oip.ip_src.s_addr != ip->ip_dst.s_addr ||
- sc->gre_oip.ip_dst.s_addr != ip->ip_src.s_addr)
- goto bad;
+void
+in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value)
+{
- GRE_RUNLOCK(sc);
- return (32 * 2);
-bad:
- GRE_RUNLOCK(sc);
- return (0);
+ MPASS(cmd == GRESKEY || cmd == GRESOPTS);
+
+ /* NOTE: we are protected with gre_ioctl_sx lock */
+ MPASS(sc->gre_family == AF_INET);
+ CK_LIST_REMOVE(sc, chain);
+ GRE_WAIT();
+ if (cmd == GRESKEY)
+ sc->gre_key = value;
+ else
+ sc->gre_options = value;
+ in_gre_attach(sc);
+}
+
+int
+in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr_in *dst, *src;
+ struct ip *ip;
+ int error;
+
+ /* NOTE: we are protected with gre_ioctl_sx lock */
+ error = EINVAL;
+ switch (cmd) {
+ case SIOCSIFPHYADDR:
+ src = &((struct in_aliasreq *)data)->ifra_addr;
+ dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
+
+ /* sanity checks */
+ if (src->sin_family != dst->sin_family ||
+ src->sin_family != AF_INET ||
+ src->sin_len != dst->sin_len ||
+ src->sin_len != sizeof(*src))
+ break;
+ if (src->sin_addr.s_addr == INADDR_ANY ||
+ dst->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ if (V_ipv4_hashtbl == NULL)
+ V_ipv4_hashtbl = gre_hashinit();
+ error = in_gre_checkdup(sc, src->sin_addr.s_addr,
+ dst->sin_addr.s_addr);
+ if (error == EADDRNOTAVAIL)
+ break;
+ if (error == EEXIST) {
+ /* Addresses are the same. Just return. */
+ error = 0;
+ break;
+ }
+ ip = malloc(sizeof(struct greip) + 3 * sizeof(uint32_t),
+ M_GRE, M_WAITOK | M_ZERO);
+ ip->ip_src.s_addr = src->sin_addr.s_addr;
+ ip->ip_dst.s_addr = dst->sin_addr.s_addr;
+ if (sc->gre_family != 0) {
+ /* Detach existing tunnel first */
+ CK_LIST_REMOVE(sc, chain);
+ GRE_WAIT();
+ free(sc->gre_hdr, M_GRE);
+ /* XXX: should we notify about link state change? */
+ }
+ sc->gre_family = AF_INET;
+ sc->gre_hdr = ip;
+ sc->gre_oseq = 0;
+ sc->gre_iseq = UINT32_MAX;
+ in_gre_attach(sc);
+ break;
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gre_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ src = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin_family = AF_INET;
+ src->sin_len = sizeof(*src);
+ src->sin_addr = (cmd == SIOCGIFPSRCADDR) ?
+ sc->gre_oip.ip_src: sc->gre_oip.ip_dst;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ }
+ return (error);
}
int
@@ -158,14 +273,30 @@ in_gre_output(struct mbuf *m, int af, int hlen)
return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL));
}
-int
-in_gre_attach(struct gre_softc *sc)
+static const struct encaptab *ecookie = NULL;
+static const struct encap_config ipv4_encap_cfg = {
+ .proto = IPPROTO_GRE,
+ .min_length = sizeof(struct greip) + sizeof(struct ip),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in_gre_lookup,
+ .input = gre_input
+};
+
+void
+in_gre_init(void)
{
- KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
- sc->gre_ecookie = encap_attach_func(AF_INET, IPPROTO_GRE,
- in_gre_encapcheck, &in_gre_protosw, sc);
- if (sc->gre_ecookie == NULL)
- return (EEXIST);
- return (0);
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+ ecookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK);
+}
+
+void
+in_gre_uninit(void)
+{
+
+ if (IS_DEFAULT_VNET(curvnet))
+ ip_encap_detach(ecookie);
+ if (V_ipv4_hashtbl != NULL)
+ gre_hashdestroy(V_ipv4_hashtbl);
}
diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c
index 3fc59a14..414e3812 100644
--- a/freebsd/sys/netinet/ip_icmp.c
+++ b/freebsd/sys/netinet/ip_icmp.c
@@ -84,13 +84,13 @@ __FBSDID("$FreeBSD$");
* routines to turnaround packets back to the originator, and
* host table maintenance routines.
*/
-static VNET_DEFINE(int, icmplim) = 200;
+VNET_DEFINE_STATIC(int, icmplim) = 200;
#define V_icmplim VNET(icmplim)
SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmplim), 0,
"Maximum number of ICMP responses per second");
-static VNET_DEFINE(int, icmplim_output) = 1;
+VNET_DEFINE_STATIC(int, icmplim_output) = 1;
#define V_icmplim_output VNET(icmplim_output)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmplim_output), 0,
@@ -106,13 +106,13 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_icmp, ICMPCTL_STATS, stats, struct icmpstat,
VNET_PCPUSTAT_SYSUNINIT(icmpstat);
#endif /* VIMAGE */
-static VNET_DEFINE(int, icmpmaskrepl) = 0;
+VNET_DEFINE_STATIC(int, icmpmaskrepl) = 0;
#define V_icmpmaskrepl VNET(icmpmaskrepl)
SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpmaskrepl), 0,
"Reply to ICMP Address Mask Request packets");
-static VNET_DEFINE(u_int, icmpmaskfake) = 0;
+VNET_DEFINE_STATIC(u_int, icmpmaskfake) = 0;
#define V_icmpmaskfake VNET(icmpmaskfake)
SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpmaskfake), 0,
@@ -124,37 +124,37 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(drop_redirect), 0,
"Ignore ICMP redirects");
-static VNET_DEFINE(int, log_redirect) = 0;
+VNET_DEFINE_STATIC(int, log_redirect) = 0;
#define V_log_redirect VNET(log_redirect)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(log_redirect), 0,
"Log ICMP redirects to the console");
-static VNET_DEFINE(char, reply_src[IFNAMSIZ]);
+VNET_DEFINE_STATIC(char, reply_src[IFNAMSIZ]);
#define V_reply_src VNET(reply_src)
SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(reply_src), IFNAMSIZ,
"ICMP reply source for non-local packets");
-static VNET_DEFINE(int, icmp_rfi) = 0;
+VNET_DEFINE_STATIC(int, icmp_rfi) = 0;
#define V_icmp_rfi VNET(icmp_rfi)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_rfi), 0,
"ICMP reply from incoming interface for non-local packets");
-
-static VNET_DEFINE(int, icmp_quotelen) = 8;
+/* Router requirements RFC 1812 section 4.3.2.3 requires 576 - 28. */
+VNET_DEFINE_STATIC(int, icmp_quotelen) = 548;
#define V_icmp_quotelen VNET(icmp_quotelen)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_quotelen), 0,
"Number of bytes from original packet to quote in ICMP reply");
-static VNET_DEFINE(int, icmpbmcastecho) = 0;
+VNET_DEFINE_STATIC(int, icmpbmcastecho) = 0;
#define V_icmpbmcastecho VNET(icmpbmcastecho)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpbmcastecho), 0,
"Reply to multicast ICMP Echo Request and Timestamp packets");
-static VNET_DEFINE(int, icmptstamprepl) = 1;
+VNET_DEFINE_STATIC(int, icmptstamprepl) = 1;
#define V_icmptstamprepl VNET(icmptstamprepl)
SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW,
&VNET_NAME(icmptstamprepl), 0,
@@ -1003,7 +1003,7 @@ struct icmp_rate {
const char *descr;
struct counter_rate cr;
};
-static VNET_DEFINE(struct icmp_rate, icmp_rates[BANDLIM_MAX]) = {
+VNET_DEFINE_STATIC(struct icmp_rate, icmp_rates[BANDLIM_MAX]) = {
{ "icmp unreach response" },
{ "icmp ping response" },
{ "icmp tstamp response" },
diff --git a/freebsd/sys/netinet/ip_id.c b/freebsd/sys/netinet/ip_id.c
index 02bf2c5b..85a67612 100644
--- a/freebsd/sys/netinet/ip_id.c
+++ b/freebsd/sys/netinet/ip_id.c
@@ -100,8 +100,8 @@ __FBSDID("$FreeBSD$");
* suggested by RFC6864. We use per-CPU counter for that, or if
* user wants to, we can turn on random ID generation.
*/
-static VNET_DEFINE(int, ip_rfc6864) = 1;
-static VNET_DEFINE(int, ip_do_randomid) = 0;
+VNET_DEFINE_STATIC(int, ip_rfc6864) = 1;
+VNET_DEFINE_STATIC(int, ip_do_randomid) = 0;
#define V_ip_rfc6864 VNET(ip_rfc6864)
#define V_ip_do_randomid VNET(ip_do_randomid)
@@ -109,13 +109,13 @@ static VNET_DEFINE(int, ip_do_randomid) = 0;
* Random ID state engine.
*/
static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
-static VNET_DEFINE(uint16_t *, id_array);
-static VNET_DEFINE(bitstr_t *, id_bits);
-static VNET_DEFINE(int, array_ptr);
-static VNET_DEFINE(int, array_size);
-static VNET_DEFINE(int, random_id_collisions);
-static VNET_DEFINE(int, random_id_total);
-static VNET_DEFINE(struct mtx, ip_id_mtx);
+VNET_DEFINE_STATIC(uint16_t *, id_array);
+VNET_DEFINE_STATIC(bitstr_t *, id_bits);
+VNET_DEFINE_STATIC(int, array_ptr);
+VNET_DEFINE_STATIC(int, array_size);
+VNET_DEFINE_STATIC(int, random_id_collisions);
+VNET_DEFINE_STATIC(int, random_id_total);
+VNET_DEFINE_STATIC(struct mtx, ip_id_mtx);
#define V_id_array VNET(id_array)
#define V_id_bits VNET(id_bits)
#define V_array_ptr VNET(array_ptr)
@@ -127,7 +127,7 @@ static VNET_DEFINE(struct mtx, ip_id_mtx);
/*
* Non-random ID state engine is simply a per-cpu counter.
*/
-static VNET_DEFINE(counter_u64_t, ip_id);
+VNET_DEFINE_STATIC(counter_u64_t, ip_id);
#define V_ip_id VNET(ip_id)
static int sysctl_ip_randomid(SYSCTL_HANDLER_ARGS);
diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c
index 343eec5e..2852b52e 100644
--- a/freebsd/sys/netinet/ip_input.c
+++ b/freebsd/sys/netinet/ip_input.c
@@ -111,7 +111,7 @@ SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW
&VNET_NAME(ipforwarding), 0,
"Enable IP forwarding between interfaces");
-static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */
+VNET_DEFINE_STATIC(int, ipsendredirects) = 1; /* XXX */
#define V_ipsendredirects VNET(ipsendredirects)
SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipsendredirects), 0,
@@ -130,7 +130,7 @@ SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_R
* to the loopback interface instead of the interface where the
* packets for those addresses are received.
*/
-static VNET_DEFINE(int, ip_checkinterface);
+VNET_DEFINE_STATIC(int, ip_checkinterface);
#define V_ip_checkinterface VNET(ip_checkinterface)
SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_checkinterface), 0,
@@ -559,13 +559,15 @@ tooshort:
/*
* Try to forward the packet, but if we fail continue.
+ * ip_tryforward() does not generate redirects, so fall
+ * through to normal processing if redirects are required.
* ip_tryforward() does inbound and outbound packet firewall
* processing. If firewall has decided that destination becomes
* our local address, it sets M_FASTFWD_OURS flag. In this
* case skip another inbound firewall processing and update
* ip pointer.
*/
- if (V_ipforwarding != 0
+ if (V_ipforwarding != 0 && V_ipsendredirects == 0
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
&& (!IPSEC_ENABLED(ipv4) ||
IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
@@ -1349,7 +1351,7 @@ makedummy:
* locking. This code remains in ip_input.c as ip_mroute.c is optionally
* compiled.
*/
-static VNET_DEFINE(int, ip_rsvp_on);
+VNET_DEFINE_STATIC(int, ip_rsvp_on);
VNET_DEFINE(struct socket *, ip_rsvpd);
#define V_ip_rsvp_on VNET(ip_rsvp_on)
diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c
index ac901601..987549c6 100644
--- a/freebsd/sys/netinet/ip_mroute.c
+++ b/freebsd/sys/netinet/ip_mroute.c
@@ -127,7 +127,7 @@ __FBSDID("$FreeBSD$");
#define VIFI_INVALID ((vifi_t) -1)
-static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
+VNET_DEFINE_STATIC(uint32_t, last_tv_sec); /* last time we processed this */
#define V_last_tv_sec VNET(last_tv_sec)
static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
@@ -151,14 +151,14 @@ static struct mtx mrouter_mtx;
static int ip_mrouter_cnt; /* # of vnets with active mrouters */
static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
-static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
+VNET_PCPUSTAT_DEFINE_STATIC(struct mrtstat, mrtstat);
VNET_PCPUSTAT_SYSINIT(mrtstat);
VNET_PCPUSTAT_SYSUNINIT(mrtstat);
SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
"netinet/ip_mroute.h)");
-static VNET_DEFINE(u_long, mfchash);
+VNET_DEFINE_STATIC(u_long, mfchash);
#define V_mfchash VNET(mfchash)
#define MFCHASH(a, g) \
((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
@@ -166,9 +166,9 @@ static VNET_DEFINE(u_long, mfchash);
#define MFCHASHSIZE 256
static u_long mfchashsize; /* Hash size */
-static VNET_DEFINE(u_char *, nexpire); /* 0..mfchashsize-1 */
+VNET_DEFINE_STATIC(u_char *, nexpire); /* 0..mfchashsize-1 */
#define V_nexpire VNET(nexpire)
-static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
+VNET_DEFINE_STATIC(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
#define V_mfchashtbl VNET(mfchashtbl)
static struct mtx mfc_mtx;
@@ -179,9 +179,9 @@ static struct mtx mfc_mtx;
mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
#define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx)
-static VNET_DEFINE(vifi_t, numvifs);
+VNET_DEFINE_STATIC(vifi_t, numvifs);
#define V_numvifs VNET(numvifs)
-static VNET_DEFINE(struct vif, viftable[MAXVIFS]);
+VNET_DEFINE_STATIC(struct vif, viftable[MAXVIFS]);
#define V_viftable VNET(viftable)
SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
@@ -197,7 +197,7 @@ static struct mtx vif_mtx;
static eventhandler_tag if_detach_event_tag = NULL;
-static VNET_DEFINE(struct callout, expire_upcalls_ch);
+VNET_DEFINE_STATIC(struct callout, expire_upcalls_ch);
#define V_expire_upcalls_ch VNET(expire_upcalls_ch)
#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
@@ -212,9 +212,9 @@ static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
* expiration time. Periodically, the entries are analysed and processed.
*/
#define BW_METER_BUCKETS 1024
-static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
+VNET_DEFINE_STATIC(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
#define V_bw_meter_timers VNET(bw_meter_timers)
-static VNET_DEFINE(struct callout, bw_meter_ch);
+VNET_DEFINE_STATIC(struct callout, bw_meter_ch);
#define V_bw_meter_ch VNET(bw_meter_ch)
#define BW_METER_PERIOD (hz) /* periodical handling of bw meters */
@@ -222,16 +222,16 @@ static VNET_DEFINE(struct callout, bw_meter_ch);
* Pending upcalls are stored in a vector which is flushed when
* full, or periodically
*/
-static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
+VNET_DEFINE_STATIC(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
#define V_bw_upcalls VNET(bw_upcalls)
-static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */
+VNET_DEFINE_STATIC(u_int, bw_upcalls_n); /* # of pending upcalls */
#define V_bw_upcalls_n VNET(bw_upcalls_n)
-static VNET_DEFINE(struct callout, bw_upcalls_ch);
+VNET_DEFINE_STATIC(struct callout, bw_upcalls_ch);
#define V_bw_upcalls_ch VNET(bw_upcalls_ch)
#define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */
-static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
+VNET_PCPUSTAT_DEFINE_STATIC(struct pimstat, pimstat);
VNET_PCPUSTAT_SYSINIT(pimstat);
VNET_PCPUSTAT_SYSUNINIT(pimstat);
@@ -244,20 +244,17 @@ SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
&pim_squelch_wholepkt, 0,
"Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
-extern struct domain inetdomain;
-static const struct protosw in_pim_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_PIM,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
- .pr_input = pim_input,
- .pr_output = rip_output,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
static const struct encaptab *pim_encap_cookie;
-
static int pim_encapcheck(const struct mbuf *, int, int, void *);
+static int pim_input(struct mbuf *, int, int, void *);
+
+static const struct encap_config ipv4_encap_cfg = {
+ .proto = IPPROTO_PIM,
+ .min_length = sizeof(struct ip) + PIM_MINLEN,
+ .exact_match = 8,
+ .check = pim_encapcheck,
+ .input = pim_input
+};
/*
* Note: the PIM Register encapsulation adds the following in front of a
@@ -302,9 +299,9 @@ static struct pim_encap_pimhdr pim_encap_pimhdr = {
0 /* flags */
};
-static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID;
+VNET_DEFINE_STATIC(vifi_t, reg_vif_num) = VIFI_INVALID;
#define V_reg_vif_num VNET(reg_vif_num)
-static VNET_DEFINE(struct ifnet, multicast_register_if);
+VNET_DEFINE_STATIC(struct ifnet, multicast_register_if);
#define V_multicast_register_if VNET(multicast_register_if)
/*
@@ -373,9 +370,9 @@ static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
MRT_MFC_FLAGS_BORDER_VIF |
MRT_MFC_RP |
MRT_MFC_BW_UPCALL);
-static VNET_DEFINE(uint32_t, mrt_api_config);
+VNET_DEFINE_STATIC(uint32_t, mrt_api_config);
#define V_mrt_api_config VNET(mrt_api_config)
-static VNET_DEFINE(int, pim_assert_enabled);
+VNET_DEFINE_STATIC(int, pim_assert_enabled);
#define V_pim_assert_enabled VNET(pim_assert_enabled)
static struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */
@@ -2546,16 +2543,12 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
* into the kernel.
*/
static int
-pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+pim_encapcheck(const struct mbuf *m __unused, int off __unused,
+ int proto __unused, void *arg __unused)
{
-#ifdef DIAGNOSTIC
KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
-#endif
- if (proto != IPPROTO_PIM)
- return 0; /* not for us; reject the datagram. */
-
- return 64; /* claim the datagram. */
+ return (8); /* claim the datagram. */
}
/*
@@ -2566,18 +2559,15 @@ pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
* (used by PIM-SM): the PIM header is stripped off, and the inner packet
* is passed to if_simloop().
*/
-int
-pim_input(struct mbuf **mp, int *offp, int proto)
+static int
+pim_input(struct mbuf *m, int off, int proto, void *arg __unused)
{
- struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
struct pim *pim;
- int iphlen = *offp;
+ int iphlen = off;
int minlen;
int datalen = ntohs(ip->ip_len) - iphlen;
int ip_tos;
-
- *mp = NULL;
/* Keep statistics */
PIMSTAT_INC(pims_rcv_total_msgs);
@@ -2781,10 +2771,7 @@ pim_input_to_daemon:
* XXX: the outer IP header pkt size of a Register is not adjust to
* reflect the fact that the inner multicast data is truncated.
*/
- *mp = m;
- rip_input(mp, offp, proto);
-
- return (IPPROTO_DONE);
+ return (rip_input(&m, &off, proto));
}
static int
@@ -2877,8 +2864,7 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
&pim_squelch_wholepkt);
- pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
- pim_encapcheck, &in_pim_protosw, NULL);
+ pim_encap_cookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK);
if (pim_encap_cookie == NULL) {
printf("ip_mroute: unable to attach pim encap\n");
VIF_LOCK_DESTROY();
@@ -2921,7 +2907,7 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
if (pim_encap_cookie) {
- encap_detach(pim_encap_cookie);
+ ip_encap_detach(pim_encap_cookie);
pim_encap_cookie = NULL;
}
diff --git a/freebsd/sys/netinet/ip_options.c b/freebsd/sys/netinet/ip_options.c
index cc2f3eed..7c189bdb 100644
--- a/freebsd/sys/netinet/ip_options.c
+++ b/freebsd/sys/netinet/ip_options.c
@@ -70,13 +70,13 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
-static VNET_DEFINE(int, ip_dosourceroute);
+VNET_DEFINE_STATIC(int, ip_dosourceroute);
SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_dosourceroute), 0,
"Enable forwarding source routed IP packets");
#define V_ip_dosourceroute VNET(ip_dosourceroute)
-static VNET_DEFINE(int, ip_acceptsourceroute);
+VNET_DEFINE_STATIC(int, ip_acceptsourceroute);
SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_acceptsourceroute), 0,
"Enable accepting source routed IP packets");
diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c
index 792f2311..5f643746 100644
--- a/freebsd/sys/netinet/ip_output.c
+++ b/freebsd/sys/netinet/ip_output.c
@@ -82,6 +82,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/ip_options.h>
+
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
#ifdef SCTP
#include <netinet/sctp.h>
#include <netinet/sctp_crc32.h>
@@ -922,24 +926,34 @@ void
in_delayed_cksum(struct mbuf *m)
{
struct ip *ip;
- uint16_t csum, offset, ip_len;
+ struct udphdr *uh;
+ uint16_t cklen, csum, offset;
ip = mtod(m, struct ip *);
offset = ip->ip_hl << 2 ;
- ip_len = ntohs(ip->ip_len);
- csum = in_cksum_skip(m, ip_len, offset);
- if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
- csum = 0xffff;
- offset += m->m_pkthdr.csum_data; /* checksum offset */
- /* find the mbuf in the chain where the checksum starts*/
- while ((m != NULL) && (offset >= m->m_len)) {
- offset -= m->m_len;
- m = m->m_next;
+ if (m->m_pkthdr.csum_flags & CSUM_UDP) {
+ /* if udp header is not in the first mbuf copy udplen */
+ if (offset + sizeof(struct udphdr) > m->m_len)
+ m_copydata(m, offset + offsetof(struct udphdr,
+ uh_ulen), sizeof(cklen), (caddr_t)&cklen);
+ else {
+ uh = (struct udphdr *)mtodo(m, offset);
+ cklen = ntohs(uh->uh_ulen);
+ }
+ csum = in_cksum_skip(m, cklen + offset, offset);
+ if (csum == 0)
+ csum = 0xffff;
+ } else {
+ cklen = ntohs(ip->ip_len);
+ csum = in_cksum_skip(m, cklen, offset);
}
- KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain."));
- KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs."));
- *(u_short *)(m->m_data + offset) = csum;
+ offset += m->m_pkthdr.csum_data; /* checksum offset */
+
+ if (offset + sizeof(csum) > m->m_len)
+ m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
+ else
+ *(u_short *)mtodo(m, offset) = csum;
}
/*
@@ -980,6 +994,15 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(inp);
error = 0;
break;
+ case SO_REUSEPORT_LB:
+ INP_WLOCK(inp);
+ if ((so->so_options & SO_REUSEPORT_LB) != 0)
+ inp->inp_flags2 |= INP_REUSEPORT_LB;
+ else
+ inp->inp_flags2 &= ~INP_REUSEPORT_LB;
+ INP_WUNLOCK(inp);
+ error = 0;
+ break;
case SO_SETFIB:
INP_WLOCK(inp);
inp->inp_inc.inc_fibnum = so->so_fibnum;
@@ -1235,13 +1258,23 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
switch (sopt->sopt_name) {
case IP_OPTIONS:
case IP_RETOPTS:
- if (inp->inp_options)
- error = sooptcopyout(sopt,
- mtod(inp->inp_options,
- char *),
- inp->inp_options->m_len);
- else
+ INP_RLOCK(inp);
+ if (inp->inp_options) {
+ struct mbuf *options;
+
+ options = m_dup(inp->inp_options, M_NOWAIT);
+ INP_RUNLOCK(inp);
+ if (options != NULL) {
+ error = sooptcopyout(sopt,
+ mtod(options, char *),
+ options->m_len);
+ m_freem(options);
+ } else
+ error = ENOMEM;
+ } else {
+ INP_RUNLOCK(inp);
sopt->sopt_valsize = 0;
+ }
break;
case IP_TOS:
diff --git a/freebsd/sys/netinet/ip_reass.c b/freebsd/sys/netinet/ip_reass.c
index 64660228..95603390 100644
--- a/freebsd/sys/netinet/ip_reass.c
+++ b/freebsd/sys/netinet/ip_reass.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/hash.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
@@ -65,18 +66,19 @@ SYSCTL_DECL(_net_inet_ip);
/*
* Reassembly headers are stored in hash buckets.
*/
-#define IPREASS_NHASH_LOG2 6
+#define IPREASS_NHASH_LOG2 10
#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
#define IPREASS_HMASK (IPREASS_NHASH - 1)
struct ipqbucket {
TAILQ_HEAD(ipqhead, ipq) head;
struct mtx lock;
+ int count;
};
-static VNET_DEFINE(struct ipqbucket, ipq[IPREASS_NHASH]);
+VNET_DEFINE_STATIC(struct ipqbucket, ipq[IPREASS_NHASH]);
#define V_ipq VNET(ipq)
-static VNET_DEFINE(uint32_t, ipq_hashseed);
+VNET_DEFINE_STATIC(uint32_t, ipq_hashseed);
#define V_ipq_hashseed VNET(ipq_hashseed)
#define IPQ_LOCK(i) mtx_lock(&V_ipq[i].lock)
@@ -84,6 +86,9 @@ static VNET_DEFINE(uint32_t, ipq_hashseed);
#define IPQ_UNLOCK(i) mtx_unlock(&V_ipq[i].lock)
#define IPQ_LOCK_ASSERT(i) mtx_assert(&V_ipq[i].lock, MA_OWNED)
+VNET_DEFINE_STATIC(int, ipreass_maxbucketsize);
+#define V_ipreass_maxbucketsize VNET(ipreass_maxbucketsize)
+
void ipreass_init(void);
void ipreass_drain(void);
void ipreass_slowtimo(void);
@@ -91,28 +96,54 @@ void ipreass_slowtimo(void);
void ipreass_destroy(void);
#endif
static int sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS);
+static int sysctl_maxfragbucketsize(SYSCTL_HANDLER_ARGS);
static void ipreass_zone_change(void *);
static void ipreass_drain_tomax(void);
-static void ipq_free(struct ipqhead *, struct ipq *);
+static void ipq_free(struct ipqbucket *, struct ipq *);
static struct ipq * ipq_reuse(int);
static inline void
-ipq_timeout(struct ipqhead *head, struct ipq *fp)
+ipq_timeout(struct ipqbucket *bucket, struct ipq *fp)
{
IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
- ipq_free(head, fp);
+ ipq_free(bucket, fp);
}
static inline void
-ipq_drop(struct ipqhead *head, struct ipq *fp)
+ipq_drop(struct ipqbucket *bucket, struct ipq *fp)
{
IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
- ipq_free(head, fp);
+ ipq_free(bucket, fp);
}
-static VNET_DEFINE(uma_zone_t, ipq_zone);
+/*
+ * By default, limit the number of IP fragments across all reassembly
+ * queues to 1/32 of the total number of mbuf clusters.
+ *
+ * Limit the total number of reassembly queues per VNET to the
+ * IP fragment limit, but ensure the limit will not allow any bucket
+ * to grow above 100 items. (The bucket limit is
+ * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
+ * multiplier to reach a 100-item limit.)
+ * The 100-item limit was chosen as brief testing seems to show that
+ * this produces "reasonable" performance on some subset of systems
+ * under DoS attack.
+ */
+#define IP_MAXFRAGS (nmbclusters / 32)
+#define IP_MAXFRAGPACKETS (imin(IP_MAXFRAGS, IPREASS_NHASH * 50))
+
+static int maxfrags;
+static volatile u_int nfrags;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW,
+ &maxfrags, 0,
+ "Maximum number of IPv4 fragments allowed across all reassembly queues");
+SYSCTL_UINT(_net_inet_ip, OID_AUTO, curfrags, CTLFLAG_RD,
+ __DEVOLATILE(u_int *, &nfrags), 0,
+ "Current number of IPv4 fragments across all reassembly queues");
+
+VNET_DEFINE_STATIC(uma_zone_t, ipq_zone);
#define V_ipq_zone VNET(ipq_zone)
SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_VNET |
CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_maxfragpackets, "I",
@@ -121,14 +152,18 @@ SYSCTL_UMA_CUR(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_VNET,
&VNET_NAME(ipq_zone),
"Current number of IPv4 fragment reassembly queue entries");
-static VNET_DEFINE(int, noreass);
+VNET_DEFINE_STATIC(int, noreass);
#define V_noreass VNET(noreass)
-static VNET_DEFINE(int, maxfragsperpacket);
+VNET_DEFINE_STATIC(int, maxfragsperpacket);
#define V_maxfragsperpacket VNET(maxfragsperpacket)
SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(maxfragsperpacket), 0,
"Maximum number of IPv4 fragments allowed per packet");
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragbucketsize,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
+ sysctl_maxfragbucketsize, "I",
+ "Maximum number of IPv4 fragment reassembly queue entries per bucket");
/*
* Take incoming datagram fragment and try to reassemble it into
@@ -148,9 +183,9 @@ ip_reass(struct mbuf *m)
struct mbuf *p, *q, *nq, *t;
struct ipq *fp;
struct ipqhead *head;
- int i, hlen, next;
+ int i, hlen, next, tmpmax;
u_int8_t ecn, ecn0;
- uint32_t hash;
+ uint32_t hash, hashkey[3];
#ifdef RSS
uint32_t rss_hash, rss_type;
#endif
@@ -158,8 +193,12 @@ ip_reass(struct mbuf *m)
/*
* If no reassembling or maxfragsperpacket are 0,
* never accept fragments.
+ * Also, drop packet if it would exceed the maximum
+ * number of fragments.
*/
- if (V_noreass == 1 || V_maxfragsperpacket == 0) {
+ tmpmax = maxfrags;
+ if (V_noreass == 1 || V_maxfragsperpacket == 0 ||
+ (tmpmax >= 0 && atomic_load_int(&nfrags) >= (u_int)tmpmax)) {
IPSTAT_INC(ips_fragments);
IPSTAT_INC(ips_fragdropped);
m_freem(m);
@@ -204,8 +243,12 @@ ip_reass(struct mbuf *m)
m->m_data += hlen;
m->m_len -= hlen;
- hash = ip->ip_src.s_addr ^ ip->ip_id;
- hash = jenkins_hash32(&hash, 1, V_ipq_hashseed) & IPREASS_HMASK;
+ hashkey[0] = ip->ip_src.s_addr;
+ hashkey[1] = ip->ip_dst.s_addr;
+ hashkey[2] = (uint32_t)ip->ip_p << 16;
+ hashkey[2] += ip->ip_id;
+ hash = jenkins_hash32(hashkey, nitems(hashkey), V_ipq_hashseed);
+ hash &= IPREASS_HMASK;
head = &V_ipq[hash].head;
IPQ_LOCK(hash);
@@ -226,9 +269,12 @@ ip_reass(struct mbuf *m)
* If first fragment to arrive, create a reassembly queue.
*/
if (fp == NULL) {
- fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
+ if (V_ipq[hash].count < V_ipreass_maxbucketsize)
+ fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
if (fp == NULL)
fp = ipq_reuse(hash);
+ if (fp == NULL)
+ goto dropfrag;
#ifdef MAC
if (mac_ipq_init(fp, M_NOWAIT) != 0) {
uma_zfree(V_ipq_zone, fp);
@@ -238,7 +284,9 @@ ip_reass(struct mbuf *m)
mac_ipq_create(m, fp);
#endif
TAILQ_INSERT_HEAD(head, fp, ipq_list);
+ V_ipq[hash].count++;
fp->ipq_nfrags = 1;
+ atomic_add_int(&nfrags, 1);
fp->ipq_ttl = IPFRAGTTL;
fp->ipq_p = ip->ip_p;
fp->ipq_id = ip->ip_id;
@@ -249,6 +297,7 @@ ip_reass(struct mbuf *m)
goto done;
} else {
fp->ipq_nfrags++;
+ atomic_add_int(&nfrags, 1);
#ifdef MAC
mac_ipq_update(m, fp);
#endif
@@ -325,6 +374,7 @@ ip_reass(struct mbuf *m)
m->m_nextpkt = nq;
IPSTAT_INC(ips_fragdropped);
fp->ipq_nfrags--;
+ atomic_subtract_int(&nfrags, 1);
m_freem(q);
}
@@ -342,7 +392,7 @@ ip_reass(struct mbuf *m)
for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
if (ntohs(GETIP(q)->ip_off) != next) {
if (fp->ipq_nfrags > V_maxfragsperpacket)
- ipq_drop(head, fp);
+ ipq_drop(&V_ipq[hash], fp);
goto done;
}
next += ntohs(GETIP(q)->ip_len);
@@ -350,7 +400,7 @@ ip_reass(struct mbuf *m)
/* Make sure the last packet didn't have the IP_MF flag */
if (p->m_flags & M_IP_FRAG) {
if (fp->ipq_nfrags > V_maxfragsperpacket)
- ipq_drop(head, fp);
+ ipq_drop(&V_ipq[hash], fp);
goto done;
}
@@ -361,7 +411,7 @@ ip_reass(struct mbuf *m)
ip = GETIP(q);
if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
IPSTAT_INC(ips_toolong);
- ipq_drop(head, fp);
+ ipq_drop(&V_ipq[hash], fp);
goto done;
}
@@ -390,6 +440,7 @@ ip_reass(struct mbuf *m)
while (m->m_pkthdr.csum_data & 0xffff0000)
m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
(m->m_pkthdr.csum_data >> 16);
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
#ifdef MAC
mac_ipq_reassemble(fp, m);
mac_ipq_destroy(fp);
@@ -404,6 +455,7 @@ ip_reass(struct mbuf *m)
ip->ip_src = fp->ipq_src;
ip->ip_dst = fp->ipq_dst;
TAILQ_REMOVE(head, fp, ipq_list);
+ V_ipq[hash].count--;
uma_zfree(V_ipq_zone, fp);
m->m_len += (ip->ip_hl << 2);
m->m_data -= (ip->ip_hl << 2);
@@ -449,8 +501,10 @@ ip_reass(struct mbuf *m)
dropfrag:
IPSTAT_INC(ips_fragdropped);
- if (fp != NULL)
+ if (fp != NULL) {
fp->ipq_nfrags--;
+ atomic_subtract_int(&nfrags, 1);
+ }
m_freem(m);
done:
IPQ_UNLOCK(hash);
@@ -465,21 +519,27 @@ done:
void
ipreass_init(void)
{
+ int max;
for (int i = 0; i < IPREASS_NHASH; i++) {
TAILQ_INIT(&V_ipq[i].head);
mtx_init(&V_ipq[i].lock, "IP reassembly", NULL,
MTX_DEF | MTX_DUPOK);
+ V_ipq[i].count = 0;
}
V_ipq_hashseed = arc4random();
V_maxfragsperpacket = 16;
V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
NULL, UMA_ALIGN_PTR, 0);
- uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
+ max = IP_MAXFRAGPACKETS;
+ max = uma_zone_set_max(V_ipq_zone, max);
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
- if (IS_DEFAULT_VNET(curvnet))
+ if (IS_DEFAULT_VNET(curvnet)) {
+ maxfrags = IP_MAXFRAGS;
EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
+ }
}
/*
@@ -494,7 +554,7 @@ ipreass_slowtimo(void)
IPQ_LOCK(i);
TAILQ_FOREACH_SAFE(fp, &V_ipq[i].head, ipq_list, tmp)
if (--fp->ipq_ttl == 0)
- ipq_timeout(&V_ipq[i].head, fp);
+ ipq_timeout(&V_ipq[i], fp);
IPQ_UNLOCK(i);
}
}
@@ -509,7 +569,10 @@ ipreass_drain(void)
for (int i = 0; i < IPREASS_NHASH; i++) {
IPQ_LOCK(i);
while(!TAILQ_EMPTY(&V_ipq[i].head))
- ipq_drop(&V_ipq[i].head, TAILQ_FIRST(&V_ipq[i].head));
+ ipq_drop(&V_ipq[i], TAILQ_FIRST(&V_ipq[i].head));
+ KASSERT(V_ipq[i].count == 0,
+ ("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i,
+ V_ipq[i].count, V_ipq));
IPQ_UNLOCK(i);
}
}
@@ -537,9 +600,23 @@ ipreass_destroy(void)
static void
ipreass_drain_tomax(void)
{
+ struct ipq *fp;
int target;
/*
+ * Make sure each bucket is under the new limit. If
+ * necessary, drop enough of the oldest elements from
+ * each bucket to get under the new limit.
+ */
+ for (int i = 0; i < IPREASS_NHASH; i++) {
+ IPQ_LOCK(i);
+ while (V_ipq[i].count > V_ipreass_maxbucketsize &&
+ (fp = TAILQ_LAST(&V_ipq[i].head, ipqhead)) != NULL)
+ ipq_timeout(&V_ipq[i], fp);
+ IPQ_UNLOCK(i);
+ }
+
+ /*
* If we are over the maximum number of fragments,
* drain off enough to get down to the new limit,
* stripping off last elements on queues. Every
@@ -547,13 +624,11 @@ ipreass_drain_tomax(void)
*/
target = uma_zone_get_max(V_ipq_zone);
while (uma_zone_get_cur(V_ipq_zone) > target) {
- struct ipq *fp;
-
for (int i = 0; i < IPREASS_NHASH; i++) {
IPQ_LOCK(i);
fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
if (fp != NULL)
- ipq_timeout(&V_ipq[i].head, fp);
+ ipq_timeout(&V_ipq[i], fp);
IPQ_UNLOCK(i);
}
}
@@ -562,9 +637,20 @@ ipreass_drain_tomax(void)
static void
ipreass_zone_change(void *tag)
{
-
- uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
- ipreass_drain_tomax();
+ VNET_ITERATOR_DECL(vnet_iter);
+ int max;
+
+ maxfrags = IP_MAXFRAGS;
+ max = IP_MAXFRAGPACKETS;
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ max = uma_zone_set_max(V_ipq_zone, max);
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
+ ipreass_drain_tomax();
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@@ -592,6 +678,7 @@ sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS)
* and place an extreme upper bound.
*/
max = uma_zone_set_max(V_ipq_zone, max);
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
ipreass_drain_tomax();
V_noreass = 0;
} else if (max == 0) {
@@ -600,6 +687,7 @@ sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS)
} else if (max == -1) {
V_noreass = 0;
uma_zone_set_max(V_ipq_zone, 0);
+ V_ipreass_maxbucketsize = INT_MAX;
} else
return (EINVAL);
return (0);
@@ -613,49 +701,72 @@ static struct ipq *
ipq_reuse(int start)
{
struct ipq *fp;
- int i;
+ int bucket, i;
IPQ_LOCK_ASSERT(start);
- for (i = start;; i++) {
- if (i == IPREASS_NHASH)
- i = 0;
- if (i != start && IPQ_TRYLOCK(i) == 0)
+ for (i = 0; i < IPREASS_NHASH; i++) {
+ bucket = (start + i) % IPREASS_NHASH;
+ if (bucket != start && IPQ_TRYLOCK(bucket) == 0)
continue;
- fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
+ fp = TAILQ_LAST(&V_ipq[bucket].head, ipqhead);
if (fp) {
struct mbuf *m;
IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
while (fp->ipq_frags) {
m = fp->ipq_frags;
fp->ipq_frags = m->m_nextpkt;
m_freem(m);
}
- TAILQ_REMOVE(&V_ipq[i].head, fp, ipq_list);
- if (i != start)
- IPQ_UNLOCK(i);
- IPQ_LOCK_ASSERT(start);
- return (fp);
+ TAILQ_REMOVE(&V_ipq[bucket].head, fp, ipq_list);
+ V_ipq[bucket].count--;
+ if (bucket != start)
+ IPQ_UNLOCK(bucket);
+ break;
}
- if (i != start)
- IPQ_UNLOCK(i);
+ if (bucket != start)
+ IPQ_UNLOCK(bucket);
}
+ IPQ_LOCK_ASSERT(start);
+ return (fp);
}
/*
* Free a fragment reassembly header and all associated datagrams.
*/
static void
-ipq_free(struct ipqhead *fhp, struct ipq *fp)
+ipq_free(struct ipqbucket *bucket, struct ipq *fp)
{
struct mbuf *q;
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
while (fp->ipq_frags) {
q = fp->ipq_frags;
fp->ipq_frags = q->m_nextpkt;
m_freem(q);
}
- TAILQ_REMOVE(fhp, fp, ipq_list);
+ TAILQ_REMOVE(&bucket->head, fp, ipq_list);
+ bucket->count--;
uma_zfree(V_ipq_zone, fp);
}
+
+/*
+ * Get or set the maximum number of reassembly queues per bucket.
+ */
+static int
+sysctl_maxfragbucketsize(SYSCTL_HANDLER_ARGS)
+{
+ int error, max;
+
+ max = V_ipreass_maxbucketsize;
+ error = sysctl_handle_int(oidp, &max, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (max <= 0)
+ return (EINVAL);
+ V_ipreass_maxbucketsize = max;
+ ipreass_drain_tomax();
+ return (0);
+}
diff --git a/freebsd/sys/netinet/libalias/alias.c b/freebsd/sys/netinet/libalias/alias.c
index 2dd5b999..d4eeb040 100644
--- a/freebsd/sys/netinet/libalias/alias.c
+++ b/freebsd/sys/netinet/libalias/alias.c
@@ -1753,7 +1753,8 @@ LibAliasUnLoadAllModule(void)
* the input packet, on failure NULL. The input packet is always consumed.
*/
struct mbuf *
-m_megapullup(struct mbuf *m, int len) {
+m_megapullup(struct mbuf *m, int len)
+{
struct mbuf *mcl;
if (len > m->m_pkthdr.len)
@@ -1762,7 +1763,14 @@ m_megapullup(struct mbuf *m, int len) {
if (m->m_next == NULL && M_WRITABLE(m))
return (m);
- mcl = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (len <= MJUMPAGESIZE)
+ mcl = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
+ else if (len <= MJUM9BYTES)
+ mcl = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
+ else if (len <= MJUM16BYTES)
+ mcl = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
+ else
+ goto bad;
if (mcl == NULL)
goto bad;
m_align(mcl, len);
diff --git a/freebsd/sys/netinet/libalias/alias_irc.c b/freebsd/sys/netinet/libalias/alias_irc.c
index 1dbb9ddf..19337121 100644
--- a/freebsd/sys/netinet/libalias/alias_irc.c
+++ b/freebsd/sys/netinet/libalias/alias_irc.c
@@ -100,8 +100,7 @@ static int
fingerprint(struct libalias *la, struct alias_data *ah)
{
- if (ah->dport == NULL || ah->dport == NULL || ah->lnk == NULL ||
- ah->maxpktsize == 0)
+ if (ah->dport == NULL || ah->lnk == NULL || ah->maxpktsize == 0)
return (-1);
if (ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_1
|| ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_2)
diff --git a/freebsd/sys/netinet/libalias/alias_mod.h b/freebsd/sys/netinet/libalias/alias_mod.h
index c646f794..a894b6de 100644
--- a/freebsd/sys/netinet/libalias/alias_mod.h
+++ b/freebsd/sys/netinet/libalias/alias_mod.h
@@ -41,17 +41,17 @@ MALLOC_DECLARE(M_ALIAS);
/* Use kernel allocator. */
#if defined(_SYS_MALLOC_H_)
+#undef malloc
#ifndef __rtems__
#define malloc(x) malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
#define calloc(n, x) mallocarray((n), (x), M_ALIAS, M_NOWAIT|M_ZERO)
#define free(x) free(x, M_ALIAS)
#else /* __rtems__ */
-#undef malloc
#undef calloc
#undef free
-#define malloc(x) _bsd_malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
-#define calloc(x, n) malloc(x*n)
-#define free(x) _bsd_free(x, M_ALIAS)
+#define malloc(x) _bsd_malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
+#define calloc(n, x) mallocarray((n), (x), M_ALIAS, M_NOWAIT|M_ZERO)
+#define free(x) _bsd_free(x, M_ALIAS)
#endif /* __rtems__ */
#endif
#endif
diff --git a/freebsd/sys/netinet/pim_var.h b/freebsd/sys/netinet/pim_var.h
index e6398a4d..dfb06928 100644
--- a/freebsd/sys/netinet/pim_var.h
+++ b/freebsd/sys/netinet/pim_var.h
@@ -73,8 +73,6 @@ struct pimstat {
#define PIMCTL_STATS 1 /* statistics (read-only) */
#ifdef _KERNEL
-
-int pim_input(struct mbuf **, int *, int);
SYSCTL_DECL(_net_inet_pim);
#endif
diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c
index 7dea3ec1..a97eadae 100644
--- a/freebsd/sys/netinet/raw_ip.c
+++ b/freebsd/sys/netinet/raw_ip.c
@@ -172,7 +172,7 @@ rip_inshash(struct inpcb *inp)
} else
hash = 0;
pcbhash = &pcbinfo->ipi_hashbase[hash];
- LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
+ CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
}
static void
@@ -182,7 +182,7 @@ rip_delhash(struct inpcb *inp)
INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
- LIST_REMOVE(inp, inp_hash);
+ CK_LIST_REMOVE(inp, inp_hash);
}
#endif /* INET */
@@ -287,6 +287,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
struct ip *ip = mtod(m, struct ip *);
struct inpcb *inp, *last;
struct sockaddr_in ripsrc;
+ struct epoch_tracker et;
int hash;
*mp = NULL;
@@ -301,8 +302,8 @@ rip_input(struct mbuf **mp, int *offp, int proto)
hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
- INP_INFO_RLOCK(&V_ripcbinfo);
- LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
+ CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
if (inp->inp_ip_p != proto)
continue;
#ifdef INET6
@@ -314,27 +315,33 @@ rip_input(struct mbuf **mp, int *offp, int proto)
continue;
if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
continue;
- if (jailed_without_vnet(inp->inp_cred)) {
- /*
- * XXX: If faddr was bound to multicast group,
- * jailed raw socket will drop datagram.
- */
- if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
- continue;
- }
if (last != NULL) {
struct mbuf *n;
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (n != NULL)
- (void) rip_append(last, ip, n, &ripsrc);
+ (void) rip_append(last, ip, n, &ripsrc);
/* XXX count dropped packet */
INP_RUNLOCK(last);
+ last = NULL;
}
INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED))
+ goto skip_1;
+ if (jailed_without_vnet(inp->inp_cred)) {
+ /*
+ * XXX: If faddr was bound to multicast group,
+ * jailed raw socket will drop datagram.
+ */
+ if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
+ goto skip_1;
+ }
last = inp;
+ continue;
+ skip_1:
+ INP_RUNLOCK(inp);
}
- LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
+ CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
if (inp->inp_ip_p && inp->inp_ip_p != proto)
continue;
#ifdef INET6
@@ -348,6 +355,19 @@ rip_input(struct mbuf **mp, int *offp, int proto)
if (!in_nullhost(inp->inp_faddr) &&
!in_hosteq(inp->inp_faddr, ip->ip_src))
continue;
+ if (last != NULL) {
+ struct mbuf *n;
+
+ n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+ if (n != NULL)
+ (void) rip_append(last, ip, n, &ripsrc);
+ /* XXX count dropped packet */
+ INP_RUNLOCK(last);
+ last = NULL;
+ }
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED))
+ goto skip_2;
if (jailed_without_vnet(inp->inp_cred)) {
/*
* Allow raw socket in jail to receive multicast;
@@ -356,7 +376,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
*/
if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
- continue;
+ goto skip_2;
}
/*
* If this raw socket has multicast state, and we
@@ -397,22 +417,15 @@ rip_input(struct mbuf **mp, int *offp, int proto)
if (blocked != MCAST_PASS) {
IPSTAT_INC(ips_notmember);
- continue;
+ goto skip_2;
}
}
- if (last != NULL) {
- struct mbuf *n;
-
- n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
- if (n != NULL)
- (void) rip_append(last, ip, n, &ripsrc);
- /* XXX count dropped packet */
- INP_RUNLOCK(last);
- }
- INP_RLOCK(inp);
last = inp;
+ continue;
+ skip_2:
+ INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
if (rip_append(last, ip, m, &ripsrc) != 0)
IPSTAT_INC(ips_delivered);
@@ -853,7 +866,6 @@ rip_detach(struct socket *so)
ip_rsvp_force_done(so);
if (so == V_ip_rsvpd)
ip_rsvp_done();
- /* XXX defer to epoch_call */
in_pcbdetach(inp);
in_pcbfree(inp);
INP_INFO_WUNLOCK(&V_ripcbinfo);
@@ -1023,10 +1035,10 @@ static int
rip_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
- struct in_pcblist *il;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
+ struct epoch_tracker et;
/*
* The process of preparing the TCB list is too time-consuming and
@@ -1045,10 +1057,10 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_ripcbinfo);
+ INP_INFO_WLOCK(&V_ripcbinfo);
gencnt = V_ripcbinfo.ipi_gencnt;
n = V_ripcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_WUNLOCK(&V_ripcbinfo);
xig.xig_len = sizeof xig;
xig.xig_count = n;
@@ -1058,12 +1070,11 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
if (error)
return (error);
- il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS);
- inp_list = il->il_inp_list;
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- INP_INFO_RLOCK(&V_ripcbinfo);
- for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = LIST_NEXT(inp, inp_list)) {
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
+ for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
@@ -1072,7 +1083,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
n = i;
error = 0;
@@ -1088,24 +1099,31 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- il->il_count = n;
- il->il_pcbinfo = &V_ripcbinfo;
- epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked);
+ INP_INFO_WLOCK(&V_ripcbinfo);
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
+ }
+ INP_INFO_WUNLOCK(&V_ripcbinfo);
if (!error) {
+ struct epoch_tracker et;
/*
* Give the user an updated idea of our state. If the
* generation differs from what we told her before, she knows
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_ripcbinfo);
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_ripcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
+ free(inp_list, M_TEMP);
return (error);
}
diff --git a/freebsd/sys/netinet/sctp.h b/freebsd/sys/netinet/sctp.h
index 5a86f108..64fd5442 100644
--- a/freebsd/sys/netinet/sctp.h
+++ b/freebsd/sys/netinet/sctp.h
@@ -419,7 +419,7 @@ struct sctp_error_unresolv_addr {
struct sctp_error_unrecognized_chunk {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNRECOG_CHUNK */
- struct sctp_chunkhdr ch;/* header from chunk in error */
+ struct sctp_chunkhdr ch; /* header from chunk in error */
} SCTP_PACKED;
struct sctp_error_no_user_data {
diff --git a/freebsd/sys/netinet/sctp_asconf.c b/freebsd/sys/netinet/sctp_asconf.c
index d2d990e1..c21e3251 100644
--- a/freebsd/sys/netinet/sctp_asconf.c
+++ b/freebsd/sys/netinet/sctp_asconf.c
@@ -279,6 +279,7 @@ sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src)
/* not found */
return (-1);
}
+
/* delete all destination addresses except the source */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
if (net != src_net) {
@@ -385,6 +386,7 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
aparam_length);
return (m_reply);
}
+
/* if deleting 0.0.0.0/::0, delete all addresses except src addr */
if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
result = sctp_asconf_del_remote_addrs_except(stcb, src);
@@ -403,6 +405,7 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
}
return (m_reply);
}
+
/* delete the address */
result = sctp_del_remote_addr(stcb, sa);
/*
@@ -618,6 +621,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
serial_num, asoc->asconf_seq_in + 1);
return;
}
+
/* it's the expected "next" sequence number, so process it */
asoc->asconf_seq_in = serial_num; /* update sequence */
/* get length of all the param's in the ASCONF */
@@ -642,6 +646,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asconf_ack), ack);
}
}
+
m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0,
M_NOWAIT, 1, MT_DATA);
if (m_ack == NULL) {
@@ -976,6 +981,7 @@ sctp_assoc_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *dstnet)
if (stcb->asoc.deleted_primary == NULL) {
return;
}
+
if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
SCTPDBG(SCTP_DEBUG_ASCONF1, "assoc_immediate_retrans: Deleted primary is ");
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
@@ -1079,6 +1085,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
}
return;
}
+
/* Multiple local addresses exsist in the association. */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
/* clear any cached route and source address */
@@ -1325,6 +1332,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
if (stcb->asoc.asconf_supported == 0) {
return (-1);
}
+
/*
* if this is deleting the last address from the assoc, mark it as
* pending.
@@ -1345,6 +1353,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
return (-1);
}
}
+
/* queue an asconf parameter */
status = sctp_asconf_queue_mgmt(stcb, ifa, type);
@@ -1366,6 +1375,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
stcb->asoc.asconf_addr_del_pending = NULL;
}
}
+
if (pending_delete_queued) {
struct sctp_nets *net;
@@ -1390,6 +1400,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
SCTP_FROM_SCTP_ASCONF,
__LINE__);
}
+
/* queue in an advisory set primary too */
(void)sctp_asconf_queue_mgmt(stcb, ifa, SCTP_SET_PRIM_ADDR);
/* let caller know we should send this out immediately */
@@ -1687,11 +1698,13 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset,
serial_num, asoc->asconf_seq_out_acked + 1);
return;
}
+
if (serial_num == asoc->asconf_seq_out - 1) {
/* stop our timer */
sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
SCTP_FROM_SCTP_ASCONF + SCTP_LOC_5);
}
+
/* process the ASCONF-ACK contents */
ack_length = ntohs(cp->ch.chunk_length) -
sizeof(struct sctp_asconf_ack_chunk);
@@ -1780,7 +1793,7 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset,
* at any given time
*/
if (last_error_id == 0)
- last_error_id--;/* set to "max" value */
+ last_error_id--; /* set to "max" value */
TAILQ_FOREACH_SAFE(aa, &stcb->asoc.asconf_queue, next, aa_next) {
if (aa->sent == 1) {
/*
@@ -1980,8 +1993,8 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* sent when the state goes open.
*/
if (status == 0 &&
- ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED))) {
+ ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED))) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
stcb, stcb->asoc.primary_destination);
@@ -2060,6 +2073,7 @@ sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val SCTP
laddr->action = 0;
break;
}
+
}
} else if (l->action == SCTP_DEL_IP_ADDRESS) {
LIST_FOREACH_SAFE(laddr, &inp->sctp_addr_list, sctp_nxt_addr, nladdr) {
@@ -2093,6 +2107,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
if (ifa->vrf_id != stcb->asoc.vrf_id) {
continue;
}
+
/* Same checks again for assoc */
switch (ifa->address.sa.sa_family) {
#ifdef INET6
@@ -2229,8 +2244,8 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* count of queued params. If in the non-open
* state, these get sent when the assoc goes open.
*/
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
if (status >= 0) {
num_queued++;
}
@@ -2283,6 +2298,7 @@ sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa)
/* Invalid address */
return (-1);
}
+
/* queue an ASCONF:SET_PRIM_ADDR to be sent */
if (!sctp_asconf_queue_add(stcb, ifa, SCTP_SET_PRIM_ADDR)) {
/* set primary queuing succeeded */
@@ -2290,8 +2306,8 @@ sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa)
"set_primary_ip_address_sa: queued on tcb=%p, ",
(void *)stcb);
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
stcb->sctp_ep, stcb,
@@ -2361,11 +2377,13 @@ sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa)
SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: param length(%u) too short\n", param_length);
break;
}
+
aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(chk->data, offset, param_length, aparam_buf);
if (aph == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1, "is_addr_pending: couldn't get entire param\n");
break;
}
+
ph = (struct sctp_paramhdr *)(aph + 1);
if (sctp_addr_match(ph, &sctp_ifa->address.sa) != 0) {
switch (param_type) {
@@ -2380,6 +2398,7 @@ sctp_is_addr_pending(struct sctp_tcb *stcb, struct sctp_ifa *sctp_ifa)
}
last_param_type = param_type;
}
+
offset += SCTP_SIZE32(param_length);
if (offset >= asconf_limit) {
/* no more data in the mbuf chain */
@@ -2463,6 +2482,7 @@ sctp_find_valid_localaddr(struct sctp_tcb *stcb, int addr_locked)
if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
continue;
}
+
sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/*
@@ -2826,8 +2846,7 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
* out the ASCONF.
*/
if (status == 0 &&
- SCTP_GET_STATE(&stcb->asoc) ==
- SCTP_STATE_OPEN) {
+ SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
stcb->sctp_ep, stcb,
@@ -2838,6 +2857,7 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
}
}
}
+
next_addr:
/*
* Sanity check: Make sure the length isn't 0, otherwise
@@ -3372,6 +3392,7 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb,
if (vrf == NULL) {
goto skip_rest;
}
+
SCTP_IPI_ADDR_RLOCK();
LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
diff --git a/freebsd/sys/netinet/sctp_asconf.h b/freebsd/sys/netinet/sctp_asconf.h
index 2a372205..581d504c 100644
--- a/freebsd/sys/netinet/sctp_asconf.h
+++ b/freebsd/sys/netinet/sctp_asconf.h
@@ -60,10 +60,10 @@ sctp_addr_mgmt_ep_sa(struct sctp_inpcb *, struct sockaddr *,
uint32_t, uint32_t, struct sctp_ifa *);
-extern int
+extern int
sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr,
uint32_t val);
-extern void
+extern void
sctp_asconf_iterator_stcb(struct sctp_inpcb *inp,
struct sctp_tcb *stcb,
void *ptr, uint32_t type);
diff --git a/freebsd/sys/netinet/sctp_auth.c b/freebsd/sys/netinet/sctp_auth.c
index d8fbcf6e..0fc076e1 100644
--- a/freebsd/sys/netinet/sctp_auth.c
+++ b/freebsd/sys/netinet/sctp_auth.c
@@ -1311,6 +1311,7 @@ sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid)
/* can't reactivate a deactivated key with other refcounts */
return (-1);
}
+
/* set the (new) active key */
stcb->asoc.authinfo.active_keyid = keyid;
/* reset the deactivated flag */
@@ -1365,6 +1366,7 @@ sctp_deact_sharedkey(struct sctp_tcb *stcb, uint16_t keyid)
sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb, keyid, 0,
SCTP_SO_LOCKED);
}
+
/* mark the key as deactivated */
skey->deactivated = 1;
@@ -1506,6 +1508,8 @@ sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
if (p_random != NULL) {
keylen = sizeof(*p_random) + random_len;
memcpy(new_key->key, p_random, keylen);
+ } else {
+ keylen = 0;
}
/* append in the AUTH chunks */
if (chunks != NULL) {
@@ -1582,6 +1586,7 @@ sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
"Assoc Key");
#endif
}
+
/* set in the active key id */
auth->shared_key_id = htons(keyid);
@@ -1769,6 +1774,7 @@ sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication,
/* If the socket is gone we are out of here */
return;
}
+
if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_AUTHEVNT))
/* event not enabled */
return;
@@ -1929,6 +1935,7 @@ sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit)
if (num_chunks)
got_chklist = 1;
}
+
offset += SCTP_SIZE32(plen);
if (offset >= limit) {
break;
@@ -2023,6 +2030,7 @@ sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
new_key->key[keylen++] = i;
}
}
+
/* append in the HMACs */
ph = (struct sctp_paramhdr *)(new_key->key + keylen);
ph->param_type = htons(SCTP_HMAC_LIST);
diff --git a/freebsd/sys/netinet/sctp_auth.h b/freebsd/sys/netinet/sctp_auth.h
index 66990c30..44126e3e 100644
--- a/freebsd/sys/netinet/sctp_auth.h
+++ b/freebsd/sys/netinet/sctp_auth.h
@@ -87,7 +87,7 @@ typedef struct sctp_hmaclist {
typedef struct sctp_authinformation {
sctp_key_t *random; /* local random key (concatenated) */
uint32_t random_len; /* local random number length for param */
- sctp_key_t *peer_random;/* peer's random key (concatenated) */
+ sctp_key_t *peer_random; /* peer's random key (concatenated) */
sctp_key_t *assoc_key; /* cached concatenated send key */
sctp_key_t *recv_key; /* cached concatenated recv key */
uint16_t active_keyid; /* active send keyid */
@@ -114,13 +114,13 @@ extern sctp_auth_chklist_t *sctp_copy_chunklist(sctp_auth_chklist_t *chklist);
extern int sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t *list);
extern int sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t *list);
extern size_t sctp_auth_get_chklist_size(const sctp_auth_chklist_t *list);
-extern int
+extern int
sctp_serialize_auth_chunks(const sctp_auth_chklist_t *list,
uint8_t *ptr);
-extern int
+extern int
sctp_pack_auth_chunks(const sctp_auth_chklist_t *list,
uint8_t *ptr);
-extern int
+extern int
sctp_unpack_auth_chunks(const uint8_t *ptr, uint8_t num_chunks,
sctp_auth_chklist_t *list);
@@ -141,16 +141,16 @@ extern void sctp_free_sharedkey(sctp_sharedkey_t *skey);
extern sctp_sharedkey_t *
sctp_find_sharedkey(struct sctp_keyhead *shared_keys,
uint16_t key_id);
-extern int
+extern int
sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
sctp_sharedkey_t *new_skey);
-extern int
+extern int
sctp_copy_skeylist(const struct sctp_keyhead *src,
struct sctp_keyhead *dest);
/* ref counts on shared keys, by key id */
extern void sctp_auth_key_acquire(struct sctp_tcb *stcb, uint16_t keyid);
-extern void
+extern void
sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t keyid,
int so_locked);
@@ -161,11 +161,11 @@ extern void sctp_free_hmaclist(sctp_hmaclist_t *list);
extern int sctp_auth_add_hmacid(sctp_hmaclist_t *list, uint16_t hmac_id);
extern sctp_hmaclist_t *sctp_copy_hmaclist(sctp_hmaclist_t *list);
extern sctp_hmaclist_t *sctp_default_supported_hmaclist(void);
-extern uint16_t
+extern uint16_t
sctp_negotiate_hmacid(sctp_hmaclist_t *peer,
sctp_hmaclist_t *local);
extern int sctp_serialize_hmaclist(sctp_hmaclist_t *list, uint8_t *ptr);
-extern int
+extern int
sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs,
uint32_t num_hmacs);
@@ -175,22 +175,22 @@ extern void sctp_free_authinfo(sctp_authinfo_t *authinfo);
/* keyed-HMAC functions */
extern uint32_t sctp_get_auth_chunk_len(uint16_t hmac_algo);
extern uint32_t sctp_get_hmac_digest_len(uint16_t hmac_algo);
-extern uint32_t
+extern uint32_t
sctp_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
uint8_t *text, uint32_t textlen, uint8_t *digest);
-extern int
+extern int
sctp_verify_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
uint8_t *text, uint32_t textlen, uint8_t *digest, uint32_t digestlen);
-extern uint32_t
+extern uint32_t
sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t *key,
uint8_t *text, uint32_t textlen, uint8_t *digest);
extern int sctp_auth_is_supported_hmac(sctp_hmaclist_t *list, uint16_t id);
/* mbuf versions */
-extern uint32_t
+extern uint32_t
sctp_hmac_m(uint16_t hmac_algo, uint8_t *key, uint32_t keylen,
struct mbuf *m, uint32_t m_offset, uint8_t *digest, uint32_t trailer);
-extern uint32_t
+extern uint32_t
sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t *key,
struct mbuf *m, uint32_t m_offset, uint8_t *digest);
@@ -206,26 +206,26 @@ extern int sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid);
extern int sctp_deact_sharedkey(struct sctp_tcb *stcb, uint16_t keyid);
extern int sctp_deact_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid);
-extern void
+extern void
sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
uint32_t offset, uint32_t length);
-extern void
+extern void
sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
struct sctp_auth_chunk *auth, struct sctp_tcb *stcb, uint16_t key_id);
extern struct mbuf *
sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
struct sctp_auth_chunk **auth_ret, uint32_t *offset,
struct sctp_tcb *stcb, uint8_t chunk);
-extern int
+extern int
sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *ch,
struct mbuf *m, uint32_t offset);
-extern void
+extern void
sctp_notify_authentication(struct sctp_tcb *stcb,
uint32_t indication, uint16_t keyid, uint16_t alt_keyid, int so_locked);
-extern int
+extern int
sctp_validate_init_auth_params(struct mbuf *m, int offset,
int limit);
-extern void
+extern void
sctp_initialize_auth_params(struct sctp_inpcb *inp,
struct sctp_tcb *stcb);
diff --git a/freebsd/sys/netinet/sctp_bsd_addr.c b/freebsd/sys/netinet/sctp_bsd_addr.c
index 94c23bff..0f0ddd89 100644
--- a/freebsd/sys/netinet/sctp_bsd_addr.c
+++ b/freebsd/sys/netinet/sctp_bsd_addr.c
@@ -307,10 +307,12 @@ sctp_addr_change(struct ifaddr *ifa, int cmd)
SCTP_BASE_VAR(first_time) = 1;
sctp_init_ifns_for_vrf(SCTP_DEFAULT_VRFID);
}
+
if ((cmd != RTM_ADD) && (cmd != RTM_DELETE)) {
/* don't know what to do with this */
return;
}
+
if (ifa->ifa_addr == NULL) {
return;
}
diff --git a/freebsd/sys/netinet/sctp_cc_functions.c b/freebsd/sys/netinet/sctp_cc_functions.c
index e8d6a354..1163cb91 100644
--- a/freebsd/sys/netinet/sctp_cc_functions.c
+++ b/freebsd/sys/netinet/sctp_cc_functions.c
@@ -133,6 +133,7 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
t_ucwnd_sbw = 1;
}
}
+
/*-
* CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
* (net->fast_retran_loss_recovery == 0)))
@@ -1121,6 +1122,7 @@ sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
}
+
}
SCTP_STAT_INCR(sctps_ecnereducedcwnd);
} else {
@@ -1320,7 +1322,7 @@ sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *ne
static
-void
+void
sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net,
struct sctp_tmit_chunk *tp1)
{
@@ -1937,6 +1939,7 @@ measure_achieved_throughput(struct sctp_nets *net)
net->cc_mod.htcp_ca.lasttime = now;
return;
}
+
net->cc_mod.htcp_ca.bytecount += net->net_ack;
if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) &&
(now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) &&
@@ -1973,6 +1976,7 @@ htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
return;
}
}
+
if (ca->modeswitch && minRTT > (uint32_t)MSEC_TO_TICKS(10) && maxRTT) {
ca->beta = (minRTT << 7) / maxRTT;
if (ca->beta < BETA_MIN)
@@ -1996,6 +2000,7 @@ htcp_alpha_update(struct htcp *ca)
diff -= hz;
factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
}
+
if (use_rtt_scaling && minRTT) {
uint32_t scale = (hz << 3) / (10 * minRTT);
@@ -2005,6 +2010,7 @@ htcp_alpha_update(struct htcp *ca)
if (!factor)
factor = 1;
}
+
ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
if (!ca->alpha)
ca->alpha = ALPHA_BASE;
@@ -2059,12 +2065,14 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
sctp_log_cwnd(stcb, net, net->mtu,
SCTP_CWND_LOG_FROM_SS);
}
+
} else {
net->cwnd += net->net_ack;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, net->net_ack,
SCTP_CWND_LOG_FROM_SS);
}
+
}
sctp_enforce_cwnd_limit(&stcb->asoc, net);
} else {
diff --git a/freebsd/sys/netinet/sctp_constants.h b/freebsd/sys/netinet/sctp_constants.h
index 018cd282..d07381d5 100644
--- a/freebsd/sys/netinet/sctp_constants.h
+++ b/freebsd/sys/netinet/sctp_constants.h
@@ -470,10 +470,14 @@ __FBSDID("$FreeBSD$");
#define SCTP_STATE_IN_ACCEPT_QUEUE 0x1000
#define SCTP_STATE_MASK 0x007f
-#define SCTP_GET_STATE(asoc) ((asoc)->state & SCTP_STATE_MASK)
-#define SCTP_SET_STATE(asoc, newstate) ((asoc)->state = ((asoc)->state & ~SCTP_STATE_MASK) | newstate)
-#define SCTP_CLEAR_SUBSTATE(asoc, substate) ((asoc)->state &= ~substate)
-#define SCTP_ADD_SUBSTATE(asoc, substate) ((asoc)->state |= substate)
+#define SCTP_GET_STATE(_stcb) \
+ ((_stcb)->asoc.state & SCTP_STATE_MASK)
+#define SCTP_SET_STATE(_stcb, _state) \
+ sctp_set_state(_stcb, _state)
+#define SCTP_CLEAR_SUBSTATE(_stcb, _substate) \
+ (_stcb)->asoc.state &= ~(_substate)
+#define SCTP_ADD_SUBSTATE(_stcb, _substate) \
+ sctp_add_substate(_stcb, _substate)
/* SCTP reachability state for each address */
#define SCTP_ADDR_REACHABLE 0x001
diff --git a/freebsd/sys/netinet/sctp_dtrace_define.h b/freebsd/sys/netinet/sctp_dtrace_define.h
index 53451d20..ad7c8526 100644
--- a/freebsd/sys/netinet/sctp_dtrace_define.h
+++ b/freebsd/sys/netinet/sctp_dtrace_define.h
@@ -40,7 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/sdt.h>
-SDT_PROVIDER_DEFINE(sctp);
+SDT_PROVIDER_DECLARE(sctp);
/********************************************************/
/* Cwnd probe - tracks changes in the congestion window on a netp */
diff --git a/freebsd/sys/netinet/sctp_header.h b/freebsd/sys/netinet/sctp_header.h
index 685ed78a..8c4137a5 100644
--- a/freebsd/sys/netinet/sctp_header.h
+++ b/freebsd/sys/netinet/sctp_header.h
@@ -48,7 +48,7 @@ __FBSDID("$FreeBSD$");
* Parameter structures
*/
struct sctp_ipv4addr_param {
- struct sctp_paramhdr ph;/* type=SCTP_IPV4_PARAM_TYPE, len=8 */
+ struct sctp_paramhdr ph; /* type=SCTP_IPV4_PARAM_TYPE, len=8 */
uint32_t addr; /* IPV4 address */
} SCTP_PACKED;
@@ -56,20 +56,20 @@ struct sctp_ipv4addr_param {
struct sctp_ipv6addr_param {
- struct sctp_paramhdr ph;/* type=SCTP_IPV6_PARAM_TYPE, len=20 */
+ struct sctp_paramhdr ph; /* type=SCTP_IPV6_PARAM_TYPE, len=20 */
uint8_t addr[SCTP_V6_ADDR_BYTES]; /* IPV6 address */
} SCTP_PACKED;
/* Cookie Preservative */
struct sctp_cookie_perserve_param {
- struct sctp_paramhdr ph;/* type=SCTP_COOKIE_PRESERVE, len=8 */
+ struct sctp_paramhdr ph; /* type=SCTP_COOKIE_PRESERVE, len=8 */
uint32_t time; /* time in ms to extend cookie */
} SCTP_PACKED;
#define SCTP_ARRAY_MIN_LEN 1
/* Host Name Address */
struct sctp_host_name_param {
- struct sctp_paramhdr ph;/* type=SCTP_HOSTNAME_ADDRESS */
+ struct sctp_paramhdr ph; /* type=SCTP_HOSTNAME_ADDRESS */
char name[SCTP_ARRAY_MIN_LEN]; /* host name */
} SCTP_PACKED;
@@ -80,7 +80,7 @@ struct sctp_host_name_param {
#define SCTP_MAX_ADDR_PARAMS_SIZE 12
/* supported address type */
struct sctp_supported_addr_param {
- struct sctp_paramhdr ph;/* type=SCTP_SUPPORTED_ADDRTYPE */
+ struct sctp_paramhdr ph; /* type=SCTP_SUPPORTED_ADDRTYPE */
uint16_t addr_type[2]; /* array of supported address types */
} SCTP_PACKED;
@@ -108,8 +108,8 @@ struct sctp_prsctp_supported_param {
/* draft-ietf-tsvwg-addip-sctp */
struct sctp_asconf_paramhdr { /* an ASCONF "parameter" */
- struct sctp_paramhdr ph;/* a SCTP parameter header */
- uint32_t correlation_id;/* correlation id for this param */
+ struct sctp_paramhdr ph; /* a SCTP parameter header */
+ uint32_t correlation_id; /* correlation id for this param */
} SCTP_PACKED;
struct sctp_asconf_addr_param { /* an ASCONF address parameter */
@@ -133,7 +133,7 @@ struct sctp_asconf_addrv4_param { /* an ASCONF address (v4) parameter */
#define SCTP_MAX_SUPPORTED_EXT 256
struct sctp_supported_chunk_types_param {
- struct sctp_paramhdr ph;/* type = 0x8008 len = x */
+ struct sctp_paramhdr ph; /* type = 0x8008 len = x */
uint8_t chunk_types[];
} SCTP_PACKED;
@@ -206,8 +206,8 @@ struct sctp_state_cookie { /* this is our definition... */
uint16_t peerport; /* port address of the peer in the INIT */
uint16_t myport; /* my port address used in the INIT */
- uint8_t ipv4_addr_legal;/* Are V4 addr legal? */
- uint8_t ipv6_addr_legal;/* Are V6 addr legal? */
+ uint8_t ipv4_addr_legal; /* Are V4 addr legal? */
+ uint8_t ipv6_addr_legal; /* Are V6 addr legal? */
uint8_t local_scope; /* IPv6 local scope flag */
uint8_t site_scope; /* IPv6 site scope flag */
@@ -512,17 +512,17 @@ struct sctp_stream_reset_resp_tsn {
/* Should we make the max be 32? */
#define SCTP_RANDOM_MAX_SIZE 256
struct sctp_auth_random {
- struct sctp_paramhdr ph;/* type = 0x8002 */
+ struct sctp_paramhdr ph; /* type = 0x8002 */
uint8_t random_data[];
} SCTP_PACKED;
struct sctp_auth_chunk_list {
- struct sctp_paramhdr ph;/* type = 0x8003 */
+ struct sctp_paramhdr ph; /* type = 0x8003 */
uint8_t chunk_types[];
} SCTP_PACKED;
struct sctp_auth_hmac_algo {
- struct sctp_paramhdr ph;/* type = 0x8004 */
+ struct sctp_paramhdr ph; /* type = 0x8004 */
uint16_t hmac_ids[];
} SCTP_PACKED;
diff --git a/freebsd/sys/netinet/sctp_indata.c b/freebsd/sys/netinet/sctp_indata.c
index 98b397a2..28e3f5b2 100644
--- a/freebsd/sys/netinet/sctp_indata.c
+++ b/freebsd/sys/netinet/sctp_indata.c
@@ -92,6 +92,7 @@ sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
if (stcb->sctp_socket == NULL) {
return (calc);
}
+
KASSERT(asoc->cnt_on_reasm_queue > 0 || asoc->size_on_reasm_queue == 0,
("size_on_reasm_queue is %u", asoc->size_on_reasm_queue));
KASSERT(asoc->cnt_on_all_streams > 0 || asoc->size_on_all_streams == 0,
@@ -117,6 +118,7 @@ sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
/* out of space */
return (calc);
}
+
/* what is the overhead of all these rwnd's */
calc = sctp_sbspace_sub(calc, stcb->asoc.my_rwnd_control_len);
/*
@@ -187,6 +189,7 @@ sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo)
/* user does not want any ancillary data */
return (NULL);
}
+
len = 0;
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) {
len += CMSG_SPACE(sizeof(struct sctp_rcvinfo));
@@ -1046,6 +1049,7 @@ place_chunk:
SCTP_FROM_SCTP_INDATA + SCTP_LOC_5);
return;
}
+
}
if (inserted == 0) {
/* Its at the end */
@@ -2140,6 +2144,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
control = NULL;
goto finish_express_del;
}
+
/* Now will we need a chunk too? */
if ((chk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) {
sctp_alloc_a_chunk(stcb, chk);
@@ -2570,7 +2575,7 @@ sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap)
* Now we need to see if we need to queue a sack or just start the
* timer (if allowed).
*/
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) {
/*
* Ok special case, in SHUTDOWN-SENT case. here we maker
* sure SACK timer is off and instead send a SHUTDOWN and a
@@ -2927,7 +2932,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_last_rcvd);
}
/* now service all of the reassm queue if needed */
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) {
/* Assure that we ack right away */
stcb->asoc.send_sack = 1;
}
@@ -3075,7 +3080,7 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
tp1->whoTo->net_ack += tp1->send_size;
if (tp1->snd_count < 2) {
/*-
- * True non-retransmited chunk
+ * True non-retransmitted chunk
*/
tp1->whoTo->net_ack2 += tp1->send_size;
@@ -3098,6 +3103,7 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
tp1->do_rtt = 0;
}
}
+
}
if (tp1->sent <= SCTP_DATAGRAM_RESEND) {
if (SCTP_TSN_GT(tp1->rec.data.tsn,
@@ -3363,6 +3369,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
continue;
}
}
+
}
if (SCTP_TSN_GT(tp1->rec.data.tsn, asoc->this_sack_highest_gap) &&
!(accum_moved && asoc->fast_retran_loss_recovery)) {
@@ -3598,6 +3605,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
tp1);
}
}
+
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) {
sctp_log_rwnd(SCTP_INCREASE_PEER_RWND,
asoc->peers_rwnd, tp1->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh));
@@ -3679,6 +3687,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
tp1->whoTo->find_pseudo_cumack = 1;
tp1->whoTo->find_rtx_pseudo_cumack = 1;
}
+
} else { /* CMT is OFF */
#ifdef SCTP_FR_TO_ALTERNATE
@@ -3967,6 +3976,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
}
return;
}
+
/* First setup for CC stuff */
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
if (SCTP_TSN_GT(cumack, net->cwr_window_tsn)) {
@@ -4048,7 +4058,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
tp1->whoTo->net_ack += tp1->send_size;
if (tp1->snd_count < 2) {
/*
- * True non-retransmited
+ * True non-retransmitted
* chunk
*/
tp1->whoTo->net_ack2 +=
@@ -4232,6 +4242,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
asoc->total_flight = 0;
asoc->total_flight_count = 0;
}
+
/* RWND update */
asoc->peers_rwnd = sctp_sbspace_sub(rwnd,
(uint32_t)(asoc->total_flight + (asoc->total_flight_count * SCTP_BASE_SYSCTL(sctp_peer_chunk_oh))));
@@ -4320,12 +4331,12 @@ again:
/* clean up */
if ((asoc->stream_queue_cnt == 1) &&
((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc))) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
(asoc->stream_queue_cnt == 1) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
struct mbuf *op_err;
@@ -4341,12 +4352,11 @@ again:
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (asoc->alternate) {
netp = asoc->alternate;
@@ -4358,13 +4368,12 @@ again:
stcb->sctp_ep, stcb, netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, netp);
- } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ } else if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_ACK_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (asoc->alternate) {
netp = asoc->alternate;
@@ -4484,6 +4493,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
sctp_misc_ints(SCTP_SACK_LOG_NORMAL, cum_ack,
rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd);
}
+
old_rwnd = stcb->asoc.peers_rwnd;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -4555,6 +4565,7 @@ hopeless_peer:
/* acking something behind */
return;
}
+
/* update the Rwnd of the peer */
if (TAILQ_EMPTY(&asoc->sent_queue) &&
TAILQ_EMPTY(&asoc->send_queue) &&
@@ -4608,6 +4619,7 @@ hopeless_peer:
if (stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) {
(*stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) (stcb, net);
}
+
/*
* CMT: SFR algo (and HTNA) - this_sack_highest_newack has
* to be greater than the cumack. Also reset saw_newack to 0
@@ -4664,7 +4676,7 @@ hopeless_peer:
if (tp1->snd_count < 2) {
/*
- * True non-retransmited
+ * True non-retransmitted
* chunk
*/
tp1->whoTo->net_ack2 +=
@@ -4843,6 +4855,7 @@ hopeless_peer:
#endif
asoc->total_flight = 0;
}
+
/* sa_ignore NO_NULL_CHK */
if ((wake_him) && (stcb->sctp_socket)) {
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
@@ -4947,6 +4960,7 @@ hopeless_peer:
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
0, (void *)net, SCTP_SO_NOT_LOCKED);
}
+
if (net == stcb->asoc.primary_destination) {
if (stcb->asoc.alternate) {
/*
@@ -4957,6 +4971,7 @@ hopeless_peer:
stcb->asoc.alternate = NULL;
}
}
+
if (net->dest_state & SCTP_ADDR_PF) {
net->dest_state &= ~SCTP_ADDR_PF;
sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
@@ -4979,6 +4994,7 @@ hopeless_peer:
}
asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery);
}
+
if (TAILQ_EMPTY(&asoc->sent_queue)) {
/* nothing left in-flight */
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
@@ -4992,6 +5008,7 @@ hopeless_peer:
asoc->total_flight = 0;
asoc->total_flight_count = 0;
}
+
/**********************************/
/* Now what about shutdown issues */
/**********************************/
@@ -5009,12 +5026,12 @@ hopeless_peer:
/* clean up */
if ((asoc->stream_queue_cnt == 1) &&
((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc))) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
(asoc->stream_queue_cnt == 1) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
struct mbuf *op_err;
@@ -5030,12 +5047,11 @@ hopeless_peer:
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (asoc->alternate) {
netp = asoc->alternate;
@@ -5048,13 +5064,12 @@ hopeless_peer:
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, netp);
return;
- } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ } else if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_ACK_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (asoc->alternate) {
netp = asoc->alternate;
@@ -5126,6 +5141,7 @@ hopeless_peer:
if (asoc->peers_rwnd > old_rwnd) {
win_probe_recovery = 1;
}
+
/*
* Now we must setup so we have a timer up for anyone with
* outstanding data.
diff --git a/freebsd/sys/netinet/sctp_indata.h b/freebsd/sys/netinet/sctp_indata.h
index 10b18d0b..59ceac3a 100644
--- a/freebsd/sys/netinet/sctp_indata.h
+++ b/freebsd/sys/netinet/sctp_indata.h
@@ -99,8 +99,7 @@ void
sctp_handle_forward_tsn(struct sctp_tcb *,
struct sctp_forward_tsn_chunk *, int *, struct mbuf *, int);
-struct sctp_tmit_chunk *
- sctp_try_advance_peer_ack_point(struct sctp_tcb *, struct sctp_association *);
+struct sctp_tmit_chunk *sctp_try_advance_peer_ack_point(struct sctp_tcb *, struct sctp_association *);
void sctp_service_queues(struct sctp_tcb *, struct sctp_association *);
diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c
index ee206551..c7e86e78 100644
--- a/freebsd/sys/netinet/sctp_input.c
+++ b/freebsd/sys/netinet/sctp_input.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
#endif
+#include <netinet/in_kdtrace.h>
#include <sys/smp.h>
@@ -192,7 +193,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
goto outnow;
}
if ((stcb != NULL) &&
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT)) {
SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending SHUTDOWN-ACK\n");
sctp_send_shutdown_ack(stcb, NULL);
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
@@ -307,6 +308,7 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_INITIALIZATION);
}
+
}
}
SCTP_TCB_SEND_LOCK(stcb);
@@ -493,6 +495,7 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
SCTP_FREE(param, SCTP_M_ASC_ADDR);
}
}
+
stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
stcb->asoc.local_hmacs);
if (op_err) {
@@ -555,6 +558,7 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
}
return (retval);
}
+
return (0);
}
@@ -572,6 +576,7 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
/* Invalid length */
return;
}
+
memset(&store, 0, sizeof(store));
switch (cp->heartbeat.hb_info.addr_family) {
#ifdef INET
@@ -711,15 +716,15 @@ sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
*/
struct sctpasochead *head;
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_INP_INFO_WLOCK();
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) {
/* generate a new vtag and send init */
LIST_REMOVE(stcb, sctp_asocs);
stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
@@ -733,15 +738,14 @@ sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
SCTP_INP_INFO_WUNLOCK();
return (1);
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
/*
* treat like a case where the cookie expired i.e.: - dump
* current cookie. - generate a new vtag. - resend init.
*/
/* generate a new vtag and send init */
LIST_REMOVE(stcb, sctp_asocs);
- stcb->asoc.state &= ~SCTP_STATE_COOKIE_ECHOED;
- stcb->asoc.state |= SCTP_STATE_COOKIE_WAIT;
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
sctp_stop_all_cookie_timers(stcb);
sctp_toss_old_cookies(stcb, &stcb->asoc);
stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
@@ -823,8 +827,8 @@ sctp_handle_abort(struct sctp_abort_chunk *abort,
sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED);
/* free the tcb */
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
#ifdef SCTP_ASOCLOG_OF_TSNS
@@ -838,7 +842,7 @@ sctp_handle_abort(struct sctp_abort_chunk *abort,
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_WAS_ABORTED);
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
@@ -893,15 +897,15 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
if (stcb == NULL)
return;
asoc = &stcb->asoc;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
return;
}
if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
/* Shutdown NOT the expected size */
return;
}
- old_state = SCTP_GET_STATE(asoc);
+ old_state = SCTP_GET_STATE(stcb);
sctp_update_acked(stcb, cp, abort_flag);
if (*abort_flag) {
return;
@@ -957,11 +961,10 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
}
/* goto SHUTDOWN_RECEIVED state to block new requests */
if (stcb->sctp_socket) {
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_RECEIVED);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT)) {
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_RECEIVED);
/*
* notify upper layer that peer has initiated a
* shutdown
@@ -972,7 +975,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
}
}
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) {
/*
* stop the shutdown timer, since we WILL move to
* SHUTDOWN-ACK-SENT.
@@ -992,13 +995,12 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
/* no outstanding data to send, so move on... */
/* send SHUTDOWN-ACK */
/* move to SHUTDOWN-ACK-SENT state */
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
- if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_ACK_SENT);
sctp_stop_timers_for_shutdown(stcb);
sctp_send_shutdown_ack(stcb, net);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
@@ -1027,15 +1029,15 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED,
asoc = &stcb->asoc;
/* process according to association state */
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
/* unexpected SHUTDOWN-ACK... do OOTB handling... */
sctp_send_shutdown_complete(stcb, net, 1);
SCTP_TCB_UNLOCK(stcb);
return;
}
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
/* unexpected SHUTDOWN-ACK... so ignore... */
SCTP_TCB_UNLOCK(stcb);
return;
@@ -1231,7 +1233,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch,
* waiting.
*/
if ((cause_length >= sizeof(struct sctp_error_stale_cookie)) &&
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
struct sctp_error_stale_cookie *stale_cookie;
stale_cookie = (struct sctp_error_stale_cookie *)cause;
@@ -1264,8 +1266,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch,
}
/* blast back to INIT state */
sctp_toss_old_cookies(stcb, &stcb->asoc);
- asoc->state &= ~SCTP_STATE_COOKIE_ECHOED;
- asoc->state |= SCTP_STATE_COOKIE_WAIT;
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
sctp_stop_all_cookie_timers(stcb);
sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
}
@@ -1416,7 +1417,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
return (-1);
}
/* process according to association state... */
- switch (stcb->asoc.state & SCTP_STATE_MASK) {
+ switch (SCTP_GET_STATE(stcb)) {
case SCTP_STATE_COOKIE_WAIT:
/* this is the expected state for this chunk */
/* process the INIT-ACK parameters */
@@ -1442,7 +1443,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
}
/* update our state */
SCTPDBG(SCTP_DEBUG_INPUT2, "moving to COOKIE-ECHOED state\n");
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_ECHOED);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_ECHOED);
/* reset the RTO calc */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
@@ -1536,7 +1537,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
if (how_indx < sizeof(asoc->cookie_how)) {
asoc->cookie_how[how_indx] = 1;
}
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
/* SHUTDOWN came in after sending INIT-ACK */
sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
op_err = sctp_generate_cause(SCTP_CAUSE_COOKIE_IN_SHUTDOWN, "");
@@ -1605,7 +1606,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
return (NULL);
}
- switch (SCTP_GET_STATE(asoc)) {
+ switch (SCTP_GET_STATE(stcb)) {
case SCTP_STATE_COOKIE_WAIT:
case SCTP_STATE_COOKIE_ECHOED:
/*
@@ -1629,12 +1630,12 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
stcb, net,
SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
/* update current state */
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)
SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
else
SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, asoc->primary_destination);
@@ -1718,6 +1719,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
asoc->cookie_how[how_indx] = 5;
return (stcb);
}
+
if (ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag &&
cookie->tie_tag_my_vtag == 0 &&
@@ -1733,7 +1735,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
* If nat support, and the below and stcb is established, send back
* a ABORT(colliding state) if we are established.
*/
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) &&
(asoc->peer_supports_nat) &&
((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
((ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) ||
@@ -1838,8 +1840,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
asoc->cookie_how[how_indx] = 10;
return (NULL);
}
- if ((asoc->state & SCTP_STATE_COOKIE_WAIT) ||
- (asoc->state & SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
*notification = SCTP_NOTIFY_ASSOC_UP;
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
@@ -1867,17 +1869,17 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)
SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
else
SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
SCTP_STAT_INCR_GAUGE32(sctps_currestab);
- } else if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ } else if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
SCTP_STAT_INCR_COUNTER32(sctps_restartestab);
} else {
SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, asoc->primary_destination);
@@ -1937,24 +1939,24 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
/* notify upper layer */
*notification = SCTP_NOTIFY_ASSOC_RESTART;
atomic_add_int(&stcb->asoc.refcnt, 1);
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT)) {
SCTP_STAT_INCR_GAUGE32(sctps_currestab);
}
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
SCTP_STAT_INCR_GAUGE32(sctps_restartestab);
- } else if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+ } else if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) {
SCTP_STAT_INCR_GAUGE32(sctps_collisionestab);
}
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, asoc->primary_destination);
- } else if (!(asoc->state & SCTP_STATE_SHUTDOWN_SENT)) {
+ } else if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) {
/* move to OPEN state, if not in SHUTDOWN_SENT */
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
}
asoc->pre_open_streams =
ntohs(initack_cp->init.num_outbound_streams);
@@ -2293,6 +2295,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
stcb->asoc.authenticated = 1;
}
}
+
/*
* if we're doing ASCONFs, check to see if we have any new local
* addresses that need to get added to the peer (eg. addresses
@@ -2342,7 +2345,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
/* update current state */
SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, asoc->primary_destination);
@@ -2590,6 +2593,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
(uint32_t)offset, cookie_offset, sig_offset);
return (NULL);
}
+
/*
* check the cookie timestamps to be sure it's not stale
*/
@@ -2710,6 +2714,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
}
}
}
+
cookie_len -= SCTP_SIGNATURE_SIZE;
if (*stcb == NULL) {
/* this is the "normal" case... get a new TCB */
@@ -2877,7 +2882,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
* the accept state waiting for the accept!
*/
if (*stcb) {
- (*stcb)->asoc.state |= SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_ADD_SUBSTATE(*stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
}
sctp_move_pcb_and_assoc(*inp_p, inp, *stcb);
@@ -2912,6 +2917,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
(*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
}
+
/*
* Pull it from the incomplete queue and wake the
* guy
@@ -2952,6 +2958,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
if ((stcb == NULL) || (net == NULL)) {
return;
}
+
asoc = &stcb->asoc;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -2963,10 +2970,10 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
asoc->overall_error_count = 0;
sctp_stop_all_cookie_timers(stcb);
/* process according to association state */
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
/* state change only needed when I am in right state */
SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
- SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ SCTP_SET_STATE(stcb, SCTP_STATE_OPEN);
sctp_start_net_timers(stcb);
if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
@@ -3018,6 +3025,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
*/
goto closed_socket;
}
+
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
stcb, net);
@@ -3224,7 +3232,6 @@ static void
sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSED,
struct sctp_tcb *stcb, struct sctp_nets *net)
{
- struct sctp_association *asoc;
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -3234,9 +3241,8 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSE
if (stcb == NULL)
return;
- asoc = &stcb->asoc;
/* process according to association state */
- if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
/* unexpected SHUTDOWN-COMPLETE... so ignore... */
SCTPDBG(SCTP_DEBUG_INPUT2,
"sctp_handle_shutdown_complete: not in SCTP_STATE_SHUTDOWN_ACK_SENT --- ignore\n");
@@ -3248,8 +3254,8 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSE
sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
}
#ifdef INVARIANTS
- if (!TAILQ_EMPTY(&asoc->send_queue) ||
- !TAILQ_EMPTY(&asoc->sent_queue) ||
+ if (!TAILQ_EMPTY(&stcb->asoc.send_queue) ||
+ !TAILQ_EMPTY(&stcb->asoc.sent_queue) ||
sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED)) {
panic("Queues are not empty when handling SHUTDOWN-COMPLETE");
}
@@ -3796,6 +3802,7 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
sctp_log_map(0, 7, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
}
+
stcb->asoc.tsn_last_delivered = stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
stcb->asoc.mapping_array_base_tsn = ntohl(resp->senders_next_tsn);
memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
@@ -4395,6 +4402,7 @@ sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp,
if (trunc_len > limit) {
trunc_len = limit;
}
+
/* now the chunks themselves */
while ((ch != NULL) && (chlen >= sizeof(struct sctp_chunkhdr))) {
desc.chunk_type = ch->chunk_type;
@@ -4654,6 +4662,7 @@ sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
*/
SCTP_INP_DECR_REF(inp);
}
+
/* now go back and verify any auth chunk to be sure */
if (auth_skipped && (stcb != NULL)) {
struct sctp_auth_chunk *auth;
@@ -4748,11 +4757,12 @@ sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
if (((ch->chunk_type == SCTP_SELECTIVE_ACK) ||
(ch->chunk_type == SCTP_NR_SELECTIVE_ACK) ||
(ch->chunk_type == SCTP_HEARTBEAT_REQUEST)) &&
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
/* implied cookie-ack.. we must have lost the ack */
sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb,
*netp);
}
+
process_control_chunks:
while (IS_SCTP_CONTROL(ch)) {
/* validate chunk length */
@@ -4792,6 +4802,7 @@ process_control_chunks:
}
return (NULL);
}
+
num_chunks++;
/* Save off the last place we got a control from */
if (stcb != NULL) {
@@ -4811,7 +4822,6 @@ process_control_chunks:
/* check to see if this chunk required auth, but isn't */
if ((stcb != NULL) &&
- (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(ch->chunk_type, stcb->asoc.local_auth_chunks) &&
!stcb->asoc.authenticated) {
/* "silently" ignore */
@@ -4941,7 +4951,7 @@ process_control_chunks:
break;
}
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
/*-
* If we have sent a shutdown-ack, we will pay no
* attention to a sack sent in to us since
@@ -5159,6 +5169,7 @@ process_control_chunks:
goto abend;
}
}
+
if (netp != NULL) {
struct sctp_tcb *locked_stcb;
@@ -5331,6 +5342,7 @@ process_control_chunks:
*offset = length;
return (stcb);
}
+
if (stcb != NULL) {
int abort_flag = 0;
@@ -5393,6 +5405,7 @@ process_control_chunks:
*offset = length;
return (stcb);
}
+
if ((ch != NULL) && (stcb != NULL) && (netp != NULL) && (*netp != NULL)) {
if (stcb->asoc.pktdrop_supported == 0) {
goto unknown_chunk;
@@ -5559,6 +5572,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
net->flowtype = mflowtype;
net->flowid = mflowid;
}
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
if ((inp != NULL) && (stcb != NULL)) {
sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1);
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
@@ -5599,6 +5613,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
net->flowid = mflowid;
}
if (inp == NULL) {
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
SCTP_STAT_INCR(sctps_noport);
if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0) {
goto out;
@@ -5647,6 +5662,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
SCTP_TCB_UNLOCK(stcb);
stcb = NULL;
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
@@ -5655,6 +5671,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
vrf_id, port);
goto out;
}
+
}
if (IS_SCTP_CONTROL(ch)) {
/* process the control portion of the SCTP packet */
@@ -5700,14 +5717,15 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
* chunks
*/
if ((stcb != NULL) &&
- (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) {
/* "silently" ignore */
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
SCTP_STAT_INCR(sctps_recvauthmissing);
goto out;
}
if (stcb == NULL) {
/* out of the blue DATA chunk */
+ SCTP_PROBE5(receive, NULL, NULL, m, NULL, sh);
snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
@@ -5718,11 +5736,13 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
}
if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
/* v_tag mismatch! */
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
SCTP_STAT_INCR(sctps_badvtag);
goto out;
}
}
+ SCTP_PROBE5(receive, NULL, stcb, m, stcb, sh);
if (stcb == NULL) {
/*
* no valid TCB for this packet, or we found it's a bad
@@ -5731,6 +5751,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
goto out;
}
+
/*
* DATA chunk processing
*/
@@ -5742,7 +5763,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
if ((length > offset) &&
(stcb != NULL) &&
- (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks) &&
!stcb->asoc.authenticated) {
/* "silently" ignore */
@@ -5759,7 +5779,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
* not get here unless we really did have a tag, so we don't
* abort if this happens, just dump the chunk silently.
*/
- switch (SCTP_GET_STATE(&stcb->asoc)) {
+ switch (SCTP_GET_STATE(stcb)) {
case SCTP_STATE_COOKIE_ECHOED:
/*
* we consider data with valid tags in this state
@@ -5810,6 +5830,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
* process_data
*/
}
+
/* take care of ecn */
if ((data_processed == 1) &&
(stcb->asoc.ecn_supported == 1) &&
@@ -5817,6 +5838,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
/* Yep, we need to add a ECNE */
sctp_send_ecn_echo(stcb, net, high_tsn);
}
+
if ((data_processed == 0) && (fwd_tsn_seen)) {
int was_a_gap;
uint32_t highest_tsn;
diff --git a/freebsd/sys/netinet/sctp_input.h b/freebsd/sys/netinet/sctp_input.h
index f393ad89..72908e11 100644
--- a/freebsd/sys/netinet/sctp_input.h
+++ b/freebsd/sys/netinet/sctp_input.h
@@ -52,7 +52,7 @@ struct sctp_stream_reset_request *
sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
struct sctp_tmit_chunk **bchk);
-void
+void
sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries,
uint16_t *list);
diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h
index d8d9e6e8..abe8e2c9 100644
--- a/freebsd/sys/netinet/sctp_os_bsd.h
+++ b/freebsd/sys/netinet/sctp_os_bsd.h
@@ -445,7 +445,7 @@ sctp_get_mbuf_for_msg(unsigned int space_needed,
/*
* SCTP AUTH
*/
-#define SCTP_READ_RANDOM(buf, len) read_random(buf, len)
+#define SCTP_READ_RANDOM(buf, len) arc4rand(buf, len, 0)
/* map standard crypto API names */
#define SCTP_SHA1_CTX SHA1_CTX
diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c
index bdef958c..8f0c8aa4 100644
--- a/freebsd/sys/netinet/sctp_output.c
+++ b/freebsd/sys/netinet/sctp_output.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
+#include <netinet/in_kdtrace.h>
@@ -2547,6 +2548,7 @@ once_again:
inp->next_addr_touse = NULL;
goto once_again;
}
+
inp->next_addr_touse = starting_point;
resettotop = 0;
once_again_too:
@@ -2554,6 +2556,7 @@ once_again_too:
inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
resettotop = 1;
}
+
/* ok, what about an acceptable address in the inp */
for (laddr = inp->next_addr_touse; laddr;
laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
@@ -2576,6 +2579,7 @@ once_again_too:
inp->next_addr_touse = NULL;
goto once_again_too;
}
+
/*
* no address bound can be a source for the destination we are in
* trouble
@@ -3990,8 +3994,8 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
int so_locked
#endif
)
-/* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
{
+/* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
/**
* Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet header
* WITH an SCTPHDR but no IP header, endpoint inp and sa structure:
@@ -4038,6 +4042,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
if ((auth != NULL) && (stcb != NULL)) {
sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid);
}
+
if (net) {
tos_value = net->dscp;
} else if (stcb) {
@@ -4249,6 +4254,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTP_SOCKET_UNLOCK(so, 0);
}
#endif
+ SCTP_PROBE5(send, NULL, stcb, ip, stcb, sctphdr);
SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
@@ -4394,7 +4400,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
} else {
ip6h->ip6_nxt = IPPROTO_SCTP;
}
- ip6h->ip6_plen = (uint16_t)(packet_length - sizeof(struct ip6_hdr));
+ ip6h->ip6_plen = htons((uint16_t)(packet_length - sizeof(struct ip6_hdr)));
ip6h->ip6_dst = sin6->sin6_addr;
/*
@@ -4552,6 +4558,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
prev_scope = sin6->sin6_scope_id;
prev_port = sin6->sin6_port;
}
+
if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
/* failed to prepend data, give up */
sctp_m_freem(m);
@@ -4581,6 +4588,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
sctp_packet_log(o_pak);
#endif
+ SCTP_PROBE5(send, NULL, stcb, ip6h, stcb, sctphdr);
SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
@@ -4740,6 +4748,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
chunk_len += parameter_len;
}
+
/* ECN parameter */
if (stcb->asoc.ecn_supported == 1) {
parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
@@ -4748,6 +4757,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* PR-SCTP supported parameter */
if (stcb->asoc.prsctp_supported == 1) {
parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
@@ -4756,6 +4766,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* Add NAT friendly parameter. */
if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
@@ -4764,6 +4775,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* And now tell the peer which extensions we support */
num_ext = 0;
pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
@@ -4854,6 +4866,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
chunk_len += parameter_len;
}
}
+
/* now any cookie time extensions */
if (stcb->asoc.cookie_preserve_req) {
struct sctp_cookie_perserve_param *cookie_preserve;
@@ -4871,6 +4884,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
stcb->asoc.cookie_preserve_req = 0;
chunk_len += parameter_len;
}
+
if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) {
uint8_t i;
@@ -4899,6 +4913,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
padding_len = 4 - 2 * i;
chunk_len += parameter_len;
}
+
SCTP_BUF_LEN(m) = chunk_len;
/* now the addresses */
/*
@@ -5519,7 +5534,7 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asoc = NULL;
}
if ((asoc != NULL) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT)) {
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT)) {
if (sctp_are_there_new_addresses(asoc, init_pkt, offset, src)) {
/*
* new addresses, out of here in non-cookie-wait
@@ -5822,9 +5837,9 @@ do_a_abort:
initack->ch.chunk_length = 0;
/* place in my tag */
if ((asoc != NULL) &&
- ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_INUSE) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED))) {
+ ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_INUSE) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED))) {
/* re-use the v-tags and init-seq here */
initack->init.initiate_tag = htonl(asoc->my_vtag);
initack->init.initial_tsn = htonl(asoc->init_seq_number);
@@ -5904,6 +5919,7 @@ do_a_abort:
ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
chunk_len += parameter_len;
}
+
/* ECN parameter */
if (((asoc != NULL) && (asoc->ecn_supported == 1)) ||
((asoc == NULL) && (inp->ecn_supported == 1))) {
@@ -5913,6 +5929,7 @@ do_a_abort:
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* PR-SCTP supported parameter */
if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
((asoc == NULL) && (inp->prsctp_supported == 1))) {
@@ -5922,6 +5939,7 @@ do_a_abort:
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* Add NAT friendly parameter */
if (nat_friendly) {
parameter_len = (uint16_t)sizeof(struct sctp_paramhdr);
@@ -5930,6 +5948,7 @@ do_a_abort:
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
+
/* And now tell the peer which extensions we support */
num_ext = 0;
pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
@@ -5973,6 +5992,7 @@ do_a_abort:
padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
chunk_len += parameter_len;
}
+
/* add authentication parameters */
if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
((asoc == NULL) && (inp->auth_supported == 1))) {
@@ -6050,6 +6070,7 @@ do_a_abort:
SCTP_BUF_LEN(m) += padding_len;
padding_len = 0;
}
+
/* tack on the operational error if present */
if (op_err) {
parameter_len = 0;
@@ -6347,9 +6368,9 @@ sctp_msg_append(struct sctp_tcb *stcb,
}
strm = &stcb->asoc.strmout[srcv->sinfo_stream];
/* Now can we send this? */
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
(stcb->asoc.state & SCTP_STATE_SHUTDOWN_PENDING)) {
/* got data while shutting down */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
@@ -6682,18 +6703,17 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
* there is nothing queued to send, so I'm
* done...
*/
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
/*
* only send SHUTDOWN the first time
* through
*/
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
sctp_send_shutdown(stcb, net);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
@@ -6714,13 +6734,13 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
* we will allow user data to be sent first
* and move to SHUTDOWN-PENDING
*/
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
- asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
@@ -7433,6 +7453,7 @@ dont_do_it:
chk->last_mbuf = SCTP_BUF_NEXT(chk->last_mbuf);
}
}
+
if (to_move > length) {
/*- This should not happen either
* since we always lower to_move to the size
@@ -7839,7 +7860,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp,
*reason_code = 0;
auth_keyid = stcb->asoc.authinfo.active_keyid;
if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
- (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
(sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
eeor_mode = 1;
} else {
@@ -7970,6 +7991,7 @@ nothing_to_send:
*reason_code = 8;
return (0);
}
+
if (asoc->sctp_cmt_on_off > 0) {
/* get the last start point */
start_at = asoc->last_net_cmt_send_started;
@@ -8596,8 +8618,8 @@ again_one_more_time:
omtu = 0;
break;
}
- if ((((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ if ((((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
(skip_data_for_this_net == 0)) ||
(cookie)) {
TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
@@ -8616,6 +8638,7 @@ again_one_more_time:
/* Don't send the chunk on this net */
continue;
}
+
if (asoc->sctp_cmt_on_off == 0) {
if ((asoc->alternate) &&
(asoc->alternate != net) &&
@@ -8645,7 +8668,7 @@ again_one_more_time:
chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
}
if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) &&
- ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) == SCTP_STATE_SHUTDOWN_PENDING)) {
+ (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
struct sctp_data_chunk *dchkh;
dchkh = mtod(chk->data, struct sctp_data_chunk *);
@@ -8879,6 +8902,7 @@ no_data_fill:
if (old_start_at)
goto again_one_more_time;
}
+
/*
* At the end there should be no NON timed chunks hanging on this
* queue.
@@ -9272,17 +9296,20 @@ sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked)
/* can't send a new one if there is one in flight already */
return;
}
+
/* compose an ASCONF chunk, maximum length is PMTU */
m_asconf = sctp_compose_asconf(stcb, &len, addr_locked);
if (m_asconf == NULL) {
return;
}
+
sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
/* no memory */
sctp_m_freem(m_asconf);
return;
}
+
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_ASCONF;
chk->rec.chunk_id.can_take_data = 0;
@@ -9353,6 +9380,7 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb)
if (ack->data == NULL) {
continue;
}
+
/* copy the asconf_ack */
m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_NOWAIT);
if (m_ack == NULL) {
@@ -9541,8 +9569,8 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp,
if (TAILQ_EMPTY(&asoc->sent_queue)) {
return (SCTP_RETRAN_DONE);
}
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT)) {
/* not yet open, resend the cookie and that is it */
return (1);
}
@@ -10241,6 +10269,7 @@ sctp_output(
SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
}
+
if (inp->sctp_socket == NULL) {
SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
@@ -11228,12 +11257,13 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
sctp_packet_log(o_pak);
}
#endif
+ SCTP_PROBE5(send, NULL, NULL, ip, NULL, shout);
SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id);
break;
#endif
#ifdef INET6
case AF_INET6:
- ip6->ip6_plen = (uint16_t)(len - sizeof(struct ip6_hdr));
+ ip6->ip6_plen = htons((uint16_t)(len - sizeof(struct ip6_hdr)));
if (port) {
shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
SCTP_STAT_INCR(sctps_sendswcrc);
@@ -11250,6 +11280,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
sctp_packet_log(o_pak);
}
#endif
+ SCTP_PROBE5(send, NULL, NULL, ip6, NULL, shout);
SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id);
break;
#endif
@@ -11314,6 +11345,7 @@ sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked
SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n");
return;
}
+
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
chk->rec.chunk_id.can_take_data = 1;
@@ -12332,6 +12364,7 @@ sctp_copy_one(struct sctp_stream_queue_pending *sp,
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
return (ENOBUFS);
}
+
sp->tail_mbuf = m_last(sp->data);
return (0);
}
@@ -12348,6 +12381,7 @@ sctp_copy_it_in(struct sctp_tcb *stcb,
int user_marks_eor,
int *error)
{
+
/*-
* This routine must be very careful in its work. Protocol
* processing is up and running so care must be taken to spl...()
@@ -12360,9 +12394,9 @@ sctp_copy_it_in(struct sctp_tcb *stcb,
*error = 0;
/* Now can we send this? */
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
(asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
/* got data while shutting down */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
@@ -12748,7 +12782,7 @@ sctp_lower_sosend(struct socket *so,
*/
queue_only = 1;
asoc = &stcb->asoc;
- SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
/* initialize authentication params for the assoc */
@@ -12870,8 +12904,8 @@ sctp_lower_sosend(struct socket *so,
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, error);
goto out_unlocked;
}
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
queue_only = 1;
}
/* we are now done with all control */
@@ -12879,9 +12913,9 @@ sctp_lower_sosend(struct socket *so,
sctp_m_freem(control);
control = NULL;
}
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
(asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
if (srcv->sinfo_flags & SCTP_ABORT) {
;
@@ -12903,8 +12937,8 @@ sctp_lower_sosend(struct socket *so,
int tot_demand, tot_out = 0, max_out;
SCTP_STAT_INCR(sctps_sends_with_abort);
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
/* It has to be up before we abort */
/* how big is the user initiated abort? */
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
@@ -13014,6 +13048,7 @@ sctp_lower_sosend(struct socket *so,
error = EFAULT;
goto out_unlocked;
}
+
/* Unless E_EOR mode is on, we must make a send FIT in one call. */
if ((user_marks_eor == 0) &&
(sndlen > SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
@@ -13031,6 +13066,7 @@ sctp_lower_sosend(struct socket *so,
error = EINVAL;
goto out_unlocked;
}
+
if (user_marks_eor) {
local_add_more = min(SCTP_SB_LIMIT_SND(so), SCTP_BASE_SYSCTL(sctp_add_more_threshold));
} else {
@@ -13095,6 +13131,7 @@ sctp_lower_sosend(struct socket *so,
}
SOCKBUF_UNLOCK(&so->so_snd);
}
+
skip_preblock:
if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
goto out_unlocked;
@@ -13231,6 +13268,7 @@ skip_preblock:
if (srcv->sinfo_flags & SCTP_SACK_IMMEDIATELY) {
sp->sinfo_flags |= SCTP_SACK_IMMEDIATELY;
}
+
/* Did we reach EOR? */
if ((uio->uio_resid == 0) &&
((user_marks_eor == 0) ||
@@ -13279,12 +13317,12 @@ skip_preblock:
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
/* a collision took us forward? */
queue_only = 0;
} else {
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
- SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
queue_only = 1;
}
}
@@ -13396,6 +13434,7 @@ skip_preblock:
SOCKBUF_UNLOCK(&so->so_snd);
goto out_unlocked;
}
+
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
asoc, stcb->asoc.total_output_queue_size);
@@ -13466,17 +13505,16 @@ dataless_eof:
goto abort_anyway;
}
/* there is nothing queued to send, so I'm done... */
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
struct sctp_nets *netp;
/* only send SHUTDOWN the first time through */
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
@@ -13500,17 +13538,17 @@ dataless_eof:
* data to be sent first and move to
* SHUTDOWN-PENDING
*/
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
if (hold_tcblock == 0) {
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
- asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
@@ -13551,12 +13589,12 @@ skip_out_eof:
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
/* a collision took us forward? */
queue_only = 0;
} else {
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
queue_only = 1;
}
}
@@ -13761,6 +13799,7 @@ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t *ro)
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
return (0);
}
+
SCTPDBG(SCTP_DEBUG_OUTPUT2, "v6src_match_nexthop(), Prefix entry is ");
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
diff --git a/freebsd/sys/netinet/sctp_output.h b/freebsd/sys/netinet/sctp_output.h
index e6222e3f..1b3d22d9 100644
--- a/freebsd/sys/netinet/sctp_output.h
+++ b/freebsd/sys/netinet/sctp_output.h
@@ -74,7 +74,7 @@ int
int
sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro);
-void
+void
sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -117,7 +117,7 @@ void sctp_send_shutdown_ack(struct sctp_tcb *, struct sctp_nets *);
void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *, int);
-void
+void
sctp_send_shutdown_complete2(struct sockaddr *, struct sockaddr *,
struct sctphdr *,
uint8_t, uint32_t, uint16_t,
@@ -146,13 +146,13 @@ int
sctp_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
struct mbuf *, struct thread *, int);
-void
+void
sctp_chunk_output(struct sctp_inpcb *, struct sctp_tcb *, int, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
#endif
);
-void
+void
sctp_send_abort_tcb(struct sctp_tcb *, struct mbuf *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -201,7 +201,7 @@ sctp_send_abort(struct mbuf *, int, struct sockaddr *, struct sockaddr *,
uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
-void
+void
sctp_send_operr_to(struct sockaddr *, struct sockaddr *,
struct sctphdr *, uint32_t, struct mbuf *,
uint8_t, uint32_t, uint16_t,
diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c
index cf993d64..782e5f1d 100644
--- a/freebsd/sys/netinet/sctp_pcb.c
+++ b/freebsd/sys/netinet/sctp_pcb.c
@@ -187,6 +187,7 @@ sctp_allocate_vrf(int vrf_id)
SCTP_FREE(vrf, SCTP_M_VRF);
return (NULL);
}
+
/* Add it to the hash table */
bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))];
LIST_INSERT_HEAD(bucket, vrf, next_vrf);
@@ -738,6 +739,7 @@ sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
goto out_now;
}
+
#ifdef SCTP_DEBUG
SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: deleting address:", vrf_id);
SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
@@ -866,6 +868,7 @@ sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
SCTP_IPI_ADDR_RUNLOCK();
return (0);
}
+
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
if ((loopback_scope == 0) &&
@@ -1027,6 +1030,7 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
if ((to == NULL) || (from == NULL)) {
return (NULL);
}
+
switch (to->sa_family) {
#ifdef INET
case AF_INET:
@@ -1389,6 +1393,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote,
if (locked_tcb) {
atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
}
+
SCTP_INP_WUNLOCK(inp);
SCTP_INP_INFO_RUNLOCK();
return (stcb);
@@ -2254,6 +2259,7 @@ sctp_findassociation_addr(struct mbuf *m, int offset,
return (stcb);
}
}
+
if (inp_p) {
stcb = sctp_findassociation_addr_sa(src, dst, inp_p, netp,
1, vrf_id);
@@ -2849,6 +2855,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
return (EINVAL);
}
+
sin = (struct sockaddr_in *)addr;
lport = sin->sin_port;
/*
@@ -3368,14 +3375,14 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
* was not closed. So go ahead and
* start it now.
*/
- asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_IN_ACCEPT_QUEUE);
sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL);
}
SCTP_TCB_UNLOCK(asoc);
continue;
}
- if (((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_ECHOED)) &&
+ if (((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) &&
(asoc->asoc.total_output_queue_size == 0)) {
/*
* If we have data in queue, we don't want
@@ -3392,7 +3399,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
}
/* Disconnect the socket please */
asoc->sctp_socket = NULL;
- asoc->asoc.state |= SCTP_STATE_CLOSED_SOCKET;
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_CLOSED_SOCKET);
if ((asoc->asoc.size_on_reasm_queue > 0) ||
(asoc->asoc.control_pdapi) ||
(asoc->asoc.size_on_all_streams > 0) ||
@@ -3404,8 +3411,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
if (sctp_free_assoc(inp, asoc,
@@ -3419,20 +3426,19 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
if ((*asoc->asoc.ss_functions.sctp_ss_is_user_msgs_incomplete) (asoc, &asoc->asoc)) {
goto abort_anyway;
}
- if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
struct sctp_nets *netp;
/*
* there is nothing queued to send,
* so I send shutdown
*/
- if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(asoc);
if (asoc->asoc.alternate) {
netp = asoc->asoc.alternate;
@@ -3448,11 +3454,11 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
}
} else {
/* mark into shutdown pending */
- asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING;
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
asoc->asoc.primary_destination);
if ((*asoc->asoc.ss_functions.sctp_ss_is_user_msgs_incomplete) (asoc, &asoc->asoc)) {
- asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
@@ -3464,8 +3470,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
if (sctp_free_assoc(inp, asoc,
@@ -3503,6 +3509,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
LIST_REMOVE(inp, sctp_hash);
inp->sctp_flags |= SCTP_PCB_FLAGS_UNBOUND;
}
+
/*
* If there is a timer running to kill us, forget it, since it may
* have a contest on the INP lock.. which would cause us to die ...
@@ -3512,7 +3519,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
SCTP_TCB_LOCK(asoc);
if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) {
- asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_IN_ACCEPT_QUEUE);
sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL);
}
cnt++;
@@ -3520,7 +3527,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
continue;
}
/* Free associations that are NOT killing us */
- if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) &&
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) {
struct mbuf *op_err;
@@ -3533,8 +3540,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
SCTP_TCB_UNLOCK(asoc);
continue;
}
- if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE,
@@ -3637,6 +3644,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
(void)sctp_m_free(ip_pcb->inp_options);
ip_pcb->inp_options = 0;
}
+
+
#ifdef INET6
if (ip_pcb->inp_vflag & INP_IPV6) {
struct in6pcb *in6p;
@@ -4797,7 +4806,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
}
/* Now the read queue needs to be cleaned up (only once) */
if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) {
- stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_ABOUT_TO_BE_FREED);
SCTP_INP_READ_LOCK(inp);
TAILQ_FOREACH(sq, &inp->read_queue, next) {
if (sq->stcb == stcb) {
@@ -4851,7 +4860,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
if ((stcb->asoc.refcnt) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
- stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
}
SCTP_TCB_UNLOCK(stcb);
@@ -4864,6 +4873,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
sctp_sorwakeup(inp, so);
sctp_sowwakeup(inp, so);
}
+
#ifdef SCTP_LOG_CLOSING
sctp_log_closing(inp, stcb, 9);
#endif
@@ -4922,6 +4932,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
}
}
}
+
/*
* Make it invalid too, that way if its about to run it will abort
* and return.
@@ -4931,7 +4942,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
atomic_add_int(&stcb->asoc.refcnt, -1);
}
if (stcb->asoc.refcnt) {
- stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
if (from_inpcbfree == SCTP_NORMAL_PROC) {
SCTP_INP_INFO_WUNLOCK();
@@ -5339,6 +5350,7 @@ sctp_update_ep_vflag(struct sctp_inpcb *inp)
__func__);
continue;
}
+
if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
continue;
}
@@ -5752,6 +5764,7 @@ sctp_startup_mcore_threads(void)
i++;
}
}
+
/* Now start them all */
CPU_FOREACH(cpu) {
(void)kproc_create(sctp_mcore_thread,
@@ -6267,7 +6280,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
* assoc? straighten out locks.
*/
if (stcb_tmp) {
- if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb_tmp) == SCTP_STATE_COOKIE_WAIT) {
struct mbuf *op_err;
char msg[SCTP_DIAG_INFO_LEN];
@@ -6286,6 +6299,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
}
SCTP_TCB_UNLOCK(stcb_tmp);
}
+
if (stcb->asoc.state == 0) {
/* the assoc was freed? */
return (-12);
@@ -6366,7 +6380,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
* assoc? straighten out locks.
*/
if (stcb_tmp) {
- if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb_tmp) == SCTP_STATE_COOKIE_WAIT) {
struct mbuf *op_err;
char msg[SCTP_DIAG_INFO_LEN];
@@ -6708,6 +6722,8 @@ next_param:
if (p_random != NULL) {
keylen = sizeof(*p_random) + random_len;
memcpy(new_key->key, p_random, keylen);
+ } else {
+ keylen = 0;
}
/* append in the AUTH chunks */
if (chunks != NULL) {
@@ -7003,6 +7019,7 @@ sctp_drain_mbufs(struct sctp_tcb *stcb)
if (!fnd) {
asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn - 1;
}
+
/*
* Question, should we go through the delivery queue? The
* only reason things are on here is the app not reading OR
diff --git a/freebsd/sys/netinet/sctp_pcb.h b/freebsd/sys/netinet/sctp_pcb.h
index 3fc03399..5b41ae8a 100644
--- a/freebsd/sys/netinet/sctp_pcb.h
+++ b/freebsd/sys/netinet/sctp_pcb.h
@@ -363,7 +363,7 @@ struct sctp_inpcb {
union {
struct inpcb inp;
char align[(sizeof(struct in6pcb) + SCTP_ALIGNM1) &
- ~SCTP_ALIGNM1];
+ ~SCTP_ALIGNM1];
} ip_inp;
@@ -389,7 +389,7 @@ struct sctp_inpcb {
uint64_t sctp_features; /* Feature flags */
uint32_t sctp_flags; /* INP state flag set */
uint32_t sctp_mobility_features; /* Mobility Feature flags */
- struct sctp_pcb sctp_ep;/* SCTP ep data */
+ struct sctp_pcb sctp_ep; /* SCTP ep data */
/* head of the hash of all associations */
struct sctpasochead *sctp_tcbhash;
u_long sctp_hashmark;
@@ -492,8 +492,7 @@ int SCTP6_ARE_ADDR_EQUAL(struct sockaddr_in6 *a, struct sockaddr_in6 *b);
void sctp_fill_pcbinfo(struct sctp_pcbinfo *);
-struct sctp_ifn *
- sctp_find_ifn(void *ifn, uint32_t ifn_index);
+struct sctp_ifn *sctp_find_ifn(void *ifn, uint32_t ifn_index);
struct sctp_vrf *sctp_allocate_vrf(int vrfid);
struct sctp_vrf *sctp_find_vrf(uint32_t vrfid);
@@ -524,7 +523,7 @@ void sctp_free_ifn(struct sctp_ifn *sctp_ifnp);
void sctp_free_ifa(struct sctp_ifa *sctp_ifap);
-void
+void
sctp_del_addr_from_vrf(uint32_t vrfid, struct sockaddr *addr,
uint32_t ifn_index, const char *if_name);
@@ -534,7 +533,7 @@ struct sctp_nets *sctp_findnet(struct sctp_tcb *, struct sockaddr *);
struct sctp_inpcb *sctp_pcb_findep(struct sockaddr *, int, int, uint32_t);
-int
+int
sctp_inpcb_bind(struct socket *, struct sockaddr *,
struct sctp_ifa *, struct thread *);
@@ -563,8 +562,7 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **,
struct sockaddr *, struct sctp_nets **, struct sockaddr *,
struct sctp_tcb *);
-struct sctp_tcb *
- sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock);
+struct sctp_tcb *sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock);
struct sctp_tcb *
sctp_findassociation_ep_asocid(struct sctp_inpcb *,
diff --git a/freebsd/sys/netinet/sctp_peeloff.c b/freebsd/sys/netinet/sctp_peeloff.c
index ad96b88c..14a7c381 100644
--- a/freebsd/sys/netinet/sctp_peeloff.c
+++ b/freebsd/sys/netinet/sctp_peeloff.c
@@ -76,7 +76,7 @@ sctp_can_peel_off(struct socket *head, sctp_assoc_t assoc_id)
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOENT);
return (ENOENT);
}
- state = SCTP_GET_STATE((&stcb->asoc));
+ state = SCTP_GET_STATE(stcb);
if ((state == SCTP_STATE_EMPTY) ||
(state == SCTP_STATE_INUSE)) {
SCTP_TCB_UNLOCK(stcb);
@@ -105,13 +105,15 @@ sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
return (ENOTCONN);
}
- state = SCTP_GET_STATE((&stcb->asoc));
+
+ state = SCTP_GET_STATE(stcb);
if ((state == SCTP_STATE_EMPTY) ||
(state == SCTP_STATE_INUSE)) {
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
return (ENOTCONN);
}
+
n_inp = (struct sctp_inpcb *)so->so_pcb;
n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
SCTP_PCB_FLAGS_CONNECTED |
diff --git a/freebsd/sys/netinet/sctp_structs.h b/freebsd/sys/netinet/sctp_structs.h
index d60705b4..c4eafc26 100644
--- a/freebsd/sys/netinet/sctp_structs.h
+++ b/freebsd/sys/netinet/sctp_structs.h
@@ -105,7 +105,7 @@ TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list);
#define SCTP_ASOC_ANY_STATE 0x00000000
typedef void (*asoc_func) (struct sctp_inpcb *, struct sctp_tcb *, void *ptr,
- uint32_t val);
+ uint32_t val);
typedef int (*inp_func) (struct sctp_inpcb *, void *ptr, uint32_t val);
typedef void (*end_func) (void *ptr, uint32_t val);
@@ -144,7 +144,7 @@ struct sctp_iterator {
asoc_func function_assoc; /* per assoc function */
inp_func function_inp; /* per endpoint function */
inp_func function_inp_end; /* end INP function */
- end_func function_atend;/* iterator completion function */
+ end_func function_atend; /* iterator completion function */
void *pointer; /* pointer for apply func to use */
uint32_t val; /* value for apply func to use */
uint32_t pcb_flags; /* endpoint flags being checked */
@@ -231,7 +231,7 @@ struct rtcc_cc {
uint64_t bw_tot_time; /* The total time since sending began */
uint64_t new_tot_time; /* temp holding the new value */
uint64_t bw_bytes_at_last_rttc; /* What bw_bytes was at last rtt calc */
- uint32_t cwnd_at_bw_set;/* Cwnd at last bw saved - lbw */
+ uint32_t cwnd_at_bw_set; /* Cwnd at last bw saved - lbw */
uint32_t vol_reduce; /* cnt of voluntary reductions */
uint16_t steady_step; /* The number required to be in steady state */
uint16_t step_cnt; /* The current number */
@@ -240,7 +240,8 @@ struct rtcc_cc {
uint8_t use_dccc_ecn; /* Flag to enable DCCC ECN */
uint8_t tls_needs_set; /* Flag to indicate we need to set tls 0 or 1
* means set at send 2 not */
- uint8_t last_step_state;/* Last state if steady state stepdown is on */
+ uint8_t last_step_state; /* Last state if steady state stepdown
+ * is on */
uint8_t rtt_set_this_sack; /* Flag saying this sack had RTT calc
* on it */
uint8_t last_inst_ind; /* Last saved inst indication */
@@ -331,8 +332,8 @@ struct sctp_nets {
uint8_t dscp;
struct timeval start_time; /* time when this net was created */
- uint32_t marked_retrans;/* number or DATA chunks marked for timer
- * based retransmissions */
+ uint32_t marked_retrans; /* number or DATA chunks marked for
+ * timer based retransmissions */
uint32_t marked_fastretrans;
uint32_t heart_beat_delay; /* Heart Beat delay in ms */
@@ -706,28 +707,28 @@ struct sctp_nonpad_sndrcvinfo {
struct sctp_cc_functions {
void (*sctp_set_initial_cc_param) (struct sctp_tcb *stcb, struct sctp_nets *net);
void (*sctp_cwnd_update_after_sack) (struct sctp_tcb *stcb,
- struct sctp_association *asoc,
- int accum_moved, int reneged_all, int will_exit);
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit);
void (*sctp_cwnd_update_exit_pf) (struct sctp_tcb *stcb, struct sctp_nets *net);
void (*sctp_cwnd_update_after_fr) (struct sctp_tcb *stcb,
- struct sctp_association *asoc);
+ struct sctp_association *asoc);
void (*sctp_cwnd_update_after_timeout) (struct sctp_tcb *stcb,
- struct sctp_nets *net);
+ struct sctp_nets *net);
void (*sctp_cwnd_update_after_ecn_echo) (struct sctp_tcb *stcb,
- struct sctp_nets *net, int in_window, int num_pkt_lost);
+ struct sctp_nets *net, int in_window, int num_pkt_lost);
void (*sctp_cwnd_update_after_packet_dropped) (struct sctp_tcb *stcb,
- struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
- uint32_t *bottle_bw, uint32_t *on_queue);
+ struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+ uint32_t *bottle_bw, uint32_t *on_queue);
void (*sctp_cwnd_update_after_output) (struct sctp_tcb *stcb,
- struct sctp_nets *net, int burst_limit);
+ struct sctp_nets *net, int burst_limit);
void (*sctp_cwnd_update_packet_transmitted) (struct sctp_tcb *stcb,
- struct sctp_nets *net);
+ struct sctp_nets *net);
void (*sctp_cwnd_update_tsn_acknowledged) (struct sctp_nets *net,
- struct sctp_tmit_chunk *);
+ struct sctp_tmit_chunk *);
void (*sctp_cwnd_new_transmission_begins) (struct sctp_tcb *stcb,
- struct sctp_nets *net);
+ struct sctp_nets *net);
void (*sctp_cwnd_prepare_net_for_sack) (struct sctp_tcb *stcb,
- struct sctp_nets *net);
+ struct sctp_nets *net);
int (*sctp_cwnd_socket_option) (struct sctp_tcb *stcb, int set, struct sctp_cc_option *);
void (*sctp_rtt_calculated) (struct sctp_tcb *, struct sctp_nets *, struct timeval *);
};
@@ -738,25 +739,25 @@ struct sctp_cc_functions {
*/
struct sctp_ss_functions {
void (*sctp_ss_init) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- int holds_lock);
+ int holds_lock);
void (*sctp_ss_clear) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- int clear_values, int holds_lock);
+ int clear_values, int holds_lock);
void (*sctp_ss_init_stream) (struct sctp_tcb *stcb, struct sctp_stream_out *strq, struct sctp_stream_out *with_strq);
void (*sctp_ss_add_to_stream) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
+ struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
int (*sctp_ss_is_empty) (struct sctp_tcb *stcb, struct sctp_association *asoc);
void (*sctp_ss_remove_from_stream) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
- struct sctp_stream_out *(*sctp_ss_select_stream) (struct sctp_tcb *stcb,
- struct sctp_nets *net, struct sctp_association *asoc);
+ struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
+struct sctp_stream_out *(*sctp_ss_select_stream) (struct sctp_tcb *stcb,
+ struct sctp_nets *net, struct sctp_association *asoc);
void (*sctp_ss_scheduled) (struct sctp_tcb *stcb, struct sctp_nets *net,
- struct sctp_association *asoc, struct sctp_stream_out *strq, int moved_how_much);
+ struct sctp_association *asoc, struct sctp_stream_out *strq, int moved_how_much);
void (*sctp_ss_packet_done) (struct sctp_tcb *stcb, struct sctp_nets *net,
- struct sctp_association *asoc);
+ struct sctp_association *asoc);
int (*sctp_ss_get_value) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_stream_out *strq, uint16_t *value);
+ struct sctp_stream_out *strq, uint16_t *value);
int (*sctp_ss_set_value) (struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_stream_out *strq, uint16_t value);
+ struct sctp_stream_out *strq, uint16_t value);
int (*sctp_ss_is_user_msgs_incomplete) (struct sctp_tcb *stcb, struct sctp_association *asoc);
};
diff --git a/freebsd/sys/netinet/sctp_sysctl.c b/freebsd/sys/netinet/sctp_sysctl.c
index f1a8d1d5..a4343cbe 100644
--- a/freebsd/sys/netinet/sctp_sysctl.c
+++ b/freebsd/sys/netinet/sctp_sysctl.c
@@ -411,7 +411,7 @@ sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS)
xinpcb.total_recvs = inp->total_recvs;
xinpcb.total_nospaces = inp->total_nospaces;
xinpcb.fragmentation_point = inp->sctp_frag_point;
- xinpcb.socket = inp->sctp_socket;
+ xinpcb.socket = (uintptr_t)inp->sctp_socket;
so = inp->sctp_socket;
if ((so == NULL) ||
(!SCTP_IS_LISTENING(inp)) ||
diff --git a/freebsd/sys/netinet/sctp_timer.c b/freebsd/sys/netinet/sctp_timer.c
index c0253840..86ed4d0d 100644
--- a/freebsd/sys/netinet/sctp_timer.c
+++ b/freebsd/sys/netinet/sctp_timer.c
@@ -651,6 +651,7 @@ start_again:
sctp_log_fr(chk->rec.data.tsn, chk->snd_count,
0, SCTP_FR_T3_MARKED);
}
+
if (chk->rec.data.chunk_was_revoked) {
/* deflate the cwnd */
chk->whoTo->cwnd -= chk->book_size;
@@ -717,6 +718,7 @@ start_again:
/* we did not subtract the same things? */
audit_tf = 1;
}
+
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
sctp_log_fr(tsnfirst, tsnlast, num_mk, SCTP_FR_T3_TIMEOUT);
}
@@ -791,6 +793,7 @@ start_again:
(uint32_t)(uintptr_t)chk->whoTo,
chk->rec.data.tsn);
}
+
sctp_flight_size_increase(chk);
sctp_total_flight_increase(stcb, chk);
}
@@ -911,6 +914,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp,
(net->flight_size == 0)) {
(*stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) (stcb, net);
}
+
/*
* setup the sat loss recovery that prevents satellite cwnd advance.
*/
@@ -939,6 +943,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp,
RTFREE(net->ro.ro_rt);
net->ro.ro_rt = NULL;
}
+
/* Was it our primary? */
if ((stcb->asoc.primary_destination == net) && (alt != net)) {
/*
@@ -959,7 +964,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp,
* Special case for cookie-echo'ed case, we don't do output but must
* await the COOKIE-ACK before retransmission
*/
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
/*
* Here we just reset the timer and start again since we
* have not established the asoc
@@ -1001,7 +1006,7 @@ sctp_t1init_timer(struct sctp_inpcb *inp,
sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
return (0);
}
- if (SCTP_GET_STATE((&stcb->asoc)) != SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) {
return (0);
}
if (sctp_threshold_management(inp, stcb, net,
@@ -1049,7 +1054,7 @@ sctp_cookie_timer(struct sctp_inpcb *inp,
}
}
if (cookie == NULL) {
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
/* FOOBAR! */
struct mbuf *op_err;
@@ -1061,7 +1066,7 @@ sctp_cookie_timer(struct sctp_inpcb *inp,
#ifdef INVARIANTS
panic("Cookie timer expires in wrong state?");
#else
- SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(&stcb->asoc));
+ SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(stcb));
return (0);
#endif
}
@@ -1212,6 +1217,7 @@ sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asconf->whoTo = alt;
atomic_add_int(&alt->ref_count, 1);
}
+
/* See if an ECN Echo is also stranded */
TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
if ((chk->whoTo == net) &&
@@ -1554,16 +1560,15 @@ sctp_autoclose_timer(struct sctp_inpcb *inp,
* there is nothing queued to send, so I'm
* done...
*/
- if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) {
/* only send SHUTDOWN 1st time thru */
struct sctp_nets *netp;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
diff --git a/freebsd/sys/netinet/sctp_uio.h b/freebsd/sys/netinet/sctp_uio.h
index 8732219c..c91e0414 100644
--- a/freebsd/sys/netinet/sctp_uio.h
+++ b/freebsd/sys/netinet/sctp_uio.h
@@ -258,13 +258,14 @@ struct sctp_snd_all_completes {
/* for the endpoint */
/* The lower four bits is an enumeration of PR-SCTP policies */
-#define SCTP_PR_SCTP_NONE 0x0000/* Reliable transfer */
-#define SCTP_PR_SCTP_TTL 0x0001/* Time based PR-SCTP */
-#define SCTP_PR_SCTP_PRIO 0x0002/* Buffer based PR-SCTP */
+#define SCTP_PR_SCTP_NONE 0x0000 /* Reliable transfer */
+#define SCTP_PR_SCTP_TTL 0x0001 /* Time based PR-SCTP */
+#define SCTP_PR_SCTP_PRIO 0x0002 /* Buffer based PR-SCTP */
#define SCTP_PR_SCTP_BUF SCTP_PR_SCTP_PRIO /* For backwards compatibility */
-#define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */
+#define SCTP_PR_SCTP_RTX 0x0003 /* Number of retransmissions based
+ * PR-SCTP */
#define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_RTX
-#define SCTP_PR_SCTP_ALL 0x000f/* Used for aggregated stats */
+#define SCTP_PR_SCTP_ALL 0x000f /* Used for aggregated stats */
#define PR_SCTP_POLICY(x) ((x) & 0x0f)
#define PR_SCTP_ENABLED(x) ((PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE) && \
@@ -744,7 +745,7 @@ struct sctp_prstatus {
struct sctp_cwnd_args {
struct sctp_nets *net; /* network to *//* FIXME: LP64 issue */
- uint32_t cwnd_new_value;/* cwnd in k */
+ uint32_t cwnd_new_value; /* cwnd in k */
uint32_t pseudo_cumack;
uint16_t inflight; /* flightsize in k */
uint16_t cwnd_augment; /* increment to it */
@@ -758,9 +759,9 @@ struct sctp_blk_args {
uint32_t onsb; /* in 1k bytes */
uint32_t sndlen; /* len of send being attempted */
uint32_t peer_rwnd; /* rwnd of peer */
- uint16_t send_sent_qcnt;/* chnk cnt */
+ uint16_t send_sent_qcnt; /* chnk cnt */
uint16_t stream_qcnt; /* chnk cnt */
- uint16_t chunks_on_oque;/* chunks out */
+ uint16_t chunks_on_oque; /* chunks out */
uint16_t flight_size; /* flight size in k */
};
@@ -952,7 +953,7 @@ struct sctpstat {
uint32_t sctps_collisionestab;
uint32_t sctps_passiveestab; /* sctpStats 3 (Counter32) */
uint32_t sctps_aborted; /* sctpStats 4 (Counter32) */
- uint32_t sctps_shutdown;/* sctpStats 5 (Counter32) */
+ uint32_t sctps_shutdown; /* sctpStats 5 (Counter32) */
uint32_t sctps_outoftheblue; /* sctpStats 6 (Counter32) */
uint32_t sctps_checksumerrors; /* sctpStats 7 (Counter32) */
uint32_t sctps_outcontrolchunks; /* sctpStats 8 (Counter64) */
@@ -971,12 +972,12 @@ struct sctpstat {
uint32_t sctps_recvdatagrams; /* total input datagrams */
uint32_t sctps_recvpktwithdata; /* total packets that had data */
uint32_t sctps_recvsacks; /* total input SACK chunks */
- uint32_t sctps_recvdata;/* total input DATA chunks */
+ uint32_t sctps_recvdata; /* total input DATA chunks */
uint32_t sctps_recvdupdata; /* total input duplicate DATA chunks */
uint32_t sctps_recvheartbeat; /* total input HB chunks */
uint32_t sctps_recvheartbeatack; /* total input HB-ACK chunks */
- uint32_t sctps_recvecne;/* total input ECNE chunks */
- uint32_t sctps_recvauth;/* total input AUTH chunks */
+ uint32_t sctps_recvecne; /* total input ECNE chunks */
+ uint32_t sctps_recvauth; /* total input AUTH chunks */
uint32_t sctps_recvauthmissing; /* total input chunks missing AUTH */
uint32_t sctps_recvivalhmacid; /* total number of invalid HMAC ids
* received */
@@ -993,7 +994,7 @@ struct sctpstat {
/* output statistics: */
uint32_t sctps_sendpackets; /* total output packets */
uint32_t sctps_sendsacks; /* total output SACKs */
- uint32_t sctps_senddata;/* total output DATA chunks */
+ uint32_t sctps_senddata; /* total output DATA chunks */
uint32_t sctps_sendretransdata; /* total output retransmitted DATA
* chunks */
uint32_t sctps_sendfastretrans; /* total output fast retransmitted
@@ -1003,8 +1004,8 @@ struct sctpstat {
* chunk (u-del multi-fr
* algo). */
uint32_t sctps_sendheartbeat; /* total output HB chunks */
- uint32_t sctps_sendecne;/* total output ECNE chunks */
- uint32_t sctps_sendauth;/* total output AUTH chunks FIXME */
+ uint32_t sctps_sendecne; /* total output ECNE chunks */
+ uint32_t sctps_sendauth; /* total output AUTH chunks FIXME */
uint32_t sctps_senderrors; /* ip_output error counter */
uint32_t sctps_send_spare; /* formerly sctps_sendnocrc */
uint32_t sctps_sendswcrc;
@@ -1012,8 +1013,8 @@ struct sctpstat {
/* PCKDROPREP statistics: */
uint32_t sctps_pdrpfmbox; /* Packet drop from middle box */
uint32_t sctps_pdrpfehos; /* P-drop from end host */
- uint32_t sctps_pdrpmbda;/* P-drops with data */
- uint32_t sctps_pdrpmbct;/* P-drops, non-data, non-endhost */
+ uint32_t sctps_pdrpmbda; /* P-drops with data */
+ uint32_t sctps_pdrpmbct; /* P-drops, non-data, non-endhost */
uint32_t sctps_pdrpbwrpt; /* P-drop, non-endhost, bandwidth rep
* only */
uint32_t sctps_pdrpcrupt; /* P-drop, not enough for chunk header */
@@ -1024,16 +1025,17 @@ struct sctpstat {
uint32_t sctps_pdrpdnfnd; /* P-drop, attempt reverse TSN lookup */
uint32_t sctps_pdrpdiwnp; /* P-drop, e-host confirms zero-rwnd */
uint32_t sctps_pdrpdizrw; /* P-drop, midbox confirms no space */
- uint32_t sctps_pdrpbadd;/* P-drop, data did not match TSN */
- uint32_t sctps_pdrpmark;/* P-drop, TSN's marked for Fast Retran */
+ uint32_t sctps_pdrpbadd; /* P-drop, data did not match TSN */
+ uint32_t sctps_pdrpmark; /* P-drop, TSN's marked for Fast
+ * Retran */
/* timeouts */
uint32_t sctps_timoiterator; /* Number of iterator timers that
* fired */
- uint32_t sctps_timodata;/* Number of T3 data time outs */
+ uint32_t sctps_timodata; /* Number of T3 data time outs */
uint32_t sctps_timowindowprobe; /* Number of window probe (T3) timers
* that fired */
- uint32_t sctps_timoinit;/* Number of INIT timers that fired */
- uint32_t sctps_timosack;/* Number of sack timers that fired */
+ uint32_t sctps_timoinit; /* Number of INIT timers that fired */
+ uint32_t sctps_timosack; /* Number of sack timers that fired */
uint32_t sctps_timoshutdown; /* Number of shutdown timers that
* fired */
uint32_t sctps_timoheartbeat; /* Number of heartbeat timers that
@@ -1175,14 +1177,11 @@ struct xsctp_inpcb {
uint16_t local_port;
uint16_t qlen_old;
uint16_t maxqlen_old;
- void *socket;
+ uint16_t __spare16;
+ kvaddr_t socket;
uint32_t qlen;
uint32_t maxqlen;
-#if defined(__LP64__)
- uint32_t extra_padding[27]; /* future */
-#else
- uint32_t extra_padding[28]; /* future */
-#endif
+ uint32_t extra_padding[26]; /* future */
};
struct xsctp_tcb {
@@ -1192,7 +1191,7 @@ struct xsctp_tcb {
uint32_t state; /* sctpAssocEntry 8 */
uint32_t in_streams; /* sctpAssocEntry 9 */
uint32_t out_streams; /* sctpAssocEntry 10 */
- uint32_t max_nr_retrans;/* sctpAssocEntry 11 */
+ uint32_t max_nr_retrans; /* sctpAssocEntry 11 */
uint32_t primary_process; /* sctpAssocEntry 12 */
uint32_t T1_expireries; /* sctpAssocEntry 13 */
uint32_t T2_expireries; /* sctpAssocEntry 14 */
@@ -1305,37 +1304,37 @@ void sctp_freeladdrs(struct sockaddr *);
int sctp_opt_info(int, sctp_assoc_t, int, void *, socklen_t *);
/* deprecated */
-ssize_t
+ssize_t
sctp_sendmsg(int, const void *, size_t, const struct sockaddr *,
socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
/* deprecated */
-ssize_t
+ssize_t
sctp_send(int, const void *, size_t,
const struct sctp_sndrcvinfo *, int);
/* deprecated */
-ssize_t
+ssize_t
sctp_sendx(int, const void *, size_t, struct sockaddr *,
int, struct sctp_sndrcvinfo *, int);
/* deprecated */
-ssize_t
+ssize_t
sctp_sendmsgx(int sd, const void *, size_t, struct sockaddr *,
int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
sctp_assoc_t sctp_getassocid(int, struct sockaddr *);
/* deprecated */
-ssize_t
+ssize_t
sctp_recvmsg(int, void *, size_t, struct sockaddr *, socklen_t *,
struct sctp_sndrcvinfo *, int *);
-ssize_t
+ssize_t
sctp_sendv(int, const struct iovec *, int, struct sockaddr *,
int, void *, socklen_t, unsigned int, int);
-ssize_t
+ssize_t
sctp_recvv(int, const struct iovec *, int, struct sockaddr *,
socklen_t *, void *, socklen_t *, unsigned int *, int *);
diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c
index 071d44c2..b519971c 100644
--- a/freebsd/sys/netinet/sctp_usrreq.c
+++ b/freebsd/sys/netinet/sctp_usrreq.c
@@ -391,6 +391,7 @@ sctp_getcred(SYSCTL_HANDLER_ARGS)
SCTP_INP_DECR_REF(inp);
goto cred_can_cont;
}
+
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
error = ENOENT;
goto out;
@@ -431,6 +432,7 @@ sctp_abort(struct socket *so)
if (inp == NULL) {
return;
}
+
sctp_must_try_again:
flags = inp->sctp_flags;
#ifdef SCTP_LOG_CLOSING
@@ -704,8 +706,7 @@ sctp_disconnect(struct socket *so)
if (((so->so_options & SO_LINGER) &&
(so->so_linger == 0)) ||
(so->so_rcv.sb_cc > 0)) {
- if (SCTP_GET_STATE(asoc) !=
- SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) {
/* Left with Data unread */
struct mbuf *op_err;
@@ -714,8 +715,8 @@ sctp_disconnect(struct socket *so)
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
}
SCTP_INP_RUNLOCK(inp);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
@@ -730,17 +731,16 @@ sctp_disconnect(struct socket *so)
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
goto abort_anyway;
}
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
/* only send SHUTDOWN 1st time thru */
struct sctp_nets *netp;
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
@@ -773,11 +773,11 @@ sctp_disconnect(struct socket *so)
netp = stcb->asoc.primary_destination;
}
- asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
netp);
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
@@ -789,8 +789,8 @@ sctp_disconnect(struct socket *so)
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
SCTP_INP_RUNLOCK(inp);
@@ -921,9 +921,9 @@ sctp_shutdown(struct socket *so)
SCTP_INP_RUNLOCK(inp);
return (0);
}
- if ((SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_ECHOED) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN)) {
+ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_ECHOED) &&
+ (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN)) {
/*
* If we are not in or before ESTABLISHED, there is
* no protocol action required.
@@ -937,7 +937,7 @@ sctp_shutdown(struct socket *so)
} else {
netp = stcb->asoc.primary_destination;
}
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) &&
TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->stream_queue_cnt == 0)) {
@@ -946,8 +946,7 @@ sctp_shutdown(struct socket *so)
}
/* there is nothing queued to send, so I'm done... */
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT);
sctp_stop_timers_for_shutdown(stcb);
sctp_send_shutdown(stcb, netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
@@ -957,9 +956,9 @@ sctp_shutdown(struct socket *so)
* We still got (or just got) data to send, so set
* SHUTDOWN_PENDING.
*/
- SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
- SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_PARTIAL_MSG_LEFT);
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
@@ -1369,11 +1368,13 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
return (EADDRINUSE);
}
+
if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) &&
(sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE))) {
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
+
if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
SCTP_INP_RLOCK(inp);
stcb = LIST_FIRST(&inp->sctp_asoc_list);
@@ -1438,6 +1439,7 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
goto out_now;
}
}
+
/* FIX ME: do we want to pass in a vrf on the connect call? */
vrf_id = inp->def_vrf_id;
@@ -1457,7 +1459,7 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
/* Set the connected flag so we can queue data */
soisconnecting(so);
}
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
/* move to second address */
switch (sa->sa_family) {
#ifdef INET
@@ -1549,6 +1551,7 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
+
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -2393,6 +2396,7 @@ flags_out:
break;
}
}
+
if (stcb != NULL) {
/* Applies to the specific association */
paddrp->spp_flags = 0;
@@ -3262,6 +3266,7 @@ flags_out:
break;
}
}
+
if (stcb != NULL) {
if (net != NULL) {
thlds->spt_pathmaxrxt = net->failure_threshold;
@@ -3374,6 +3379,7 @@ flags_out:
break;
}
}
+
if (stcb != NULL) {
if (net) {
encaps->sue_port = net->port;
@@ -4252,6 +4258,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (sctp_auth_add_chunk(sauth->sauth_chunk, inp->sctp_ep.local_auth_chunks)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
+ } else {
+ inp->auth_supported = 1;
}
SCTP_INP_WUNLOCK(inp);
break;
@@ -4397,6 +4405,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
error = EINVAL;
break;
}
+
hmaclist = sctp_alloc_hmaclist((uint16_t)shmac->shmac_number_of_idents);
if (hmaclist == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
@@ -4589,6 +4598,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_INP_RUNLOCK(inp);
}
+
}
break;
}
@@ -5272,12 +5282,14 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
+
if ((paddrp->spp_flags & SPP_PMTUD_ENABLE) && (paddrp->spp_flags & SPP_PMTUD_DISABLE)) {
if (stcb)
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
+
if (stcb != NULL) {
/************************TCB SPECIFIC SET ******************/
if (net != NULL) {
@@ -5413,6 +5425,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
net->failure_threshold = paddrp->spp_pathmaxrxt;
}
}
+
if (paddrp->spp_flags & SPP_HB_ENABLE) {
if (paddrp->spp_hbinterval != 0) {
stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
@@ -5523,6 +5536,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (paddrp->spp_pathmaxrxt != 0) {
inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
}
+
if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
else if (paddrp->spp_hbinterval != 0) {
@@ -5530,6 +5544,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
}
+
if (paddrp->spp_flags & SPP_HB_ENABLE) {
if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
@@ -6482,6 +6497,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
break;
}
}
+
if (stcb != NULL) {
if (net != NULL) {
net->port = encaps->sue_port;
@@ -6865,6 +6881,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return EINVAL;
}
+
switch (addr->sa_family) {
#ifdef INET6
case AF_INET6:
@@ -6970,6 +6987,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
error = EALREADY;
goto out_now;
}
+
vrf_id = inp->def_vrf_id;
/* We are GOOD to go */
stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
@@ -6984,7 +7002,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
/* Set the connected flag so we can queue data */
soisconnecting(so);
}
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
/* initialize authentication parameters for the assoc */
@@ -6996,6 +7014,7 @@ out_now:
if (create_lock_on) {
SCTP_ASOC_CREATE_UNLOCK(inp);
}
+
SCTP_INP_DECR_REF(inp);
return (error);
}
@@ -7134,6 +7153,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
return (EADDRINUSE);
}
}
+
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
/* We are already connected AND the TCP model */
@@ -7201,7 +7221,7 @@ sctp_accept(struct socket *so, struct sockaddr **addr)
SCTP_TCB_LOCK(stcb);
SCTP_INP_RUNLOCK(inp);
store = stcb->asoc.primary_destination->ro._l_addr;
- stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
+ SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
SCTP_TCB_UNLOCK(stcb);
switch (store.sa.sa_family) {
#ifdef INET
@@ -7336,6 +7356,7 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr)
SCTP_TCB_UNLOCK(stcb);
goto notConn;
}
+
vrf_id = inp->def_vrf_id;
sctp_ifa = sctp_source_address_selection(inp,
stcb,
diff --git a/freebsd/sys/netinet/sctp_var.h b/freebsd/sys/netinet/sctp_var.h
index 84cbfc88..175888c3 100644
--- a/freebsd/sys/netinet/sctp_var.h
+++ b/freebsd/sys/netinet/sctp_var.h
@@ -341,12 +341,12 @@ int sctp_input(struct mbuf **, int *, int);
void sctp_pathmtu_adjustment(struct sctp_tcb *, uint16_t);
void sctp_drain(void);
void sctp_init(void);
-void
+void
sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
uint8_t, uint8_t, uint16_t, uint32_t);
int sctp_flush(struct socket *, int);
int sctp_shutdown(struct socket *);
-int
+int
sctp_bindx(struct socket *, int, struct sockaddr_storage *,
int, int, struct proc *);
diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c
index aad1e19d..c3cb115e 100644
--- a/freebsd/sys/netinet/sctputil.c
+++ b/freebsd/sys/netinet/sctputil.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#include <netinet/in_kdtrace.h>
#include <sys/proc.h>
#ifdef INET6
#include <netinet/icmp6.h>
@@ -1016,7 +1017,7 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asoc = &stcb->asoc;
/* init all variables to a known value. */
- SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE);
+ SCTP_SET_STATE(stcb, SCTP_STATE_INUSE);
asoc->max_burst = inp->sctp_ep.max_burst;
asoc->fr_max_burst = inp->sctp_ep.fr_max_burst;
asoc->heart_beat_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
@@ -1435,6 +1436,7 @@ select_a_new_ep:
atomic_add_int(&it->stcb->asoc.refcnt, -1);
iteration_count = 0;
}
+
/* run function on this one */
(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
@@ -1788,6 +1790,7 @@ sctp_timeout_handler(void *t)
if ((stcb == NULL) || (inp == NULL)) {
break;
}
+
if (sctp_cookie_timer(inp, stcb, net)) {
/* no need to unlock on tcb its gone */
goto out_decr;
@@ -1983,6 +1986,7 @@ out_decr:
if (inp) {
SCTP_INP_DECR_REF(inp);
}
+
out_no_decr:
SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type = %d)\n", type);
CURVNET_RESTORE();
@@ -2498,9 +2502,8 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
}
timevalsub(&now, old);
/* store the current RTT in us */
- net->rtt = (uint64_t)1000000 *(uint64_t)now.tv_sec +
- (uint64_t)now.tv_usec;
-
+ net->rtt = (uint64_t)1000000 * (uint64_t)now.tv_sec +
+ (uint64_t)now.tv_usec;
/* compute rtt in ms */
rtt = (int32_t)(net->rtt / 1000);
if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) {
@@ -2522,6 +2525,7 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
net->lan_type = SCTP_LAN_LOCAL;
}
}
+
/***************************/
/* 2. update RTTVAR & SRTT */
/***************************/
@@ -2798,7 +2802,7 @@ set_error:
((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
SOCK_LOCK(stcb->sctp_socket);
if (from_peer) {
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
stcb->sctp_socket->so_error = ECONNREFUSED;
} else {
@@ -2806,8 +2810,8 @@ set_error:
stcb->sctp_socket->so_error = ECONNRESET;
}
} else {
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ETIMEDOUT);
stcb->sctp_socket->so_error = ETIMEDOUT;
} else {
@@ -2960,6 +2964,7 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint8_t sent, uint32_t error,
/* event not enabled */
return;
}
+
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
notifhdr_len = sizeof(struct sctp_send_failed_event);
} else {
@@ -3188,6 +3193,7 @@ sctp_notify_adaptation_layer(struct sctp_tcb *stcb)
/* event not enabled */
return;
}
+
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
@@ -3244,6 +3250,7 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error,
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
return;
}
+
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
@@ -3352,6 +3359,7 @@ sctp_notify_shutdown_event(struct sctp_tcb *stcb)
/* event not enabled */
return;
}
+
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
@@ -3401,6 +3409,7 @@ sctp_notify_sender_dry_event(struct sctp_tcb *stcb,
/* event not enabled */
return;
}
+
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* no space left */
@@ -3557,6 +3566,7 @@ sctp_notify_stream_reset(struct sctp_tcb *stcb,
/* event not enabled */
return;
}
+
m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
@@ -3691,8 +3701,8 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
if (stcb->sctp_socket->so_rcv.sb_state & SBS_CANTRCVMORE) {
return;
}
- if ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
- (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
(notification == SCTP_NOTIFY_INTERFACE_UP) ||
(notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
@@ -3766,16 +3776,16 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
break;
}
case SCTP_NOTIFY_ASSOC_LOC_ABORTED:
- if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
- ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 0, so_locked);
} else {
sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 0, so_locked);
}
break;
case SCTP_NOTIFY_ASSOC_REM_ABORTED:
- if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
- ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 1, so_locked);
} else {
sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 1, so_locked);
@@ -4019,7 +4029,7 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
if (stcb != NULL) {
/* We have a TCB to abort, send notification too */
sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED);
- stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_WAS_ABORTED);
/* Ok, now lets free it */
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
@@ -4030,8 +4040,8 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
@@ -4130,13 +4140,13 @@ sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
}
return;
} else {
- stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_WAS_ABORTED);
}
/* notify the peer */
sctp_send_abort_tcb(stcb, op_err, so_locked);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
- if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
/* notify the ulp */
@@ -4971,6 +4981,7 @@ sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr,
if (holds_lock == 0) {
SCTP_INP_RLOCK(inp);
}
+
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL)
continue;
@@ -5060,6 +5071,7 @@ sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock)
SCTP_IPI_ADDR_RUNLOCK();
return (NULL);
}
+
hash_of_addr = sctp_get_ifa_hash_val(addr);
hash_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
@@ -5121,9 +5133,8 @@ sctp_user_rcvd(struct sctp_tcb *stcb, uint32_t *freed_so_far, int hold_rlock,
atomic_add_int(&stcb->asoc.refcnt, 1);
- if (stcb->asoc.state & (SCTP_STATE_ABOUT_TO_BE_FREED |
- SCTP_STATE_SHUTDOWN_RECEIVED |
- SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (stcb->asoc.state & (SCTP_STATE_ABOUT_TO_BE_FREED | SCTP_STATE_SHUTDOWN_RECEIVED))) {
/* Pre-check If we are freeing no update */
goto no_lock;
}
@@ -5184,6 +5195,7 @@ out:
if (so && r_unlocked && hold_rlock) {
SCTP_INP_READ_LOCK(stcb->sctp_ep);
}
+
SCTP_INP_DECR_REF(stcb->sctp_ep);
no_lock:
atomic_add_int(&stcb->asoc.refcnt, -1);
@@ -5233,6 +5245,7 @@ sctp_sorecvmsg(struct socket *so,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
return (EINVAL);
}
+
if (msg_flags) {
in_flags = *msg_flags;
if (in_flags & MSG_PEEK)
@@ -5276,6 +5289,8 @@ sctp_sorecvmsg(struct socket *so,
sctp_misc_ints(SCTP_SORECV_ENTERPL,
rwnd_req, block_allowed, so->so_rcv.sb_cc, (uint32_t)uio->uio_resid);
}
+
+
error = sblock(&so->so_rcv, (block_allowed ? SBL_WAIT : 0));
if (error) {
goto release_unlocked;
@@ -5385,6 +5400,7 @@ restart_nosblocks:
hold_rlock = 0;
goto restart;
}
+
if ((control->length == 0) &&
(control->do_not_ref_stcb)) {
/*
@@ -5568,6 +5584,7 @@ found_one:
control->do_not_ref_stcb == 0) {
stcb->asoc.strmin[control->sinfo_stream].delivery_started = 1;
}
+
/* First lets get off the sinfo and sockaddr info */
if ((sinfo != NULL) && (filling_sinfo != 0)) {
sinfo->sinfo_stream = control->sinfo_stream;
@@ -5729,6 +5746,7 @@ get_more_data:
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
goto release;
}
+
if ((control->do_not_ref_stcb == 0) && stcb &&
stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
no_rcv_needed = 1;
@@ -5941,6 +5959,7 @@ wait_some_more:
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
goto release;
}
+
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)
goto release;
@@ -6069,6 +6088,7 @@ release:
SOCKBUF_UNLOCK(&so->so_rcv);
hold_sblock = 0;
}
+
sbunlock(&so->so_rcv);
sockbuf_lock = 0;
@@ -6106,6 +6126,7 @@ out:
if (sockbuf_lock) {
sbunlock(&so->so_rcv);
}
+
if (freecnt_applied) {
/*
* The lock on the socket buffer protects us so the free
@@ -6703,6 +6724,7 @@ sctp_local_addr_count(struct sctp_tcb *stcb)
SCTP_IPI_ADDR_RUNLOCK();
return (0);
}
+
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
/*
* bound all case: go through all ifns on the vrf
@@ -7362,3 +7384,49 @@ sctp_hc_get_mtu(union sctp_sockstore *addr, uint16_t fibnum)
}
return ((uint32_t)tcp_hc_getmtu(&inc));
}
+
+void
+sctp_set_state(struct sctp_tcb *stcb, int new_state)
+{
+#if defined(KDTRACE_HOOKS)
+ int old_state = stcb->asoc.state;
+#endif
+
+ KASSERT((new_state & ~SCTP_STATE_MASK) == 0,
+ ("sctp_set_state: Can't set substate (new_state = %x)",
+ new_state));
+ stcb->asoc.state = (stcb->asoc.state & ~SCTP_STATE_MASK) | new_state;
+ if ((new_state == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (new_state == SCTP_STATE_SHUTDOWN_SENT) ||
+ (new_state == SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING);
+ }
+#if defined(KDTRACE_HOOKS)
+ if (((old_state & SCTP_STATE_MASK) != new_state) &&
+ !(((old_state & SCTP_STATE_MASK) == SCTP_STATE_EMPTY) &&
+ (new_state == SCTP_STATE_INUSE))) {
+ SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state);
+ }
+#endif
+}
+
+void
+sctp_add_substate(struct sctp_tcb *stcb, int substate)
+{
+#if defined(KDTRACE_HOOKS)
+ int old_state = stcb->asoc.state;
+#endif
+
+ KASSERT((substate & SCTP_STATE_MASK) == 0,
+ ("sctp_add_substate: Can't set state (substate = %x)",
+ substate));
+ stcb->asoc.state |= substate;
+#if defined(KDTRACE_HOOKS)
+ if (((substate & SCTP_STATE_ABOUT_TO_BE_FREED) &&
+ ((old_state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) ||
+ ((substate & SCTP_STATE_SHUTDOWN_PENDING) &&
+ ((old_state & SCTP_STATE_SHUTDOWN_PENDING) == 0))) {
+ SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state);
+ }
+#endif
+}
diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h
index 61d34591..c12fb210 100644
--- a/freebsd/sys/netinet/sctputil.h
+++ b/freebsd/sys/netinet/sctputil.h
@@ -72,11 +72,9 @@ int32_t
uint32_t
sctp_get_ifa_hash_val(struct sockaddr *addr);
-struct sctp_ifa *
- sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int hold_lock);
+struct sctp_ifa *sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int hold_lock);
-struct sctp_ifa *
- sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock);
+struct sctp_ifa *sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock);
uint32_t sctp_select_initial_TSN(struct sctp_pcb *);
@@ -147,13 +145,11 @@ struct sctp_paramhdr *
sctp_get_next_param(struct mbuf *, int,
struct sctp_paramhdr *, int);
-struct mbuf *
- sctp_add_pad_tombuf(struct mbuf *, int);
+struct mbuf *sctp_add_pad_tombuf(struct mbuf *, int);
-struct mbuf *
- sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
+struct mbuf *sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
-void
+void
sctp_ulp_notify(uint32_t, struct sctp_tcb *, uint32_t, void *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -168,7 +164,7 @@ sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
void sctp_stop_timers_for_shutdown(struct sctp_tcb *);
-void
+void
sctp_report_all_outbound(struct sctp_tcb *, uint16_t, int, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
SCTP_UNUSED
@@ -177,7 +173,7 @@ sctp_report_all_outbound(struct sctp_tcb *, uint16_t, int, int
int sctp_expand_mapping_array(struct sctp_association *, uint32_t);
-void
+void
sctp_abort_notification(struct sctp_tcb *, uint8_t, uint16_t,
struct sctp_abort_chunk *, int
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
@@ -203,7 +199,7 @@ sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *,
#endif
);
-void
+void
sctp_handle_ootb(struct mbuf *, int, int,
struct sockaddr *, struct sockaddr *,
struct sctphdr *, struct sctp_inpcb *,
@@ -211,7 +207,7 @@ sctp_handle_ootb(struct mbuf *, int, int,
uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
-int
+int
sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
int totaddr, int *error);
@@ -224,8 +220,7 @@ int sctp_is_there_an_abort_here(struct mbuf *, int, uint32_t *);
#ifdef INET6
uint32_t sctp_is_same_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
-struct sockaddr_in6 *
- sctp_recover_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
+struct sockaddr_in6 *sctp_recover_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
#define sctp_recover_scope_mac(addr, store) do { \
if ((addr->sin6_family == AF_INET6) && \
@@ -258,11 +253,11 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *,
struct mbuf *sctp_generate_cause(uint16_t, char *);
struct mbuf *sctp_generate_no_user_data_cause(uint32_t);
-void
+void
sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
struct sockaddr *sa, sctp_assoc_t assoc_id,
uint32_t vrf_id, int *error, void *p);
-void
+void
sctp_bindx_delete_address(struct sctp_inpcb *inp,
struct sockaddr *sa, sctp_assoc_t assoc_id,
uint32_t vrf_id, int *error);
@@ -393,5 +388,7 @@ void sctp_audit_log(uint8_t, uint8_t);
uint32_t sctp_min_mtu(uint32_t, uint32_t, uint32_t);
void sctp_hc_set_mtu(union sctp_sockstore *, uint16_t, uint32_t);
uint32_t sctp_hc_get_mtu(union sctp_sockstore *, uint16_t);
+void sctp_set_state(struct sctp_tcb *, int);
+void sctp_add_substate(struct sctp_tcb *, int);
#endif /* _KERNEL */
#endif
diff --git a/freebsd/sys/netinet/tcp_hostcache.c b/freebsd/sys/netinet/tcp_hostcache.c
index d1de3f33..f2e3d875 100644
--- a/freebsd/sys/netinet/tcp_hostcache.c
+++ b/freebsd/sys/netinet/tcp_hostcache.c
@@ -114,10 +114,10 @@ __FBSDID("$FreeBSD$");
#define TCP_HOSTCACHE_EXPIRE 60*60 /* one hour */
#define TCP_HOSTCACHE_PRUNE 5*60 /* every 5 minutes */
-static VNET_DEFINE(struct tcp_hostcache, tcp_hostcache);
+VNET_DEFINE_STATIC(struct tcp_hostcache, tcp_hostcache);
#define V_tcp_hostcache VNET(tcp_hostcache)
-static VNET_DEFINE(struct callout, tcp_hc_callout);
+VNET_DEFINE_STATIC(struct callout, tcp_hc_callout);
#define V_tcp_hc_callout VNET(tcp_hc_callout)
static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *);
diff --git a/freebsd/sys/netinet/tcp_hpts.h b/freebsd/sys/netinet/tcp_hpts.h
index c52a1d78..04c86769 100644
--- a/freebsd/sys/netinet/tcp_hpts.h
+++ b/freebsd/sys/netinet/tcp_hpts.h
@@ -238,10 +238,10 @@ int
#define tcp_queue_to_input_locked(a, b) __tcp_queue_to_input_locked(a, b, __LINE__);
void
tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
- int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked);
+ int32_t tlen, int32_t drop_hdrlen, uint8_t iptos);
int
__tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
- int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked, int32_t line);
+ int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line);
#define tcp_queue_to_input(a, b, c, d, e, f, g) __tcp_queue_to_input(a, b, c, d, e, f, g, __LINE__)
uint16_t tcp_hpts_delayedby(struct inpcb *inp);
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
index 20bea2de..2c6c3048 100644
--- a/freebsd/sys/netinet/tcp_input.c
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -585,6 +585,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
int rstreason = 0; /* For badport_bandlim accounting purposes */
uint8_t iptos;
struct m_tag *fwd_tag = NULL;
+ struct epoch_tracker et;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int isipv6;
@@ -775,7 +776,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
if ((thflags & (TH_FIN | TH_RST)) != 0) {
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_RLOCKED;
} else
ti_locked = TI_UNLOCKED;
@@ -962,25 +963,10 @@ findpcb:
*
* XXXRW: It may be time to rethink timewait locking.
*/
-relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- inp = NULL;
- goto findpcb;
- } else if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- inp = NULL;
- goto findpcb;
- }
- } else
- ti_locked = TI_RLOCKED;
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ ti_locked = TI_RLOCKED;
}
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -991,7 +977,7 @@ relocked:
*/
if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -1028,23 +1014,8 @@ relocked:
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
!IS_FASTOPEN(tp->t_flags)))) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- inp = NULL;
- goto findpcb;
- } else if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- inp = NULL;
- goto findpcb;
- }
- goto relocked;
- } else
- ti_locked = TI_RLOCKED;
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ ti_locked = TI_RLOCKED;
}
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
@@ -1082,6 +1053,8 @@ relocked:
#ifdef INET6
if (isipv6) {
inc.inc_flags |= INC_ISIPV6;
+ if (inp->inp_inc.inc_flags & INC_IPV6MINMTU)
+ inc.inc_flags |= INC_IPV6MINMTU;
inc.inc6_faddr = ip6->ip6_src;
inc.inc6_laddr = ip6->ip6_dst;
} else
@@ -1176,9 +1149,11 @@ tfo_socket_result:
* contains. tcp_do_segment() consumes
* the mbuf chain and unlocks the inpcb.
*/
+ TCP_PROBE5(receive, NULL, tp, m, tp, th);
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
- iptos, ti_locked);
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ iptos);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -1382,7 +1357,7 @@ tfo_socket_result:
* Only the listen socket is unlocked by syncache_add().
*/
if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED;
}
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
@@ -1416,15 +1391,16 @@ tfo_socket_result:
* state. tcp_do_segment() always consumes the mbuf chain, unlocks
* the inpcb, and unlocks pcbinfo.
*/
- tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
dropwithreset:
TCP_PROBE5(receive, NULL, tp, m, tp, th);
if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1448,7 +1424,7 @@ dropunlock:
TCP_PROBE5(receive, NULL, tp, m, tp, th);
if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1535,8 +1511,7 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
- struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
- int ti_locked)
+ struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
{
int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
int rstreason, todrop, win;
@@ -1562,7 +1537,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->sackhint.last_sack_ack = 0;
sack_changed = 0;
nsegs = max(1, m->m_pkthdr.lro_nsegs);
-
/*
* If this is either a state-changing packet or current state isn't
* established, we require a write lock on tcbinfo. Otherwise, we
@@ -1571,19 +1545,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) {
- KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
- "SYN/FIN/RST/!EST", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- } else {
-#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- else {
- KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
- "ti_locked: %d", __func__, ti_locked));
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
}
INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
@@ -1717,10 +1679,19 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
(to.to_flags & TOF_SACKPERM) == 0)
tp->t_flags &= ~TF_SACK_PERMIT;
if (IS_FASTOPEN(tp->t_flags)) {
- if (to.to_flags & TOF_FASTOPEN)
- tcp_fastopen_update_cache(tp, to.to_mss,
+ if (to.to_flags & TOF_FASTOPEN) {
+ uint16_t mss;
+
+ if (to.to_flags & TOF_MSS)
+ mss = to.to_mss;
+ else
+ if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
+ mss = TCP6_MSS;
+ else
+ mss = TCP_MSS;
+ tcp_fastopen_update_cache(tp, mss,
to.to_tfo_len, to.to_tfo_cookie);
- else
+ } else
tcp_fastopen_disable_path(tp);
}
}
@@ -1767,7 +1738,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->snd_nxt == tp->snd_max &&
tiwin && tiwin == tp->snd_wnd &&
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
- LIST_EMPTY(&tp->t_segq) &&
+ SEGQ_EMPTY(tp) &&
((to.to_flags & TOF_TS) == 0 ||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
@@ -1792,10 +1763,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
TCPSTAT_INC(tcps_predack);
/*
@@ -1899,10 +1866,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
/* Clean receiver SACK report if present */
if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
tcp_clean_sackreport(tp);
@@ -2104,8 +2067,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
- "ti_locked %d", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -2180,9 +2141,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED,
- ("%s: TH_RST ti_locked %d, th %p tp %p",
- __func__, ti_locked, th, tp));
KASSERT(tp->t_state != TCPS_SYN_SENT,
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
__func__, th, tp));
@@ -2225,8 +2183,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
tp->t_state != TCPS_SYN_RECEIVED) {
- KASSERT(ti_locked == TI_RLOCKED,
- ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
TCPSTAT_INC(tcps_badsyn);
@@ -2340,8 +2296,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
- "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
@@ -2457,6 +2411,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* SYN-RECEIVED* -> FIN-WAIT-1
*/
tp->t_starttime = ticks;
+ if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+
+ /*
+ * Account for the ACK of our SYN prior to
+ * regular ACK processing below.
+ */
+ tp->snd_una++;
+ }
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
@@ -2464,16 +2428,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_state_change(tp, TCPS_ESTABLISHED);
TCP_PROBE5(accept__established, NULL, tp,
m, tp, th);
- if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
- tcp_fastopen_decrement_counter(tp->t_tfo_pending);
- tp->t_tfo_pending = NULL;
-
- /*
- * Account for the ACK of our SYN prior to
- * regular ACK processing below.
- */
- tp->snd_una++;
- }
/*
* TFO connections call cc_conn_init() during SYN
* processing. Calling it again here for such
@@ -2490,7 +2444,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* later; if not, do so now to pass queued data to user.
*/
if (tlen == 0 && (thflags & TH_FIN) == 0)
- (void) tcp_reass(tp, (struct tcphdr *)0, 0,
+ (void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
(struct mbuf *)0);
tp->snd_wl1 = th->th_seq - 1;
/* FALLTHROUGH */
@@ -2931,7 +2885,6 @@ process_ACK:
if (ourfinisacked) {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return;
}
@@ -3068,7 +3021,7 @@ dodata: /* XXX */
* fast retransmit can work).
*/
if (th->th_seq == tp->rcv_nxt &&
- LIST_EMPTY(&tp->t_segq) &&
+ SEGQ_EMPTY(tp) &&
(TCPS_HAVEESTABLISHED(tp->t_state) ||
tfo_syn)) {
if (DELAY_ACK(tp, tlen) || tfo_syn)
@@ -3093,7 +3046,7 @@ dodata: /* XXX */
* m_adj() doesn't actually frees any mbufs
* when trimming from the head.
*/
- thflags = tcp_reass(tp, th, &tlen, m);
+ thflags = tcp_reass(tp, th, &save_start, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
@@ -3163,19 +3116,11 @@ dodata: /* XXX */
*/
case TCPS_FIN_WAIT_2:
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
- "TCP_FIN_WAIT_2 ti_locked: %d", __func__,
- ti_locked));
tcp_twstart(tp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
return;
}
}
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
#ifdef TCPDEBUG
if (so->so_options & SO_DEBUG)
tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
@@ -3190,9 +3135,6 @@ dodata: /* XXX */
(void) tp->t_fb->tfb_tcp_output(tp);
check_delack:
- KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
- __func__, ti_locked));
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
if (tp->t_flags & TF_DELACK) {
@@ -3230,10 +3172,6 @@ dropafterack:
&tcp_savetcp, 0);
#endif
TCP_PROBE3(debug__input, tp, th, m);
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
tp->t_flags |= TF_ACKNOW;
(void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(tp->t_inpcb);
@@ -3241,10 +3179,6 @@ dropafterack:
return;
dropwithreset:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
-
if (tp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason);
INP_WUNLOCK(tp->t_inpcb);
@@ -3253,15 +3187,6 @@ dropwithreset:
return;
drop:
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
- ti_locked = TI_UNLOCKED;
- }
-#ifdef INVARIANTS
- else
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
-#endif
-
/*
* Drop space held by incoming segment and return.
*/
diff --git a/freebsd/sys/netinet/tcp_log_buf.h b/freebsd/sys/netinet/tcp_log_buf.h
index 58713fe5..e569395a 100644
--- a/freebsd/sys/netinet/tcp_log_buf.h
+++ b/freebsd/sys/netinet/tcp_log_buf.h
@@ -94,7 +94,7 @@ struct tcp_log_bbr {
uint16_t flex7;
uint8_t bbr_state;
uint8_t bbr_substate;
- uint8_t inpacer;
+ uint8_t inhpts;
uint8_t ininput;
uint8_t use_lt_bw;
uint8_t flex8;
@@ -217,7 +217,9 @@ enum tcp_log_events {
BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/
TCP_LOG_RTT, /* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
BBR_LOG_SETTINGS_CHG, /* Settings changed for loss response 48 */
- TCP_LOG_END /* End (keep at end) 49 */
+ BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */
+ TCP_LOG_REASS, /* Reassembly buffer logging 50 */
+ TCP_LOG_END /* End (keep at end) 51 */
};
enum tcp_log_states {
diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c
index bdbfe984..8f83440d 100644
--- a/freebsd/sys/netinet/tcp_output.c
+++ b/freebsd/sys/netinet/tcp_output.c
@@ -145,18 +145,13 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat, CTLFLAG_VNET | CTLFLAG_R
tcp_timer_active((tp), TT_PERSIST), \
("neither rexmt nor persist timer is set"))
-#ifdef TCP_HHOOK
-static void inline hhook_run_tcp_est_out(struct tcpcb *tp,
- struct tcphdr *th, struct tcpopt *to,
- uint32_t len, int tso);
-#endif
static void inline cc_after_idle(struct tcpcb *tp);
#ifdef TCP_HHOOK
/*
* Wrapper for the TCP established output helper hook.
*/
-static void inline
+void
hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
struct tcpopt *to, uint32_t len, int tso)
{
@@ -197,6 +192,8 @@ tcp_output(struct tcpcb *tp)
int32_t len;
uint32_t recwin, sendwin;
int off, flags, error = 0; /* Keep compiler happy */
+ u_int if_hw_tsomaxsegcount = 0;
+ u_int if_hw_tsomaxsegsize;
struct mbuf *m;
struct ip *ip = NULL;
#ifdef TCPDEBUG
@@ -233,13 +230,15 @@ tcp_output(struct tcpcb *tp)
#endif
/*
- * For TFO connections in SYN_RECEIVED, only allow the initial
- * SYN|ACK and those sent by the retransmit timer.
+ * For TFO connections in SYN_SENT or SYN_RECEIVED,
+ * only allow the initial SYN or SYN|ACK and those sent
+ * by the retransmit timer.
*/
if (IS_FASTOPEN(tp->t_flags) &&
- (tp->t_state == TCPS_SYN_RECEIVED) &&
- SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */
- (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
+ ((tp->t_state == TCPS_SYN_SENT) ||
+ (tp->t_state == TCPS_SYN_RECEIVED)) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
+ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
return (0);
/*
@@ -867,9 +866,6 @@ send:
if (tso) {
u_int if_hw_tsomax;
- u_int if_hw_tsomaxsegcount;
- u_int if_hw_tsomaxsegsize;
- struct mbuf *mb;
u_int moff;
int max_len;
@@ -901,65 +897,6 @@ send:
len = max_len;
}
}
-
- /*
- * Check if we should limit by maximum segment
- * size and count:
- */
- if (if_hw_tsomaxsegcount != 0 &&
- if_hw_tsomaxsegsize != 0) {
- /*
- * Subtract one segment for the LINK
- * and TCP/IP headers mbuf that will
- * be prepended to this mbuf chain
- * after the code in this section
- * limits the number of mbufs in the
- * chain to if_hw_tsomaxsegcount.
- */
- if_hw_tsomaxsegcount -= 1;
- max_len = 0;
- mb = sbsndmbuf(&so->so_snd, off, &moff);
-
- while (mb != NULL && max_len < len) {
- u_int mlen;
- u_int frags;
-
- /*
- * Get length of mbuf fragment
- * and how many hardware frags,
- * rounded up, it would use:
- */
- mlen = (mb->m_len - moff);
- frags = howmany(mlen,
- if_hw_tsomaxsegsize);
-
- /* Handle special case: Zero Length Mbuf */
- if (frags == 0)
- frags = 1;
-
- /*
- * Check if the fragment limit
- * will be reached or exceeded:
- */
- if (frags >= if_hw_tsomaxsegcount) {
- max_len += min(mlen,
- if_hw_tsomaxsegcount *
- if_hw_tsomaxsegsize);
- break;
- }
- max_len += mlen;
- if_hw_tsomaxsegcount -= frags;
- moff = 0;
- mb = mb->m_next;
- }
- if (max_len <= 0) {
- len = 0;
- } else if (len > max_len) {
- sendalot = 1;
- len = max_len;
- }
- }
-
/*
* Prevent the last segment from being
* fractional unless the send sockbuf can be
@@ -994,7 +931,6 @@ send:
*/
if (tp->t_flags & TF_NEEDFIN)
sendalot = 1;
-
} else {
len = tp->t_maxseg - optlen - ipoptlen;
sendalot = 1;
@@ -1029,6 +965,7 @@ send:
*/
if (len) {
struct mbuf *mb;
+ struct sockbuf *msb;
u_int moff;
if ((tp->t_flags & TF_FORCEDATA) && len == 1)
@@ -1062,14 +999,30 @@ send:
* Start the m_copy functions from the closest mbuf
* to the offset in the socket buffer chain.
*/
- mb = sbsndptr(&so->so_snd, off, len, &moff);
-
+ mb = sbsndptr_noadv(&so->so_snd, off, &moff);
if (len <= MHLEN - hdrlen - max_linkhdr) {
m_copydata(mb, moff, len,
mtod(m, caddr_t) + hdrlen);
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ sbsndptr_adv(&so->so_snd, mb, len);
m->m_len += len;
} else {
- m->m_next = m_copym(mb, moff, len, M_NOWAIT);
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ msb = NULL;
+ else
+ msb = &so->so_snd;
+ m->m_next = tcp_m_copym(mb, moff,
+ &len, if_hw_tsomaxsegcount,
+ if_hw_tsomaxsegsize, msb);
+ if (len <= (tp->t_maxseg - optlen)) {
+ /*
+ * Must have ran out of mbufs for the copy
+ * shorten it to no longer need tso. Lets
+ * not put on sendalot since we are low on
+ * mbufs.
+ */
+ tso = 0;
+ }
if (m->m_next == NULL) {
SOCKBUF_UNLOCK(&so->so_snd);
(void) m_free(m);
@@ -1853,6 +1806,144 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
return (optlen);
}
+/*
+ * This is a copy of m_copym(), taking the TSO segment size/limit
+ * constraints into account, and advancing the sndptr as it goes.
+ */
+struct mbuf *
+tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
+ int32_t seglimit, int32_t segsize, struct sockbuf *sb)
+{
+ struct mbuf *n, **np;
+ struct mbuf *top;
+ int32_t off = off0;
+ int32_t len = *plen;
+ int32_t fragsize;
+ int32_t len_cp = 0;
+ int32_t *pkthdrlen;
+ uint32_t mlen, frags;
+ bool copyhdr;
+
+
+ KASSERT(off >= 0, ("tcp_m_copym, negative off %d", off));
+ KASSERT(len >= 0, ("tcp_m_copym, negative len %d", len));
+ if (off == 0 && m->m_flags & M_PKTHDR)
+ copyhdr = true;
+ else
+ copyhdr = false;
+ while (off > 0) {
+ KASSERT(m != NULL, ("tcp_m_copym, offset > size of mbuf chain"));
+ if (off < m->m_len)
+ break;
+ off -= m->m_len;
+ if ((sb) && (m == sb->sb_sndptr)) {
+ sb->sb_sndptroff += m->m_len;
+ sb->sb_sndptr = m->m_next;
+ }
+ m = m->m_next;
+ }
+ np = &top;
+ top = NULL;
+ pkthdrlen = NULL;
+ while (len > 0) {
+ if (m == NULL) {
+ KASSERT(len == M_COPYALL,
+ ("tcp_m_copym, length > size of mbuf chain"));
+ *plen = len_cp;
+ if (pkthdrlen != NULL)
+ *pkthdrlen = len_cp;
+ break;
+ }
+ mlen = min(len, m->m_len - off);
+ if (seglimit) {
+ /*
+ * For M_NOMAP mbufs, add 3 segments
+ * + 1 in case we are crossing page boundaries
+ * + 2 in case the TLS hdr/trailer are used
+ * It is cheaper to just add the segments
+ * than it is to take the cache miss to look
+ * at the mbuf ext_pgs state in detail.
+ */
+ if (m->m_flags & M_NOMAP) {
+ fragsize = min(segsize, PAGE_SIZE);
+ frags = 3;
+ } else {
+ fragsize = segsize;
+ frags = 0;
+ }
+
+ /* Break if we really can't fit anymore. */
+ if ((frags + 1) >= seglimit) {
+ *plen = len_cp;
+ if (pkthdrlen != NULL)
+ *pkthdrlen = len_cp;
+ break;
+ }
+
+ /*
+ * Reduce size if you can't copy the whole
+ * mbuf. If we can't copy the whole mbuf, also
+ * adjust len so the loop will end after this
+ * mbuf.
+ */
+ if ((frags + howmany(mlen, fragsize)) >= seglimit) {
+ mlen = (seglimit - frags - 1) * fragsize;
+ len = mlen;
+ *plen = len_cp + len;
+ if (pkthdrlen != NULL)
+ *pkthdrlen = *plen;
+ }
+ frags += howmany(mlen, fragsize);
+ if (frags == 0)
+ frags++;
+ seglimit -= frags;
+ KASSERT(seglimit > 0,
+ ("%s: seglimit went too low", __func__));
+ }
+ if (copyhdr)
+ n = m_gethdr(M_NOWAIT, m->m_type);
+ else
+ n = m_get(M_NOWAIT, m->m_type);
+ *np = n;
+ if (n == NULL)
+ goto nospace;
+ if (copyhdr) {
+ if (!m_dup_pkthdr(n, m, M_NOWAIT))
+ goto nospace;
+ if (len == M_COPYALL)
+ n->m_pkthdr.len -= off0;
+ else
+ n->m_pkthdr.len = len;
+ pkthdrlen = &n->m_pkthdr.len;
+ copyhdr = false;
+ }
+ n->m_len = mlen;
+ len_cp += n->m_len;
+ if (m->m_flags & M_EXT) {
+ n->m_data = m->m_data + off;
+ mb_dupcl(n, m);
+ } else
+ bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
+ (u_int)n->m_len);
+
+ if (sb && (sb->sb_sndptr == m) &&
+ ((n->m_len + off) >= m->m_len) && m->m_next) {
+ sb->sb_sndptroff += m->m_len;
+ sb->sb_sndptr = m->m_next;
+ }
+ off = 0;
+ if (len != M_COPYALL) {
+ len -= n->m_len;
+ }
+ m = m->m_next;
+ np = &n->m_next;
+ }
+ return (top);
+nospace:
+ m_freem(top);
+ return (NULL);
+}
+
void
tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin)
{
diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c
index dbb61299..4776a808 100644
--- a/freebsd/sys/netinet/tcp_reass.c
+++ b/freebsd/sys/netinet/tcp_reass.c
@@ -74,15 +74,37 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_log_buf.h>
+#include <netinet/tcp_hpts.h>
#include <netinet6/tcp6_var.h>
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
+#define TCP_R_LOG_ADD 1
+#define TCP_R_LOG_LIMIT_REACHED 2
+#define TCP_R_LOG_APPEND 3
+#define TCP_R_LOG_PREPEND 4
+#define TCP_R_LOG_REPLACE 5
+#define TCP_R_LOG_MERGE_INTO 6
+#define TCP_R_LOG_NEW_ENTRY 7
+#define TCP_R_LOG_READ 8
+#define TCP_R_LOG_ZERO 9
+#define TCP_R_LOG_DUMP 10
+#define TCP_R_LOG_TRIM 11
+
+/* For debugging we want counters and BB logging */
+/* #define TCP_REASS_COUNTERS 1 */
+/* #define TCP_REASS_LOGGING 1 */
+
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
+static SYSCTL_NODE(_net_inet_tcp_reass, OID_AUTO, stats, CTLFLAG_RW, 0,
+ "TCP Segment Reassembly stats");
+
+
static int tcp_reass_maxseg = 0;
SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
&tcp_reass_maxseg, 0,
@@ -93,6 +115,77 @@ SYSCTL_UMA_CUR(_net_inet_tcp_reass, OID_AUTO, cursegments, 0,
&tcp_reass_zone,
"Global number of TCP Segments currently in Reassembly Queue");
+static u_int tcp_reass_maxqueuelen = 100;
+SYSCTL_UINT(_net_inet_tcp_reass, OID_AUTO, maxqueuelen, CTLFLAG_RWTUN,
+ &tcp_reass_maxqueuelen, 0,
+ "Maximum number of TCP Segments per Reassembly Queue");
+
+static int tcp_new_limits = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, new_limit, CTLFLAG_RWTUN,
+ &tcp_new_limits, 0,
+ "Do we use the new limit method we are discussing?");
+
+static u_int tcp_reass_queue_guard = 16;
+SYSCTL_UINT(_net_inet_tcp_reass, OID_AUTO, queueguard, CTLFLAG_RWTUN,
+ &tcp_reass_queue_guard, 16,
+ "Number of TCP Segments in Reassembly Queue where we flip over to guard mode");
+
+#ifdef TCP_REASS_COUNTERS
+
+counter_u64_t reass_entry;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, entry, CTLFLAG_RD,
+ &reass_entry, "A segment entered reassembly ");
+
+counter_u64_t reass_path1;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path1, CTLFLAG_RD,
+ &reass_path1, "Took path 1");
+
+counter_u64_t reass_path2;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path2, CTLFLAG_RD,
+ &reass_path2, "Took path 2");
+
+counter_u64_t reass_path3;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path3, CTLFLAG_RD,
+ &reass_path3, "Took path 3");
+
+counter_u64_t reass_path4;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path4, CTLFLAG_RD,
+ &reass_path4, "Took path 4");
+
+counter_u64_t reass_path5;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path5, CTLFLAG_RD,
+ &reass_path5, "Took path 5");
+
+counter_u64_t reass_path6;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path6, CTLFLAG_RD,
+ &reass_path6, "Took path 6");
+
+counter_u64_t reass_path7;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, path7, CTLFLAG_RD,
+ &reass_path7, "Took path 7");
+
+counter_u64_t reass_fullwalk;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, fullwalk, CTLFLAG_RD,
+ &reass_fullwalk, "Took a full walk ");
+
+counter_u64_t reass_nospace;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, nospace, CTLFLAG_RD,
+ &reass_nospace, "Had no mbuf capacity ");
+
+counter_u64_t merge_fwd;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, merge_fwd, CTLFLAG_RD,
+ &merge_fwd, "Ran merge fwd");
+
+counter_u64_t merge_into;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, merge_into, CTLFLAG_RD,
+ &merge_into, "Ran merge into");
+
+counter_u64_t tcp_zero_input;
+SYSCTL_COUNTER_U64(_net_inet_tcp_reass_stats, OID_AUTO, zero_input, CTLFLAG_RD,
+ &tcp_zero_input, "The reassembly buffer saw a zero len segment etc");
+
+#endif
+
/* Initialize TCP reassembly queue */
static void
tcp_reass_zone_change(void *tag)
@@ -104,6 +197,77 @@ tcp_reass_zone_change(void *tag)
tcp_reass_maxseg);
}
+#ifdef TCP_REASS_LOGGING
+
+static void
+tcp_log_reassm(struct tcpcb *tp, struct tseg_qent *q, struct tseg_qent *p,
+ tcp_seq seq, int len, uint8_t action, int instance)
+{
+ uint32_t cts;
+ struct timeval tv;
+
+ if (tp->t_logstate != TCP_LOG_STATE_OFF) {
+ union tcp_log_stackspecific log;
+
+ memset(&log, 0, sizeof(log));
+ cts = tcp_get_usecs(&tv);
+ log.u_bbr.flex1 = seq;
+ log.u_bbr.cur_del_rate = (uint64_t)q;
+ log.u_bbr.delRate = (uint64_t)p;
+ if (q != NULL) {
+ log.u_bbr.flex2 = q->tqe_start;
+ log.u_bbr.flex3 = q->tqe_len;
+ log.u_bbr.flex4 = q->tqe_mbuf_cnt;
+ log.u_bbr.hptsi_gain = q->tqe_flags;
+ }
+ if (p != NULL) {
+ log.u_bbr.flex5 = p->tqe_start;
+ log.u_bbr.pkts_out = p->tqe_len;
+ log.u_bbr.epoch = p->tqe_mbuf_cnt;
+ log.u_bbr.cwnd_gain = p->tqe_flags;
+ }
+ log.u_bbr.flex6 = tp->t_segqmbuflen;
+ log.u_bbr.flex7 = instance;
+ log.u_bbr.flex8 = action;
+ log.u_bbr.timeStamp = cts;
+ TCP_LOG_EVENTP(tp, NULL,
+ &tp->t_inpcb->inp_socket->so_rcv,
+ &tp->t_inpcb->inp_socket->so_snd,
+ TCP_LOG_REASS, 0,
+ len, &log, false, &tv);
+ }
+}
+
+static void
+tcp_reass_log_dump(struct tcpcb *tp)
+{
+ struct tseg_qent *q;
+
+ if (tp->t_logstate != TCP_LOG_STATE_OFF) {
+ TAILQ_FOREACH(q, &tp->t_segq, tqe_q) {
+ tcp_log_reassm(tp, q, NULL, q->tqe_start, q->tqe_len, TCP_R_LOG_DUMP, 0);
+ }
+ };
+}
+
+static void
+tcp_reass_log_new_in(struct tcpcb *tp, tcp_seq seq, int len, struct mbuf *m,
+ int logval, struct tseg_qent *q)
+{
+ int cnt;
+ struct mbuf *t;
+
+ cnt = 0;
+ t = m;
+ while (t) {
+ cnt += t->m_len;
+ t = t->m_next;
+ }
+ tcp_log_reassm(tp, q, NULL, seq, len, logval, cnt);
+}
+
+#endif
+
void
tcp_reass_global_init(void)
{
@@ -116,8 +280,24 @@ tcp_reass_global_init(void)
/* Set the zone limit and read back the effective value. */
tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
tcp_reass_maxseg);
+#ifdef TCP_REASS_COUNTERS
+ reass_path1 = counter_u64_alloc(M_WAITOK);
+ reass_path2 = counter_u64_alloc(M_WAITOK);
+ reass_path3 = counter_u64_alloc(M_WAITOK);
+ reass_path4 = counter_u64_alloc(M_WAITOK);
+ reass_path5 = counter_u64_alloc(M_WAITOK);
+ reass_path6 = counter_u64_alloc(M_WAITOK);
+ reass_path7 = counter_u64_alloc(M_WAITOK);
+ reass_fullwalk = counter_u64_alloc(M_WAITOK);
+ reass_nospace = counter_u64_alloc(M_WAITOK);
+ reass_entry = counter_u64_alloc(M_WAITOK);
+ merge_fwd = counter_u64_alloc(M_WAITOK);
+ merge_into = counter_u64_alloc(M_WAITOK);
+ tcp_zero_input = counter_u64_alloc(M_WAITOK);
+#endif
EVENTHANDLER_REGISTER(nmbclusters_change,
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+
}
void
@@ -127,32 +307,237 @@ tcp_reass_flush(struct tcpcb *tp)
INP_WLOCK_ASSERT(tp->t_inpcb);
- while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
- LIST_REMOVE(qe, tqe_q);
+ while ((qe = TAILQ_FIRST(&tp->t_segq)) != NULL) {
+ TAILQ_REMOVE(&tp->t_segq, qe, tqe_q);
m_freem(qe->tqe_m);
uma_zfree(tcp_reass_zone, qe);
tp->t_segqlen--;
}
-
+ tp->t_segqmbuflen = 0;
KASSERT((tp->t_segqlen == 0),
("TCP reass queue %p segment count is %d instead of 0 after flush.",
tp, tp->t_segqlen));
}
+static void
+tcp_reass_append(struct tcpcb *tp, struct tseg_qent *last,
+ struct mbuf *m, struct tcphdr *th, int tlen,
+ struct mbuf *mlast, int lenofoh)
+{
+
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, last, NULL, th->th_seq, tlen, TCP_R_LOG_APPEND, 0);
+#endif
+ last->tqe_len += tlen;
+ last->tqe_m->m_pkthdr.len += tlen;
+ /* Preserve the FIN bit if its there */
+ last->tqe_flags |= (th->th_flags & TH_FIN);
+ last->tqe_last->m_next = m;
+ last->tqe_last = mlast;
+ last->tqe_mbuf_cnt += lenofoh;
+ tp->t_rcvoopack++;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, tlen);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, last->tqe_start, lenofoh, last->tqe_m,
+ TCP_R_LOG_APPEND,
+ last);
+#endif
+}
+
+static void
+tcp_reass_prepend(struct tcpcb *tp, struct tseg_qent *first, struct mbuf *m, struct tcphdr *th,
+ int tlen, struct mbuf *mlast, int lenofoh)
+{
+ int i;
+
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, first, NULL, th->th_seq, tlen, TCP_R_LOG_PREPEND, 0);
+#endif
+ if (SEQ_GT((th->th_seq + tlen), first->tqe_start)) {
+ /* The new data overlaps into the old */
+ i = (th->th_seq + tlen) - first->tqe_start;
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, first, NULL, 0, i, TCP_R_LOG_TRIM, 1);
+#endif
+ m_adj(first->tqe_m, i);
+ first->tqe_len -= i;
+ first->tqe_start += i;
+ }
+ /* Ok now setup our chain to point to the old first */
+ mlast->m_next = first->tqe_m;
+ first->tqe_m = m;
+ first->tqe_len += tlen;
+ first->tqe_start = th->th_seq;
+ first->tqe_m->m_pkthdr.len = first->tqe_len;
+ first->tqe_mbuf_cnt += lenofoh;
+ tp->t_rcvoopack++;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, tlen);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, first->tqe_start, lenofoh, first->tqe_m,
+ TCP_R_LOG_PREPEND,
+ first);
+#endif
+}
+
+static void
+tcp_reass_replace(struct tcpcb *tp, struct tseg_qent *q, struct mbuf *m,
+ tcp_seq seq, int len, struct mbuf *mlast, int mbufoh, uint8_t flags)
+{
+ /*
+ * Free the data in q, and replace
+ * it with the new segment.
+ */
+ int len_dif;
+
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, q, NULL, seq, len, TCP_R_LOG_REPLACE, 0);
+#endif
+ m_freem(q->tqe_m);
+ KASSERT(tp->t_segqmbuflen >= q->tqe_mbuf_cnt,
+ ("Tp:%p seg queue goes negative", tp));
+ tp->t_segqmbuflen -= q->tqe_mbuf_cnt;
+ q->tqe_mbuf_cnt = mbufoh;
+ q->tqe_m = m;
+ q->tqe_last = mlast;
+ q->tqe_start = seq;
+ if (len > q->tqe_len)
+ len_dif = len - q->tqe_len;
+ else
+ len_dif = 0;
+ tp->t_rcvoopack++;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, len_dif);
+ q->tqe_len = len;
+ q->tqe_flags = (flags & TH_FIN);
+ q->tqe_m->m_pkthdr.len = q->tqe_len;
+ tp->t_segqmbuflen += mbufoh;
+
+}
+
+static void
+tcp_reass_merge_into(struct tcpcb *tp, struct tseg_qent *ent,
+ struct tseg_qent *q)
+{
+ /*
+ * Merge q into ent and free q from the list.
+ */
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, q, ent, 0, 0, TCP_R_LOG_MERGE_INTO, 0);
+#endif
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(merge_into, 1);
+#endif
+ ent->tqe_last->m_next = q->tqe_m;
+ ent->tqe_last = q->tqe_last;
+ ent->tqe_len += q->tqe_len;
+ ent->tqe_mbuf_cnt += q->tqe_mbuf_cnt;
+ ent->tqe_m->m_pkthdr.len += q->tqe_len;
+ ent->tqe_flags |= (q->tqe_flags & TH_FIN);
+ TAILQ_REMOVE(&tp->t_segq, q, tqe_q);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+
+}
+
+static void
+tcp_reass_merge_forward(struct tcpcb *tp, struct tseg_qent *ent)
+{
+ struct tseg_qent *q, *qtmp;
+ int i;
+ tcp_seq max;
+ /*
+ * Given an entry merge forward anyplace
+ * that ent overlaps forward.
+ */
+
+ max = ent->tqe_start + ent->tqe_len;
+ q = TAILQ_NEXT(ent, tqe_q);
+ if (q == NULL) {
+ /* Nothing left */
+ return;
+ }
+ TAILQ_FOREACH_FROM_SAFE(q, &tp->t_segq, tqe_q, qtmp) {
+ if (SEQ_GT(q->tqe_start, max)) {
+ /* Beyond q */
+ break;
+ }
+ /* We have some or all that are overlapping */
+ if (SEQ_GEQ(max, (q->tqe_start + q->tqe_len))) {
+ /* It consumes it all */
+ tp->t_segqmbuflen -= q->tqe_mbuf_cnt;
+ m_freem(q->tqe_m);
+ TAILQ_REMOVE(&tp->t_segq, q, tqe_q);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ continue;
+ }
+ /*
+ * Trim the q entry to dovetail to this one
+ * and then merge q into ent updating max
+ * in the process.
+ */
+ i = max - q->tqe_start;
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, q, NULL, 0, i, TCP_R_LOG_TRIM, 2);
+#endif
+ m_adj(q->tqe_m, i);
+ q->tqe_len -= i;
+ q->tqe_start += i;
+ tcp_reass_merge_into(tp, ent, q);
+ max = ent->tqe_start + ent->tqe_len;
+ }
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(merge_fwd, 1);
+#endif
+}
+
+static int
+tcp_reass_overhead_of_chain(struct mbuf *m, struct mbuf **mlast)
+{
+ int len = MSIZE;
+
+ if (m->m_flags & M_EXT)
+ len += m->m_ext.ext_size;
+ while (m->m_next != NULL) {
+ m = m->m_next;
+ len += MSIZE;
+ if (m->m_flags & M_EXT)
+ len += m->m_ext.ext_size;
+ }
+ *mlast = m;
+ return (len);
+}
+
+
+/*
+ * NOTE!!! the new tcp-reassembly code *must not* use
+ * m_adj() with a negative index. That alters the chain
+ * of mbufs (by possibly chopping trailing mbufs). At
+ * the front of tcp_reass we count the mbuf overhead
+ * and setup the tail pointer. If we use m_adj(m, -5)
+ * we could corrupt the tail pointer. Currently the
+ * code only uses m_adj(m, postive-num). If this
+ * changes appropriate changes to update mlast would
+ * be needed.
+ */
int
-tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
+tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start,
+ int *tlenp, struct mbuf *m)
{
- struct tseg_qent *q;
+ struct tseg_qent *q, *last, *first;
struct tseg_qent *p = NULL;
- struct tseg_qent *nq;
+ struct tseg_qent *nq = NULL;
struct tseg_qent *te = NULL;
+ struct tseg_qent tqs;
+ struct mbuf *mlast = NULL;
+ struct sockbuf *sb;
struct socket *so = tp->t_inpcb->inp_socket;
char *s = NULL;
- int flags;
- struct tseg_qent tqs;
+ int flags, i, lenofoh;
INP_WLOCK_ASSERT(tp->t_inpcb);
-
/*
* XXX: tcp_reass() is rather inefficient with its data structures
* and should be rewritten (see NetBSD for optimizations).
@@ -164,149 +549,475 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
*/
if (th == NULL)
goto present;
-
+ KASSERT(SEQ_GEQ(th->th_seq, tp->rcv_nxt),
+ ("Attempt to add old entry to reassembly queue (th=%p, tp=%p)",
+ th, tp));
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, th->th_seq, *tlenp, m, TCP_R_LOG_ADD, NULL);
+#endif
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_entry, 1);
+#endif
/*
- * Limit the number of segments that can be queued to reduce the
- * potential for mbuf exhaustion. For best performance, we want to be
- * able to queue a full window's worth of segments. The size of the
- * socket receive buffer determines our advertised window and grows
- * automatically when socket buffer autotuning is enabled. Use it as the
- * basis for our queue limit.
- * Always let the missing segment through which caused this queue.
- * NB: Access to the socket buffer is left intentionally unlocked as we
- * can tolerate stale information here.
- *
- * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
- * should work but causes packets to be dropped when they shouldn't.
- * Investigate why and re-evaluate the below limit after the behaviour
- * is understood.
+ * Check for zero length data.
+ */
+ if ((*tlenp == 0) && ((th->th_flags & TH_FIN) == 0)) {
+ /*
+ * A zero length segment does no
+ * one any good. We could check
+ * the rcv_nxt <-> rcv_wnd but thats
+ * already done for us by the caller.
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(tcp_zero_input, 1);
+#endif
+ m_freem(m);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
+ return (0);
+ }
+ /*
+ * Will it fit?
*/
- if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
- tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
+ lenofoh = tcp_reass_overhead_of_chain(m, &mlast);
+ sb = &tp->t_inpcb->inp_socket->so_rcv;
+ if ((sb->sb_mbcnt + tp->t_segqmbuflen + lenofoh) > sb->sb_mbmax) {
+ /* No room */
TCPSTAT_INC(tcps_rcvreassfull);
- *tlenp = 0;
- if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
- log(LOG_DEBUG, "%s; %s: queue limit reached, "
- "segment dropped\n", s, __func__);
- free(s, M_TCPLOG);
- }
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_nospace, 1);
+#endif
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, NULL, NULL, th->th_seq, lenofoh, TCP_R_LOG_LIMIT_REACHED, 0);
+#endif
m_freem(m);
+ *tlenp = 0;
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
return (0);
}
-
/*
- * Allocate a new queue entry. If we can't, or hit the zone limit
- * just drop the pkt.
- *
- * Use a temporary structure on the stack for the missing segment
- * when the zone is exhausted. Otherwise we may get stuck.
+ * First lets deal with two common cases, the
+ * segment appends to the back of our collected
+ * segments. Or the segment is the next in line.
*/
- te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
- if (te == NULL) {
- if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) {
- TCPSTAT_INC(tcps_rcvmemdrop);
- m_freem(m);
+ last = TAILQ_LAST_FAST(&tp->t_segq, tseg_qent, tqe_q);
+ if (last != NULL) {
+ if ((th->th_flags & TH_FIN) &&
+ SEQ_LT((th->th_seq + *tlenp), (last->tqe_start + last->tqe_len))) {
+ /*
+ * Someone is trying to game us, dump
+ * the segment.
+ */
*tlenp = 0;
- if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
- NULL))) {
- log(LOG_DEBUG, "%s; %s: global zone limit "
- "reached, segment dropped\n", s, __func__);
- free(s, M_TCPLOG);
+ m_freem(m);
+ return (0);
+ }
+ if ((SEQ_GEQ(th->th_seq, last->tqe_start)) &&
+ (SEQ_GEQ((last->tqe_start + last->tqe_len), th->th_seq))) {
+ /* Common case, trailing segment is added */
+ /**
+ * +--last
+ * v
+ * reassembly buffer |---| |---| |---|
+ * new segment |---|
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path1, 1);
+#endif
+ if (SEQ_GT((last->tqe_start + last->tqe_len), th->th_seq)) {
+ i = (last->tqe_start + last->tqe_len) - th->th_seq;
+ if (i < *tlenp) {
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, last, NULL, 0, i, TCP_R_LOG_TRIM, 3);
+ th->th_seq += i;
+#endif
+ m_adj(m, i);
+ *tlenp -= i;
+ } else {
+ /* Complete overlap */
+ TCPSTAT_INC(tcps_rcvduppack);
+ TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
+ m_freem(m);
+ *tlenp = last->tqe_len;
+ *seq_start = last->tqe_start;
+ return (0);
+ }
+ }
+ if (last->tqe_flags & TH_FIN) {
+ /*
+ * We have data after the FIN on the last?
+ */
+ *tlenp = 0;
+ m_freem(m);
+ return(0);
}
+ tcp_reass_append(tp, last, m, th, *tlenp, mlast, lenofoh);
+ tp->t_segqmbuflen += lenofoh;
+ *seq_start = last->tqe_start;
+ *tlenp = last->tqe_len;
return (0);
- } else {
- bzero(&tqs, sizeof(struct tseg_qent));
- te = &tqs;
- if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
- NULL))) {
- log(LOG_DEBUG,
- "%s; %s: global zone limit reached, using "
- "stack for missing segment\n", s, __func__);
- free(s, M_TCPLOG);
+ } else if (SEQ_GT(th->th_seq, (last->tqe_start + last->tqe_len))) {
+ /*
+ * Second common case, we missed
+ * another one and have something more
+ * for the end.
+ */
+ /**
+ * +--last
+ * v
+ * reassembly buffer |---| |---| |---|
+ * new segment |---|
+ */
+ if (last->tqe_flags & TH_FIN) {
+ /*
+ * We have data after the FIN on the last?
+ */
+ *tlenp = 0;
+ m_freem(m);
+ return(0);
}
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path2, 1);
+#endif
+ p = last;
+ goto new_entry;
}
+ } else {
+ /* First segment (it's NULL). */
+ goto new_entry;
}
- tp->t_segqlen++;
+ first = TAILQ_FIRST(&tp->t_segq);
+ if (SEQ_LT(th->th_seq, first->tqe_start) &&
+ SEQ_GEQ((th->th_seq + *tlenp),first->tqe_start) &&
+ SEQ_LT((th->th_seq + *tlenp), (first->tqe_start + first->tqe_len))) {
+ /*
+ * The head of the queue is prepended by this and
+ * it may be the one I want most.
+ */
+ /**
+ * first-------+
+ * v
+ * rea: |---| |---| |---|
+ * new: |---|
+ * Note the case we do not deal with here is:
+ * rea= |---| |---| |---|
+ * new= |----|
+ * Due to the fact that it could be
+ * new |--------------------|
+ * And we might need to merge forward.
+ */
+#ifdef INVARIANTS
+ struct mbuf *firstmbuf;
+#endif
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path3, 1);
+#endif
+ if (SEQ_LT(th->th_seq, tp->rcv_nxt)) {
+ /*
+ * The resend was even before
+ * what we have. We need to trim it.
+ * Note TSNH (it should be trimmed
+ * before the call to tcp_reass()).
+ */
+#ifdef INVARIANTS
+ panic("th->th_seq:%u rcv_nxt:%u tp:%p not pre-trimmed",
+ th->th_seq, tp->rcv_nxt, tp);
+#else
+ i = tp->rcv_nxt - th->th_seq;
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, first, NULL, 0, i, TCP_R_LOG_TRIM, 4);
+#endif
+ m_adj(m, i);
+ th->th_seq += i;
+ *tlenp -= i;
+#endif
+ }
+#ifdef INVARIANTS
+ firstmbuf = first->tqe_m;
+#endif
+ tcp_reass_prepend(tp, first, m, th, *tlenp, mlast, lenofoh);
+#ifdef INVARIANTS
+ if (firstmbuf == first->tqe_m) {
+ panic("First stayed same m:%p foobar:%p first->tqe_m:%p tp:%p first:%p",
+ m, firstmbuf, first->tqe_m, tp, first);
+ } else if (first->tqe_m != m) {
+ panic("First did not change to m:%p foobar:%p first->tqe_m:%p tp:%p first:%p",
+ m, firstmbuf, first->tqe_m, tp, first);
+ }
+#endif
+ tp->t_segqmbuflen += lenofoh;
+ *seq_start = first->tqe_start;
+ *tlenp = first->tqe_len;
+ goto present;
+ } else if (SEQ_LT((th->th_seq + *tlenp), first->tqe_start)) {
+ /* New segment is before our earliest segment. */
+ /**
+ * first---->+
+ * v
+ * rea= |---| ....
+ * new" |---|
+ *
+ */
+ goto new_entry;
+ }
/*
* Find a segment which begins after this one does.
*/
- LIST_FOREACH(q, &tp->t_segq, tqe_q) {
- if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_fullwalk, 1);
+#endif
+ TAILQ_FOREACH(q, &tp->t_segq, tqe_q) {
+ if (SEQ_GT(q->tqe_start, th->th_seq))
break;
- p = q;
}
-
- /*
- * If there is a preceding segment, it may provide some of
- * our data already. If so, drop the data from the incoming
- * segment. If it provides all of our data, drop us.
+ p = TAILQ_PREV(q, tsegqe_head, tqe_q);
+ /**
+ * Now is this fit just in-between only?
+ * i.e.:
+ * p---+ +----q
+ * v v
+ * res= |--| |--| |--|
+ * nee |-|
+ */
+ if (SEQ_LT((th->th_seq + *tlenp), q->tqe_start) &&
+ ((p == NULL) || (SEQ_GT(th->th_seq, (p->tqe_start + p->tqe_len))))) {
+ /* Yep no overlap */
+ goto new_entry;
+ }
+ /**
+ * If we reach here we have some (possibly all) overlap
+ * such as:
+ * res= |--| |--| |--|
+ * new= |----|
+ * or new= |-----------------|
+ * or new= |--------|
+ * or new= |---|
+ * or new= |-----------|
*/
- if (p != NULL) {
- int i;
+ if ((p != NULL) &&
+ (SEQ_LEQ(th->th_seq, (p->tqe_start + p->tqe_len)))) {
/* conversion to int (in i) handles seq wraparound */
- i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
- if (i > 0) {
+
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path4, 1);
+#endif
+ i = p->tqe_start + p->tqe_len - th->th_seq;
+ if (i >= 0) {
if (i >= *tlenp) {
+ /**
+ * prev seg---->+
+ * v
+ * reassembly buffer |---|
+ * new segment |-|
+ */
TCPSTAT_INC(tcps_rcvduppack);
TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
+ *tlenp = p->tqe_len;
+ *seq_start = p->tqe_start;
m_freem(m);
- if (te != &tqs)
- uma_zfree(tcp_reass_zone, te);
- tp->t_segqlen--;
/*
* Try to present any queued data
* at the left window edge to the user.
* This is needed after the 3-WHS
- * completes.
+ * completes. Note this probably
+ * will not work and we will return.
*/
- goto present; /* ??? */
+ return (0);
}
- m_adj(m, i);
- *tlenp -= i;
- th->th_seq += i;
+ if (i > 0) {
+ /**
+ * prev seg---->+
+ * v
+ * reassembly buffer |---|
+ * new segment |-----|
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path5, 1);
+#endif
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, p, NULL, 0, i, TCP_R_LOG_TRIM, 5);
+#endif
+ m_adj(m, i);
+ *tlenp -= i;
+ th->th_seq += i;
+ }
+ }
+ if (th->th_seq == (p->tqe_start + p->tqe_len)) {
+ /*
+ * If dovetails in with this one
+ * append it.
+ */
+ /**
+ * prev seg---->+
+ * v
+ * reassembly buffer |--| |---|
+ * new segment |--|
+ * (note: it was trimmed above if it overlapped)
+ */
+ tcp_reass_append(tp, p, m, th, *tlenp, mlast, lenofoh);
+ tp->t_segqmbuflen += lenofoh;
+ } else {
+#ifdef INVARIANTS
+ panic("Impossible cut th_seq:%u p->seq:%u(%d) p:%p tp:%p",
+ th->th_seq, p->tqe_start, p->tqe_len,
+ p, tp);
+#endif
+ *tlenp = 0;
+ m_freem(m);
+ return (0);
+ }
+ q = p;
+ } else {
+ /*
+ * The new data runs over the
+ * top of previously sack'd data (in q).
+ * It may be partially overlapping, or
+ * it may overlap the entire segment.
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path6, 1);
+#endif
+ if (SEQ_GEQ((th->th_seq + *tlenp), (q->tqe_start + q->tqe_len))) {
+ /* It consumes it all */
+ /**
+ * next seg---->+
+ * v
+ * reassembly buffer |--| |---|
+ * new segment |----------|
+ */
+#ifdef TCP_REASS_COUNTERS
+ counter_u64_add(reass_path7, 1);
+#endif
+ tcp_reass_replace(tp, q, m, th->th_seq, *tlenp, mlast, lenofoh, th->th_flags);
+ } else {
+ /*
+ * We just need to prepend the data
+ * to this. It does not overrun
+ * the end.
+ */
+ /**
+ * next seg---->+
+ * v
+ * reassembly buffer |--| |---|
+ * new segment |----------|
+ */
+ tcp_reass_prepend(tp, q, m, th, *tlenp, mlast, lenofoh);
+ tp->t_segqmbuflen += lenofoh;
}
}
- tp->t_rcvoopack++;
- TCPSTAT_INC(tcps_rcvoopack);
- TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
+ /* Now does it go further than that? */
+ tcp_reass_merge_forward(tp, q);
+ *seq_start = q->tqe_start;
+ *tlenp = q->tqe_len;
+ goto present;
- /*
- * While we overlap succeeding segments trim them or,
- * if they are completely covered, dequeue them.
+ /*
+ * When we reach here we can't combine it
+ * with any existing segment.
+ *
+ * Limit the number of segments that can be queued to reduce the
+ * potential for mbuf exhaustion. For best performance, we want to be
+ * able to queue a full window's worth of segments. The size of the
+ * socket receive buffer determines our advertised window and grows
+ * automatically when socket buffer autotuning is enabled. Use it as the
+ * basis for our queue limit.
+ *
+ * However, allow the user to specify a ceiling for the number of
+ * segments in each queue.
+ *
+ * Always let the missing segment through which caused this queue.
+ * NB: Access to the socket buffer is left intentionally unlocked as we
+ * can tolerate stale information here.
+ *
+ * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
+ * should work but causes packets to be dropped when they shouldn't.
+ * Investigate why and re-evaluate the below limit after the behaviour
+ * is understood.
*/
- while (q) {
- int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
- if (i <= 0)
- break;
- if (i < q->tqe_len) {
- q->tqe_th->th_seq += i;
- q->tqe_len -= i;
- m_adj(q->tqe_m, i);
- break;
+new_entry:
+ if (tcp_new_limits) {
+ if ((tp->t_segqlen > tcp_reass_queue_guard) &&
+ (*tlenp < MSIZE)) {
+ /*
+ * This is really a lie, we are not full but
+ * are getting a segment that is above
+ * guard threshold. If it is and its below
+ * a mbuf size (256) we drop it if it
+ * can't fill in some place.
+ */
+ TCPSTAT_INC(tcps_rcvreassfull);
+ *tlenp = 0;
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: queue limit reached, "
+ "segment dropped\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ m_freem(m);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
+ return (0);
}
+ } else {
- nq = LIST_NEXT(q, tqe_q);
- LIST_REMOVE(q, tqe_q);
- m_freem(q->tqe_m);
- uma_zfree(tcp_reass_zone, q);
- tp->t_segqlen--;
- q = nq;
+ if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
+ tp->t_segqlen >= min((so->so_rcv.sb_hiwat / tp->t_maxseg) + 1,
+ tcp_reass_maxqueuelen)) {
+ TCPSTAT_INC(tcps_rcvreassfull);
+ *tlenp = 0;
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: queue limit reached, "
+ "segment dropped\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ m_freem(m);
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
+ return (0);
+ }
}
-
+ /*
+ * Allocate a new queue entry. If we can't, or hit the zone limit
+ * just drop the pkt.
+ */
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
+ if (te == NULL) {
+ TCPSTAT_INC(tcps_rcvmemdrop);
+ m_freem(m);
+ *tlenp = 0;
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
+ NULL))) {
+ log(LOG_DEBUG, "%s; %s: global zone limit "
+ "reached, segment dropped\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ return (0);
+ }
+ tp->t_segqlen++;
+ tp->t_rcvoopack++;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
/* Insert the new segment queue entry into place. */
te->tqe_m = m;
- te->tqe_th = th;
+ te->tqe_flags = th->th_flags;
te->tqe_len = *tlenp;
-
+ te->tqe_start = th->th_seq;
+ te->tqe_last = mlast;
+ te->tqe_mbuf_cnt = lenofoh;
+ tp->t_segqmbuflen += te->tqe_mbuf_cnt;
if (p == NULL) {
- LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
+ TAILQ_INSERT_HEAD(&tp->t_segq, te, tqe_q);
} else {
- KASSERT(te != &tqs, ("%s: temporary stack based entry not "
- "first element in queue", __func__));
- LIST_INSERT_AFTER(p, te, tqe_q);
+ TAILQ_INSERT_AFTER(&tp->t_segq, p, te, tqe_q);
}
-
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, th->th_seq, *tlenp, m, TCP_R_LOG_NEW_ENTRY, te);
+#endif
present:
/*
* Present data to user, advancing rcv_nxt through
@@ -314,24 +1025,56 @@ present:
*/
if (!TCPS_HAVEESTABLISHED(tp->t_state))
return (0);
- q = LIST_FIRST(&tp->t_segq);
- if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+ q = TAILQ_FIRST(&tp->t_segq);
+ KASSERT(q == NULL || SEQ_GEQ(q->tqe_start, tp->rcv_nxt),
+ ("Reassembly queue for %p has stale entry at head", tp));
+ if (!q || q->tqe_start != tp->rcv_nxt) {
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
return (0);
+ }
SOCKBUF_LOCK(&so->so_rcv);
do {
tp->rcv_nxt += q->tqe_len;
- flags = q->tqe_th->th_flags & TH_FIN;
- nq = LIST_NEXT(q, tqe_q);
- LIST_REMOVE(q, tqe_q);
- if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ flags = q->tqe_flags & TH_FIN;
+ nq = TAILQ_NEXT(q, tqe_q);
+ TAILQ_REMOVE(&tp->t_segq, q, tqe_q);
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
m_freem(q->tqe_m);
- else
+ } else {
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_new_in(tp, q->tqe_start, q->tqe_len, q->tqe_m, TCP_R_LOG_READ, q);
+ tcp_log_reassm(tp, q, NULL, th->th_seq, *tlenp, TCP_R_LOG_READ, 1);
+#endif
sbappendstream_locked(&so->so_rcv, q->tqe_m, 0);
- if (q != &tqs)
+ }
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, q, NULL, th->th_seq, *tlenp, TCP_R_LOG_READ, 2);
+#endif
+ KASSERT(tp->t_segqmbuflen >= q->tqe_mbuf_cnt,
+ ("tp:%p seg queue goes negative", tp));
+ tp->t_segqmbuflen -= q->tqe_mbuf_cnt;
+ if (q != &tqs)
uma_zfree(tcp_reass_zone, q);
tp->t_segqlen--;
q = nq;
- } while (q && q->tqe_th->th_seq == tp->rcv_nxt);
+ } while (q && q->tqe_start == tp->rcv_nxt);
+ if (TAILQ_EMPTY(&tp->t_segq) &&
+ (tp->t_segqmbuflen != 0)) {
+#ifdef INVARIANTS
+ panic("tp:%p segq:%p len:%d queue empty",
+ tp, &tp->t_segq, tp->t_segqmbuflen);
+#else
+#ifdef TCP_REASS_LOGGING
+ tcp_log_reassm(tp, NULL, NULL, th->th_seq, *tlenp, TCP_R_LOG_ZERO, 0);
+#endif
+ tp->t_segqmbuflen = 0;
+#endif
+ }
+#ifdef TCP_REASS_LOGGING
+ tcp_reass_log_dump(tp);
+#endif
sorwakeup_locked(so);
return (flags);
}
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c
index 787213b0..4852ffaf 100644
--- a/freebsd/sys/netinet/tcp_subr.c
+++ b/freebsd/sys/netinet/tcp_subr.c
@@ -216,13 +216,13 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
-static VNET_DEFINE(int, icmp_may_rst) = 1;
+VNET_DEFINE_STATIC(int, icmp_may_rst) = 1;
#define V_icmp_may_rst VNET(icmp_may_rst)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_may_rst), 0,
"Certain ICMP unreachable messages may abort connections in SYN_SENT");
-static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0;
+VNET_DEFINE_STATIC(int, tcp_isn_reseed_interval) = 0;
#define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_isn_reseed_interval), 0,
@@ -239,6 +239,10 @@ VNET_DEFINE(uma_zone_t, sack_hole_zone);
VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
#endif
+#define TS_OFFSET_SECRET_LENGTH 32
+VNET_DEFINE_STATIC(u_char, ts_offset_secret[TS_OFFSET_SECRET_LENGTH]);
+#define V_ts_offset_secret VNET(ts_offset_secret)
+
static int tcp_default_fb_init(struct tcpcb *tp);
static void tcp_default_fb_fini(struct tcpcb *tp, int tcb_is_purged);
static int tcp_default_handoff_ok(struct tcpcb *tp);
@@ -701,7 +705,7 @@ struct tcpcb_mem {
#endif
};
-static VNET_DEFINE(uma_zone_t, tcpcb_zone);
+VNET_DEFINE_STATIC(uma_zone_t, tcpcb_zone);
#define V_tcpcb_zone VNET(tcpcb_zone)
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
@@ -949,11 +953,10 @@ deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
rw_wunlock(&tcp_function_lock);
VNET_LIST_RLOCK();
- /* XXX handle */
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
INP_INFO_WLOCK(&V_tcbinfo);
- LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
+ CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inp);
if (inp->inp_flags & INP_TIMEWAIT) {
INP_WUNLOCK(inp);
@@ -1099,6 +1102,7 @@ tcp_init(void)
/* Initialize the TCP logging data. */
tcp_log_init();
#endif
+ arc4rand(&V_ts_offset_secret, sizeof(V_ts_offset_secret), 0);
if (tcp_soreceive_stream) {
#ifdef INET
@@ -1629,7 +1633,7 @@ tcp_newtcpcb(struct inpcb *inp)
tp->t_vnet = inp->inp_vnet;
#endif
tp->t_timers = &tm->tt;
- /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
+ TAILQ_INIT(&tp->t_segq);
tp->t_maxseg =
#ifdef INET6
isipv6 ? V_tcp_v6mssdflt :
@@ -1723,7 +1727,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
* therefore don't enter the loop below until the connection
* list has stabilised.
*/
- LIST_FOREACH(inp, &V_tcb, inp_list) {
+ CK_LIST_FOREACH(inp, &V_tcb, inp_list) {
INP_WLOCK(inp);
/* Important to skip tcptw structs. */
if (!(inp->inp_flags & INP_TIMEWAIT) &&
@@ -1737,11 +1741,18 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
*/
if (CC_ALGO(tp) == unload_algo) {
tmpalgo = CC_ALGO(tp);
- /* NewReno does not require any init. */
- CC_ALGO(tp) = &newreno_cc_algo;
- /* XXX defer to epoch_call */
if (tmpalgo->cb_destroy != NULL)
tmpalgo->cb_destroy(tp->ccv);
+ CC_DATA(tp) = NULL;
+ /*
+ * NewReno may allocate memory on
+ * demand for certain stateful
+ * configuration as needed, but is
+ * coded to never fail on memory
+ * allocation failure so it is a safe
+ * fallback.
+ */
+ CC_ALGO(tp) = &newreno_cc_algo;
}
}
INP_WUNLOCK(inp);
@@ -1893,6 +1904,7 @@ tcp_discardcb(struct tcpcb *tp)
/* Allow the CC algorithm to clean up after itself. */
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
+ CC_DATA(tp) = NULL;
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
@@ -1922,10 +1934,11 @@ tcp_timer_discard(void *ptp)
{
struct inpcb *inp;
struct tcpcb *tp;
+ struct epoch_tracker et;
tp = (struct tcpcb *)ptp;
CURVNET_SET(tp->t_vnet);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
__func__, tp));
@@ -1945,13 +1958,13 @@ tcp_timer_discard(void *ptp)
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
return;
}
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
}
@@ -2024,10 +2037,12 @@ tcp_drain(void)
* useful.
*/
INP_INFO_WLOCK(&V_tcbinfo);
- LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
- if (inpb->inp_flags & INP_TIMEWAIT)
- continue;
+ CK_LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inpb);
+ if (inpb->inp_flags & INP_TIMEWAIT) {
+ INP_WUNLOCK(inpb);
+ continue;
+ }
if ((tcpb = intotcpcb(inpb)) != NULL) {
tcp_reass_flush(tcpb);
tcp_clean_sackreport(tcpb);
@@ -2110,10 +2125,10 @@ static int
tcp_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, m, n, pcb_count;
- struct in_pcblist *il;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
+ struct epoch_tracker et;
/*
* The process of preparing the TCB list is too time-consuming and
@@ -2157,12 +2172,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
if (error)
return (error);
- il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS);
- inp_list = il->il_inp_list;
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
INP_INFO_WLOCK(&V_tcbinfo);
- for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
- inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
+ for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
+ inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt) {
/*
@@ -2201,10 +2215,14 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
-
- il->il_count = n;
- il->il_pcbinfo = &V_tcbinfo;
- epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
+ }
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (!error) {
/*
@@ -2221,6 +2239,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
INP_LIST_RUNLOCK(&V_tcbinfo);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
+ free(inp_list, M_TEMP);
return (error);
}
@@ -2342,6 +2361,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct icmp *icp;
struct in_conninfo inc;
+ struct epoch_tracker et;
tcp_seq icmp_tcp_seq;
int mtu;
@@ -2373,7 +2393,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@@ -2438,7 +2458,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
out:
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#endif /* INET */
@@ -2456,6 +2476,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
struct ip6ctlparam *ip6cp = NULL;
const struct sockaddr_in6 *sa6_src = NULL;
struct in_conninfo inc;
+ struct epoch_tracker et;
struct tcp_ports {
uint16_t th_sport;
uint16_t th_dport;
@@ -2517,7 +2538,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
}
bzero(&t_ports, sizeof(struct tcp_ports));
m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport,
&ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@@ -2589,10 +2610,45 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
out:
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#endif /* INET6 */
+static uint32_t
+tcp_keyed_hash(struct in_conninfo *inc, u_char *key, u_int len)
+{
+ MD5_CTX ctx;
+ uint32_t hash[4];
+
+ MD5Init(&ctx);
+ MD5Update(&ctx, &inc->inc_fport, sizeof(uint16_t));
+ MD5Update(&ctx, &inc->inc_lport, sizeof(uint16_t));
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ MD5Update(&ctx, &inc->inc_faddr, sizeof(struct in_addr));
+ MD5Update(&ctx, &inc->inc_laddr, sizeof(struct in_addr));
+ break;
+#endif
+#ifdef INET6
+ case INC_ISIPV6:
+ MD5Update(&ctx, &inc->inc6_faddr, sizeof(struct in6_addr));
+ MD5Update(&ctx, &inc->inc6_laddr, sizeof(struct in6_addr));
+ break;
+#endif
+ }
+ MD5Update(&ctx, key, len);
+ MD5Final((unsigned char *)hash, &ctx);
+
+ return (hash[0]);
+}
+
+uint32_t
+tcp_new_ts_offset(struct in_conninfo *inc)
+{
+ return (tcp_keyed_hash(inc, V_ts_offset_secret,
+ sizeof(V_ts_offset_secret)));
+}
/*
* Following is where TCP initial sequence number generation occurs.
@@ -2634,19 +2690,20 @@ out:
* as reseeding should not be necessary.
*
* Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
- * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In
+ * isn_offset_old, and isn_ctx is performed using the ISN lock. In
* general, this means holding an exclusive (write) lock.
*/
#define ISN_BYTES_PER_SECOND 1048576
#define ISN_STATIC_INCREMENT 4096
#define ISN_RANDOM_INCREMENT (4096 - 1)
+#define ISN_SECRET_LENGTH 32
-static VNET_DEFINE(u_char, isn_secret[32]);
-static VNET_DEFINE(int, isn_last);
-static VNET_DEFINE(int, isn_last_reseed);
-static VNET_DEFINE(u_int32_t, isn_offset);
-static VNET_DEFINE(u_int32_t, isn_offset_old);
+VNET_DEFINE_STATIC(u_char, isn_secret[ISN_SECRET_LENGTH]);
+VNET_DEFINE_STATIC(int, isn_last);
+VNET_DEFINE_STATIC(int, isn_last_reseed);
+VNET_DEFINE_STATIC(u_int32_t, isn_offset);
+VNET_DEFINE_STATIC(u_int32_t, isn_offset_old);
#define V_isn_secret VNET(isn_secret)
#define V_isn_last VNET(isn_last)
@@ -2655,45 +2712,23 @@ static VNET_DEFINE(u_int32_t, isn_offset_old);
#define V_isn_offset_old VNET(isn_offset_old)
tcp_seq
-tcp_new_isn(struct tcpcb *tp)
+tcp_new_isn(struct in_conninfo *inc)
{
- MD5_CTX isn_ctx;
- u_int32_t md5_buffer[4];
tcp_seq new_isn;
u_int32_t projected_offset;
- INP_WLOCK_ASSERT(tp->t_inpcb);
-
ISN_LOCK();
/* Seed if this is the first use, reseed if requested. */
if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) &&
(((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz)
< (u_int)ticks))) {
- read_random(&V_isn_secret, sizeof(V_isn_secret));
+ arc4rand(&V_isn_secret, sizeof(V_isn_secret), 0);
V_isn_last_reseed = ticks;
}
/* Compute the md5 hash and return the ISN. */
- MD5Init(&isn_ctx);
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short));
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short));
-#ifdef INET6
- if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) {
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr,
- sizeof(struct in6_addr));
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr,
- sizeof(struct in6_addr));
- } else
-#endif
- {
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr,
- sizeof(struct in_addr));
- MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr,
- sizeof(struct in_addr));
- }
- MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret));
- MD5Final((u_char *) &md5_buffer, &isn_ctx);
- new_isn = (tcp_seq) md5_buffer[0];
+ new_isn = (tcp_seq)tcp_keyed_hash(inc, V_isn_secret,
+ sizeof(V_isn_secret));
V_isn_offset += ISN_STATIC_INCREMENT +
(arc4random() & ISN_RANDOM_INCREMENT);
if (ticks != V_isn_last) {
@@ -2840,6 +2875,9 @@ tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
+ if (inc->inc_flags & INC_IPV6MINMTU)
+ return (IPV6_MMTU);
+
if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid);
if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0,
@@ -2928,6 +2966,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
struct tcpcb *tp;
struct tcptw *tw;
struct sockaddr_in *fin, *lin;
+ struct epoch_tracker et;
#ifdef INET6
struct sockaddr_in6 *fin6, *lin6;
#endif
@@ -2987,7 +3026,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
default:
return (EINVAL);
}
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -3026,7 +3065,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
INP_WUNLOCK(inp);
} else
error = ESRCH;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error);
}
diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c
index e163aa54..6fdd859d 100644
--- a/freebsd/sys/netinet/tcp_syncache.c
+++ b/freebsd/sys/netinet/tcp_syncache.c
@@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_var.h>
@@ -104,19 +105,19 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-static VNET_DEFINE(int, tcp_syncookies) = 1;
+VNET_DEFINE_STATIC(int, tcp_syncookies) = 1;
#define V_tcp_syncookies VNET(tcp_syncookies)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookies), 0,
"Use TCP SYN cookies if the syncache overflows");
-static VNET_DEFINE(int, tcp_syncookiesonly) = 0;
+VNET_DEFINE_STATIC(int, tcp_syncookiesonly) = 0;
#define V_tcp_syncookiesonly VNET(tcp_syncookiesonly)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
-static VNET_DEFINE(int, functions_inherit_listen_socket_stack) = 1;
+VNET_DEFINE_STATIC(int, functions_inherit_listen_socket_stack) = 1;
#define V_functions_inherit_listen_socket_stack \
VNET(functions_inherit_listen_socket_stack)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack,
@@ -164,7 +165,7 @@ static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
#define TCP_SYNCACHE_HASHSIZE 512
#define TCP_SYNCACHE_BUCKETLIMIT 30
-static VNET_DEFINE(struct tcp_syncache, tcp_syncache);
+VNET_DEFINE_STATIC(struct tcp_syncache, tcp_syncache);
#define V_tcp_syncache VNET(tcp_syncache)
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0,
@@ -185,8 +186,27 @@ SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_VNET | CTLFLAG_R
&VNET_NAME(tcp_syncache.hashsize), 0,
"Size of TCP syncache hashtable");
-SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_VNET | CTLFLAG_RW,
+static int
+sysctl_net_inet_tcp_syncache_rexmtlimit_check(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ u_int new;
+
+ new = V_tcp_syncache.rexmt_limit;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) && (req->newptr != NULL)) {
+ if (new > TCP_MAXRXTSHIFT)
+ error = EINVAL;
+ else
+ V_tcp_syncache.rexmt_limit = new;
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
&VNET_NAME(tcp_syncache.rexmt_limit), 0,
+ sysctl_net_inet_tcp_syncache_rexmtlimit_check, "UI",
"Limit on SYN/ACK retransmissions");
VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1;
@@ -398,8 +418,14 @@ syncache_drop(struct syncache *sc, struct syncache_head *sch)
static void
syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
{
- sc->sc_rxttime = ticks +
- TCPTV_RTOBASE * (tcp_syn_backoff[sc->sc_rxmits]);
+ int rexmt;
+
+ if (sc->sc_rxmits == 0)
+ rexmt = TCPTV_RTOBASE;
+ else
+ TCPT_RANGESET(rexmt, TCPTV_RTOBASE * tcp_syn_backoff[sc->sc_rxmits],
+ tcp_rexmit_min, TCPTV_REXMTMAX);
+ sc->sc_rxttime = ticks + rexmt;
sc->sc_rxmits++;
if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
sch->sch_nextc = sc->sc_rxttime;
@@ -746,10 +772,9 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
goto abort;
}
#ifdef INET6
- if (sc->sc_inc.inc_flags & INC_ISIPV6) {
+ if (inp->inp_vflag & INP_IPV6PROTO) {
struct inpcb *oinp = sotoinpcb(lso);
- struct in6_addr laddr6;
- struct sockaddr_in6 sin6;
+
/*
* Inherit socket options from the listening socket.
* Note that in6p_inputopts are not (and should not be)
@@ -763,6 +788,11 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (oinp->in6p_outputopts)
inp->in6p_outputopts =
ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
+ }
+
+ if (sc->sc_inc.inc_flags & INC_ISIPV6) {
+ struct in6_addr laddr6;
+ struct sockaddr_in6 sin6;
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(sin6);
@@ -1153,25 +1183,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
}
- /*
- * If timestamps were negotiated, the reflected timestamp
- * must be equal to what we actually sent in the SYN|ACK
- * except in the case of 0. Some boxes are known for sending
- * broken timestamp replies during the 3whs (and potentially
- * during the connection also).
- *
- * Accept the final ACK of 3whs with reflected timestamp of 0
- * instead of sending a RST and deleting the syncache entry.
- */
- if ((to->to_flags & TOF_TS) && to->to_tsecr &&
- to->to_tsecr != sc->sc_ts) {
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
- log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
- "segment rejected\n",
- s, __func__, to->to_tsecr, sc->sc_ts);
- goto failed;
- }
-
*lsop = syncache_socket(sc, *lsop, m);
if (*lsop == NULL)
@@ -1404,6 +1415,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
*/
mac_syncache_destroy(&maclabel);
#endif
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
/* Retransmit SYN|ACK and reset retransmit count. */
if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
@@ -1418,7 +1430,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
TCPSTAT_INC(tcps_sndtotal);
}
SCH_UNLOCK(sch);
- goto done;
+ goto donenoprobe;
}
if (tfo_cookie_valid) {
@@ -1498,8 +1510,8 @@ skip_alloc:
*/
if (to->to_flags & TOF_TS) {
sc->sc_tsreflect = to->to_tsval;
- sc->sc_ts = tcp_ts_getticks();
sc->sc_flags |= SCF_TIMESTAMP;
+ sc->sc_tsoff = tcp_new_ts_offset(inc);
}
if (to->to_flags & TOF_SCALE) {
int wscale = 0;
@@ -1571,6 +1583,7 @@ skip_alloc:
goto tfo_expanded;
}
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
/*
* Do a standard 3-way handshake.
*/
@@ -1586,8 +1599,11 @@ skip_alloc:
syncache_free(sc);
TCPSTAT_INC(tcps_sc_dropped);
}
+ goto donenoprobe;
done:
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
+donenoprobe:
if (m) {
*lsop = NULL;
m_freem(m);
@@ -1727,8 +1743,7 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
to.to_flags |= TOF_SCALE;
}
if (sc->sc_flags & SCF_TIMESTAMP) {
- /* Virgin timestamp or TCP cookie enhanced one. */
- to.to_tsval = sc->sc_ts;
+ to.to_tsval = sc->sc_tsoff + tcp_ts_getticks();
to.to_tsecr = sc->sc_tsreflect;
to.to_flags |= TOF_TS;
}
@@ -1799,6 +1814,7 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
return (error);
}
#endif
+ TCP_PROBE5(send, NULL, NULL, ip6, NULL, th);
error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
}
#endif
@@ -1819,6 +1835,7 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
return (error);
}
#endif
+ TCP_PROBE5(send, NULL, NULL, ip, NULL, th);
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
#endif
@@ -2033,12 +2050,6 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc)
iss = hash & ~0xff;
iss |= cookie.cookie ^ (hash >> 24);
- /* Randomize the timestamp. */
- if (sc->sc_flags & SCF_TIMESTAMP) {
- sc->sc_ts = arc4random();
- sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
- }
-
TCPSTAT_INC(tcps_sc_sendcookie);
return (iss);
}
@@ -2125,8 +2136,7 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
if (to->to_flags & TOF_TS) {
sc->sc_flags |= SCF_TIMESTAMP;
sc->sc_tsreflect = to->to_tsval;
- sc->sc_ts = to->to_tsecr;
- sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
+ sc->sc_tsoff = tcp_new_ts_offset(inc);
}
if (to->to_flags & TOF_SIGNATURE)
diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h
index 92a7c7c9..0104e528 100644
--- a/freebsd/sys/netinet/tcp_syncache.h
+++ b/freebsd/sys/netinet/tcp_syncache.h
@@ -56,7 +56,6 @@ struct syncache {
int sc_rxttime; /* retransmit time */
u_int16_t sc_rxmits; /* retransmit counter */
u_int32_t sc_tsreflect; /* timestamp to reflect */
- u_int32_t sc_ts; /* our timestamp to send */
u_int32_t sc_tsoff; /* ts offset w/ syncookies */
u_int32_t sc_flowlabel; /* IPv6 flowlabel */
tcp_seq sc_irs; /* seq from peer */
diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c
index 422e5122..c50af2bb 100644
--- a/freebsd/sys/netinet/tcp_timer.c
+++ b/freebsd/sys/netinet/tcp_timer.c
@@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_log_buf.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_seq.h>
#include <netinet/cc/cc.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
@@ -141,7 +142,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
/* max idle probes */
int tcp_maxpersistidle;
-static int tcp_rexmit_drop_options = 0;
+int tcp_rexmit_drop_options = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
&tcp_rexmit_drop_options, 0,
"Drop TCP options from 3rd and later retransmitted SYN");
@@ -176,18 +177,13 @@ static int per_cpu_timers = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
&per_cpu_timers , 0, "run tcp timers on all cpus");
-#if 0
-#define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
- ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
-#endif
-
/*
* Map the given inp to a CPU id.
*
* This queries RSS if it's compiled in, else it defaults to the current
* CPU ID.
*/
-static inline int
+inline int
inp_to_cpuid(struct inpcb *inp)
{
u_int cpuid;
@@ -245,7 +241,7 @@ int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
{ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
-static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
+int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
/*
* TCP timer processing.
@@ -280,55 +276,10 @@ tcp_timer_delack(void *xtp)
CURVNET_RESTORE();
}
-/*
- * When a timer wants to remove a TCB it must
- * hold the INP_INFO_RLOCK(). The timer function
- * should only have grabbed the INP_WLOCK() when
- * it entered. To safely switch to holding both the
- * INP_INFO_RLOCK() and the INP_WLOCK() we must first
- * grab a reference on the inp, which will hold the inp
- * so that it can't be removed. We then unlock the INP_WLOCK(),
- * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK()
- * we proceed again to get the INP_WLOCK() (this preserves proper
- * lock order). After acquiring the INP_WLOCK we must check if someone
- * else deleted the pcb i.e. the inp_flags check.
- * If so we return 1 otherwise we return 0.
- *
- * No matter what the tcp_inpinfo_lock_add() function
- * returns the caller must afterwards call tcp_inpinfo_lock_del()
- * to drop the locks and reference properly.
- */
-
-int
-tcp_inpinfo_lock_add(struct inpcb *inp)
-{
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- return(1);
- }
- return(0);
-
-}
-
void
tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp)
{
- INP_INFO_RUNLOCK(&V_tcbinfo);
- if (inp && (tp == NULL)) {
- /*
- * If tcp_close/drop() gets called and tp
- * returns NULL, then the function dropped
- * the inp lock, we hold a reference keeping
- * this around, so we must re-aquire the
- * INP_WLOCK() in order to proceed with
- * our dropping the inp reference.
- */
- INP_WLOCK(inp);
- }
- if (inp && in_pcbrele_wlocked(inp) == 0)
+ if (inp && tp != NULL)
INP_WUNLOCK(inp);
}
@@ -337,6 +288,7 @@ tcp_timer_2msl(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
+ struct epoch_tracker et;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -383,11 +335,13 @@ tcp_timer_2msl(void *xtp)
tp->t_inpcb && tp->t_inpcb->inp_socket &&
(tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
TCPSTAT_INC(tcps_finwait2_drops);
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_close(tp);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
} else {
@@ -395,15 +349,17 @@ tcp_timer_2msl(void *xtp)
callout_reset(&tp->t_timers->tt_2msl,
TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
} else {
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_close(tp);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- }
+ }
#ifdef TCPDEBUG
if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
@@ -424,6 +380,7 @@ tcp_timer_keep(void *xtp)
struct tcpcb *tp = xtp;
struct tcptemp *t_template;
struct inpcb *inp;
+ struct epoch_tracker et;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -517,11 +474,11 @@ tcp_timer_keep(void *xtp)
dropit:
TCPSTAT_INC(tcps_keepdrops);
-
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT);
#ifdef TCPDEBUG
@@ -530,8 +487,9 @@ dropit:
PRU_SLOWTIMO);
#endif
TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
-out:
+ out:
CURVNET_RESTORE();
}
@@ -540,6 +498,7 @@ tcp_timer_persist(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
+ struct epoch_tracker et;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -579,11 +538,13 @@ tcp_timer_persist(void *xtp)
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
TCPSTAT_INC(tcps_persistdrop);
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -594,11 +555,13 @@ tcp_timer_persist(void *xtp)
if (tp->t_state > TCPS_CLOSE_WAIT &&
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
TCPSTAT_INC(tcps_persistdrop);
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -624,6 +587,7 @@ tcp_timer_rexmt(void * xtp)
CURVNET_SET(tp->t_vnet);
int rexmt;
struct inpcb *inp;
+ struct epoch_tracker et;
#ifdef TCPDEBUG
int ostate;
@@ -660,11 +624,13 @@ tcp_timer_rexmt(void * xtp)
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
TCPSTAT_INC(tcps_timeoutdrop);
- if (tcp_inpinfo_lock_add(inp)) {
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
tp = tcp_drop(tp, ETIMEDOUT);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -950,6 +916,111 @@ tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
return callout_active(t_callout);
}
+/*
+ * Stop the timer from running, and apply a flag
+ * against the timer_flags that will force the
+ * timer never to run. The flag is needed to assure
+ * a race does not leave it running and cause
+ * the timer to possibly restart itself (keep and persist
+ * especially do this).
+ */
+int
+tcp_timer_suspend(struct tcpcb *tp, uint32_t timer_type)
+{
+ struct callout *t_callout;
+ uint32_t t_flags;
+
+ switch (timer_type) {
+ case TT_DELACK:
+ t_flags = TT_DELACK_SUS;
+ t_callout = &tp->t_timers->tt_delack;
+ break;
+ case TT_REXMT:
+ t_flags = TT_REXMT_SUS;
+ t_callout = &tp->t_timers->tt_rexmt;
+ break;
+ case TT_PERSIST:
+ t_flags = TT_PERSIST_SUS;
+ t_callout = &tp->t_timers->tt_persist;
+ break;
+ case TT_KEEP:
+ t_flags = TT_KEEP_SUS;
+ t_callout = &tp->t_timers->tt_keep;
+ break;
+ case TT_2MSL:
+ t_flags = TT_2MSL_SUS;
+ t_callout = &tp->t_timers->tt_2msl;
+ break;
+ default:
+ panic("tp:%p bad timer_type 0x%x", tp, timer_type);
+ }
+ tp->t_timers->tt_flags |= t_flags;
+ return (callout_stop(t_callout));
+}
+
+void
+tcp_timers_unsuspend(struct tcpcb *tp, uint32_t timer_type)
+{
+ switch (timer_type) {
+ case TT_DELACK:
+ if (tp->t_timers->tt_flags & TT_DELACK_SUS) {
+ tp->t_timers->tt_flags &= ~TT_DELACK_SUS;
+ if (tp->t_flags & TF_DELACK) {
+ /* Delayed ack timer should be up activate a timer */
+ tp->t_flags &= ~TF_DELACK;
+ tcp_timer_activate(tp, TT_DELACK,
+ tcp_delacktime);
+ }
+ }
+ break;
+ case TT_REXMT:
+ if (tp->t_timers->tt_flags & TT_REXMT_SUS) {
+ tp->t_timers->tt_flags &= ~TT_REXMT_SUS;
+ if (SEQ_GT(tp->snd_max, tp->snd_una) &&
+ (tcp_timer_active((tp), TT_PERSIST) == 0) &&
+ tp->snd_wnd) {
+ /* We have outstanding data activate a timer */
+ tcp_timer_activate(tp, TT_REXMT,
+ tp->t_rxtcur);
+ }
+ }
+ break;
+ case TT_PERSIST:
+ if (tp->t_timers->tt_flags & TT_PERSIST_SUS) {
+ tp->t_timers->tt_flags &= ~TT_PERSIST_SUS;
+ if (tp->snd_wnd == 0) {
+ /* Activate the persists timer */
+ tp->t_rxtshift = 0;
+ tcp_setpersist(tp);
+ }
+ }
+ break;
+ case TT_KEEP:
+ if (tp->t_timers->tt_flags & TT_KEEP_SUS) {
+ tp->t_timers->tt_flags &= ~TT_KEEP_SUS;
+ tcp_timer_activate(tp, TT_KEEP,
+ TCPS_HAVEESTABLISHED(tp->t_state) ?
+ TP_KEEPIDLE(tp) : TP_KEEPINIT(tp));
+ }
+ break;
+ case TT_2MSL:
+ if (tp->t_timers->tt_flags &= TT_2MSL_SUS) {
+ tp->t_timers->tt_flags &= ~TT_2MSL_SUS;
+ if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+ ((tp->t_inpcb->inp_socket == NULL) ||
+ (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE))) {
+ /* Star the 2MSL timer */
+ tcp_timer_activate(tp, TT_2MSL,
+ (tcp_fast_finwait2_recycle) ?
+ tcp_finwait2_timeout : TP_MAXIDLE(tp));
+ }
+ }
+ break;
+ default:
+ panic("tp:%p bad timer_type 0x%x", tp, timer_type);
+ }
+}
+
void
tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
{
diff --git a/freebsd/sys/netinet/tcp_timer.h b/freebsd/sys/netinet/tcp_timer.h
index b0ff3809..a2ab6ca5 100644
--- a/freebsd/sys/netinet/tcp_timer.h
+++ b/freebsd/sys/netinet/tcp_timer.h
@@ -168,11 +168,15 @@ struct tcp_timer {
#define TT_2MSL 0x0010
#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
-#define TT_DELACK_RST 0x0100
-#define TT_REXMT_RST 0x0200
-#define TT_PERSIST_RST 0x0400
-#define TT_KEEP_RST 0x0800
-#define TT_2MSL_RST 0x1000
+/*
+ * Suspend flags - used when suspending a timer
+ * from ever running again.
+ */
+#define TT_DELACK_SUS 0x0100
+#define TT_REXMT_SUS 0x0200
+#define TT_PERSIST_SUS 0x0400
+#define TT_KEEP_SUS 0x0800
+#define TT_2MSL_SUS 0x1000
#define TT_STOPPED 0x00010000
@@ -196,6 +200,8 @@ extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
extern int tcp_syn_backoff[];
+extern int tcp_totbackoff;
+extern int tcp_rexmit_drop_options;
extern int tcp_always_keepalive;
extern int tcp_finwait2_timeout;
@@ -208,7 +214,6 @@ VNET_DECLARE(int, tcp_pmtud_blackhole_mss);
VNET_DECLARE(int, tcp_v6pmtud_blackhole_mss);
#define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss)
-int tcp_inpinfo_lock_add(struct inpcb *inp);
void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp);
void tcp_timer_init(void);
diff --git a/freebsd/sys/netinet/tcp_timewait.c b/freebsd/sys/netinet/tcp_timewait.c
index afadf7cd..8a28283f 100644
--- a/freebsd/sys/netinet/tcp_timewait.c
+++ b/freebsd/sys/netinet/tcp_timewait.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -98,7 +99,7 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-static VNET_DEFINE(uma_zone_t, tcptw_zone);
+VNET_DEFINE_STATIC(uma_zone_t, tcptw_zone);
#define V_tcptw_zone VNET(tcptw_zone)
static int maxtcptw;
@@ -113,11 +114,11 @@ static int maxtcptw;
* - a tcptw relies on its inpcb reference counting for memory stability
* - a tcptw is dereferenceable only while its inpcb is locked
*/
-static VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl);
+VNET_DEFINE_STATIC(TAILQ_HEAD(, tcptw), twq_2msl);
#define V_twq_2msl VNET(twq_2msl)
/* Global timewait lock */
-static VNET_DEFINE(struct rwlock, tw_lock);
+VNET_DEFINE_STATIC(struct rwlock, tw_lock);
#define V_tw_lock VNET(tw_lock)
#define TW_LOCK_INIT(tw, d) rw_init_flags(&(tw), (d), 0)
@@ -174,7 +175,7 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
&maxtcptw, 0, sysctl_maxtcptw, "IU",
"Maximum number of compressed TCP TIME_WAIT entries");
-static VNET_DEFINE(int, nolocaltimewait) = 0;
+VNET_DEFINE_STATIC(int, nolocaltimewait) = 0;
#define V_nolocaltimewait VNET(nolocaltimewait)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(nolocaltimewait), 0,
@@ -208,11 +209,12 @@ void
tcp_tw_destroy(void)
{
struct tcptw *tw;
+ struct epoch_tracker et;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
tcp_twclose(tw, 0);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
TW_LOCK_DESTROY(V_tw_lock);
uma_zdestroy(V_tcptw_zone);
@@ -230,6 +232,7 @@ tcp_twstart(struct tcpcb *tp)
struct tcptw twlocal, *tw;
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
+ uint32_t recwin;
bool acknow, local;
#ifdef INET6
bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
@@ -292,10 +295,16 @@ tcp_twstart(struct tcpcb *tp)
/*
* Recover last window size sent.
*/
- if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
- tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
- else
- tw->last_win = 0;
+ so = inp->inp_socket;
+ recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
+ (long)TCP_MAXWIN << tp->rcv_scale);
+ if (recwin < (so->so_rcv.sb_hiwat / 4) &&
+ recwin < tp->t_maxseg)
+ recwin = 0;
+ if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
+ recwin < (tp->rcv_adv - tp->rcv_nxt))
+ recwin = (tp->rcv_adv - tp->rcv_nxt);
+ tw->last_win = htons((u_short)(recwin >> tp->rcv_scale));
/*
* Set t_recent if timestamps are used on the connection.
@@ -332,7 +341,6 @@ tcp_twstart(struct tcpcb *tp)
* and might not be needed here any longer.
*/
tcp_discardcb(tp);
- so = inp->inp_socket;
soisdisconnected(so);
tw->tw_so_options = so->so_options;
inp->inp_flags |= INP_TIMEWAIT;
@@ -451,9 +459,14 @@ tcp_twcheck(struct inpcb *inp, struct tcpopt *to __unused, struct tcphdr *th,
* Acknowledge the segment if it has data or is not a duplicate ACK.
*/
if (thflags != TH_ACK || tlen != 0 ||
- th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
+ th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt) {
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
tcp_twrespond(tw, TH_ACK);
+ goto dropnoprobe;
+ }
drop:
+ TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
+dropnoprobe:
INP_WUNLOCK(inp);
m_freem(m);
return (0);
@@ -599,6 +612,7 @@ tcp_twrespond(struct tcptw *tw, int flags)
th->th_sum = in6_cksum_pseudo(ip6,
sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+ TCP_PROBE5(send, NULL, NULL, ip6, NULL, th);
error = ip6_output(m, inp->in6p_outputopts, NULL,
(tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
}
@@ -614,6 +628,7 @@ tcp_twrespond(struct tcptw *tw, int flags)
ip->ip_len = htons(m->m_pkthdr.len);
if (V_path_mtu_discovery)
ip->ip_off |= htons(IP_DF);
+ TCP_PROBE5(send, NULL, NULL, ip, NULL, th);
error = ip_output(m, inp->inp_options, NULL,
((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
NULL, inp);
@@ -676,6 +691,7 @@ tcp_tw_2msl_scan(int reuse)
{
struct tcptw *tw;
struct inpcb *inp;
+ struct epoch_tracker et;
#ifdef INVARIANTS
if (reuse) {
@@ -709,54 +725,46 @@ tcp_tw_2msl_scan(int reuse)
in_pcbref(inp);
TW_RUNLOCK(V_tw_lock);
- if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
-
- INP_WLOCK(inp);
- tw = intotw(inp);
- if (in_pcbrele_wlocked(inp)) {
- if (__predict_true(tw == NULL)) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
- continue;
- } else {
- /* This should not happen as in TIMEWAIT
- * state the inp should not be destroyed
- * before its tcptw. If INVARIANTS is
- * defined panic.
- */
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ INP_WLOCK(inp);
+ tw = intotw(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ if (__predict_true(tw == NULL)) {
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ continue;
+ } else {
+ /* This should not happen as in TIMEWAIT
+ * state the inp should not be destroyed
+ * before its tcptw. If INVARIANTS is
+ * defined panic.
+ */
#ifdef INVARIANTS
- panic("%s: Panic before an infinite "
- "loop: INP_TIMEWAIT && (INP_FREED "
- "|| inp last reference) && tw != "
- "NULL", __func__);
+ panic("%s: Panic before an infinite "
+ "loop: INP_TIMEWAIT && (INP_FREED "
+ "|| inp last reference) && tw != "
+ "NULL", __func__);
#else
- log(LOG_ERR, "%s: Avoid an infinite "
- "loop: INP_TIMEWAIT && (INP_FREED "
- "|| inp last reference) && tw != "
- "NULL", __func__);
+ log(LOG_ERR, "%s: Avoid an infinite "
+ "loop: INP_TIMEWAIT && (INP_FREED "
+ "|| inp last reference) && tw != "
+ "NULL", __func__);
#endif
- INP_INFO_RUNLOCK(&V_tcbinfo);
- break;
- }
- }
-
- if (tw == NULL) {
- /* tcp_twclose() has already been called */
- INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
- continue;
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ break;
}
+ }
- tcp_twclose(tw, reuse);
- INP_INFO_RUNLOCK(&V_tcbinfo);
- if (reuse)
- return tw;
- } else {
- /* INP_INFO lock is busy, continue later. */
- INP_WLOCK(inp);
- if (!in_pcbrele_wlocked(inp))
- INP_WUNLOCK(inp);
- break;
+ if (tw == NULL) {
+ /* tcp_twclose() has already been called */
+ INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ continue;
}
+
+ tcp_twclose(tw, reuse);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ if (reuse)
+ return tw;
}
return NULL;
diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c
index bf2cff4c..617f60d0 100644
--- a/freebsd/sys/netinet/tcp_usrreq.c
+++ b/freebsd/sys/netinet/tcp_usrreq.c
@@ -278,11 +278,12 @@ tcp_usr_detach(struct socket *so)
{
struct inpcb *inp;
int rlock = 0;
+ struct epoch_tracker et;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
rlock = 1;
}
INP_WLOCK(inp);
@@ -290,7 +291,7 @@ tcp_usr_detach(struct socket *so)
("tcp_usr_detach: inp_socket == NULL"));
tcp_detach(so, inp);
if (rlock)
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#ifdef INET
@@ -379,6 +380,11 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
struct sockaddr_in sin;
in6_sin6_2_sin(&sin, sin6p);
+ if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ INP_HASH_WUNLOCK(&V_tcbinfo);
+ goto out;
+ }
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
error = in_pcbbind(inp, (struct sockaddr *)&sin,
@@ -608,6 +614,10 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
}
in6_sin6_2_sin(&sin, sin6p);
+ if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
if ((error = prison_remote_ip4(td->td_ucred,
@@ -670,10 +680,11 @@ tcp_usr_disconnect(struct socket *so)
{
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker et;
int error = 0;
TCPDEBUG0;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp);
@@ -690,7 +701,7 @@ out:
TCPDEBUG2(PRU_DISCONNECT);
TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error);
}
@@ -749,6 +760,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
struct tcpcb *tp = NULL;
struct in_addr addr;
struct in6_addr addr6;
+ struct epoch_tracker et;
in_port_t port = 0;
int v4 = 0;
TCPDEBUG0;
@@ -758,7 +770,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -785,7 +797,7 @@ out:
TCPDEBUG2(PRU_ACCEPT);
TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (error == 0) {
if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -805,9 +817,10 @@ tcp_usr_shutdown(struct socket *so)
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker et;
TCPDEBUG0;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp);
@@ -826,7 +839,7 @@ out:
TCPDEBUG2(PRU_SHUTDOWN);
TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error);
}
@@ -889,6 +902,13 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker net_et;
+#ifdef INET
+#ifdef INET6
+ struct sockaddr_in sin;
+#endif
+ struct sockaddr_in *sinp;
+#endif
#ifdef INET6
int isipv6;
#endif
@@ -899,7 +919,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* this call.
*/
if (flags & PRUS_EOF)
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, net_et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -915,11 +935,124 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
error = ECONNRESET;
goto out;
}
-#ifdef INET6
- isipv6 = nam && nam->sa_family == AF_INET6;
-#endif /* INET6 */
tp = intotcpcb(inp);
TCPDEBUG1();
+ if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
+ switch (nam->sa_family) {
+#ifdef INET
+ case AF_INET:
+ sinp = (struct sockaddr_in *)nam;
+ if (sinp->sin_len != sizeof(struct sockaddr_in)) {
+ if (m)
+ m_freem(m);
+ error = EINVAL;
+ goto out;
+ }
+ if ((inp->inp_vflag & INP_IPV6) != 0) {
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ if ((error = prison_remote_ip4(td->td_ucred,
+ &sinp->sin_addr))) {
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+#ifdef INET6
+ isipv6 = 0;
+#endif
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6p;
+
+ sin6p = (struct sockaddr_in6 *)nam;
+ if (sin6p->sin6_len != sizeof(struct sockaddr_in6)) {
+ if (m)
+ m_freem(m);
+ error = EINVAL;
+ goto out;
+ }
+ if (IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+#ifdef INET
+ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
+ error = EINVAL;
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ if ((inp->inp_vflag & INP_IPV4) == 0) {
+ error = EAFNOSUPPORT;
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ inp->inp_vflag &= ~INP_IPV6;
+ sinp = &sin;
+ in6_sin6_2_sin(sinp, sin6p);
+ if (IN_MULTICAST(
+ ntohl(sinp->sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ if ((error = prison_remote_ip4(td->td_ucred,
+ &sinp->sin_addr))) {
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ isipv6 = 0;
+#else /* !INET */
+ error = EAFNOSUPPORT;
+ if (m)
+ m_freem(m);
+ goto out;
+#endif /* INET */
+ } else {
+ if ((inp->inp_vflag & INP_IPV6) == 0) {
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ inp->inp_vflag &= ~INP_IPV4;
+ inp->inp_inc.inc_flags |= INC_ISIPV6;
+ if ((error = prison_remote_ip6(td->td_ucred,
+ &sin6p->sin6_addr))) {
+ if (m)
+ m_freem(m);
+ goto out;
+ }
+ isipv6 = 1;
+ }
+ break;
+ }
+#endif /* INET6 */
+ default:
+ if (m)
+ m_freem(m);
+ error = EAFNOSUPPORT;
+ goto out;
+ }
+ }
if (control) {
/* TCP doesn't do control messages (rights, creds, etc) */
if (control->m_len) {
@@ -947,7 +1080,8 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
else
#endif
#ifdef INET
- error = tcp_connect(tp, nam, td);
+ error = tcp_connect(tp,
+ (struct sockaddr *)sinp, td);
#endif
if (error)
goto out;
@@ -1016,7 +1150,8 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
else
#endif
#ifdef INET
- error = tcp_connect(tp, nam, td);
+ error = tcp_connect(tp,
+ (struct sockaddr *)sinp, td);
#endif
if (error)
goto out;
@@ -1042,7 +1177,7 @@ out:
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
if (flags & PRUS_EOF)
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, net_et);
return (error);
}
@@ -1081,12 +1216,13 @@ tcp_usr_abort(struct socket *so)
{
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker et;
TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_abort: inp_socket == NULL"));
@@ -1112,7 +1248,7 @@ tcp_usr_abort(struct socket *so)
}
INP_WUNLOCK(inp);
dropped:
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
/*
@@ -1123,12 +1259,13 @@ tcp_usr_close(struct socket *so)
{
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ struct epoch_tracker et;
TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_close: inp_socket == NULL"));
@@ -1152,7 +1289,7 @@ tcp_usr_close(struct socket *so)
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
/*
@@ -1304,7 +1441,9 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tcp_state_change(tp, TCPS_SYN_SENT);
- tp->iss = tcp_new_isn(tp);
+ tp->iss = tcp_new_isn(&inp->inp_inc);
+ if (tp->t_flags & TF_REQ_TSTMP)
+ tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
tcp_sendseqinit(tp);
return 0;
@@ -1343,7 +1482,9 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(inp->inp_socket);
TCPSTAT_INC(tcps_connattempt);
tcp_state_change(tp, TCPS_SYN_SENT);
- tp->iss = tcp_new_isn(tp);
+ tp->iss = tcp_new_isn(&inp->inp_inc);
+ if (tp->t_flags & TF_REQ_TSTMP)
+ tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
tcp_sendseqinit(tp);
return 0;
@@ -1445,6 +1586,42 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
if (inp->inp_vflag & INP_IPV6PROTO) {
INP_WUNLOCK(inp);
error = ip6_ctloutput(so, sopt);
+ /*
+ * In case of the IPV6_USE_MIN_MTU socket option,
+ * the INC_IPV6MINMTU flag to announce a corresponding
+ * MSS during the initial handshake.
+ * If the TCP connection is not in the front states,
+ * just reduce the MSS being used.
+ * This avoids the sending of TCP segments which will
+ * be fragmented at the IPv6 layer.
+ */
+ if ((error == 0) &&
+ (sopt->sopt_dir == SOPT_SET) &&
+ (sopt->sopt_level == IPPROTO_IPV6) &&
+ (sopt->sopt_name == IPV6_USE_MIN_MTU)) {
+ INP_WLOCK(inp);
+ if ((inp->inp_flags &
+ (INP_TIMEWAIT | INP_DROPPED))) {
+ INP_WUNLOCK(inp);
+ return (ECONNRESET);
+ }
+ inp->inp_inc.inc_flags |= INC_IPV6MINMTU;
+ tp = intotcpcb(inp);
+ if ((tp->t_state >= TCPS_SYN_SENT) &&
+ (inp->inp_inc.inc_flags & INC_ISIPV6)) {
+ struct ip6_pktopts *opt;
+
+ opt = inp->in6p_outputopts;
+ if ((opt != NULL) &&
+ (opt->ip6po_minmtu ==
+ IP6PO_MINMTU_ALL)) {
+ if (tp->t_maxseg > TCP6_MSS) {
+ tp->t_maxseg = TCP6_MSS;
+ }
+ }
+ }
+ INP_WUNLOCK(inp);
+ }
}
#endif /* INET6 */
#if defined(INET6) && defined(INET)
@@ -1487,7 +1664,6 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
return (0);
}
if (tp->t_state != TCPS_CLOSED) {
- int error=EINVAL;
/*
* The user has advanced the state
* past the initial point, we may not
@@ -1500,7 +1676,8 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
* still be possible?
*/
error = (*blk->tfb_tcp_handoff_ok)(tp);
- }
+ } else
+ error = EINVAL;
if (error) {
refcount_release(&blk->tfb_refcnt);
INP_WUNLOCK(inp);
@@ -1724,6 +1901,7 @@ unlock_and_done:
*/
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
+ CC_DATA(tp) = NULL;
CC_ALGO(tp) = algo;
/*
* If something goes pear shaped initialising the new
@@ -2045,6 +2223,7 @@ tcp_attach(struct socket *so)
{
struct tcpcb *tp;
struct inpcb *inp;
+ struct epoch_tracker et;
int error;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
@@ -2054,10 +2233,10 @@ tcp_attach(struct socket *so)
}
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
error = in_pcballoc(so, &V_tcbinfo);
if (error) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (error);
}
inp = sotoinpcb(so);
@@ -2075,12 +2254,12 @@ tcp_attach(struct socket *so)
if (tp == NULL) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (ENOBUFS);
}
tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
TCPSTATES_INC(TCPS_CLOSED);
return (0);
}
@@ -2106,7 +2285,8 @@ tcp_disconnect(struct tcpcb *tp)
* Neither tcp_close() nor tcp_drop() should return NULL, as the
* socket is still open.
*/
- if (tp->t_state < TCPS_ESTABLISHED) {
+ if (tp->t_state < TCPS_ESTABLISHED &&
+ !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
tp = tcp_close(tp);
KASSERT(tp != NULL,
("tcp_disconnect: tcp_close() returned NULL"));
@@ -2383,7 +2563,7 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
db_print_indent(indent);
db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
- LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
+ TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
db_print_indent(indent);
db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h
index adaaff61..2fbe07ad 100644
--- a/freebsd/sys/netinet/tcp_var.h
+++ b/freebsd/sys/netinet/tcp_var.h
@@ -46,12 +46,15 @@
#if defined(_KERNEL) || defined(_WANT_TCPCB)
/* TCP segment queue entry */
struct tseg_qent {
- LIST_ENTRY(tseg_qent) tqe_q;
+ TAILQ_ENTRY(tseg_qent) tqe_q;
+ struct mbuf *tqe_m; /* mbuf contains packet */
+ struct mbuf *tqe_last; /* last mbuf in chain */
+ tcp_seq tqe_start; /* TCP Sequence number start */
int tqe_len; /* TCP segment data length */
- struct tcphdr *tqe_th; /* a pointer to tcp header */
- struct mbuf *tqe_m; /* mbuf contains packet */
+ uint32_t tqe_flags; /* The flags from the th->th_flags */
+ uint32_t tqe_mbuf_cnt; /* Count of mbuf overhead */
};
-LIST_HEAD(tsegqe_head, tseg_qent);
+TAILQ_HEAD(tsegqe_head, tseg_qent);
struct sackblk {
tcp_seq start; /* start seq no. of sack block */
@@ -79,6 +82,8 @@ struct sackhint {
uint64_t _pad[1]; /* TBD */
};
+#define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
+
STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
/*
@@ -93,8 +98,11 @@ struct tcpcb {
void *t_fb_ptr; /* Pointer to t_fb specific data */
uint32_t t_maxseg:24, /* maximum segment size */
t_logstate:8; /* State of "black box" logging */
- uint32_t t_state:4, /* state of this connection */
- bits_spare : 24;
+ uint32_t t_port:16, /* Tunneling (over udp) port */
+ t_state:4, /* state of this connection */
+ t_idle_reduce : 1,
+ t_delayed_ack: 7, /* Delayed ack variable */
+ bits_spare : 4;
u_int t_flags;
tcp_seq snd_una; /* sent but unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
@@ -104,7 +112,7 @@ struct tcpcb {
tcp_seq snd_up; /* send urgent pointer */
uint32_t snd_wnd; /* send window */
uint32_t snd_cwnd; /* congestion-controlled window */
- uint32_t cl1_spare; /* Spare to round out CL 1 */
+ uint32_t t_peakrate_thr; /* pre-calculated peak rate threshold */
/* Cache line 2 */
u_int32_t ts_offset; /* our timestamp offset */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
@@ -128,6 +136,7 @@ struct tcpcb {
/* Cache line 3 */
tcp_seq rcv_up; /* receive urgent pointer */
int t_segqlen; /* segment reassembly queue length */
+ uint32_t t_segqmbuflen; /* Count of bytes mbufs on all entries */
struct tsegqe_head t_segq; /* segment reassembly queue */
struct mbuf *t_in_pkt;
struct mbuf *t_tail_pkt;
@@ -189,6 +198,7 @@ struct tcpcb {
struct cc_var *ccv; /* congestion control specific vars */
struct osd *osd; /* storage for Khelp module data */
int t_bytes_acked; /* # bytes acked during current RTT */
+ u_int t_maxunacktime;
u_int t_keepinit; /* time to establish connection */
u_int t_keepidle; /* time before keepalive probes begin */
u_int t_keepintvl; /* interval between keepalives */
@@ -260,12 +270,11 @@ struct tcp_function_block {
int (*tfb_tcp_output_wtime)(struct tcpcb *, const struct timeval *);
void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *,
- int, int, uint8_t,
- int);
+ int, int, uint8_t);
void (*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *,
int, int, uint8_t,
- int, int, struct timeval *);
+ int, struct timeval *);
int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
struct inpcb *inp, struct tcpcb *tp);
/* Optional memory allocation/free routine */
@@ -361,6 +370,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
#define TF2_LOG_AUTO 0x00000008 /* Session is auto-logging. */
+#define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */
/*
* Structure to hold TCP options that are only used during segment
@@ -649,6 +659,11 @@ struct tcp_hhook_data {
int tso;
tcp_seq curack;
};
+#ifdef TCP_HHOOK
+void hhook_run_tcp_est_out(struct tcpcb *tp,
+ struct tcphdr *th, struct tcpopt *to,
+ uint32_t len, int tso);
+#endif
#endif
/*
@@ -668,7 +683,7 @@ struct tcp_hhook_data {
*/
#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
struct xtcpcb {
- size_t xt_len; /* length of this structure */
+ ksize_t xt_len; /* length of this structure */
struct xinpcb xt_inp;
char xt_stack[TCP_FUNCTION_NAME_LEN_MAX]; /* (s) */
char xt_logid[TCP_LOG_ID_LEN]; /* (s) */
@@ -801,6 +816,9 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes)
#define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail)
#define V_tcp_sendspace VNET(tcp_sendspace)
+#define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead)
+#define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port)
+
#ifdef TCP_HHOOK
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
@@ -825,7 +843,7 @@ char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
const void *);
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
const void *);
-int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
+int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *);
void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
void tcp_dooptions(struct tcpopt *, u_char *, int, int);
@@ -849,8 +867,7 @@ int tcp_input(struct mbuf **, int *, int);
int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
struct tcpcb *, int);
void tcp_do_segment(struct mbuf *, struct tcphdr *,
- struct socket *, struct tcpcb *, int, int, uint8_t,
- int);
+ struct socket *, struct tcpcb *, int, int, uint8_t);
int register_tcp_functions(struct tcp_function_block *blk, int wait);
int register_tcp_functions_as_names(struct tcp_function_block *blk,
@@ -893,9 +910,12 @@ struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
+int tcp_timer_suspend(struct tcpcb *, uint32_t);
+void tcp_timers_unsuspend(struct tcpcb *, uint32_t);
int tcp_timer_active(struct tcpcb *, uint32_t);
void tcp_timer_stop(struct tcpcb *, uint32_t);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
+int inp_to_cpuid(struct inpcb *inp);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
*/
@@ -909,7 +929,9 @@ void tcp_hc_updatemtu(struct in_conninfo *, uint32_t);
void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
extern struct pr_usrreqs tcp_usrreqs;
-tcp_seq tcp_new_isn(struct tcpcb *);
+
+uint32_t tcp_new_ts_offset(struct in_conninfo *);
+tcp_seq tcp_new_isn(struct in_conninfo *);
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
@@ -921,6 +943,10 @@ void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
int tcp_compute_pipe(struct tcpcb *);
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
+struct mbuf *
+ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
+ int32_t seglimit, int32_t segsize, struct sockbuf *sb);
+
static inline void
tcp_fields_to_host(struct tcphdr *th)
diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c
index 178a8d5e..9557c154 100644
--- a/freebsd/sys/netinet/udp_usrreq.c
+++ b/freebsd/sys/netinet/udp_usrreq.c
@@ -150,7 +150,7 @@ VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */
VNET_DEFINE(struct inpcbinfo, udbinfo);
VNET_DEFINE(struct inpcbhead, ulitecb);
VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
-static VNET_DEFINE(uma_zone_t, udpcb_zone);
+VNET_DEFINE_STATIC(uma_zone_t, udpcb_zone);
#define V_udpcb_zone VNET(udpcb_zone)
#ifndef UDBHASHSIZE
@@ -405,6 +405,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
struct sockaddr_in udp_in[2];
struct mbuf *m;
struct m_tag *fwd_tag;
+ struct epoch_tracker et;
int cscov_partial, iphlen;
m = *mp;
@@ -535,10 +536,10 @@ udp_input(struct mbuf **mp, int *offp, int proto)
struct inpcbhead *pcblist;
struct ip_moptions *imo;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_RLOCK_ET(pcbinfo, et);
pcblist = udp_get_pcblist(proto);
last = NULL;
- LIST_FOREACH(inp, pcblist, inp_list) {
+ CK_LIST_FOREACH(inp, pcblist, inp_list) {
if (inp->inp_lport != uh->uh_dport)
continue;
#ifdef INET6
@@ -599,8 +600,12 @@ udp_input(struct mbuf **mp, int *offp, int proto)
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
NULL) {
- UDP_PROBE(receive, NULL, last, ip,
- last, uh);
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, last, ip,
+ last, uh);
+ else
+ UDP_PROBE(receive, NULL, last, ip, last,
+ uh);
if (udp_append(last, ip, n, iphlen,
udp_in)) {
goto inp_lost;
@@ -618,7 +623,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
* will never clear these options after setting them.
*/
if ((last->inp_socket->so_options &
- (SO_REUSEPORT|SO_REUSEADDR)) == 0)
+ (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
break;
}
@@ -631,14 +636,17 @@ udp_input(struct mbuf **mp, int *offp, int proto)
UDPSTAT_INC(udps_noportbcast);
if (inp)
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_RUNLOCK_ET(pcbinfo, et);
goto badunlocked;
}
- UDP_PROBE(receive, NULL, last, ip, last, uh);
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, last, ip, last, uh);
+ else
+ UDP_PROBE(receive, NULL, last, ip, last, uh);
if (udp_append(last, ip, m, iphlen, udp_in) == 0)
INP_RUNLOCK(last);
inp_lost:
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_RUNLOCK_ET(pcbinfo, et);
return (IPPROTO_DONE);
}
@@ -690,6 +698,10 @@ udp_input(struct mbuf **mp, int *offp, int proto)
inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport),
inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport));
}
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, NULL, ip, NULL, uh);
+ else
+ UDP_PROBE(receive, NULL, NULL, ip, NULL, uh);
UDPSTAT_INC(udps_noport);
if (m->m_flags & (M_BCAST | M_MCAST)) {
UDPSTAT_INC(udps_noportbcast);
@@ -709,6 +721,10 @@ udp_input(struct mbuf **mp, int *offp, int proto)
*/
INP_RLOCK_ASSERT(inp);
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
+ else
+ UDP_PROBE(receive, NULL, inp, ip, inp, uh);
INP_RUNLOCK(inp);
m_freem(m);
return (IPPROTO_DONE);
@@ -724,7 +740,10 @@ udp_input(struct mbuf **mp, int *offp, int proto)
}
}
- UDP_PROBE(receive, NULL, inp, ip, inp, uh);
+ if (proto == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
+ else
+ UDP_PROBE(receive, NULL, inp, ip, inp, uh);
if (udp_append(inp, ip, m, iphlen, udp_in) == 0)
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
@@ -808,14 +827,15 @@ udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
struct udpcb *up;
+ void *ctx;
+ udp_tun_icmp_t func;
up = intoudpcb(inp);
- if (up->u_icmp_func != NULL) {
- INP_RUNLOCK(inp);
- (*up->u_icmp_func)(cmd, sa, vip, up->u_tun_ctx);
- } else {
- INP_RUNLOCK(inp);
- }
+ ctx = up->u_tun_ctx;
+ func = up->u_icmp_func;
+ INP_RUNLOCK(inp);
+ if (func != NULL)
+ (*func)(cmd, sa, vip, ctx);
}
}
} else
@@ -842,9 +862,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
struct inpcb *inp, **inp_list;
- struct in_pcblist *il;
inp_gen_t gencnt;
struct xinpgen xig;
+ struct epoch_tracker et;
/*
* The process of preparing the PCB list is too time-consuming and
@@ -863,10 +883,10 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK_ET(&V_udbinfo, et);
gencnt = V_udbinfo.ipi_gencnt;
n = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+ n * sizeof(struct xinpcb));
@@ -880,12 +900,14 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- il = malloc(sizeof(struct in_pcblist) + n * sizeof(struct inpcb *), M_TEMP, M_WAITOK|M_ZERO_INVARIANTS);
- inp_list = il->il_inp_list;
- INP_INFO_RLOCK(&V_udbinfo);
- for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = LIST_NEXT(inp, inp_list)) {
+ inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+ if (inp_list == NULL)
+ return (ENOMEM);
+
+ INP_INFO_RLOCK_ET(&V_udbinfo, et);
+ for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
if (inp->inp_gencnt <= gencnt &&
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
@@ -894,7 +916,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
n = i;
error = 0;
@@ -910,9 +932,14 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- il->il_count = n;
- il->il_pcbinfo = &V_udbinfo;
- epoch_call(net_epoch_preempt, &il->il_epoch_ctx, in_pcblist_rele_rlocked);
+ INP_INFO_WLOCK(&V_udbinfo);
+ for (i = 0; i < n; i++) {
+ inp = inp_list[i];
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
+ }
+ INP_INFO_WUNLOCK(&V_udbinfo);
if (!error) {
/*
@@ -921,13 +948,14 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK_ET(&V_udbinfo, et);
xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
+ free(inp_list, M_TEMP);
return (error);
}
@@ -1106,6 +1134,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct cmsghdr *cm;
struct inpcbinfo *pcbinfo;
struct sockaddr_in *sin, src;
+ struct epoch_tracker et;
int cscov_partial = 0;
int error = 0;
int ipflags;
@@ -1262,7 +1291,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
(inp->inp_laddr.s_addr == INADDR_ANY) ||
(inp->inp_lport == 0))) ||
(src.sin_family == AF_INET)) {
- INP_HASH_RLOCK(pcbinfo);
+ INP_HASH_RLOCK_ET(pcbinfo, et);
unlock_udbinfo = UH_RLOCKED;
} else
unlock_udbinfo = UH_UNLOCKED;
@@ -1390,6 +1419,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
*/
ui = mtod(m, struct udpiphdr *);
bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
+ ui->ui_v = IPVERSION << 4;
ui->ui_pr = pr;
ui->ui_src = laddr;
ui->ui_dst = faddr;
@@ -1412,8 +1442,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* the entire UDPLite packet is covered by the checksum.
*/
cscov_partial = (cscov == 0) ? 0 : 1;
- } else
- ui->ui_v = IPVERSION << 4;
+ }
/*
* Set the Don't Fragment bit in the IP header.
@@ -1518,8 +1547,11 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
if (unlock_udbinfo == UH_WLOCKED)
INP_HASH_WUNLOCK(pcbinfo);
else if (unlock_udbinfo == UH_RLOCKED)
- INP_HASH_RUNLOCK(pcbinfo);
- UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
+ if (pr == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
+ else
+ UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
error = ip_output(m, inp->inp_options,
(unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
inp->inp_moptions, inp);
@@ -1538,7 +1570,7 @@ release:
} else if (unlock_udbinfo == UH_RLOCKED) {
KASSERT(unlock_inp == UH_RLOCKED,
("%s: shared udbinfo lock, excl inp lock", __func__));
- INP_HASH_RUNLOCK(pcbinfo);
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
INP_RUNLOCK(inp);
} else if (unlock_inp == UH_WLOCKED)
INP_WUNLOCK(inp);
@@ -1719,7 +1751,6 @@ udp_detach(struct socket *so)
INP_WLOCK(inp);
up = intoudpcb(inp);
KASSERT(up != NULL, ("%s: up == NULL", __func__));
- /* XXX defer to epoch_call */
inp->inp_ppcb = NULL;
in_pcbdetach(inp);
in_pcbfree(inp);
diff --git a/freebsd/sys/netinet/udplite.h b/freebsd/sys/netinet/udplite.h
index 0e23cd70..57a1422a 100644
--- a/freebsd/sys/netinet/udplite.h
+++ b/freebsd/sys/netinet/udplite.h
@@ -29,6 +29,17 @@
#ifndef _NETINET_UDPLITE_H_
#define _NETINET_UDPLITE_H_
+/*
+ * UDP-Lite protocol header.
+ * Per RFC 3828, July, 2004.
+ */
+struct udplitehdr {
+ u_short udplite_sport; /* UDO-Lite source port */
+ u_short udplite_dport; /* UDP-Lite destination port */
+ u_short udplite_coverage; /* UDP-Lite checksum coverage */
+ u_short udplite_checksum; /* UDP-Lite checksum */
+};
+
/*
* User-settable options (used with setsockopt).
*/
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c
index 70103fe3..0b0c7b91 100644
--- a/freebsd/sys/netinet6/frag6.c
+++ b/freebsd/sys/netinet6/frag6.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/hash.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
@@ -51,6 +52,8 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/syslog.h>
+#include <machine/atomic.h>
+
#include <net/if.h>
#include <net/if_var.h>
#include <net/netisr.h>
@@ -67,58 +70,110 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
-static void frag6_deq(struct ip6asfrag *);
-static void frag6_insque(struct ip6q *, struct ip6q *);
-static void frag6_remque(struct ip6q *);
-static void frag6_freef(struct ip6q *);
-
-static struct mtx ip6qlock;
/*
- * These fields all protected by ip6qlock.
+ * Reassembly headers are stored in hash buckets.
*/
-static VNET_DEFINE(u_int, frag6_nfragpackets);
-static VNET_DEFINE(u_int, frag6_nfrags);
-static VNET_DEFINE(struct ip6q, ip6q); /* ip6 reassemble queue */
+#define IP6REASS_NHASH_LOG2 10
+#define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2)
+#define IP6REASS_HMASK (IP6REASS_NHASH - 1)
+
+static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *,
+ uint32_t bucket __unused);
+static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused);
+static void frag6_insque_head(struct ip6q *, struct ip6q *,
+ uint32_t bucket);
+static void frag6_remque(struct ip6q *, uint32_t bucket);
+static void frag6_freef(struct ip6q *, uint32_t bucket);
+
+struct ip6qbucket {
+ struct ip6q ip6q;
+ struct mtx lock;
+ int count;
+};
+
+VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
+volatile u_int frag6_nfrags = 0;
+VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]);
+VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed);
#define V_frag6_nfragpackets VNET(frag6_nfragpackets)
-#define V_frag6_nfrags VNET(frag6_nfrags)
#define V_ip6q VNET(ip6q)
+#define V_ip6q_hashseed VNET(ip6q_hashseed)
-#define IP6Q_LOCK_INIT() mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF);
-#define IP6Q_LOCK() mtx_lock(&ip6qlock)
-#define IP6Q_TRYLOCK() mtx_trylock(&ip6qlock)
-#define IP6Q_LOCK_ASSERT() mtx_assert(&ip6qlock, MA_OWNED)
-#define IP6Q_UNLOCK() mtx_unlock(&ip6qlock)
+#define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock)
+#define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock)
+#define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED)
+#define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock)
+#define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q)
static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
/*
+ * By default, limit the number of IP6 fragments across all reassembly
+ * queues to 1/32 of the total number of mbuf clusters.
+ *
+ * Limit the total number of reassembly queues per VNET to the
+ * IP6 fragment limit, but ensure the limit will not allow any bucket
+ * to grow above 100 items. (The bucket limit is
+ * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
+ * multiplier to reach a 100-item limit.)
+ * The 100-item limit was chosen as brief testing seems to show that
+ * this produces "reasonable" performance on some subset of systems
+ * under DoS attack.
+ */
+#define IP6_MAXFRAGS (nmbclusters / 32)
+#define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50))
+
+/*
* Initialise reassembly queue and fragment identifier.
*/
+void
+frag6_set_bucketsize()
+{
+ int i;
+
+ if ((i = V_ip6_maxfragpackets) > 0)
+ V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
+}
+
static void
frag6_change(void *tag)
{
+ VNET_ITERATOR_DECL(vnet_iter);
- V_ip6_maxfragpackets = nmbclusters / 4;
- V_ip6_maxfrags = nmbclusters / 4;
+ ip6_maxfrags = IP6_MAXFRAGS;
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
+ frag6_set_bucketsize();
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
void
frag6_init(void)
{
-
- V_ip6_maxfragpackets = nmbclusters / 4;
- V_ip6_maxfrags = nmbclusters / 4;
- V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q;
-
+ struct ip6q *q6;
+ int i;
+
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
+ frag6_set_bucketsize();
+ for (i = 0; i < IP6REASS_NHASH; i++) {
+ q6 = IP6Q_HEAD(i);
+ q6->ip6q_next = q6->ip6q_prev = q6;
+ mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF);
+ V_ip6q[i].count = 0;
+ }
+ V_ip6q_hashseed = arc4random();
+ V_ip6_maxfragsperpacket = 64;
if (!IS_DEFAULT_VNET(curvnet))
return;
+ ip6_maxfrags = IP6_MAXFRAGS;
EVENTHANDLER_REGISTER(nmbclusters_change,
frag6_change, NULL, EVENTHANDLER_PRI_ANY);
-
- IP6Q_LOCK_INIT();
}
/*
@@ -159,12 +214,15 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
struct mbuf *m = *mp, *t;
struct ip6_hdr *ip6;
struct ip6_frag *ip6f;
- struct ip6q *q6;
+ struct ip6q *head, *q6;
struct ip6asfrag *af6, *ip6af, *af6dwn;
struct in6_ifaddr *ia;
int offset = *offp, nxt, i, next;
int first_frag = 0;
int fragoff, frgpartlen; /* must be larger than u_int16_t */
+ uint32_t hashkey[(sizeof(struct in6_addr) * 2 +
+ sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)];
+ uint32_t hash, *hashkeyp;
struct ifnet *dstifp;
u_int8_t ecn, ecn0;
#ifdef RSS
@@ -233,19 +291,38 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
return (ip6f->ip6f_nxt);
}
- IP6Q_LOCK();
+ /* Get fragment length and discard 0-byte fragments. */
+ frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
+ if (frgpartlen == 0) {
+ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
+ offsetof(struct ip6_hdr, ip6_plen));
+ in6_ifstat_inc(dstifp, ifs6_reass_fail);
+ IP6STAT_INC(ip6s_fragdropped);
+ return IPPROTO_DONE;
+ }
+
+ hashkeyp = hashkey;
+ memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
+ hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
+ memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
+ hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
+ *hashkeyp = ip6f->ip6f_ident;
+ hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed);
+ hash &= IP6REASS_HMASK;
+ head = IP6Q_HEAD(hash);
+ IP6Q_LOCK(hash);
/*
* Enforce upper bound on number of fragments.
* If maxfrag is 0, never accept fragments.
* If maxfrag is -1, accept all fragments without limitation.
*/
- if (V_ip6_maxfrags < 0)
+ if (ip6_maxfrags < 0)
;
- else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags)
+ else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags)
goto dropfrag;
- for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next)
+ for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next)
if (ip6f->ip6f_ident == q6->ip6q_ident &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
@@ -255,7 +332,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
)
break;
- if (q6 == &V_ip6q) {
+ if (q6 == head) {
/*
* the first fragment to arrive, create a reassembly queue.
*/
@@ -270,9 +347,11 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
*/
if (V_ip6_maxfragpackets < 0)
;
- else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets)
+ else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize ||
+ atomic_load_int(&V_frag6_nfragpackets) >=
+ (u_int)V_ip6_maxfragpackets)
goto dropfrag;
- V_frag6_nfragpackets++;
+ atomic_add_int(&V_frag6_nfragpackets, 1);
q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
M_NOWAIT);
if (q6 == NULL)
@@ -285,7 +364,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
}
mac_ip6q_create(m, q6);
#endif
- frag6_insque(q6, &V_ip6q);
+ frag6_insque_head(q6, head, hash);
/* ip6q_nxt will be filled afterwards, from 1st fragment */
q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
@@ -319,21 +398,20 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
* in size.
* If it would exceed, discard the fragment and return an ICMP error.
*/
- frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
if (q6->ip6q_unfrglen >= 0) {
/* The 1st fragment has already arrived. */
if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
- IP6Q_UNLOCK();
+ IP6Q_UNLOCK(hash);
return (IPPROTO_DONE);
}
} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
- IP6Q_UNLOCK();
+ IP6Q_UNLOCK(hash);
return (IPPROTO_DONE);
}
/*
@@ -352,7 +430,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
int erroff = af6->ip6af_offset;
/* dequeue the fragment. */
- frag6_deq(af6);
+ frag6_deq(af6, hash);
free(af6, M_FTABLE);
/* adjust pointer. */
@@ -450,7 +528,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
}
af6 = af6->ip6af_down;
m_freem(IP6_REASS_MBUF(af6->ip6af_up));
- frag6_deq(af6->ip6af_up);
+ frag6_deq(af6->ip6af_up, hash);
}
#else
/*
@@ -499,29 +577,38 @@ insert:
/*
* Stick new segment in its place;
* check for complete reassembly.
+ * If not complete, check fragment limit.
* Move to front of packet queue, as we are
* the most recently active fragmented packet.
*/
- frag6_enq(ip6af, af6->ip6af_up);
- V_frag6_nfrags++;
+ frag6_enq(ip6af, af6->ip6af_up, hash);
+ atomic_add_int(&frag6_nfrags, 1);
q6->ip6q_nfrag++;
#if 0 /* xxx */
- if (q6 != V_ip6q.ip6q_next) {
- frag6_remque(q6);
- frag6_insque(q6, &V_ip6q);
+ if (q6 != head->ip6q_next) {
+ frag6_remque(q6, hash);
+ frag6_insque_head(q6, head, hash);
}
#endif
next = 0;
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = af6->ip6af_down) {
if (af6->ip6af_off != next) {
- IP6Q_UNLOCK();
+ if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
+ IP6STAT_INC(ip6s_fragdropped);
+ frag6_freef(q6, hash);
+ }
+ IP6Q_UNLOCK(hash);
return IPPROTO_DONE;
}
next += af6->ip6af_frglen;
}
if (af6->ip6af_up->ip6af_mff) {
- IP6Q_UNLOCK();
+ if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
+ IP6STAT_INC(ip6s_fragdropped);
+ frag6_freef(q6, hash);
+ }
+ IP6Q_UNLOCK(hash);
return IPPROTO_DONE;
}
@@ -531,7 +618,7 @@ insert:
ip6af = q6->ip6q_down;
t = m = IP6_REASS_MBUF(ip6af);
af6 = ip6af->ip6af_down;
- frag6_deq(ip6af);
+ frag6_deq(ip6af, hash);
while (af6 != (struct ip6asfrag *)q6) {
m->m_pkthdr.csum_flags &=
IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
@@ -539,7 +626,7 @@ insert:
IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
af6dwn = af6->ip6af_down;
- frag6_deq(af6);
+ frag6_deq(af6, hash);
while (t->m_next)
t = t->m_next;
m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
@@ -566,13 +653,13 @@ insert:
#endif
if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ frag6_remque(q6, hash);
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_destroy(q6);
#endif
free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
goto dropfrag;
}
@@ -583,14 +670,14 @@ insert:
m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
(caddr_t)&nxt);
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ frag6_remque(q6, hash);
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_reassemble(q6, m);
mac_ip6q_destroy(q6);
#endif
free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
int plen = 0;
@@ -612,7 +699,7 @@ insert:
m_tag_prepend(m, mtag);
#endif
- IP6Q_UNLOCK();
+ IP6Q_UNLOCK(hash);
IP6STAT_INC(ip6s_reassembled);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
@@ -634,7 +721,7 @@ insert:
return nxt;
dropfrag:
- IP6Q_UNLOCK();
+ IP6Q_UNLOCK(hash);
in6_ifstat_inc(dstifp, ifs6_reass_fail);
IP6STAT_INC(ip6s_fragdropped);
m_freem(m);
@@ -645,19 +732,19 @@ insert:
* Free a fragment reassembly header and all
* associated datagrams.
*/
-void
-frag6_freef(struct ip6q *q6)
+static void
+frag6_freef(struct ip6q *q6, uint32_t bucket)
{
struct ip6asfrag *af6, *down6;
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = down6) {
struct mbuf *m = IP6_REASS_MBUF(af6);
down6 = af6->ip6af_down;
- frag6_deq(af6);
+ frag6_deq(af6, bucket);
/*
* Return ICMP time exceeded error for the 1st fragment.
@@ -679,24 +766,25 @@ frag6_freef(struct ip6q *q6)
m_freem(m);
free(af6, M_FTABLE);
}
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ frag6_remque(q6, bucket);
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_destroy(q6);
#endif
free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
}
/*
* Put an ip fragment on a reassembly chain.
* Like insque, but pointers in middle of structure.
*/
-void
-frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
+static void
+frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
+ uint32_t bucket __unused)
{
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
af6->ip6af_up = up6;
af6->ip6af_down = up6->ip6af_down;
@@ -707,36 +795,41 @@ frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
/*
* To frag6_enq as remque is to insque.
*/
-void
-frag6_deq(struct ip6asfrag *af6)
+static void
+frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
{
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
af6->ip6af_up->ip6af_down = af6->ip6af_down;
af6->ip6af_down->ip6af_up = af6->ip6af_up;
}
-void
-frag6_insque(struct ip6q *new, struct ip6q *old)
+static void
+frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
{
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
+ KASSERT(IP6Q_HEAD(bucket) == old,
+ ("%s: attempt to insert at head of wrong bucket"
+ " (bucket=%u, old=%p)", __func__, bucket, old));
new->ip6q_prev = old;
new->ip6q_next = old->ip6q_next;
old->ip6q_next->ip6q_prev= new;
old->ip6q_next = new;
+ V_ip6q[bucket].count++;
}
-void
-frag6_remque(struct ip6q *p6)
+static void
+frag6_remque(struct ip6q *p6, uint32_t bucket)
{
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
p6->ip6q_prev->ip6q_next = p6->ip6q_next;
p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
+ V_ip6q[bucket].count--;
}
/*
@@ -748,37 +841,72 @@ void
frag6_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- struct ip6q *q6;
+ struct ip6q *head, *q6;
+ int i;
VNET_LIST_RLOCK_NOSLEEP();
- IP6Q_LOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- q6 = V_ip6q.ip6q_next;
- if (q6)
- while (q6 != &V_ip6q) {
+ for (i = 0; i < IP6REASS_NHASH; i++) {
+ IP6Q_LOCK(i);
+ head = IP6Q_HEAD(i);
+ q6 = head->ip6q_next;
+ if (q6 == NULL) {
+ /*
+ * XXXJTL: This should never happen. This
+ * should turn into an assertion.
+ */
+ IP6Q_UNLOCK(i);
+ continue;
+ }
+ while (q6 != head) {
--q6->ip6q_ttl;
q6 = q6->ip6q_next;
if (q6->ip6q_prev->ip6q_ttl == 0) {
IP6STAT_INC(ip6s_fragtimeout);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(q6->ip6q_prev);
+ frag6_freef(q6->ip6q_prev, i);
}
}
+ /*
+ * If we are over the maximum number of fragments
+ * (due to the limit being lowered), drain off
+ * enough to get down to the new limit.
+ * Note that we drain all reassembly queues if
+ * maxfragpackets is 0 (fragmentation is disabled),
+ * and don't enforce a limit when maxfragpackets
+ * is negative.
+ */
+ while ((V_ip6_maxfragpackets == 0 ||
+ (V_ip6_maxfragpackets > 0 &&
+ V_ip6q[i].count > V_ip6_maxfragbucketsize)) &&
+ head->ip6q_prev != head) {
+ IP6STAT_INC(ip6s_fragoverflow);
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+ frag6_freef(head->ip6q_prev, i);
+ }
+ IP6Q_UNLOCK(i);
+ }
/*
- * If we are over the maximum number of fragments
- * (due to the limit being lowered), drain off
- * enough to get down to the new limit.
+ * If we are still over the maximum number of fragmented
+ * packets, drain off enough to get down to the new limit.
*/
- while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets &&
- V_ip6q.ip6q_prev) {
- IP6STAT_INC(ip6s_fragoverflow);
- /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(V_ip6q.ip6q_prev);
+ i = 0;
+ while (V_ip6_maxfragpackets >= 0 &&
+ atomic_load_int(&V_frag6_nfragpackets) >
+ (u_int)V_ip6_maxfragpackets) {
+ IP6Q_LOCK(i);
+ head = IP6Q_HEAD(i);
+ if (head->ip6q_prev != head) {
+ IP6STAT_INC(ip6s_fragoverflow);
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+ frag6_freef(head->ip6q_prev, i);
+ }
+ IP6Q_UNLOCK(i);
+ i = (i + 1) % IP6REASS_NHASH;
}
CURVNET_RESTORE();
}
- IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
@@ -789,22 +917,25 @@ void
frag6_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
+ struct ip6q *head;
+ int i;
VNET_LIST_RLOCK_NOSLEEP();
- if (IP6Q_TRYLOCK() == 0) {
- VNET_LIST_RUNLOCK_NOSLEEP();
- return;
- }
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- while (V_ip6q.ip6q_next != &V_ip6q) {
- IP6STAT_INC(ip6s_fragdropped);
- /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(V_ip6q.ip6q_next);
+ for (i = 0; i < IP6REASS_NHASH; i++) {
+ if (IP6Q_TRYLOCK(i) == 0)
+ continue;
+ head = IP6Q_HEAD(i);
+ while (head->ip6q_next != head) {
+ IP6STAT_INC(ip6s_fragdropped);
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+ frag6_freef(head->ip6q_next, i);
+ }
+ IP6Q_UNLOCK(i);
}
CURVNET_RESTORE();
}
- IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c
index 4d06ca16..2b080169 100644
--- a/freebsd/sys/netinet6/icmp6.c
+++ b/freebsd/sys/netinet6/icmp6.c
@@ -126,8 +126,8 @@ VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
VNET_DECLARE(struct inpcbhead, ripcb);
VNET_DECLARE(int, icmp6errppslim);
-static VNET_DEFINE(int, icmp6errpps_count) = 0;
-static VNET_DEFINE(struct timeval, icmp6errppslim_last);
+VNET_DEFINE_STATIC(int, icmp6errpps_count) = 0;
+VNET_DEFINE_STATIC(struct timeval, icmp6errppslim_last);
VNET_DECLARE(int, icmp6_nodeinfo);
#define V_ripcbinfo VNET(ripcbinfo)
@@ -1910,6 +1910,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
struct inpcb *last = NULL;
struct sockaddr_in6 fromsa;
struct icmp6_hdr *icmp6;
+ struct epoch_tracker et;
struct mbuf *opts = NULL;
#ifndef PULLDOWN_TEST
@@ -1936,8 +1937,8 @@ icmp6_rip6_input(struct mbuf **mp, int off)
return (IPPROTO_DONE);
}
- INP_INFO_RLOCK(&V_ripcbinfo);
- LIST_FOREACH(in6p, &V_ripcb, inp_list) {
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
+ CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
if ((in6p->inp_vflag & INP_IPV6) == 0)
continue;
if (in6p->inp_ip_p != IPPROTO_ICMPV6)
@@ -2014,7 +2015,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
}
last = in6p;
}
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
if (last->inp_flags & INP_CONTROLOPTS)
ip6_savecontrol(last, m, &opts);
diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c
index 3ed80c9c..c415cf78 100644
--- a/freebsd/sys/netinet6/in6.c
+++ b/freebsd/sys/netinet6/in6.c
@@ -2139,9 +2139,6 @@ in6_lltable_free_entry(struct lltable *llt, struct llentry *lle)
lltable_unlink_entry(llt, lle);
}
- if (callout_stop(&lle->lle_timer) > 0)
- LLE_REMREF(lle);
-
llentry_free(lle);
}
diff --git a/freebsd/sys/netinet6/in6_fib.c b/freebsd/sys/netinet6/in6_fib.c
index cf79797d..e5e8a161 100644
--- a/freebsd/sys/netinet6/in6_fib.c
+++ b/freebsd/sys/netinet6/in6_fib.c
@@ -40,7 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -173,6 +173,7 @@ int
fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid,
uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in6 sin6;
@@ -222,6 +223,7 @@ int
fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in6 sin6;
diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c
index 160a0929..66b4c63a 100644
--- a/freebsd/sys/netinet6/in6_gif.c
+++ b/freebsd/sys/netinet6/in6_gif.c
@@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,20 +41,19 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
#include <sys/systm.h>
+#include <sys/jail.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/kernel.h>
-#include <sys/queue.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
-#include <sys/protosw.h>
#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
@@ -63,52 +63,189 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#ifdef INET
#include <netinet/ip.h>
+#include <netinet/ip_ecn.h>
#endif
#include <netinet/ip_encap.h>
-#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_var.h>
-#endif
-#include <netinet/ip_ecn.h>
-#ifdef INET6
+#include <netinet6/scope6_var.h>
#include <netinet6/ip6_ecn.h>
#include <netinet6/in6_fib.h>
-#endif
#include <net/if_gif.h>
#define GIF_HLIM 30
-static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
+VNET_DEFINE_STATIC(int, ip6_gif_hlim) = GIF_HLIM;
#define V_ip6_gif_hlim VNET(ip6_gif_hlim)
SYSCTL_DECL(_net_inet6_ip6);
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(ip6_gif_hlim), 0, "");
-
-static int in6_gif_input(struct mbuf **, int *, int);
-
-extern struct domain inet6domain;
-static struct protosw in6_gif_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inet6domain,
- .pr_protocol = 0, /* IPPROTO_IPV[46] */
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = in6_gif_input,
- .pr_output = rip6_output,
- .pr_ctloutput = rip6_ctloutput,
- .pr_usrreqs = &rip6_usrreqs
-};
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0,
+ "Default hop limit for encapsulated packets");
+
+/*
+ * We keep interfaces in a hash table using src+dst as key.
+ * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list.
+ */
+VNET_DEFINE_STATIC(struct gif_list *, ipv6_hashtbl) = NULL;
+VNET_DEFINE_STATIC(struct gif_list, ipv6_list) = CK_LIST_HEAD_INITIALIZER();
+#define V_ipv6_hashtbl VNET(ipv6_hashtbl)
+#define V_ipv6_list VNET(ipv6_list)
+
+#define GIF_HASH(src, dst) (V_ipv6_hashtbl[\
+ in6_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)])
+#define GIF_HASH_SC(sc) GIF_HASH(&(sc)->gif_ip6hdr->ip6_src,\
+ &(sc)->gif_ip6hdr->ip6_dst)
+static uint32_t
+in6_gif_hashval(const struct in6_addr *src, const struct in6_addr *dst)
+{
+ uint32_t ret;
+
+ ret = fnv_32_buf(src, sizeof(*src), FNV1_32_INIT);
+ return (fnv_32_buf(dst, sizeof(*dst), ret));
+}
+
+static int
+in6_gif_checkdup(const struct gif_softc *sc, const struct in6_addr *src,
+ const struct in6_addr *dst)
+{
+ struct gif_softc *tmp;
+
+ if (sc->gif_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, src) &&
+ IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, dst))
+ return (EEXIST);
+
+ CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) {
+ if (tmp == sc)
+ continue;
+ if (IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_src, src) &&
+ IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_dst, dst))
+ return (EADDRNOTAVAIL);
+ }
+ return (0);
+}
+
+static void
+in6_gif_attach(struct gif_softc *sc)
+{
+
+ if (sc->gif_options & GIF_IGNORE_SOURCE)
+ CK_LIST_INSERT_HEAD(&V_ipv6_list, sc, chain);
+ else
+ CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain);
+}
+
+int
+in6_gif_setopts(struct gif_softc *sc, u_int options)
+{
+
+ /* NOTE: we are protected with gif_ioctl_sx lock */
+ MPASS(sc->gif_family == AF_INET6);
+ MPASS(sc->gif_options != options);
+
+ if ((options & GIF_IGNORE_SOURCE) !=
+ (sc->gif_options & GIF_IGNORE_SOURCE)) {
+ CK_LIST_REMOVE(sc, chain);
+ sc->gif_options = options;
+ in6_gif_attach(sc);
+ }
+ return (0);
+}
+
+int
+in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data)
+{
+ struct in6_ifreq *ifr = (struct in6_ifreq *)data;
+ struct sockaddr_in6 *dst, *src;
+ struct ip6_hdr *ip6;
+ int error;
+
+ /* NOTE: we are protected with gif_ioctl_sx lock */
+ error = EINVAL;
+ switch (cmd) {
+ case SIOCSIFPHYADDR_IN6:
+ src = &((struct in6_aliasreq *)data)->ifra_addr;
+ dst = &((struct in6_aliasreq *)data)->ifra_dstaddr;
+
+ /* sanity checks */
+ if (src->sin6_family != dst->sin6_family ||
+ src->sin6_family != AF_INET6 ||
+ src->sin6_len != dst->sin6_len ||
+ src->sin6_len != sizeof(*src))
+ break;
+ if (IN6_IS_ADDR_UNSPECIFIED(&src->sin6_addr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ if ((error = sa6_embedscope(src, 0)) != 0 ||
+ (error = sa6_embedscope(dst, 0)) != 0)
+ break;
+
+ if (V_ipv6_hashtbl == NULL)
+ V_ipv6_hashtbl = gif_hashinit();
+ error = in6_gif_checkdup(sc, &src->sin6_addr,
+ &dst->sin6_addr);
+ if (error == EADDRNOTAVAIL)
+ break;
+ if (error == EEXIST) {
+ /* Addresses are the same. Just return. */
+ error = 0;
+ break;
+ }
+ ip6 = malloc(sizeof(*ip6), M_GIF, M_WAITOK | M_ZERO);
+ ip6->ip6_src = src->sin6_addr;
+ ip6->ip6_dst = dst->sin6_addr;
+ ip6->ip6_vfc = IPV6_VERSION;
+ if (sc->gif_family != 0) {
+ /* Detach existing tunnel first */
+ CK_LIST_REMOVE(sc, chain);
+ GIF_WAIT();
+ free(sc->gif_hdr, M_GIF);
+ /* XXX: should we notify about link state change? */
+ }
+ sc->gif_family = AF_INET6;
+ sc->gif_ip6hdr = ip6;
+ in6_gif_attach(sc);
+ break;
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gif_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ src = (struct sockaddr_in6 *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin6_family = AF_INET6;
+ src->sin6_len = sizeof(*src);
+ src->sin6_addr = (cmd == SIOCGIFPSRCADDR_IN6) ?
+ sc->gif_ip6hdr->ip6_src: sc->gif_ip6hdr->ip6_dst;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
+ if (error == 0)
+ error = sa6_recoverscope(src);
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ }
+ return (error);
+}
int
in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
- GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
struct ip6_hdr *ip6;
int len;
/* prepend new IP header */
+ MPASS(in_epoch(net_epoch_preempt));
len = sizeof(struct ip6_hdr);
#ifndef __NO_STRICT_ALIGNMENT
if (proto == IPPROTO_ETHERIP)
@@ -128,14 +265,8 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
#endif
ip6 = mtod(m, struct ip6_hdr *);
- GIF_RLOCK(sc);
- if (sc->gif_family != AF_INET6) {
- m_freem(m);
- GIF_RUNLOCK(sc);
- return (ENETDOWN);
- }
+ MPASS(sc->gif_family == AF_INET6);
bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr));
- GIF_RUNLOCK(sc);
ip6->ip6_flow |= htonl((uint32_t)ecn << 20);
ip6->ip6_nxt = proto;
@@ -149,15 +280,14 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
}
static int
-in6_gif_input(struct mbuf **mp, int *offp, int proto)
+in6_gif_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct mbuf *m = *mp;
+ struct gif_softc *sc = arg;
struct ifnet *gifp;
- struct gif_softc *sc;
struct ip6_hdr *ip6;
uint8_t ecn;
- sc = encap_getarg(m);
+ MPASS(in_epoch(net_epoch_preempt));
if (sc == NULL) {
m_freem(m);
IP6STAT_INC(ip6s_nogif);
@@ -167,7 +297,7 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
if ((gifp->if_flags & IFF_UP) != 0) {
ip6 = mtod(m, struct ip6_hdr *);
ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- m_adj(m, *offp);
+ m_adj(m, off);
gif_input(m, gifp, proto, ecn);
} else {
m_freem(m);
@@ -176,59 +306,126 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}
-/*
- * we know that we are in IFF_UP, outer address available, and outer family
- * matched the physical addr family. see gif_encapcheck().
- */
-int
-in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+static int
+in6_gif_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
const struct ip6_hdr *ip6;
struct gif_softc *sc;
int ret;
- /* sanity check done in caller */
- sc = (struct gif_softc *)arg;
- GIF_RLOCK_ASSERT(sc);
+ if (V_ipv6_hashtbl == NULL)
+ return (0);
+ MPASS(in_epoch(net_epoch_preempt));
/*
- * Check for address match. Note that the check is for an incoming
- * packet. We should compare the *source* address in our configuration
- * and the *destination* address of the packet, and vice versa.
+ * NOTE: it is safe to iterate without any locking here, because softc
+ * can be reclaimed only when we are not within net_epoch_preempt
+ * section, but ip_encap lookup+input are executed in epoch section.
*/
ip6 = mtod(m, const struct ip6_hdr *);
- if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst))
+ ret = 0;
+ CK_LIST_FOREACH(sc, &GIF_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) {
+ /*
+ * This is an inbound packet, its ip6_dst is source address
+ * in softc.
+ */
+ if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src,
+ &ip6->ip6_dst) &&
+ IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst,
+ &ip6->ip6_src)) {
+ ret = ENCAP_DRV_LOOKUP;
+ goto done;
+ }
+ }
+ /*
+ * No exact match.
+ * Check the list of interfaces with GIF_IGNORE_SOURCE flag.
+ */
+ CK_LIST_FOREACH(sc, &V_ipv6_list, chain) {
+ if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src,
+ &ip6->ip6_dst)) {
+ ret = 128 + 8; /* src + proto */
+ goto done;
+ }
+ }
+ return (0);
+done:
+ if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
- ret = 128;
- if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) {
- if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
- return (0);
- } else
- ret += 128;
-
/* ingress filters on outer source */
if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
struct nhop6_basic nh6;
- /* XXX empty scope id */
- if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0,
- &nh6) != 0)
+ if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src,
+ ntohs(in6_getscope(&ip6->ip6_src)), 0, 0, &nh6) != 0)
return (0);
if (nh6.nh_ifp != m->m_pkthdr.rcvif)
return (0);
}
+ *arg = sc;
return (ret);
}
-int
-in6_gif_attach(struct gif_softc *sc)
+static struct {
+ const struct encap_config encap;
+ const struct encaptab *cookie;
+} ipv6_encap_cfg[] = {
+#ifdef INET
+ {
+ .encap = {
+ .proto = IPPROTO_IPV4,
+ .min_length = sizeof(struct ip6_hdr) +
+ sizeof(struct ip),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in6_gif_lookup,
+ .input = in6_gif_input
+ },
+ },
+#endif
+ {
+ .encap = {
+ .proto = IPPROTO_IPV6,
+ .min_length = 2 * sizeof(struct ip6_hdr),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in6_gif_lookup,
+ .input = in6_gif_input
+ },
+ },
+ {
+ .encap = {
+ .proto = IPPROTO_ETHERIP,
+ .min_length = sizeof(struct ip6_hdr) +
+ sizeof(struct etherip_header) +
+ sizeof(struct ether_header),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in6_gif_lookup,
+ .input = in6_gif_input
+ },
+ }
+};
+
+void
+in6_gif_init(void)
{
+ int i;
- KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
- sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck,
- (void *)&in6_gif_protosw, sc);
- if (sc->gif_ecookie == NULL)
- return (EEXIST);
- return (0);
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+ for (i = 0; i < nitems(ipv6_encap_cfg); i++)
+ ipv6_encap_cfg[i].cookie = ip6_encap_attach(
+ &ipv6_encap_cfg[i].encap, NULL, M_WAITOK);
+}
+
+void
+in6_gif_uninit(void)
+{
+ int i;
+
+ if (IS_DEFAULT_VNET(curvnet)) {
+ for (i = 0; i < nitems(ipv6_encap_cfg); i++)
+ ip6_encap_detach(ipv6_encap_cfg[i].cookie);
+ }
+ if (V_ipv6_hashtbl != NULL)
+ gif_hashdestroy(V_ipv6_hashtbl);
}
diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c
index 81182b4e..1cab31d1 100644
--- a/freebsd/sys/netinet6/in6_ifattach.c
+++ b/freebsd/sys/netinet6/in6_ifattach.c
@@ -759,7 +759,6 @@ _in6_ifdetach(struct ifnet *ifp, int purgeulp)
/*
* nuke any of IPv6 addresses we have
- * XXX: all addresses should be already removed
*/
CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
if (ifa->ifa_addr->sa_family != AF_INET6)
@@ -874,6 +873,7 @@ in6_purgemaddrs(struct ifnet *ifp)
ifma->ifma_protospec == NULL)
continue;
inm = (struct in6_multi *)ifma->ifma_protospec;
+ in6m_disconnect(inm);
in6m_rele_locked(&purgeinms, inm);
if (__predict_false(ifma6_restart)) {
ifma6_restart = false;
diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c
index 32660c89..3824645d 100644
--- a/freebsd/sys/netinet6/in6_mcast.c
+++ b/freebsd/sys/netinet6/in6_mcast.c
@@ -540,6 +540,8 @@ in6m_release(struct in6_multi *inm)
CTR2(KTR_MLD, "%s: purging ifma %p", __func__, ifma);
KASSERT(ifma->ifma_protospec == NULL,
("%s: ifma_protospec != NULL", __func__));
+ if (ifp == NULL)
+ ifp = ifma->ifma_ifp;
if (ifp != NULL) {
CURVNET_SET(ifp->if_vnet);
@@ -564,8 +566,13 @@ static void in6m_init(void)
taskqgroup_config_gtask_init(NULL, &free_gtask, in6m_release_task, "in6m release task");
}
+#ifdef EARLY_AP_STARTUP
SYSINIT(in6m_init, SI_SUB_SMP + 1, SI_ORDER_FIRST,
in6m_init, NULL);
+#else
+SYSINIT(in6m_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_SECOND,
+ in6m_init, NULL);
+#endif
void
@@ -589,11 +596,20 @@ in6m_disconnect(struct in6_multi *inm)
struct ifmultiaddr *ifma, *ll_ifma;
ifp = inm->in6m_ifp;
+
+ if (ifp == NULL)
+ return;
+ inm->in6m_ifp = NULL;
IF_ADDR_WLOCK_ASSERT(ifp);
ifma = inm->in6m_ifma;
+ if (ifma == NULL)
+ return;
if_ref(ifp);
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
+ if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
+ ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
if ((ll_ifma = ifma->ifma_llifma) != NULL) {
MPASS(ifma != ll_ifma);
@@ -602,7 +618,10 @@ in6m_disconnect(struct in6_multi *inm)
MPASS(ll_ifma->ifma_ifp == ifp);
if (--ll_ifma->ifma_refcount == 0) {
ifma6_restart = true;
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
+ if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
+ ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
if_freemulti(ll_ifma);
}
@@ -629,7 +648,7 @@ in6m_release_deferred(struct in6_multi *inm)
IN6_MULTI_LIST_LOCK_ASSERT();
KASSERT(inm->in6m_refcount > 0, ("refcount == %d inm: %p", inm->in6m_refcount, inm));
if (--inm->in6m_refcount == 0) {
- in6m_disconnect(inm);
+ MPASS(inm->in6m_ifp == NULL);
SLIST_INIT(&tmp);
inm->in6m_ifma->ifma_protospec = NULL;
MPASS(inm->in6m_ifma->ifma_llifma == NULL);
@@ -1307,6 +1326,7 @@ out_in6m_release:
break;
}
}
+ in6m_disconnect(inm);
in6m_release_deferred(inm);
IF_ADDR_RUNLOCK(ifp);
} else {
@@ -1386,13 +1406,17 @@ in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
- error = mld_change_state(inm, 0);
+ error = 0;
+ if (ifp)
+ error = mld_change_state(inm, 0);
if (error)
CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm);
if (ifp)
IF_ADDR_WLOCK(ifp);
+ if (inm->in6m_refcount == 1 && inm->in6m_ifp != NULL)
+ in6m_disconnect(inm);
in6m_release_deferred(inm);
if (ifp)
IF_ADDR_WUNLOCK(ifp);
@@ -1626,16 +1650,13 @@ in6p_findmoptions(struct inpcb *inp)
*/
static void
-inp_gcmoptions(epoch_context_t ctx)
+inp_gcmoptions(struct ip6_moptions *imo)
{
- struct ip6_moptions *imo;
struct in6_mfilter *imf;
struct in6_multi *inm;
struct ifnet *ifp;
size_t idx, nmships;
- imo = __containerof(ctx, struct ip6_moptions, imo6_epoch_ctx);
-
nmships = imo->im6o_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
imf = imo->im6o_mfilters ? &imo->im6o_mfilters[idx] : NULL;
@@ -1665,7 +1686,7 @@ ip6_freemoptions(struct ip6_moptions *imo)
{
if (imo == NULL)
return;
- epoch_call(net_epoch_preempt, &imo->imo6_epoch_ctx, inp_gcmoptions);
+ inp_gcmoptions(imo);
}
/*
@@ -2159,6 +2180,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
IN6_MULTI_UNLOCK();
goto out_im6o_free;
}
+ in6m_acquire(inm);
imo->im6o_membership[idx] = inm;
} else {
CTR1(KTR_MLD, "%s: merge inm state", __func__);
@@ -2193,6 +2215,12 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
out_im6o_free:
if (error && is_new) {
+ inm = imo->im6o_membership[idx];
+ if (inm != NULL) {
+ IN6_MULTI_LIST_LOCK();
+ in6m_release_deferred(inm);
+ IN6_MULTI_LIST_UNLOCK();
+ }
imo->im6o_membership[idx] = NULL;
--imo->im6o_num_memberships;
}
diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c
index 488cca86..a30cb98b 100644
--- a/freebsd/sys/netinet6/in6_pcb.c
+++ b/freebsd/sys/netinet6/in6_pcb.c
@@ -131,6 +131,12 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
int error, lookupflags = 0;
int reuseport = (so->so_options & SO_REUSEPORT);
+ /*
+ * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
+ * so that we don't have to add to the (already messy) code below.
+ */
+ int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
+
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
@@ -138,7 +144,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
return (EADDRNOTAVAIL);
if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
return (EINVAL);
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
if (nam == NULL) {
if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
@@ -172,6 +178,13 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
*/
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR|SO_REUSEPORT;
+ /*
+ * XXX: How to deal with SO_REUSEPORT_LB here?
+ * Treat same as SO_REUSEPORT for now.
+ */
+ if ((so->so_options &
+ (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
+ reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
struct ifaddr *ifa;
@@ -221,7 +234,8 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
(!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
!IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
- (t->inp_flags2 & INP_REUSEPORT) == 0) &&
+ (t->inp_flags2 & INP_REUSEPORT) ||
+ (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
#ifndef __rtems__
(inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
@@ -279,9 +293,11 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
*/
tw = intotw(t);
if (tw == NULL ||
- (reuseport & tw->tw_so_options) == 0)
+ ((reuseport & tw->tw_so_options) == 0 &&
+ (reuseport_lb & tw->tw_so_options) == 0))
return (EADDRINUSE);
- } else if (t && (reuseport & inp_so_options(t)) == 0) {
+ } else if (t && (reuseport & inp_so_options(t)) == 0 &&
+ (reuseport_lb & inp_so_options(t)) == 0) {
return (EADDRINUSE);
}
#ifdef INET
@@ -291,22 +307,25 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
in6_sin6_2_sin(&sin, sin6);
t = in_pcblookup_local(pcbinfo, sin.sin_addr,
- lport, lookupflags, cred);
+ lport, lookupflags, cred);
if (t && t->inp_flags & INP_TIMEWAIT) {
tw = intotw(t);
if (tw == NULL)
return (EADDRINUSE);
if ((reuseport & tw->tw_so_options) == 0
+ && (reuseport_lb & tw->tw_so_options) == 0
&& (ntohl(t->inp_laddr.s_addr) !=
- INADDR_ANY || ((inp->inp_vflag &
- INP_IPV6PROTO) ==
- (t->inp_vflag & INP_IPV6PROTO))))
+ INADDR_ANY || ((inp->inp_vflag &
+ INP_IPV6PROTO) ==
+ (t->inp_vflag & INP_IPV6PROTO))))
return (EADDRINUSE);
} else if (t &&
(reuseport & inp_so_options(t)) == 0 &&
+ (reuseport_lb & inp_so_options(t)) == 0 &&
(ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_vflag & INP_IPV6PROTO) != 0))
+ (t->inp_vflag & INP_IPV6PROTO) != 0)) {
return (EADDRINUSE);
+ }
}
#endif
}
@@ -644,7 +663,7 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
}
errno = inet6ctlerrmap[cmd];
INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
+ CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
INP_WLOCK(inp);
if ((inp->inp_vflag & INP_IPV6) == 0) {
INP_WUNLOCK(inp);
@@ -721,7 +740,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
INP6_PCBHASHKEY(&in6addr_any), lport, 0,
pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -751,7 +770,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
*/
porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
pcbinfo->ipi_porthashmask)];
- LIST_FOREACH(phd, porthash, phd_hash) {
+ CK_LIST_FOREACH(phd, porthash, phd_hash) {
if (phd->phd_port == lport)
break;
}
@@ -760,7 +779,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
* Port is in use by one or more PCBs. Look for best
* fit.
*/
- LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
+ CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
wildcard = 0;
if (cred != NULL &&
!prison_equal_ip6(cred->cr_prison,
@@ -802,7 +821,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
int i, gap;
INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
+ CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(in6p);
im6o = in6p->in6p_moptions;
if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) {
@@ -841,16 +860,10 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
* (by a redirect), time to try a default gateway again.
*/
void
-in6_losing(struct inpcb *in6p)
+in6_losing(struct inpcb *inp)
{
- if (in6p->inp_route6.ro_rt) {
- RTFREE(in6p->inp_route6.ro_rt);
- in6p->inp_route6.ro_rt = (struct rtentry *)NULL;
- }
- if (in6p->inp_route.ro_lle)
- LLE_FREE(in6p->inp_route.ro_lle); /* zeros ro_lle */
- return;
+ RO_INVALIDATE_CACHE(&inp->inp_route6);
}
/*
@@ -858,18 +871,67 @@ in6_losing(struct inpcb *in6p)
* and allocate a (hopefully) better one.
*/
struct inpcb *
-in6_rtchange(struct inpcb *inp, int errno)
+in6_rtchange(struct inpcb *inp, int errno __unused)
{
- if (inp->inp_route6.ro_rt) {
- RTFREE(inp->inp_route6.ro_rt);
- inp->inp_route6.ro_rt = (struct rtentry *)NULL;
- }
- if (inp->inp_route.ro_lle)
- LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */
+ RO_INVALIDATE_CACHE(&inp->inp_route6);
return inp;
}
+static struct inpcb *
+in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb *local_wild = NULL;
+ const struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ struct inpcblbgroup *grp_local_wild;
+ uint32_t idx;
+
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[INP_PCBLBGROUP_PORTHASH(
+ lport, pcbinfo->ipi_lbgrouphashmask)];
+
+ /*
+ * Order of socket selection:
+ * 1. non-wild.
+ * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
+ *
+ * NOTE:
+ * - Load balanced group does not contain jailed sockets.
+ * - Load balanced does not contain IPv4 mapped INET6 wild sockets.
+ */
+ CK_LIST_FOREACH(grp, hdr, il_list) {
+#ifdef INET
+ if (!(grp->il_vflag & INP_IPV6))
+ continue;
+#endif
+ if (grp->il_lport == lport) {
+ idx = 0;
+ int pkt_hash = INP_PCBLBGROUP_PKTHASH(
+ INP6_PCBHASHKEY(faddr), lport, fport);
+
+ idx = pkt_hash % grp->il_inpcnt;
+
+ if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) {
+ return (grp->il_inp[idx]);
+ } else {
+ if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
+ (lookupflags & INPLOOKUP_WILDCARD)) {
+ local_wild = grp->il_inp[idx];
+ grp_local_wild = grp;
+ }
+ }
+ }
+ }
+ if (local_wild != NULL) {
+ return (local_wild);
+ }
+ return (NULL);
+}
+
#ifdef PCBGROUP
/*
* Lookup PCB in hash list, using pcbgroup tables.
@@ -891,7 +953,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
INP_GROUP_LOCK(pcbgroup);
head = &pcbgroup->ipg_hashbase[INP_PCBHASH(
INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)];
- LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -932,7 +994,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
*/
head = &pcbgroup->ipg_hashbase[
INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)];
- LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -994,7 +1056,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
head = &pcbinfo->ipi_wildbase[INP_PCBHASH(
INP6_PCBHASHKEY(&in6addr_any), lport, 0,
pcbinfo->ipi_wildmask)];
- LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -1094,7 +1156,7 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
tmpinp = NULL;
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -1117,6 +1179,18 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
return (tmpinp);
/*
+ * Then look in lb group (for wildcard match).
+ */
+ if (pcbinfo->ipi_lbgrouphashbase != NULL &&
+ (lookupflags & INPLOOKUP_WILDCARD)) {
+ inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr,
+ fport, lookupflags);
+ if (inp != NULL) {
+ return (inp);
+ }
+ }
+
+ /*
* Then look for a wildcard match, if requested.
*/
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
@@ -1134,7 +1208,7 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
INP6_PCBHASHKEY(&in6addr_any), lport, 0,
pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -1192,40 +1266,35 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
struct ifnet *ifp)
{
struct inpcb *inp;
- bool locked;
INP_HASH_RLOCK(pcbinfo);
inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
(lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
if (inp != NULL) {
- if (lookupflags & INPLOOKUP_WLOCKPCB)
- locked = INP_TRY_WLOCK(inp);
- else if (lookupflags & INPLOOKUP_RLOCKPCB)
- locked = INP_TRY_RLOCK(inp);
- else
- panic("%s: locking bug", __func__);
- if (!locked)
- in_pcbref(inp);
- INP_HASH_RUNLOCK(pcbinfo);
- if (!locked) {
- if (lookupflags & INPLOOKUP_WLOCKPCB) {
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- return (NULL);
- } else {
- INP_RLOCK(inp);
- if (in_pcbrele_rlocked(inp))
- return (NULL);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_WUNLOCK(inp);
+ inp = NULL;
}
- }
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(inp);
+ inp = NULL;
+ }
+ } else
+ panic("%s: locking bug", __func__);
#ifdef INVARIANTS
- if (lookupflags & INPLOOKUP_WLOCKPCB)
- INP_WLOCK_ASSERT(inp);
- else
- INP_RLOCK_ASSERT(inp);
+ if (inp != NULL) {
+ if (lookupflags & INPLOOKUP_WLOCKPCB)
+ INP_WLOCK_ASSERT(inp);
+ else
+ INP_RLOCK_ASSERT(inp);
+ }
#endif
- } else
- INP_HASH_RUNLOCK(pcbinfo);
+ }
+ INP_HASH_RUNLOCK(pcbinfo);
return (inp);
}
diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c
index 756ea48b..cf62e60c 100644
--- a/freebsd/sys/netinet6/in6_proto.c
+++ b/freebsd/sys/netinet6/in6_proto.c
@@ -173,7 +173,7 @@ struct protosw inet6sw[] = {
.pr_type = SOCK_STREAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_TCP,
- .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN,
+ .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|PR_LISTEN,
.pr_input = tcp6_input,
.pr_ctlinput = tcp6_ctlinput,
.pr_ctloutput = tcp_ctloutput,
@@ -387,7 +387,9 @@ VNET_DEFINE(int, ip6_no_radr) = 0;
VNET_DEFINE(int, ip6_norbit_raif) = 0;
VNET_DEFINE(int, ip6_rfc6204w3) = 0;
VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */
-VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */
+int ip6_maxfrags; /* initialized in frag6.c:frag6_init() */
+VNET_DEFINE(int, ip6_maxfragbucketsize);/* initialized in frag6.c:frag6_init() */
+VNET_DEFINE(int, ip6_maxfragsperpacket); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_log_interval) = 5;
VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we
* process? */
@@ -474,6 +476,20 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
return (0);
}
+static int
+sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
+{
+ int error, val;
+
+ val = V_ip6_maxfragpackets;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || !req->newptr)
+ return (error);
+ V_ip6_maxfragpackets = val;
+ frag6_set_bucketsize();
+ return (0);
+}
+
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
"Enable forwarding of IPv6 packets between interfaces");
@@ -486,8 +502,9 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim,
SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
ip6stat,
"IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0,
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+ sysctl_ip6_maxfragpackets, "I",
"Default maximum number of outstanding fragmented IPv6 packets. "
"A value of 0 means no fragmented packets will be accepted, while a "
"a value of -1 means no limit");
@@ -561,8 +578,16 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
"Use the default scope zone when none is specified");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0,
- "Maximum allowed number of outstanding IPv6 packet fragments");
+ CTLFLAG_RW, &ip6_maxfrags, 0,
+ "Maximum allowed number of outstanding IPv6 packet fragments. "
+ "A value of 0 means no fragmented packets will be accepted, while a "
+ "a value of -1 means no limit");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
+ "Maximum number of reassembly queues per hash bucket");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
+ "Maximum allowed number of fragments per packet");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
"Enable path MTU discovery for multicast packets");
diff --git a/freebsd/sys/netinet6/in6_rmx.c b/freebsd/sys/netinet6/in6_rmx.c
index 402d9e87..38c89b9b 100644
--- a/freebsd/sys/netinet6/in6_rmx.c
+++ b/freebsd/sys/netinet6/in6_rmx.c
@@ -161,7 +161,7 @@ struct mtuex_arg {
struct rib_head *rnh;
time_t nextstop;
};
-static VNET_DEFINE(struct callout, rtq_mtutimer);
+VNET_DEFINE_STATIC(struct callout, rtq_mtutimer);
#define V_rtq_mtutimer VNET(rtq_mtutimer)
static int
@@ -211,7 +211,7 @@ in6_mtutimo(void *rock)
/*
* Initialize our routing tree.
*/
-static VNET_DEFINE(int, _in6_rt_was_here);
+VNET_DEFINE_STATIC(int, _in6_rt_was_here);
#define V__in6_rt_was_here VNET(_in6_rt_was_here)
int
diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c
index 92f7df4e..1cb71b88 100644
--- a/freebsd/sys/netinet6/in6_src.c
+++ b/freebsd/sys/netinet6/in6_src.c
@@ -129,7 +129,7 @@ static struct sx addrsel_sxlock;
#define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock)
#define ADDR_LABEL_NOTAPP (-1)
-static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy);
+VNET_DEFINE_STATIC(struct in6_addrpolicy, defaultaddrpolicy);
#define V_defaultaddrpolicy VNET(defaultaddrpolicy)
VNET_DEFINE(int, ip6_prefer_tempaddr) = 0;
@@ -975,7 +975,7 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
return(error);
/* XXX: this is redundant when called from in6_pcbbind */
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
inp->inp_flags |= INP_ANONPORT;
@@ -1096,7 +1096,7 @@ struct addrsel_policyent {
TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
-static VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab);
+VNET_DEFINE_STATIC(struct addrsel_policyhead, addrsel_policytab);
#define V_addrsel_policytab VNET(addrsel_policytab)
static void
diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h
index 6b4fe1ab..5ed0ae90 100644
--- a/freebsd/sys/netinet6/in6_var.h
+++ b/freebsd/sys/netinet6/in6_var.h
@@ -784,7 +784,7 @@ in6m_rele_locked(struct in6_multi_head *inmh, struct in6_multi *inm)
IN6_MULTI_LIST_LOCK_ASSERT();
if (--inm->in6m_refcount == 0) {
- in6m_disconnect(inm);
+ MPASS(inm->in6m_ifp == NULL);
inm->in6m_ifma->ifma_protospec = NULL;
MPASS(inm->in6m_ifma->ifma_llifma == NULL);
SLIST_INSERT_HEAD(inmh, inm, in6m_nrele);
diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c
index 77e32da8..25ab624c 100644
--- a/freebsd/sys/netinet6/ip6_input.c
+++ b/freebsd/sys/netinet6/ip6_input.c
@@ -722,13 +722,15 @@ ip6_input(struct mbuf *m)
#endif
/*
* Try to forward the packet, but if we fail continue.
+ * ip6_tryforward() does not generate redirects, so fall
+ * through to normal processing if redirects are required.
* ip6_tryforward() does inbound and outbound packet firewall
* processing. If firewall has decided that destination becomes
* our local address, it sets M_FASTFWD_OURS flag. In this
* case skip another inbound firewall processing and update
* ip6 pointer.
*/
- if (V_ip6_forwarding != 0
+ if (V_ip6_forwarding != 0 && V_ip6_sendredirects == 0
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
&& (!IPSEC_ENABLED(ipv6) ||
IPSEC_CAPS(ipv6, m, IPSEC_CAP_OPERABLE) == 0)
diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c
index a4a8cdf9..c1f66028 100644
--- a/freebsd/sys/netinet6/ip6_mroute.c
+++ b/freebsd/sys/netinet6/ip6_mroute.c
@@ -111,7 +111,6 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_types.h>
-#include <net/raw_cb.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -141,19 +140,19 @@ extern int in6_mcast_loop;
extern struct domain inet6domain;
static const struct encaptab *pim6_encap_cookie;
-static const struct protosw in6_pim_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inet6domain,
- .pr_protocol = IPPROTO_PIM,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
- .pr_input = pim6_input,
- .pr_output = rip6_output,
- .pr_ctloutput = rip6_ctloutput,
- .pr_usrreqs = &rip6_usrreqs
-};
static int pim6_encapcheck(const struct mbuf *, int, int, void *);
+static int pim6_input(struct mbuf *, int, int, void *);
+
+static const struct encap_config ipv6_encap_cfg = {
+ .proto = IPPROTO_PIM,
+ .min_length = sizeof(struct ip6_hdr) + PIM_MINLEN,
+ .exact_match = 8,
+ .check = pim6_encapcheck,
+ .input = pim6_input
+};
-static VNET_DEFINE(int, ip6_mrouter_ver) = 0;
+
+VNET_DEFINE_STATIC(int, ip6_mrouter_ver) = 0;
#define V_ip6_mrouter_ver VNET(ip6_mrouter_ver)
SYSCTL_DECL(_net_inet6);
@@ -238,7 +237,7 @@ static struct mtx mif6_mtx;
#define MIF6_LOCK_DESTROY() mtx_destroy(&mif6_mtx)
#ifdef MRT6DEBUG
-static VNET_DEFINE(u_int, mrt6debug) = 0; /* debug level */
+VNET_DEFINE_STATIC(u_int, mrt6debug) = 0; /* debug level */
#define V_mrt6debug VNET(mrt6debug)
#define DEBUG_MFC 0x02
#define DEBUG_FORWARD 0x04
@@ -291,7 +290,7 @@ SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW,
"PIM Statistics (struct pim6stat, netinet6/pim6_var.h)");
#define PIM6STAT_INC(name) pim6stat.name += 1
-static VNET_DEFINE(int, pim6);
+VNET_DEFINE_STATIC(int, pim6);
#define V_pim6 VNET(pim6)
/*
@@ -1697,16 +1696,12 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
* into the kernel.
*/
static int
-pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+pim6_encapcheck(const struct mbuf *m __unused, int off __unused,
+ int proto __unused, void *arg __unused)
{
-#ifdef DIAGNOSTIC
KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
-#endif
- if (proto != IPPROTO_PIM)
- return 0; /* not for us; reject the datagram. */
-
- return 64; /* claim the datagram. */
+ return (8); /* claim the datagram. */
}
/*
@@ -1716,20 +1711,18 @@ pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
* The only message processed is the REGISTER pim message; the pim header
* is stripped off, and the inner packet is passed to register_mforward.
*/
-int
-pim6_input(struct mbuf **mp, int *offp, int proto)
+static int
+pim6_input(struct mbuf *m, int off, int proto, void *arg __unused)
{
struct pim *pim; /* pointer to a pim struct */
struct ip6_hdr *ip6;
int pimlen;
- struct mbuf *m = *mp;
int minlen;
- int off = *offp;
PIM6STAT_INC(pim6s_rcv_total);
ip6 = mtod(m, struct ip6_hdr *);
- pimlen = m->m_pkthdr.len - *offp;
+ pimlen = m->m_pkthdr.len - off;
/*
* Validate lengths
@@ -1906,8 +1899,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
* encapsulated ip6 header.
*/
pim6_input_to_daemon:
- rip6_input(&m, offp, proto);
- return (IPPROTO_DONE);
+ return (rip6_input(&m, &off, proto));
}
static int
@@ -1920,9 +1912,8 @@ ip6_mroute_modevent(module_t mod, int type, void *unused)
MFC6_LOCK_INIT();
MIF6_LOCK_INIT();
- pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
- pim6_encapcheck,
- (const struct protosw *)&in6_pim_protosw, NULL);
+ pim6_encap_cookie = ip6_encap_attach(&ipv6_encap_cfg,
+ NULL, M_WAITOK);
if (pim6_encap_cookie == NULL) {
printf("ip6_mroute: unable to attach pim6 encap\n");
MIF6_LOCK_DESTROY();
@@ -1943,7 +1934,7 @@ ip6_mroute_modevent(module_t mod, int type, void *unused)
return EINVAL;
if (pim6_encap_cookie) {
- encap_detach(pim6_encap_cookie);
+ ip6_encap_detach(pim6_encap_cookie);
pim6_encap_cookie = NULL;
}
X_ip6_mrouter_done();
diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c
index 1841829a..d3e530a6 100644
--- a/freebsd/sys/netinet6/ip6_output.c
+++ b/freebsd/sys/netinet6/ip6_output.c
@@ -201,18 +201,10 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
csum = 0xffff;
offset += m->m_pkthdr.csum_data; /* checksum offset */
- if (offset + sizeof(u_short) > m->m_len) {
- printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
- "csum_flags=%b\n", __func__, m->m_len, plen, offset,
- (int)m->m_pkthdr.csum_flags, CSUM_BITS);
- /*
- * XXX this should not happen, but if it does, the correct
- * behavior may be to insert the checksum in the appropriate
- * next mbuf in the chain.
- */
- return;
- }
- *(u_short *)(m->m_data + offset) = csum;
+ if (offset + sizeof(csum) > m->m_len)
+ m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
+ else
+ *(u_short *)mtodo(m, offset) = csum;
}
int
@@ -814,22 +806,16 @@ again:
error = netisr_queue(NETISR_IPV6, m);
goto done;
} else {
- RO_RTFREE(ro);
+ RO_INVALIDATE_CACHE(ro);
needfiblookup = 1; /* Redo the routing table lookup. */
- if (ro->ro_lle)
- LLE_FREE(ro->ro_lle); /* zeros ro_lle */
- ro->ro_lle = NULL;
}
}
/* See if fib was changed by packet filter. */
if (fibnum != M_GETFIB(m)) {
m->m_flags |= M_SKIP_FIREWALL;
fibnum = M_GETFIB(m);
- RO_RTFREE(ro);
+ RO_INVALIDATE_CACHE(ro);
needfiblookup = 1;
- if (ro->ro_lle)
- LLE_FREE(ro->ro_lle); /* zeros ro_lle */
- ro->ro_lle = NULL;
}
if (needfiblookup)
goto again;
@@ -1456,6 +1442,15 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(in6p);
error = 0;
break;
+ case SO_REUSEPORT_LB:
+ INP_WLOCK(in6p);
+ if ((so->so_options & SO_REUSEPORT_LB) != 0)
+ in6p->inp_flags2 |= INP_REUSEPORT_LB;
+ else
+ in6p->inp_flags2 &= ~INP_REUSEPORT_LB;
+ INP_WUNLOCK(in6p);
+ error = 0;
+ break;
case SO_SETFIB:
INP_WLOCK(in6p);
in6p->inp_inc.inc_fibnum = so->so_fibnum;
@@ -1637,11 +1632,17 @@ do { \
error = EINVAL;
break;
}
+ INP_WLOCK(in6p);
+ if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(in6p);
+ return (ECONNRESET);
+ }
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(IPV6_HOPLIMIT,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
+ INP_WUNLOCK(in6p);
break;
}
@@ -1751,11 +1752,17 @@ do { \
break;
{
struct ip6_pktopts **optp;
+ INP_WLOCK(in6p);
+ if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(in6p);
+ return (ECONNRESET);
+ }
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(optname,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
+ INP_WUNLOCK(in6p);
break;
}
@@ -1837,10 +1844,16 @@ do { \
break;
optlen = sopt->sopt_valsize;
optbuf = optbuf_storage;
+ INP_WLOCK(in6p);
+ if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(in6p);
+ return (ECONNRESET);
+ }
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(optname, optbuf, optlen,
optp, (td != NULL) ? td->td_ucred : NULL,
uproto);
+ INP_WUNLOCK(in6p);
break;
}
#undef OPTSET
@@ -2287,7 +2300,9 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
if (*pktopt == NULL) {
*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
- M_WAITOK);
+ M_NOWAIT);
+ if (*pktopt == NULL)
+ return (ENOBUFS);
ip6_initpktopts(*pktopt);
}
opt = *pktopt;
diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h
index 74b5f89c..f235572d 100644
--- a/freebsd/sys/netinet6/ip6_var.h
+++ b/freebsd/sys/netinet6/ip6_var.h
@@ -301,8 +301,10 @@ VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */
VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */
VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly
* queue */
-VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly
+extern int ip6_maxfrags; /* Maximum fragments in reassembly
* queue */
+VNET_DECLARE(int, ip6_maxfragbucketsize); /* Maximum reassembly queues per bucket */
+VNET_DECLARE(int, ip6_maxfragsperpacket); /* Maximum fragments per packet */
VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */
VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */
VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
@@ -317,7 +319,8 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */
#define V_ip6_mrouter VNET(ip6_mrouter)
#define V_ip6_sendredirects VNET(ip6_sendredirects)
#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
-#define V_ip6_maxfrags VNET(ip6_maxfrags)
+#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize)
+#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket)
#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv)
#define V_ip6_no_radr VNET(ip6_no_radr)
#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
@@ -404,6 +407,7 @@ int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
int route6_input(struct mbuf **, int *, int);
+void frag6_set_bucketsize(void);
void frag6_init(void);
int frag6_input(struct mbuf **, int *, int);
void frag6_slowtimo(void);
diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c
index 0c82d5ff..b00f03ef 100644
--- a/freebsd/sys/netinet6/mld6.c
+++ b/freebsd/sys/netinet6/mld6.c
@@ -209,11 +209,11 @@ static MALLOC_DEFINE(M_MLD, "mld", "mld state");
/*
* VIMAGE-wide globals.
*/
-static VNET_DEFINE(struct timeval, mld_gsrdelay) = {10, 0};
-static VNET_DEFINE(LIST_HEAD(, mld_ifsoftc), mli_head);
-static VNET_DEFINE(int, interface_timers_running6);
-static VNET_DEFINE(int, state_change_timers_running6);
-static VNET_DEFINE(int, current_state_timers_running6);
+VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0};
+VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head);
+VNET_DEFINE_STATIC(int, interface_timers_running6);
+VNET_DEFINE_STATIC(int, state_change_timers_running6);
+VNET_DEFINE_STATIC(int, current_state_timers_running6);
#define V_mld_gsrdelay VNET(mld_gsrdelay)
#define V_mli_head VNET(mli_head)
@@ -559,6 +559,7 @@ mld_ifdetach(struct ifnet *ifp)
continue;
inm = (struct in6_multi *)ifma->ifma_protospec;
if (inm->in6m_state == MLD_LEAVING_MEMBER) {
+ in6m_disconnect(inm);
in6m_rele_locked(&inmh, inm);
ifma->ifma_protospec = NULL;
}
@@ -1485,6 +1486,7 @@ mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm)
case MLD_REPORTING_MEMBER:
if (report_timer_expired) {
inm->in6m_state = MLD_IDLE_MEMBER;
+ in6m_disconnect(inm);
in6m_rele_locked(inmh, inm);
}
break;
@@ -1609,6 +1611,7 @@ mld_v2_process_group_timers(struct in6_multi_head *inmh,
if (inm->in6m_state == MLD_LEAVING_MEMBER &&
inm->in6m_scrv == 0) {
inm->in6m_state = MLD_NOT_MEMBER;
+ in6m_disconnect(inm);
in6m_rele_locked(inmh, inm);
}
}
@@ -1681,7 +1684,8 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
IF_ADDR_WLOCK(ifp);
restart:
CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
- if (ifma->ifma_addr->sa_family != AF_INET6)
+ if (ifma->ifma_addr->sa_family != AF_INET6 ||
+ ifma->ifma_protospec == NULL)
continue;
inm = (struct in6_multi *)ifma->ifma_protospec;
switch (inm->in6m_state) {
@@ -1698,6 +1702,7 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
* version, we need to release the final
* reference held for issuing the INCLUDE {}.
*/
+ in6m_disconnect(inm);
in6m_rele_locked(&inmh, inm);
ifma->ifma_protospec = NULL;
/* FALLTHROUGH */
@@ -1795,8 +1800,11 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
IN6_MULTI_LIST_LOCK_ASSERT();
MLD_LOCK_ASSERT();
-
+
ifp = in6m->in6m_ifp;
+ /* in process of being freed */
+ if (ifp == NULL)
+ return (0);
ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
/* ia may be NULL if link-local address is tentative. */
@@ -1894,16 +1902,15 @@ mld_change_state(struct in6_multi *inm, const int delay)
*/
KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__));
ifp = inm->in6m_ifma->ifma_ifp;
- if (ifp != NULL) {
- /*
- * Sanity check that netinet6's notion of ifp is the
- * same as net's.
- */
- KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
- }
+ if (ifp == NULL)
+ return (0);
+ /*
+ * Sanity check that netinet6's notion of ifp is the
+ * same as net's.
+ */
+ KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
MLD_LOCK();
-
mli = MLD_IFINFO(ifp);
KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
@@ -1997,9 +2004,9 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli,
* group around for the final INCLUDE {} enqueue.
*/
if (mli->mli_version == MLD_VERSION_2 &&
- inm->in6m_state == MLD_LEAVING_MEMBER)
- in6m_release_deferred(inm);
-
+ inm->in6m_state == MLD_LEAVING_MEMBER) {
+ inm->in6m_refcount--;
+ }
inm->in6m_state = MLD_REPORTING_MEMBER;
switch (mli->mli_version) {
diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c
index 6a36803f..f065815c 100644
--- a/freebsd/sys/netinet6/nd6.c
+++ b/freebsd/sys/netinet6/nd6.c
@@ -100,11 +100,11 @@ VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage
* collection timer */
/* preventing too many loops in ND option parsing */
-static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
+VNET_DEFINE_STATIC(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper
* layer hints */
-static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
+VNET_DEFINE_STATIC(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
* ND entries */
#define V_nd6_maxndopt VNET(nd6_maxndopt)
#define V_nd6_maxqueuelen VNET(nd6_maxqueuelen)
@@ -144,7 +144,7 @@ static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
static int nd6_need_cache(struct ifnet *);
-static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
+VNET_DEFINE_STATIC(struct callout, nd6_slowtimo_ch);
#define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch)
VNET_DEFINE(struct callout, nd6_timer_ch);
diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c
index d4ab38af..49810020 100644
--- a/freebsd/sys/netinet6/nd6_nbr.c
+++ b/freebsd/sys/netinet6/nd6_nbr.c
@@ -101,7 +101,7 @@ static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int);
-static VNET_DEFINE(int, dad_enhanced) = 1;
+VNET_DEFINE_STATIC(int, dad_enhanced) = 1;
#define V_dad_enhanced VNET(dad_enhanced)
SYSCTL_DECL(_net_inet6_ip6);
@@ -109,7 +109,7 @@ SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(dad_enhanced), 0,
"Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
-static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to
+VNET_DEFINE_STATIC(int, dad_maxtry) = 15; /* max # of *tries* to
transmit DAD packet */
#define V_dad_maxtry VNET(dad_maxtry)
@@ -1122,8 +1122,8 @@ struct dadq {
bool dad_ondadq; /* on dadq? Protected by DADQ_WLOCK. */
};
-static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
-static VNET_DEFINE(struct rwlock, dad_rwlock);
+VNET_DEFINE_STATIC(TAILQ_HEAD(, dadq), dadq);
+VNET_DEFINE_STATIC(struct rwlock, dad_rwlock);
#define V_dadq VNET(dadq)
#define V_dad_rwlock VNET(dad_rwlock)
diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c
index fab7c7c2..a60e7c66 100644
--- a/freebsd/sys/netinet6/nd6_rtr.c
+++ b/freebsd/sys/netinet6/nd6_rtr.c
@@ -96,7 +96,7 @@ static int rt6_deleteroute(const struct rtentry *, void *);
VNET_DECLARE(int, nd6_recalc_reachtm_interval);
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
-static VNET_DEFINE(struct ifnet *, nd6_defifp);
+VNET_DEFINE_STATIC(struct ifnet *, nd6_defifp);
VNET_DEFINE(int, nd6_defifindex);
#define V_nd6_defifp VNET(nd6_defifp)
diff --git a/freebsd/sys/netinet6/pim6_var.h b/freebsd/sys/netinet6/pim6_var.h
index 7afe89b9..7288c67e 100644
--- a/freebsd/sys/netinet6/pim6_var.h
+++ b/freebsd/sys/netinet6/pim6_var.h
@@ -53,10 +53,6 @@ struct pim6stat {
uint64_t pim6s_snd_registers; /* sent registers */
};
-#if (defined(KERNEL)) || (defined(_KERNEL))
-int pim6_input(struct mbuf **, int*, int);
-#endif /* KERNEL */
-
/*
* Identifiers for PIM sysctl nodes
*/
diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c
index c05399b3..9c3d7a61 100644
--- a/freebsd/sys/netinet6/raw_ip6.c
+++ b/freebsd/sys/netinet6/raw_ip6.c
@@ -167,6 +167,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
struct inpcb *last = NULL;
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
+ struct epoch_tracker et;
RIP6STAT_INC(rip6s_ipackets);
@@ -174,8 +175,8 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
ifp = m->m_pkthdr.rcvif;
- INP_INFO_RLOCK(&V_ripcbinfo);
- LIST_FOREACH(in6p, &V_ripcb, inp_list) {
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
+ CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
/* XXX inp locking */
if ((in6p->inp_vflag & INP_IPV6) == 0)
continue;
@@ -293,7 +294,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
}
last = in6p;
}
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* Check AH/ESP integrity.
diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c
index 40218287..64b866dd 100644
--- a/freebsd/sys/netinet6/scope6.c
+++ b/freebsd/sys/netinet6/scope6.c
@@ -78,7 +78,7 @@ static struct mtx scope6_lock;
#define SCOPE6_UNLOCK() mtx_unlock(&scope6_lock)
#define SCOPE6_LOCK_ASSERT() mtx_assert(&scope6_lock, MA_OWNED)
-static VNET_DEFINE(struct scope6_id, sid_default);
+VNET_DEFINE_STATIC(struct scope6_id, sid_default);
#define V_sid_default VNET(sid_default)
#define SID(ifp) \
@@ -455,7 +455,7 @@ in6_clearscope(struct in6_addr *in6)
* Return the scope identifier or zero.
*/
uint16_t
-in6_getscope(struct in6_addr *in6)
+in6_getscope(const struct in6_addr *in6)
{
if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6))
diff --git a/freebsd/sys/netinet6/scope6_var.h b/freebsd/sys/netinet6/scope6_var.h
index a2a9137d..f4e59a19 100644
--- a/freebsd/sys/netinet6/scope6_var.h
+++ b/freebsd/sys/netinet6/scope6_var.h
@@ -63,7 +63,7 @@ int sa6_checkzone(struct sockaddr_in6 *);
int sa6_checkzone_ifp(struct ifnet *, struct sockaddr_in6 *);
int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *);
int in6_clearscope(struct in6_addr *);
-uint16_t in6_getscope(struct in6_addr *);
+uint16_t in6_getscope(const struct in6_addr *);
uint32_t in6_getscopezone(const struct ifnet *, int);
void in6_splitscope(const struct in6_addr *, struct in6_addr *, uint32_t *);
struct ifnet* in6_getlinkifnet(uint32_t);
diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c
index fd963fb3..6a3391ee 100644
--- a/freebsd/sys/netinet6/sctp6_usrreq.c
+++ b/freebsd/sys/netinet6/sctp6_usrreq.c
@@ -273,6 +273,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d)
pktdst->sa_len != sizeof(struct sockaddr_in6)) {
return;
}
+
if ((unsigned)cmd >= PRC_NCMDS) {
return;
}
@@ -296,6 +297,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d)
if (ip6cp->ip6c_m == NULL) {
return;
}
+
/*
* Check if we can safely examine the ports and the
* verification tag of the SCTP common header.
@@ -304,6 +306,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d)
(int32_t)(ip6cp->ip6c_off + offsetof(struct sctphdr, checksum))) {
return;
}
+
/* Copy out the port numbers and the verification tag. */
memset(&sh, 0, sizeof(sh));
m_copydata(ip6cp->ip6c_m,
@@ -529,6 +532,7 @@ sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNU
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
+
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace));
if (error)
@@ -569,6 +573,7 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
+
if (addr) {
switch (addr->sa_family) {
#ifdef INET
@@ -918,7 +923,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
/* Set the connected flag so we can queue data */
soisconnecting(so);
}
- stcb->asoc.state = SCTP_STATE_COOKIE_WAIT;
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
/* initialize authentication parameters for the assoc */
@@ -1105,6 +1110,7 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
+
/* allow v6 addresses precedence */
error = sctp6_getaddr(so, nam);
#ifdef INET
@@ -1140,6 +1146,7 @@ sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
+
/* allow v6 addresses precedence */
error = sctp6_peeraddr(so, nam);
#ifdef INET
diff --git a/freebsd/sys/netinet6/sctp6_var.h b/freebsd/sys/netinet6/sctp6_var.h
index e3c4359a..4ad0ca28 100644
--- a/freebsd/sys/netinet6/sctp6_var.h
+++ b/freebsd/sys/netinet6/sctp6_var.h
@@ -45,11 +45,11 @@ extern struct pr_usrreqs sctp6_usrreqs;
int sctp6_input(struct mbuf **, int *, int);
int sctp6_input_with_port(struct mbuf **, int *, uint16_t);
-int
+int
sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
struct mbuf *, struct proc *);
void sctp6_ctlinput(int, struct sockaddr *, void *);
-void
+void
sctp6_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
uint8_t, uint8_t, uint32_t);
#endif
diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c
index c2b32eb1..67ed0e35 100644
--- a/freebsd/sys/netinet6/udp6_usrreq.c
+++ b/freebsd/sys/netinet6/udp6_usrreq.c
@@ -216,6 +216,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
int off = *offp;
int cscov_partial;
int plen, ulen;
+ struct epoch_tracker et;
struct sockaddr_in6 fromsa[2];
struct m_tag *fwd_tag;
uint16_t uh_sum;
@@ -302,7 +303,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
struct inpcbhead *pcblist;
struct ip6_moptions *imo;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_RLOCK_ET(pcbinfo, et);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -320,7 +321,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
*/
pcblist = udp_get_pcblist(nxt);
last = NULL;
- LIST_FOREACH(inp, pcblist, inp_list) {
+ CK_LIST_FOREACH(inp, pcblist, inp_list) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
if (inp->inp_lport != uh->uh_dport)
@@ -357,6 +358,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
int blocked;
INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(inp);
+ continue;
+ }
bzero(&mcaddr, sizeof(struct sockaddr_in6));
mcaddr.sin6_len = sizeof(struct sockaddr_in6);
@@ -384,10 +389,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
NULL) {
INP_RLOCK(last);
- UDP_PROBE(receive, NULL, last, ip6,
- last, uh);
- if (udp6_append(last, n, off, fromsa))
- goto inp_lost;
+ if (__predict_true(last->inp_flags2 & INP_FREED) == 0) {
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, last,
+ ip6, last, uh);
+ else
+ UDP_PROBE(receive, NULL, last,
+ ip6, last, uh);
+ if (udp6_append(last, n, off, fromsa))
+ goto inp_lost;
+ }
INP_RUNLOCK(last);
}
}
@@ -401,7 +412,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
* will never clear these options after setting them.
*/
if ((last->inp_socket->so_options &
- (SO_REUSEPORT|SO_REUSEADDR)) == 0)
+ (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
break;
}
@@ -416,10 +427,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
goto badheadlocked;
}
INP_RLOCK(last);
- INP_INFO_RUNLOCK(pcbinfo);
- UDP_PROBE(receive, NULL, last, ip6, last, uh);
- if (udp6_append(last, m, off, fromsa) == 0)
+ if (__predict_true(last->inp_flags2 & INP_FREED) == 0) {
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, last, ip6, last, uh);
+ else
+ UDP_PROBE(receive, NULL, last, ip6, last, uh);
+ if (udp6_append(last, m, off, fromsa) == 0)
+ INP_RUNLOCK(last);
+ } else
INP_RUNLOCK(last);
+ INP_INFO_RUNLOCK_ET(pcbinfo, et);
inp_lost:
return (IPPROTO_DONE);
}
@@ -475,6 +492,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ntohs(uh->uh_sport));
}
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, NULL, ip6, NULL, uh);
+ else
+ UDP_PROBE(receive, NULL, NULL, ip6, NULL, uh);
UDPSTAT_INC(udps_noport);
if (m->m_flags & M_MCAST) {
printf("UDP6: M_MCAST is set in a unicast packet.\n");
@@ -495,13 +516,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}
}
- UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, inp, ip6, inp, uh);
+ else
+ UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
if (udp6_append(inp, m, off, fromsa) == 0)
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
badheadlocked:
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_RUNLOCK_ET(pcbinfo, et);
badunlocked:
if (m)
m_freem(m);
@@ -657,35 +681,38 @@ udp6_getcred(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
+#define UH_WLOCKED 2
+#define UH_RLOCKED 1
+#define UH_UNLOCKED 0
static int
-udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
- struct mbuf *control, struct thread *td)
+udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
+ struct sockaddr *addr6, struct mbuf *control, struct thread *td)
{
- u_int32_t ulen = m->m_pkthdr.len;
- u_int32_t plen = sizeof(struct udphdr) + ulen;
+ struct inpcbinfo *pcbinfo;
+ struct inpcb *inp;
struct ip6_hdr *ip6;
struct udphdr *udp6;
struct in6_addr *laddr, *faddr, in6a;
- struct sockaddr_in6 *sin6 = NULL;
- int cscov_partial = 0;
- int scope_ambiguous = 0;
- u_short fport;
- int error = 0;
- uint8_t nxt;
- uint16_t cscov = 0;
struct ip6_pktopts *optp, opt;
- int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
- int flags;
- struct sockaddr_in6 tmp;
+ struct sockaddr_in6 *sin6, tmp;
+ struct epoch_tracker et;
+ int cscov_partial, error, flags, hlen, scope_ambiguous;
+ u_int32_t ulen, plen;
+ uint16_t cscov;
+ u_short fport;
+ uint8_t nxt, unlock_udbinfo;
- INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
+ /* addr6 has been validated in udp6_send(). */
+ sin6 = (struct sockaddr_in6 *)addr6;
- if (addr6) {
- /* addr6 has been validated in udp6_send(). */
- sin6 = (struct sockaddr_in6 *)addr6;
+ /*
+ * In contrast to to IPv4 we do not validate the max. packet length
+ * here due to IPv6 Jumbograms (RFC2675).
+ */
- /* protect *sin6 from overwrites */
+ scope_ambiguous = 0;
+ if (sin6) {
+ /* Protect *addr6 from overwrites. */
tmp = *sin6;
sin6 = &tmp;
@@ -699,22 +726,86 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
*/
if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
scope_ambiguous = 1;
- if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
+ if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) {
+ if (control)
+ m_freem(control);
+ m_freem(m);
return (error);
+ }
}
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+ INP_RLOCK(inp);
nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
IPPROTO_UDP : IPPROTO_UDPLITE;
+
+#ifdef INET
+ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
+ int hasv4addr;
+
+ if (sin6 == NULL)
+ hasv4addr = (inp->inp_vflag & INP_IPV4);
+ else
+ hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
+ ? 1 : 0;
+ if (hasv4addr) {
+ struct pr_usrreqs *pru;
+
+ /*
+ * XXXRW: We release UDP-layer locks before calling
+ * udp_send() in order to avoid recursion. However,
+ * this does mean there is a short window where inp's
+ * fields are unstable. Could this lead to a
+ * potential race in which the factors causing us to
+ * select the UDPv4 output routine are invalidated?
+ */
+ INP_RUNLOCK(inp);
+ if (sin6)
+ in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
+ /* addr will just be freed in sendit(). */
+ return ((*pru->pru_send)(so, flags_arg, m,
+ (struct sockaddr *)sin6, control, td));
+ }
+ }
+#endif
+
if (control) {
if ((error = ip6_setpktopts(control, &opt,
- inp->in6p_outputopts, td->td_ucred, nxt)) != 0)
- goto release;
+ inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
+ INP_RUNLOCK(inp);
+ ip6_clearpktopts(&opt, -1);
+ if (control)
+ m_freem(control);
+ m_freem(m);
+ return (error);
+ }
optp = &opt;
} else
optp = inp->in6p_outputopts;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
+ if (sin6 != NULL &&
+ IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) {
+ INP_RUNLOCK(inp);
+ /*
+ * XXX there is a short window here which could lead to a race;
+ * should we re-check that what got us here is still valid?
+ */
+ INP_WLOCK(inp);
+ INP_HASH_WLOCK(pcbinfo);
+ unlock_udbinfo = UH_WLOCKED;
+ } else if (sin6 != NULL &&
+ (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
+ inp->inp_lport == 0)) {
+ INP_HASH_RLOCK_ET(pcbinfo, et);
+ unlock_udbinfo = UH_RLOCKED;
+ } else
+ unlock_udbinfo = UH_UNLOCKED;
+
if (sin6) {
- faddr = &sin6->sin6_addr;
/*
* Since we saw no essential reason for calling in_pcbconnect,
@@ -733,85 +824,47 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
goto release;
}
- fport = sin6->sin6_port; /* allow 0 port */
+ /*
+ * Given we handle the v4mapped case in the INET block above
+ * assert here that it must not happen anymore.
+ */
+ KASSERT(!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr),
+ ("%s: sin6(%p)->sin6_addr is v4mapped which we "
+ "should have handled.", __func__, sin6));
- if (IN6_IS_ADDR_V4MAPPED(faddr)) {
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
- /*
- * I believe we should explicitly discard the
- * packet when mapped addresses are disabled,
- * rather than send the packet as an IPv6 one.
- * If we chose the latter approach, the packet
- * might be sent out on the wire based on the
- * default route, the situation which we'd
- * probably want to avoid.
- * (20010421 jinmei@kame.net)
- */
- error = EINVAL;
- goto release;
- }
- if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
- !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
- /*
- * when remote addr is an IPv4-mapped address,
- * local addr should not be an IPv6 address,
- * since you cannot determine how to map IPv6
- * source address to IPv4.
- */
- error = EINVAL;
- goto release;
- }
+ /* This only requires read-locking. */
+ error = in6_selectsrc_socket(sin6, optp, inp,
+ td->td_ucred, scope_ambiguous, &in6a, NULL);
+ if (error)
+ goto release;
+ laddr = &in6a;
- af = AF_INET;
- }
+ if (inp->inp_lport == 0) {
- if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
- error = in6_selectsrc_socket(sin6, optp, inp,
- td->td_ucred, scope_ambiguous, &in6a, NULL);
- if (error)
+ INP_WLOCK_ASSERT(inp);
+ error = in6_pcbsetport(laddr, inp, td->td_ucred);
+ if (error != 0) {
+ /* Undo an address bind that may have occurred. */
+ inp->in6p_laddr = in6addr_any;
goto release;
- laddr = &in6a;
- } else
- laddr = &inp->in6p_laddr; /* XXX */
- if (laddr == NULL) {
- if (error == 0)
- error = EADDRNOTAVAIL;
- goto release;
- }
- if (inp->inp_lport == 0 &&
- (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) {
- /* Undo an address bind that may have occurred. */
- inp->in6p_laddr = in6addr_any;
- goto release;
+ }
}
+ faddr = &sin6->sin6_addr;
+ fport = sin6->sin6_port; /* allow 0 port */
+
} else {
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
error = ENOTCONN;
goto release;
}
- if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
- /*
- * XXX: this case would happen when the
- * application sets the V6ONLY flag after
- * connecting the foreign address.
- * Such applications should be fixed,
- * so we bark here.
- */
- log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
- "option was set for a connected socket\n");
- error = EINVAL;
- goto release;
- } else
- af = AF_INET;
- }
laddr = &inp->in6p_laddr;
faddr = &inp->in6p_faddr;
fport = inp->inp_fport;
}
- if (af == AF_INET)
- hlen = sizeof(struct ip);
+ ulen = m->m_pkthdr.len;
+ plen = sizeof(struct udphdr) + ulen;
+ hlen = sizeof(struct ip6_hdr);
/*
* Calculate data length and get a mbuf
@@ -826,6 +879,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
/*
* Stuff checksum and output datagram.
*/
+ cscov = cscov_partial = 0;
udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
udp6->uh_dport = fport;
@@ -848,59 +902,59 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
udp6->uh_ulen = 0;
udp6->uh_sum = 0;
- switch (af) {
- case AF_INET6:
- ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
- ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
- ip6->ip6_vfc |= IPV6_VERSION;
- ip6->ip6_plen = htons((u_short)plen);
- ip6->ip6_nxt = nxt;
- ip6->ip6_hlim = in6_selecthlim(inp, NULL);
- ip6->ip6_src = *laddr;
- ip6->ip6_dst = *faddr;
-
- if (cscov_partial) {
- if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
- sizeof(struct ip6_hdr), plen, cscov)) == 0)
- udp6->uh_sum = 0xffff;
- } else {
- udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
- m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
- m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
- }
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
+ ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_plen = htons((u_short)plen);
+ ip6->ip6_nxt = nxt;
+ ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+ ip6->ip6_src = *laddr;
+ ip6->ip6_dst = *faddr;
+#ifdef MAC
+ mac_inpcb_create_mbuf(inp, m);
+#endif
+
+ if (cscov_partial) {
+ if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
+ sizeof(struct ip6_hdr), plen, cscov)) == 0)
+ udp6->uh_sum = 0xffff;
+ } else {
+ udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
+
+ flags = 0;
#ifdef RSS
- {
- uint32_t hash_val, hash_type;
- uint8_t pr;
+ {
+ uint32_t hash_val, hash_type;
+ uint8_t pr;
- pr = inp->inp_socket->so_proto->pr_protocol;
- /*
- * Calculate an appropriate RSS hash for UDP and
- * UDP Lite.
- *
- * The called function will take care of figuring out
- * whether a 2-tuple or 4-tuple hash is required based
- * on the currently configured scheme.
- *
- * Later later on connected socket values should be
- * cached in the inpcb and reused, rather than constantly
- * re-calculating it.
- *
- * UDP Lite is a different protocol number and will
- * likely end up being hashed as a 2-tuple until
- * RSS / NICs grow UDP Lite protocol awareness.
- */
- if (rss_proto_software_hash_v6(faddr, laddr, fport,
- inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
- m->m_pkthdr.flowid = hash_val;
- M_HASHTYPE_SET(m, hash_type);
- }
+ pr = inp->inp_socket->so_proto->pr_protocol;
+ /*
+ * Calculate an appropriate RSS hash for UDP and
+ * UDP Lite.
+ *
+ * The called function will take care of figuring out
+ * whether a 2-tuple or 4-tuple hash is required based
+ * on the currently configured scheme.
+ *
+ * Later later on connected socket values should be
+ * cached in the inpcb and reused, rather than constantly
+ * re-calculating it.
+ *
+ * UDP Lite is a different protocol number and will
+ * likely end up being hashed as a 2-tuple until
+ * RSS / NICs grow UDP Lite protocol awareness.
+ */
+ if (rss_proto_software_hash_v6(faddr, laddr, fport,
+ inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
}
-#endif
- flags = 0;
-#ifdef RSS
+
/*
* Don't override with the inp cached flowid.
*
@@ -908,27 +962,46 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
* be incorrect.
*/
flags |= IP_NODEFAULTFLOWID;
+ }
#endif
+ UDPSTAT_INC(udps_opackets);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_HASH_WUNLOCK(pcbinfo);
+ else if (unlock_udbinfo == UH_RLOCKED)
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
+ else
UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
- UDPSTAT_INC(udps_opackets);
- error = ip6_output(m, optp, &inp->inp_route6, flags,
- inp->in6p_moptions, NULL, inp);
- break;
- case AF_INET:
- error = EAFNOSUPPORT;
- goto release;
+ error = ip6_output(m, optp, &inp->inp_route6, flags,
+ inp->in6p_moptions, NULL, inp);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
+
+ if (control) {
+ ip6_clearpktopts(&opt, -1);
+ m_freem(control);
}
- goto releaseopt;
+ return (error);
release:
- m_freem(m);
-
-releaseopt:
+ if (unlock_udbinfo == UH_WLOCKED) {
+ INP_HASH_WUNLOCK(pcbinfo);
+ INP_WUNLOCK(inp);
+ } else if (unlock_udbinfo == UH_RLOCKED) {
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
+ INP_RUNLOCK(inp);
+ } else
+ INP_RUNLOCK(inp);
if (control) {
ip6_clearpktopts(&opt, -1);
m_freem(control);
}
+ m_freem(m);
+
return (error);
}
@@ -1232,15 +1305,8 @@ static int
udp6_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control, struct thread *td)
{
- struct inpcb *inp;
- struct inpcbinfo *pcbinfo;
- int error = 0;
-
- pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
+ int error;
- INP_WLOCK(inp);
if (addr) {
if (addr->sa_len != sizeof(struct sockaddr_in6)) {
error = EINVAL;
@@ -1252,53 +1318,11 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
}
}
-#ifdef INET
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
- int hasv4addr;
- struct sockaddr_in6 *sin6 = NULL;
-
- if (addr == NULL)
- hasv4addr = (inp->inp_vflag & INP_IPV4);
- else {
- sin6 = (struct sockaddr_in6 *)addr;
- hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
- ? 1 : 0;
- }
- if (hasv4addr) {
- struct pr_usrreqs *pru;
- uint8_t nxt;
-
- nxt = (inp->inp_socket->so_proto->pr_protocol ==
- IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE;
- /*
- * XXXRW: We release UDP-layer locks before calling
- * udp_send() in order to avoid recursion. However,
- * this does mean there is a short window where inp's
- * fields are unstable. Could this lead to a
- * potential race in which the factors causing us to
- * select the UDPv4 output routine are invalidated?
- */
- INP_WUNLOCK(inp);
- if (sin6)
- in6_sin6_2_sin_in_sock(addr);
- pru = inetsw[ip_protox[nxt]].pr_usrreqs;
- /* addr will just be freed in sendit(). */
- return ((*pru->pru_send)(so, flags, m, addr, control,
- td));
- }
- }
-#endif
-#ifdef MAC
- mac_inpcb_create_mbuf(inp, m);
-#endif
- INP_HASH_WLOCK(pcbinfo);
- error = udp6_output(inp, m, addr, control, td);
- INP_HASH_WUNLOCK(pcbinfo);
- INP_WUNLOCK(inp);
- return (error);
+ return (udp6_output(so, flags, m, addr, control, td));
bad:
- INP_WUNLOCK(inp);
+ if (control)
+ m_freem(control);
m_freem(m);
return (error);
}
diff --git a/freebsd/sys/netipsec/ipsec.c b/freebsd/sys/netipsec/ipsec.c
index 24a6df5b..116557ed 100644
--- a/freebsd/sys/netipsec/ipsec.c
+++ b/freebsd/sys/netipsec/ipsec.c
@@ -121,11 +121,11 @@ VNET_DEFINE(int, ip4_ah_net_deflev) = IPSEC_LEVEL_USE;
/* ECN ignore(-1)/forbidden(0)/allowed(1) */
VNET_DEFINE(int, ip4_ipsec_ecn) = 0;
-static VNET_DEFINE(int, ip4_filtertunnel) = 0;
+VNET_DEFINE_STATIC(int, ip4_filtertunnel) = 0;
#define V_ip4_filtertunnel VNET(ip4_filtertunnel)
-static VNET_DEFINE(int, check_policy_history) = 0;
+VNET_DEFINE_STATIC(int, check_policy_history) = 0;
#define V_check_policy_history VNET(check_policy_history)
-static VNET_DEFINE(struct secpolicy *, def_policy) = NULL;
+VNET_DEFINE_STATIC(struct secpolicy *, def_policy) = NULL;
#define V_def_policy VNET(def_policy)
static int
sysctl_def_policy(SYSCTL_HANDLER_ARGS)
@@ -251,7 +251,7 @@ VNET_DEFINE(int, ip6_ah_trans_deflev) = IPSEC_LEVEL_USE;
VNET_DEFINE(int, ip6_ah_net_deflev) = IPSEC_LEVEL_USE;
VNET_DEFINE(int, ip6_ipsec_ecn) = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */
-static VNET_DEFINE(int, ip6_filtertunnel) = 0;
+VNET_DEFINE_STATIC(int, ip6_filtertunnel) = 0;
#define V_ip6_filtertunnel VNET(ip6_filtertunnel)
SYSCTL_DECL(_net_inet6_ipsec6);
@@ -1324,9 +1324,10 @@ ok:
}
int
-ipsec_updateid(struct secasvar *sav, uint64_t *new, uint64_t *old)
+ipsec_updateid(struct secasvar *sav, crypto_session_t *new,
+ crypto_session_t *old)
{
- uint64_t tmp;
+ crypto_session_t tmp;
/*
* tdb_cryptoid is initialized by xform_init().
@@ -1352,8 +1353,8 @@ ipsec_updateid(struct secasvar *sav, uint64_t *new, uint64_t *old)
* XXXAE: check this more carefully.
*/
KEYDBG(IPSEC_STAMP,
- printf("%s: SA(%p) moves cryptoid %jd -> %jd\n",
- __func__, sav, (uintmax_t)(*old), (uintmax_t)(*new)));
+ printf("%s: SA(%p) moves cryptoid %p -> %p\n",
+ __func__, sav, *old, *new));
KEYDBG(IPSEC_DATA, kdebug_secasv(sav));
SECASVAR_LOCK(sav);
if (sav->tdb_cryptoid != *old) {
diff --git a/freebsd/sys/netipsec/ipsec.h b/freebsd/sys/netipsec/ipsec.h
index 936e7bca..eed2d077 100644
--- a/freebsd/sys/netipsec/ipsec.h
+++ b/freebsd/sys/netipsec/ipsec.h
@@ -332,7 +332,7 @@ int udp_ipsec_pcbctl(struct inpcb *, struct sockopt *);
int ipsec_chkreplay(uint32_t, struct secasvar *);
int ipsec_updatereplay(uint32_t, struct secasvar *);
-int ipsec_updateid(struct secasvar *, uint64_t *, uint64_t *);
+int ipsec_updateid(struct secasvar *, crypto_session_t *, crypto_session_t *);
int ipsec_initialized(void);
void ipsec_setspidx_inpcb(struct inpcb *, struct secpolicyindex *, u_int);
diff --git a/freebsd/sys/netipsec/key.c b/freebsd/sys/netipsec/key.c
index fbf12f41..9bd3f234 100644
--- a/freebsd/sys/netipsec/key.c
+++ b/freebsd/sys/netipsec/key.c
@@ -115,20 +115,20 @@
*/
VNET_DEFINE(u_int32_t, key_debug_level) = 0;
-static VNET_DEFINE(u_int, key_spi_trycnt) = 1000;
-static VNET_DEFINE(u_int32_t, key_spi_minval) = 0x100;
-static VNET_DEFINE(u_int32_t, key_spi_maxval) = 0x0fffffff; /* XXX */
-static VNET_DEFINE(u_int32_t, policy_id) = 0;
+VNET_DEFINE_STATIC(u_int, key_spi_trycnt) = 1000;
+VNET_DEFINE_STATIC(u_int32_t, key_spi_minval) = 0x100;
+VNET_DEFINE_STATIC(u_int32_t, key_spi_maxval) = 0x0fffffff; /* XXX */
+VNET_DEFINE_STATIC(u_int32_t, policy_id) = 0;
/*interval to initialize randseed,1(m)*/
-static VNET_DEFINE(u_int, key_int_random) = 60;
+VNET_DEFINE_STATIC(u_int, key_int_random) = 60;
/* interval to expire acquiring, 30(s)*/
-static VNET_DEFINE(u_int, key_larval_lifetime) = 30;
+VNET_DEFINE_STATIC(u_int, key_larval_lifetime) = 30;
/* counter for blocking SADB_ACQUIRE.*/
-static VNET_DEFINE(int, key_blockacq_count) = 10;
+VNET_DEFINE_STATIC(int, key_blockacq_count) = 10;
/* lifetime for blocking SADB_ACQUIRE.*/
-static VNET_DEFINE(int, key_blockacq_lifetime) = 20;
+VNET_DEFINE_STATIC(int, key_blockacq_lifetime) = 20;
/* preferred old sa rather than new sa.*/
-static VNET_DEFINE(int, key_preferred_oldsa) = 1;
+VNET_DEFINE_STATIC(int, key_preferred_oldsa) = 1;
#define V_key_spi_trycnt VNET(key_spi_trycnt)
#define V_key_spi_minval VNET(key_spi_minval)
#define V_key_spi_maxval VNET(key_spi_maxval)
@@ -139,17 +139,17 @@ static VNET_DEFINE(int, key_preferred_oldsa) = 1;
#define V_key_blockacq_lifetime VNET(key_blockacq_lifetime)
#define V_key_preferred_oldsa VNET(key_preferred_oldsa)
-static VNET_DEFINE(u_int32_t, acq_seq) = 0;
+VNET_DEFINE_STATIC(u_int32_t, acq_seq) = 0;
#define V_acq_seq VNET(acq_seq)
-static VNET_DEFINE(uint32_t, sp_genid) = 0;
+VNET_DEFINE_STATIC(uint32_t, sp_genid) = 0;
#define V_sp_genid VNET(sp_genid)
/* SPD */
TAILQ_HEAD(secpolicy_queue, secpolicy);
LIST_HEAD(secpolicy_list, secpolicy);
-static VNET_DEFINE(struct secpolicy_queue, sptree[IPSEC_DIR_MAX]);
-static VNET_DEFINE(struct secpolicy_queue, sptree_ifnet[IPSEC_DIR_MAX]);
+VNET_DEFINE_STATIC(struct secpolicy_queue, sptree[IPSEC_DIR_MAX]);
+VNET_DEFINE_STATIC(struct secpolicy_queue, sptree_ifnet[IPSEC_DIR_MAX]);
static struct rmlock sptree_lock;
#define V_sptree VNET(sptree)
#define V_sptree_ifnet VNET(sptree_ifnet)
@@ -165,8 +165,8 @@ static struct rmlock sptree_lock;
#define SPTREE_UNLOCK_ASSERT() rm_assert(&sptree_lock, RA_UNLOCKED)
/* Hash table for lookup SP using unique id */
-static VNET_DEFINE(struct secpolicy_list *, sphashtbl);
-static VNET_DEFINE(u_long, sphash_mask);
+VNET_DEFINE_STATIC(struct secpolicy_list *, sphashtbl);
+VNET_DEFINE_STATIC(u_long, sphash_mask);
#define V_sphashtbl VNET(sphashtbl)
#define V_sphash_mask VNET(sphash_mask)
@@ -186,19 +186,19 @@ LIST_HEAD(spdcache_entry_list, spdcache_entry);
#define SPDCACHE_MAX_ENTRIES_PER_HASH 8
-static VNET_DEFINE(u_int, key_spdcache_maxentries) = 0;
+VNET_DEFINE_STATIC(u_int, key_spdcache_maxentries) = 0;
#define V_key_spdcache_maxentries VNET(key_spdcache_maxentries)
-static VNET_DEFINE(u_int, key_spdcache_threshold) = 32;
+VNET_DEFINE_STATIC(u_int, key_spdcache_threshold) = 32;
#define V_key_spdcache_threshold VNET(key_spdcache_threshold)
-static VNET_DEFINE(unsigned long, spd_size) = 0;
+VNET_DEFINE_STATIC(unsigned long, spd_size) = 0;
#define V_spd_size VNET(spd_size)
#define SPDCACHE_ENABLED() (V_key_spdcache_maxentries != 0)
#define SPDCACHE_ACTIVE() \
(SPDCACHE_ENABLED() && V_spd_size >= V_key_spdcache_threshold)
-static VNET_DEFINE(struct spdcache_entry_list *, spdcachehashtbl);
-static VNET_DEFINE(u_long, spdcachehash_mask);
+VNET_DEFINE_STATIC(struct spdcache_entry_list *, spdcachehashtbl);
+VNET_DEFINE_STATIC(u_long, spdcachehash_mask);
#define V_spdcachehashtbl VNET(spdcachehashtbl)
#define V_spdcachehash_mask VNET(spdcachehash_mask)
@@ -207,7 +207,7 @@ static VNET_DEFINE(u_long, spdcachehash_mask);
V_spdcachehash_mask)
/* Each cache line is protected by a mutex */
-static VNET_DEFINE(struct mtx *, spdcache_lock);
+VNET_DEFINE_STATIC(struct mtx *, spdcache_lock);
#define V_spdcache_lock VNET(spdcache_lock)
#define SPDCACHE_LOCK_INIT(a) \
@@ -220,7 +220,7 @@ static VNET_DEFINE(struct mtx *, spdcache_lock);
/* SAD */
TAILQ_HEAD(secashead_queue, secashead);
LIST_HEAD(secashead_list, secashead);
-static VNET_DEFINE(struct secashead_queue, sahtree);
+VNET_DEFINE_STATIC(struct secashead_queue, sahtree);
static struct rmlock sahtree_lock;
#define V_sahtree VNET(sahtree)
#define SAHTREE_LOCK_INIT() rm_init(&sahtree_lock, "sahtree")
@@ -235,8 +235,8 @@ static struct rmlock sahtree_lock;
#define SAHTREE_UNLOCK_ASSERT() rm_assert(&sahtree_lock, RA_UNLOCKED)
/* Hash table for lookup in SAD using SA addresses */
-static VNET_DEFINE(struct secashead_list *, sahaddrhashtbl);
-static VNET_DEFINE(u_long, sahaddrhash_mask);
+VNET_DEFINE_STATIC(struct secashead_list *, sahaddrhashtbl);
+VNET_DEFINE_STATIC(u_long, sahaddrhash_mask);
#define V_sahaddrhashtbl VNET(sahaddrhashtbl)
#define V_sahaddrhash_mask VNET(sahaddrhash_mask)
@@ -250,8 +250,8 @@ static VNET_DEFINE(u_long, sahaddrhash_mask);
/* Hash table for lookup in SAD using SPI */
LIST_HEAD(secasvar_list, secasvar);
-static VNET_DEFINE(struct secasvar_list *, savhashtbl);
-static VNET_DEFINE(u_long, savhash_mask);
+VNET_DEFINE_STATIC(struct secasvar_list *, savhashtbl);
+VNET_DEFINE_STATIC(u_long, savhash_mask);
#define V_savhashtbl VNET(savhashtbl)
#define V_savhash_mask VNET(savhash_mask)
#define SAVHASH_NHASH_LOG2 7
@@ -300,7 +300,7 @@ key_u32hash(uint32_t val)
}
/* registed list */
-static VNET_DEFINE(LIST_HEAD(_regtree, secreg), regtree[SADB_SATYPE_MAX + 1]);
+VNET_DEFINE_STATIC(LIST_HEAD(_regtree, secreg), regtree[SADB_SATYPE_MAX + 1]);
#define V_regtree VNET(regtree)
static struct mtx regtree_lock;
#define REGTREE_LOCK_INIT() \
@@ -312,7 +312,7 @@ static struct mtx regtree_lock;
/* Acquiring list */
LIST_HEAD(secacq_list, secacq);
-static VNET_DEFINE(struct secacq_list, acqtree);
+VNET_DEFINE_STATIC(struct secacq_list, acqtree);
#define V_acqtree VNET(acqtree)
static struct mtx acq_lock;
#define ACQ_LOCK_INIT() \
@@ -323,14 +323,14 @@ static struct mtx acq_lock;
#define ACQ_LOCK_ASSERT() mtx_assert(&acq_lock, MA_OWNED)
/* Hash table for lookup in ACQ list using SA addresses */
-static VNET_DEFINE(struct secacq_list *, acqaddrhashtbl);
-static VNET_DEFINE(u_long, acqaddrhash_mask);
+VNET_DEFINE_STATIC(struct secacq_list *, acqaddrhashtbl);
+VNET_DEFINE_STATIC(u_long, acqaddrhash_mask);
#define V_acqaddrhashtbl VNET(acqaddrhashtbl)
#define V_acqaddrhash_mask VNET(acqaddrhash_mask)
/* Hash table for lookup in ACQ list using SEQ number */
-static VNET_DEFINE(struct secacq_list *, acqseqhashtbl);
-static VNET_DEFINE(u_long, acqseqhash_mask);
+VNET_DEFINE_STATIC(struct secacq_list *, acqseqhashtbl);
+VNET_DEFINE_STATIC(u_long, acqseqhash_mask);
#define V_acqseqhashtbl VNET(acqseqhashtbl)
#define V_acqseqhash_mask VNET(acqseqhash_mask)
@@ -346,7 +346,7 @@ static VNET_DEFINE(u_long, acqseqhash_mask);
#define ACQSEQHASH_HASH(seq) \
&V_acqseqhashtbl[ACQSEQHASH_HASHVAL(seq)]
/* SP acquiring list */
-static VNET_DEFINE(LIST_HEAD(_spacqtree, secspacq), spacqtree);
+VNET_DEFINE_STATIC(LIST_HEAD(_spacqtree, secspacq), spacqtree);
#define V_spacqtree VNET(spacqtree)
static struct mtx spacq_lock;
#define SPACQ_LOCK_INIT() \
@@ -435,9 +435,9 @@ _Static_assert(sizeof(maxsize)/sizeof(int) == SADB_EXT_MAX + 1, "minsize size mi
((_mhp)->extlen[(_ext)] > maxsize[(_ext)])))
#define SADB_CHECKHDR(_mhp, _ext) ((_mhp)->ext[(_ext)] == NULL)
-static VNET_DEFINE(int, ipsec_esp_keymin) = 256;
-static VNET_DEFINE(int, ipsec_esp_auth) = 0;
-static VNET_DEFINE(int, ipsec_ah_keymin) = 128;
+VNET_DEFINE_STATIC(int, ipsec_esp_keymin) = 256;
+VNET_DEFINE_STATIC(int, ipsec_esp_auth) = 0;
+VNET_DEFINE_STATIC(int, ipsec_ah_keymin) = 128;
#define V_ipsec_esp_keymin VNET(ipsec_esp_keymin)
#define V_ipsec_esp_auth VNET(ipsec_esp_auth)
@@ -533,7 +533,7 @@ MALLOC_DEFINE(M_IPSEC_SAQ, "ipsec-saq", "ipsec sa acquire");
MALLOC_DEFINE(M_IPSEC_SAR, "ipsec-reg", "ipsec sa acquire");
MALLOC_DEFINE(M_IPSEC_SPDCACHE, "ipsec-spdcache", "ipsec SPD cache");
-static VNET_DEFINE(uma_zone_t, key_lft_zone);
+VNET_DEFINE_STATIC(uma_zone_t, key_lft_zone);
#define V_key_lft_zone VNET(key_lft_zone)
static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER();
@@ -2959,7 +2959,7 @@ key_newsav(const struct sadb_msghdr *mhp, struct secasindex *saidx,
goto done;
}
mtx_init(sav->lock, "ipsec association", NULL, MTX_DEF);
- sav->lft_c = uma_zalloc(V_key_lft_zone, M_NOWAIT);
+ sav->lft_c = uma_zalloc_pcpu(V_key_lft_zone, M_NOWAIT);
if (sav->lft_c == NULL) {
*errp = ENOBUFS;
goto done;
@@ -3051,7 +3051,7 @@ done:
free(sav->lock, M_IPSEC_MISC);
}
if (sav->lft_c != NULL)
- uma_zfree(V_key_lft_zone, sav->lft_c);
+ uma_zfree_pcpu(V_key_lft_zone, sav->lft_c);
free(sav, M_IPSEC_SA), sav = NULL;
}
if (sah != NULL)
diff --git a/freebsd/sys/netipsec/key_debug.c b/freebsd/sys/netipsec/key_debug.c
index 12cfe34e..07eec79e 100644
--- a/freebsd/sys/netipsec/key_debug.c
+++ b/freebsd/sys/netipsec/key_debug.c
@@ -87,6 +87,85 @@ static void kdebug_sadb_x_natt(struct sadb_ext *);
/* NOTE: host byte order */
+static const char*
+kdebug_sadb_type(uint8_t type)
+{
+#define SADB_NAME(n) case SADB_ ## n: return (#n)
+
+ switch (type) {
+ SADB_NAME(RESERVED);
+ SADB_NAME(GETSPI);
+ SADB_NAME(UPDATE);
+ SADB_NAME(ADD);
+ SADB_NAME(DELETE);
+ SADB_NAME(GET);
+ SADB_NAME(ACQUIRE);
+ SADB_NAME(REGISTER);
+ SADB_NAME(EXPIRE);
+ SADB_NAME(FLUSH);
+ SADB_NAME(DUMP);
+ SADB_NAME(X_PROMISC);
+ SADB_NAME(X_PCHANGE);
+ SADB_NAME(X_SPDUPDATE);
+ SADB_NAME(X_SPDADD);
+ SADB_NAME(X_SPDDELETE);
+ SADB_NAME(X_SPDGET);
+ SADB_NAME(X_SPDACQUIRE);
+ SADB_NAME(X_SPDDUMP);
+ SADB_NAME(X_SPDFLUSH);
+ SADB_NAME(X_SPDSETIDX);
+ SADB_NAME(X_SPDEXPIRE);
+ SADB_NAME(X_SPDDELETE2);
+ default:
+ return ("UNKNOWN");
+ }
+#undef SADB_NAME
+}
+
+static const char*
+kdebug_sadb_exttype(uint16_t type)
+{
+#define EXT_NAME(n) case SADB_EXT_ ## n: return (#n)
+#define X_NAME(n) case SADB_X_EXT_ ## n: return (#n)
+
+ switch (type) {
+ EXT_NAME(RESERVED);
+ EXT_NAME(SA);
+ EXT_NAME(LIFETIME_CURRENT);
+ EXT_NAME(LIFETIME_HARD);
+ EXT_NAME(LIFETIME_SOFT);
+ EXT_NAME(ADDRESS_SRC);
+ EXT_NAME(ADDRESS_DST);
+ EXT_NAME(ADDRESS_PROXY);
+ EXT_NAME(KEY_AUTH);
+ EXT_NAME(KEY_ENCRYPT);
+ EXT_NAME(IDENTITY_SRC);
+ EXT_NAME(IDENTITY_DST);
+ EXT_NAME(SENSITIVITY);
+ EXT_NAME(PROPOSAL);
+ EXT_NAME(SUPPORTED_AUTH);
+ EXT_NAME(SUPPORTED_ENCRYPT);
+ EXT_NAME(SPIRANGE);
+ X_NAME(KMPRIVATE);
+ X_NAME(POLICY);
+ X_NAME(SA2);
+ X_NAME(NAT_T_TYPE);
+ X_NAME(NAT_T_SPORT);
+ X_NAME(NAT_T_DPORT);
+ X_NAME(NAT_T_OAI);
+ X_NAME(NAT_T_OAR);
+ X_NAME(NAT_T_FRAG);
+ X_NAME(SA_REPLAY);
+ X_NAME(NEW_ADDRESS_SRC);
+ X_NAME(NEW_ADDRESS_DST);
+ default:
+ return ("UNKNOWN");
+ };
+#undef EXT_NAME
+#undef X_NAME
+}
+
+
/* %%%: about struct sadb_msg */
void
kdebug_sadb(struct sadb_msg *base)
@@ -98,8 +177,9 @@ kdebug_sadb(struct sadb_msg *base)
if (base == NULL)
panic("%s: NULL pointer was passed.\n", __func__);
- printf("sadb_msg{ version=%u type=%u errno=%u satype=%u\n",
+ printf("sadb_msg{ version=%u type=%u(%s) errno=%u satype=%u\n",
base->sadb_msg_version, base->sadb_msg_type,
+ kdebug_sadb_type(base->sadb_msg_type),
base->sadb_msg_errno, base->sadb_msg_satype);
printf(" len=%u reserved=%u seq=%u pid=%u\n",
base->sadb_msg_len, base->sadb_msg_reserved,
@@ -109,8 +189,9 @@ kdebug_sadb(struct sadb_msg *base)
ext = (struct sadb_ext *)((caddr_t)base + sizeof(struct sadb_msg));
while (tlen > 0) {
- printf("sadb_ext{ len=%u type=%u }\n",
- ext->sadb_ext_len, ext->sadb_ext_type);
+ printf("sadb_ext{ len=%u type=%u(%s) }\n",
+ ext->sadb_ext_len, ext->sadb_ext_type,
+ kdebug_sadb_exttype(ext->sadb_ext_type));
if (ext->sadb_ext_len == 0) {
printf("%s: invalid ext_len=0 was passed.\n", __func__);
diff --git a/freebsd/sys/netipsec/keydb.h b/freebsd/sys/netipsec/keydb.h
index 19eae767..6993b4e4 100644
--- a/freebsd/sys/netipsec/keydb.h
+++ b/freebsd/sys/netipsec/keydb.h
@@ -41,6 +41,7 @@
#include <sys/mutex.h>
#include <netipsec/key_var.h>
+#include <opencrypto/_cryptodev.h>
#ifndef _SOCKADDR_UNION_DEFINED
#define _SOCKADDR_UNION_DEFINED
@@ -162,7 +163,7 @@ struct secasvar {
const struct enc_xform *tdb_encalgxform;/* encoding algorithm */
const struct auth_hash *tdb_authalgxform;/* authentication algorithm */
const struct comp_algo *tdb_compalgxform;/* compression algorithm */
- uint64_t tdb_cryptoid; /* crypto session id */
+ crypto_session_t tdb_cryptoid; /* crypto session */
uint8_t alg_auth; /* Authentication Algorithm Identifier*/
uint8_t alg_enc; /* Cipher Algorithm Identifier */
diff --git a/freebsd/sys/netipsec/keysock.c b/freebsd/sys/netipsec/keysock.c
index 170335bc..9ea1d8f1 100644
--- a/freebsd/sys/netipsec/keysock.c
+++ b/freebsd/sys/netipsec/keysock.c
@@ -73,7 +73,7 @@ struct key_cb {
int key_count;
int any_count;
};
-static VNET_DEFINE(struct key_cb, key_cb);
+VNET_DEFINE_STATIC(struct key_cb, key_cb);
#define V_key_cb VNET(key_cb)
static struct sockaddr key_src = { 2, PF_KEY, };
diff --git a/freebsd/sys/netipsec/xform.h b/freebsd/sys/netipsec/xform.h
index 2720f72a..389d0b66 100644
--- a/freebsd/sys/netipsec/xform.h
+++ b/freebsd/sys/netipsec/xform.h
@@ -71,7 +71,7 @@ struct xform_history {
struct xform_data {
struct secpolicy *sp; /* security policy */
struct secasvar *sav; /* related SA */
- uint64_t cryptoid; /* used crypto session id */
+ crypto_session_t cryptoid; /* used crypto session */
u_int idx; /* IPsec request index */
int protoff; /* current protocol offset */
int skip; /* data offset */
diff --git a/freebsd/sys/netipsec/xform_ah.c b/freebsd/sys/netipsec/xform_ah.c
index 13999f41..84ba6c16 100644
--- a/freebsd/sys/netipsec/xform_ah.c
+++ b/freebsd/sys/netipsec/xform_ah.c
@@ -149,11 +149,21 @@ ah_hdrsiz(struct secasvar *sav)
size_t size;
if (sav != NULL) {
- int authsize;
+ int authsize, rplen, align;
+
IPSEC_ASSERT(sav->tdb_authalgxform != NULL, ("null xform"));
/*XXX not right for null algorithm--does it matter??*/
+
+ /* RFC4302: use the correct alignment. */
+ align = sizeof(uint32_t);
+#ifdef INET6
+ if (sav->sah->saidx.dst.sa.sa_family == AF_INET6) {
+ align = sizeof(uint64_t);
+ }
+#endif
+ rplen = HDRSIZE(sav);
authsize = AUTHSIZE(sav);
- size = roundup(authsize, sizeof (u_int32_t)) + HDRSIZE(sav);
+ size = roundup(rplen + authsize, align);
} else {
/* default guess */
size = sizeof (struct ah) + sizeof (u_int32_t) + 16;
@@ -237,16 +247,15 @@ ah_init(struct secasvar *sav, struct xformsw *xsp)
int
ah_zeroize(struct secasvar *sav)
{
- int err;
if (sav->key_auth)
bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth));
- err = crypto_freesession(sav->tdb_cryptoid);
- sav->tdb_cryptoid = 0;
+ crypto_freesession(sav->tdb_cryptoid);
+ sav->tdb_cryptoid = NULL;
sav->tdb_authalgxform = NULL;
sav->tdb_xform = NULL;
- return err;
+ return 0;
}
/*
@@ -536,8 +545,8 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
struct cryptop *crp;
struct xform_data *xd;
struct newah *ah;
- uint64_t cryptoid;
- int hl, rplen, authsize, error;
+ crypto_session_t cryptoid;
+ int hl, rplen, authsize, ahsize, error;
IPSEC_ASSERT(sav != NULL, ("null SA"));
IPSEC_ASSERT(sav->key_auth != NULL, ("null authentication key"));
@@ -571,23 +580,24 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
SECASVAR_UNLOCK(sav);
/* Verify AH header length. */
- hl = ah->ah_len * sizeof (u_int32_t);
+ hl = sizeof(struct ah) + (ah->ah_len * sizeof (u_int32_t));
ahx = sav->tdb_authalgxform;
authsize = AUTHSIZE(sav);
- if (hl != authsize + rplen - sizeof (struct ah)) {
+ ahsize = ah_hdrsiz(sav);
+ if (hl != ahsize) {
DPRINTF(("%s: bad authenticator length %u (expecting %lu)"
" for packet in SA %s/%08lx\n", __func__, hl,
- (u_long) (authsize + rplen - sizeof (struct ah)),
+ (u_long)ahsize,
ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
(u_long) ntohl(sav->spi)));
AHSTAT_INC(ahs_badauthl);
error = EACCES;
goto bad;
}
- if (skip + authsize + rplen > m->m_pkthdr.len) {
+ if (skip + ahsize > m->m_pkthdr.len) {
DPRINTF(("%s: bad mbuf length %u (expecting %lu)"
" for packet in SA %s/%08lx\n", __func__,
- m->m_pkthdr.len, (u_long) (skip + authsize + rplen),
+ m->m_pkthdr.len, (u_long)(skip + ahsize),
ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
(u_long) ntohl(sav->spi)));
AHSTAT_INC(ahs_badauthl);
@@ -660,7 +670,7 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
crp->crp_flags |= CRYPTO_F_ASYNC | CRYPTO_F_ASYNC_KEEPORDER;
crp->crp_buf = (caddr_t) m;
crp->crp_callback = ah_input_cb;
- crp->crp_sid = cryptoid;
+ crp->crp_session = cryptoid;
crp->crp_opaque = (caddr_t) xd;
/* These are passed as-is to the callback. */
@@ -690,8 +700,8 @@ ah_input_cb(struct cryptop *crp)
struct secasvar *sav;
struct secasindex *saidx;
caddr_t ptr;
- uint64_t cryptoid;
- int authsize, rplen, error, skip, protoff;
+ crypto_session_t cryptoid;
+ int authsize, rplen, ahsize, error, skip, protoff;
uint8_t nxt;
m = (struct mbuf *) crp->crp_buf;
@@ -711,9 +721,9 @@ ah_input_cb(struct cryptop *crp)
if (crp->crp_etype) {
if (crp->crp_etype == EAGAIN) {
/* Reset the session ID */
- if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0)
+ if (ipsec_updateid(sav, &crp->crp_session, &cryptoid) != 0)
crypto_freesession(cryptoid);
- xd->cryptoid = crp->crp_sid;
+ xd->cryptoid = crp->crp_session;
CURVNET_RESTORE();
return (crypto_dispatch(crp));
}
@@ -738,6 +748,7 @@ ah_input_cb(struct cryptop *crp)
/* Figure out header size. */
rplen = HDRSIZE(sav);
authsize = AUTHSIZE(sav);
+ ahsize = ah_hdrsiz(sav);
/* Copy authenticator off the packet. */
m_copydata(m, skip + rplen, authsize, calc);
@@ -786,7 +797,7 @@ ah_input_cb(struct cryptop *crp)
/*
* Remove the AH header and authenticator from the mbuf.
*/
- error = m_striphdr(m, skip, rplen + authsize);
+ error = m_striphdr(m, skip, ahsize);
if (error) {
DPRINTF(("%s: mangled mbuf chain for SA %s/%08lx\n", __func__,
ipsec_address(&saidx->dst, buf, sizeof(buf)),
@@ -839,9 +850,9 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
struct mbuf *mi;
struct cryptop *crp;
struct newah *ah;
- uint64_t cryptoid;
+ crypto_session_t cryptoid;
uint16_t iplen;
- int error, rplen, authsize, maxpacketsize, roff;
+ int error, rplen, authsize, ahsize, maxpacketsize, roff;
uint8_t prot;
IPSEC_ASSERT(sav != NULL, ("null SA"));
@@ -852,6 +863,8 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
/* Figure out header size. */
rplen = HDRSIZE(sav);
+ authsize = AUTHSIZE(sav);
+ ahsize = ah_hdrsiz(sav);
/* Check for maximum packet size violations. */
switch (sav->sah->saidx.dst.sa.sa_family) {
@@ -875,13 +888,12 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
error = EPFNOSUPPORT;
goto bad;
}
- authsize = AUTHSIZE(sav);
- if (rplen + authsize + m->m_pkthdr.len > maxpacketsize) {
+ if (ahsize + m->m_pkthdr.len > maxpacketsize) {
DPRINTF(("%s: packet in SA %s/%08lx got too big "
"(len %u, max len %u)\n", __func__,
ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
(u_long) ntohl(sav->spi),
- rplen + authsize + m->m_pkthdr.len, maxpacketsize));
+ ahsize + m->m_pkthdr.len, maxpacketsize));
AHSTAT_INC(ahs_toobig);
error = EMSGSIZE;
goto bad;
@@ -901,11 +913,10 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
}
/* Inject AH header. */
- mi = m_makespace(m, skip, rplen + authsize, &roff);
+ mi = m_makespace(m, skip, ahsize, &roff);
if (mi == NULL) {
DPRINTF(("%s: failed to inject %u byte AH header for SA "
- "%s/%08lx\n", __func__,
- rplen + authsize,
+ "%s/%08lx\n", __func__, ahsize,
ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)),
(u_long) ntohl(sav->spi)));
AHSTAT_INC(ahs_hdrops); /*XXX differs from openbsd */
@@ -921,13 +932,17 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
/* Initialize the AH header. */
m_copydata(m, protoff, sizeof(u_int8_t), (caddr_t) &ah->ah_nxt);
- ah->ah_len = (rplen + authsize - sizeof(struct ah)) / sizeof(u_int32_t);
+ ah->ah_len = (ahsize - sizeof(struct ah)) / sizeof(u_int32_t);
ah->ah_reserve = 0;
ah->ah_spi = sav->spi;
/* Zeroize authenticator. */
m_copyback(m, skip + rplen, authsize, ipseczeroes);
+ /* Zeroize padding */
+ m_copyback(m, skip + rplen + authsize, ahsize - (rplen + authsize),
+ ipseczeroes);
+
/* Insert packet replay counter, as requested. */
SECASVAR_LOCK(sav);
if (sav->replay) {
@@ -996,7 +1011,7 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
bcopy(((caddr_t)(xd + 1)) +
offsetof(struct ip, ip_len),
(caddr_t) &iplen, sizeof(u_int16_t));
- iplen = htons(ntohs(iplen) + rplen + authsize);
+ iplen = htons(ntohs(iplen) + ahsize);
m_copyback(m, offsetof(struct ip, ip_len),
sizeof(u_int16_t), (caddr_t) &iplen);
break;
@@ -1007,7 +1022,7 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
bcopy(((caddr_t)(xd + 1)) +
offsetof(struct ip6_hdr, ip6_plen),
(caddr_t) &iplen, sizeof(uint16_t));
- iplen = htons(ntohs(iplen) + rplen + authsize);
+ iplen = htons(ntohs(iplen) + ahsize);
m_copyback(m, offsetof(struct ip6_hdr, ip6_plen),
sizeof(uint16_t), (caddr_t) &iplen);
break;
@@ -1038,7 +1053,7 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
crp->crp_flags |= CRYPTO_F_ASYNC | CRYPTO_F_ASYNC_KEEPORDER;
crp->crp_buf = (caddr_t) m;
crp->crp_callback = ah_output_cb;
- crp->crp_sid = cryptoid;
+ crp->crp_session = cryptoid;
crp->crp_opaque = (caddr_t) xd;
/* These are passed as-is to the callback. */
@@ -1068,7 +1083,7 @@ ah_output_cb(struct cryptop *crp)
struct secpolicy *sp;
struct secasvar *sav;
struct mbuf *m;
- uint64_t cryptoid;
+ crypto_session_t cryptoid;
caddr_t ptr;
u_int idx;
int skip, error;
@@ -1087,9 +1102,9 @@ ah_output_cb(struct cryptop *crp)
if (crp->crp_etype) {
if (crp->crp_etype == EAGAIN) {
/* Reset the session ID */
- if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0)
+ if (ipsec_updateid(sav, &crp->crp_session, &cryptoid) != 0)
crypto_freesession(cryptoid);
- xd->cryptoid = crp->crp_sid;
+ xd->cryptoid = crp->crp_session;
CURVNET_RESTORE();
return (crypto_dispatch(crp));
}
diff --git a/freebsd/sys/netipsec/xform_esp.c b/freebsd/sys/netipsec/xform_esp.c
index 49b08ba6..f8473575 100644
--- a/freebsd/sys/netipsec/xform_esp.c
+++ b/freebsd/sys/netipsec/xform_esp.c
@@ -273,7 +273,7 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
struct cryptop *crp;
struct newesp *esp;
uint8_t *ivp;
- uint64_t cryptoid;
+ crypto_session_t cryptoid;
int alen, error, hlen, plen;
IPSEC_ASSERT(sav != NULL, ("null SA"));
@@ -391,7 +391,7 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
crp->crp_flags |= CRYPTO_F_ASYNC | CRYPTO_F_ASYNC_KEEPORDER;
crp->crp_buf = (caddr_t) m;
crp->crp_callback = esp_input_cb;
- crp->crp_sid = cryptoid;
+ crp->crp_session = cryptoid;
crp->crp_opaque = (caddr_t) xd;
/* These are passed as-is to the callback */
@@ -450,7 +450,7 @@ esp_input_cb(struct cryptop *crp)
struct secasvar *sav;
struct secasindex *saidx;
caddr_t ptr;
- uint64_t cryptoid;
+ crypto_session_t cryptoid;
int hlen, skip, protoff, error, alen;
crd = crp->crp_desc;
@@ -470,9 +470,9 @@ esp_input_cb(struct cryptop *crp)
if (crp->crp_etype) {
if (crp->crp_etype == EAGAIN) {
/* Reset the session ID */
- if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0)
+ if (ipsec_updateid(sav, &crp->crp_session, &cryptoid) != 0)
crypto_freesession(cryptoid);
- xd->cryptoid = crp->crp_sid;
+ xd->cryptoid = crp->crp_session;
CURVNET_RESTORE();
return (crypto_dispatch(crp));
}
@@ -639,7 +639,8 @@ esp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
struct secasindex *saidx;
unsigned char *pad;
uint8_t *ivp;
- uint64_t cntr, cryptoid;
+ uint64_t cntr;
+ crypto_session_t cryptoid;
int hlen, rlen, padding, blks, alen, i, roff;
int error, maxpacketsize;
uint8_t prot;
@@ -854,7 +855,7 @@ esp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
crp->crp_buf = (caddr_t) m;
crp->crp_callback = esp_output_cb;
crp->crp_opaque = (caddr_t) xd;
- crp->crp_sid = cryptoid;
+ crp->crp_session = cryptoid;
if (esph) {
/* Authentication descriptor. */
@@ -885,7 +886,7 @@ esp_output_cb(struct cryptop *crp)
struct secpolicy *sp;
struct secasvar *sav;
struct mbuf *m;
- uint64_t cryptoid;
+ crypto_session_t cryptoid;
u_int idx;
int error;
@@ -901,9 +902,9 @@ esp_output_cb(struct cryptop *crp)
if (crp->crp_etype) {
if (crp->crp_etype == EAGAIN) {
/* Reset the session ID */
- if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0)
+ if (ipsec_updateid(sav, &crp->crp_session, &cryptoid) != 0)
crypto_freesession(cryptoid);
- xd->cryptoid = crp->crp_sid;
+ xd->cryptoid = crp->crp_session;
CURVNET_RESTORE();
return (crypto_dispatch(crp));
}
diff --git a/freebsd/sys/netipsec/xform_ipcomp.c b/freebsd/sys/netipsec/xform_ipcomp.c
index b3fdee49..86addc87 100644
--- a/freebsd/sys/netipsec/xform_ipcomp.c
+++ b/freebsd/sys/netipsec/xform_ipcomp.c
@@ -120,7 +120,7 @@ ipcomp_encapcheck(union sockaddr_union *src, union sockaddr_union *dst)
}
static int
-ipcomp_nonexp_input(struct mbuf **mp, int *offp, int proto)
+ipcomp_nonexp_input(struct mbuf *m, int off, int proto, void *arg __unused)
{
int isr;
@@ -137,13 +137,13 @@ ipcomp_nonexp_input(struct mbuf **mp, int *offp, int proto)
#endif
default:
IPCOMPSTAT_INC(ipcomps_nopf);
- m_freem(*mp);
+ m_freem(m);
return (IPPROTO_DONE);
}
- m_adj(*mp, *offp);
- IPCOMPSTAT_ADD(ipcomps_ibytes, (*mp)->m_pkthdr.len);
+ m_adj(m, off);
+ IPCOMPSTAT_ADD(ipcomps_ibytes, m->m_pkthdr.len);
IPCOMPSTAT_INC(ipcomps_input);
- netisr_dispatch(isr, *mp);
+ netisr_dispatch(isr, m);
return (IPPROTO_DONE);
}
@@ -180,11 +180,10 @@ ipcomp_init(struct secasvar *sav, struct xformsw *xsp)
static int
ipcomp_zeroize(struct secasvar *sav)
{
- int err;
- err = crypto_freesession(sav->tdb_cryptoid);
- sav->tdb_cryptoid = 0;
- return err;
+ crypto_freesession(sav->tdb_cryptoid);
+ sav->tdb_cryptoid = NULL;
+ return 0;
}
/*
@@ -260,7 +259,7 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
xd->vnet = curvnet;
SECASVAR_LOCK(sav);
- crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid;
+ crp->crp_session = xd->cryptoid = sav->tdb_cryptoid;
SECASVAR_UNLOCK(sav);
return crypto_dispatch(crp);
@@ -282,7 +281,7 @@ ipcomp_input_cb(struct cryptop *crp)
struct secasvar *sav;
struct secasindex *saidx;
caddr_t addr;
- uint64_t cryptoid;
+ crypto_session_t cryptoid;
int hlen = IPCOMP_HLENGTH, error, clen;
int skip, protoff;
uint8_t nproto;
@@ -303,9 +302,9 @@ ipcomp_input_cb(struct cryptop *crp)
if (crp->crp_etype) {
if (crp->crp_etype == EAGAIN) {
/* Reset the session ID */
- if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0)
+ if (ipsec_updateid(sav, &crp->crp_session, &cryptoid) != 0)
crypto_freesession(cryptoid);
- xd->cryptoid = crp->crp_sid;
+ xd->cryptoid = crp->crp_session;
CURVNET_RESTORE();
return (crypto_dispatch(crp));
}
@@ -510,7 +509,7 @@ ipcomp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
crp->crp_opaque = (caddr_t) xd;
SECASVAR_LOCK(sav);
- crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid;
+ crp->crp_session = xd->cryptoid = sav->tdb_cryptoid;
SECASVAR_UNLOCK(sav);
return crypto_dispatch(crp);
@@ -533,7 +532,7 @@ ipcomp_output_cb(struct cryptop *crp)
struct secpolicy *sp;
struct secasvar *sav;
struct mbuf *m;
- uint64_t cryptoid;
+ crypto_session_t cryptoid;
u_int idx;
int error, skip, protoff;
@@ -551,9 +550,9 @@ ipcomp_output_cb(struct cryptop *crp)
if (crp->crp_etype) {
if (crp->crp_etype == EAGAIN) {
/* Reset the session ID */
- if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0)
+ if (ipsec_updateid(sav, &crp->crp_session, &cryptoid) != 0)
crypto_freesession(cryptoid);
- xd->cryptoid = crp->crp_sid;
+ xd->cryptoid = crp->crp_session;
CURVNET_RESTORE();
return (crypto_dispatch(crp));
}
@@ -664,19 +663,6 @@ bad:
}
#ifdef INET
-static const struct encaptab *ipe4_cookie = NULL;
-extern struct domain inetdomain;
-static struct protosw ipcomp4_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = 0 /* IPPROTO_IPV[46] */,
- .pr_flags = PR_ATOMIC | PR_ADDR | PR_LASTHDR,
- .pr_input = ipcomp_nonexp_input,
- .pr_output = rip_output,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
static int
ipcomp4_nonexp_encapcheck(const struct mbuf *m, int off, int proto,
void *arg __unused)
@@ -697,21 +683,17 @@ ipcomp4_nonexp_encapcheck(const struct mbuf *m, int off, int proto,
dst.sin.sin_addr = ip->ip_dst;
return (ipcomp_encapcheck(&src, &dst));
}
+
+static const struct encaptab *ipe4_cookie = NULL;
+static const struct encap_config ipv4_encap_cfg = {
+ .proto = -1,
+ .min_length = sizeof(struct ip),
+ .exact_match = sizeof(in_addr_t) << 4,
+ .check = ipcomp4_nonexp_encapcheck,
+ .input = ipcomp_nonexp_input
+};
#endif
#ifdef INET6
-static const struct encaptab *ipe6_cookie = NULL;
-extern struct domain inet6domain;
-static struct protosw ipcomp6_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inet6domain,
- .pr_protocol = 0 /* IPPROTO_IPV[46] */,
- .pr_flags = PR_ATOMIC | PR_ADDR | PR_LASTHDR,
- .pr_input = ipcomp_nonexp_input,
- .pr_output = rip6_output,
- .pr_ctloutput = rip6_ctloutput,
- .pr_usrreqs = &rip6_usrreqs
-};
-
static int
ipcomp6_nonexp_encapcheck(const struct mbuf *m, int off, int proto,
void *arg __unused)
@@ -744,6 +726,15 @@ ipcomp6_nonexp_encapcheck(const struct mbuf *m, int off, int proto,
}
return (ipcomp_encapcheck(&src, &dst));
}
+
+static const struct encaptab *ipe6_cookie = NULL;
+static const struct encap_config ipv6_encap_cfg = {
+ .proto = -1,
+ .min_length = sizeof(struct ip6_hdr),
+ .exact_match = sizeof(struct in6_addr) << 4,
+ .check = ipcomp6_nonexp_encapcheck,
+ .input = ipcomp_nonexp_input
+};
#endif
static struct xformsw ipcomp_xformsw = {
@@ -760,12 +751,10 @@ ipcomp_attach(void)
{
#ifdef INET
- ipe4_cookie = encap_attach_func(AF_INET, -1,
- ipcomp4_nonexp_encapcheck, &ipcomp4_protosw, NULL);
+ ipe4_cookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK);
#endif
#ifdef INET6
- ipe6_cookie = encap_attach_func(AF_INET6, -1,
- ipcomp6_nonexp_encapcheck, &ipcomp6_protosw, NULL);
+ ipe6_cookie = ip6_encap_attach(&ipv6_encap_cfg, NULL, M_WAITOK);
#endif
xform_attach(&ipcomp_xformsw);
}
@@ -775,10 +764,10 @@ ipcomp_detach(void)
{
#ifdef INET
- encap_detach(ipe4_cookie);
+ ip_encap_detach(ipe4_cookie);
#endif
#ifdef INET6
- encap_detach(ipe6_cookie);
+ ip6_encap_detach(ipe6_cookie);
#endif
xform_detach(&ipcomp_xformsw);
}
diff --git a/freebsd/sys/netipsec/xform_tcp.c b/freebsd/sys/netipsec/xform_tcp.c
index 9310cf2c..f9cd3964 100644
--- a/freebsd/sys/netipsec/xform_tcp.c
+++ b/freebsd/sys/netipsec/xform_tcp.c
@@ -82,23 +82,24 @@ tcp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt)
struct tcpcb *tp;
int error, optval;
- INP_WLOCK_ASSERT(inp);
if (sopt->sopt_name != TCP_MD5SIG) {
- INP_WUNLOCK(inp);
return (ENOPROTOOPT);
}
- tp = intotcpcb(inp);
if (sopt->sopt_dir == SOPT_GET) {
+ INP_RLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_RUNLOCK(inp);
+ return (ECONNRESET);
+ }
+ tp = intotcpcb(inp);
optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
- INP_WUNLOCK(inp);
+ INP_RUNLOCK(inp);
/* On success return with released INP_WLOCK */
return (sooptcopyout(sopt, &optval, sizeof(optval)));
}
- INP_WUNLOCK(inp);
-
error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
if (error != 0)
return (error);
@@ -109,12 +110,13 @@ tcp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt)
INP_WUNLOCK(inp);
return (ECONNRESET);
}
+ tp = intotcpcb(inp);
if (optval > 0)
tp->t_flags |= TF_SIGNATURE;
else
tp->t_flags &= ~TF_SIGNATURE;
- /* On success return with acquired INP_WLOCK */
+ INP_WUNLOCK(inp);
return (error);
}
diff --git a/freebsd/sys/netpfil/pf/if_pflog.c b/freebsd/sys/netpfil/pf/if_pflog.c
index 53cf94c8..3da5d8c0 100644
--- a/freebsd/sys/netpfil/pf/if_pflog.c
+++ b/freebsd/sys/netpfil/pf/if_pflog.c
@@ -98,7 +98,7 @@ static void pflog_clone_destroy(struct ifnet *);
static const char pflogname[] = "pflog";
-static VNET_DEFINE(struct if_clone *, pflog_cloner);
+VNET_DEFINE_STATIC(struct if_clone *, pflog_cloner);
#define V_pflog_cloner VNET(pflog_cloner)
VNET_DEFINE(struct ifnet *, pflogifs[PFLOGIFS_MAX]); /* for fast access */
diff --git a/freebsd/sys/netpfil/pf/if_pfsync.c b/freebsd/sys/netpfil/pf/if_pfsync.c
index 9b457818..dae091db 100644
--- a/freebsd/sys/netpfil/pf/if_pfsync.c
+++ b/freebsd/sys/netpfil/pf/if_pfsync.c
@@ -231,13 +231,13 @@ struct pfsync_softc {
static const char pfsyncname[] = "pfsync";
static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
-static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL;
+VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL;
#define V_pfsyncif VNET(pfsyncif)
-static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
+VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL;
#define V_pfsync_swi_cookie VNET(pfsync_swi_cookie)
-static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
+VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats);
#define V_pfsyncstats VNET(pfsyncstats)
-static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
+VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW;
#define V_pfsync_carp_adj VNET(pfsync_carp_adj)
static void pfsync_timeout(void *);
@@ -871,7 +871,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
st = pf_find_state_byid(sp->id, sp->creatorid);
if (st == NULL) {
/* insert the update */
- if (pfsync_state_import(sp, 0))
+ if (pfsync_state_import(sp, pkt->flags))
V_pfsyncstats.pfsyncs_badstate++;
continue;
}
diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c
index 3cc4ff11..f765350d 100644
--- a/freebsd/sys/netpfil/pf/pf.c
+++ b/freebsd/sys/netpfil/pf/pf.c
@@ -137,7 +137,7 @@ VNET_DEFINE(int, pf_tcp_iss_off);
VNET_DECLARE(int, pf_vnet_active);
#define V_pf_vnet_active VNET(pf_vnet_active)
-static VNET_DEFINE(uint32_t, pf_purge_idx);
+VNET_DEFINE_STATIC(uint32_t, pf_purge_idx);
#define V_pf_purge_idx VNET(pf_purge_idx)
/*
@@ -161,7 +161,7 @@ struct pf_send_entry {
};
STAILQ_HEAD(pf_send_head, pf_send_entry);
-static VNET_DEFINE(struct pf_send_head, pf_sendqueue);
+VNET_DEFINE_STATIC(struct pf_send_head, pf_sendqueue);
#define V_pf_sendqueue VNET(pf_sendqueue)
static struct mtx pf_sendqueue_mtx;
@@ -181,9 +181,9 @@ struct pf_overload_entry {
};
SLIST_HEAD(pf_overload_head, pf_overload_entry);
-static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
+VNET_DEFINE_STATIC(struct pf_overload_head, pf_overloadqueue);
#define V_pf_overloadqueue VNET(pf_overloadqueue)
-static VNET_DEFINE(struct task, pf_overloadtask);
+VNET_DEFINE_STATIC(struct task, pf_overloadtask);
#define V_pf_overloadtask VNET(pf_overloadtask)
static struct mtx pf_overloadqueue_mtx;
@@ -197,7 +197,7 @@ struct mtx pf_unlnkdrules_mtx;
MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
MTX_DEF);
-static VNET_DEFINE(uma_zone_t, pf_sources_z);
+VNET_DEFINE_STATIC(uma_zone_t, pf_sources_z);
#define V_pf_sources_z VNET(pf_sources_z)
uma_zone_t pf_mtag_z;
VNET_DEFINE(uma_zone_t, pf_state_z);
@@ -297,14 +297,14 @@ static void pf_mtag_free(struct m_tag *);
#ifdef INET
static void pf_route(struct mbuf **, struct pf_rule *, int,
struct ifnet *, struct pf_state *,
- struct pf_pdesc *);
+ struct pf_pdesc *, struct inpcb *);
#endif /* INET */
#ifdef INET6
static void pf_change_a6(struct pf_addr *, u_int16_t *,
struct pf_addr *, u_int8_t);
static void pf_route6(struct mbuf **, struct pf_rule *, int,
struct ifnet *, struct pf_state *,
- struct pf_pdesc *);
+ struct pf_pdesc *, struct inpcb *);
#endif /* INET6 */
int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
@@ -1721,24 +1721,28 @@ pf_purge_expired_states(u_int i, int maxcheck)
while (maxcheck > 0) {
ih = &V_pf_idhash[i];
+
+ /* only take the lock if we expect to do work */
+ if (!LIST_EMPTY(&ih->states)) {
relock:
- PF_HASHROW_LOCK(ih);
- LIST_FOREACH(s, &ih->states, entry) {
- if (pf_state_expires(s) <= time_uptime) {
- V_pf_status.states -=
- pf_unlink_state(s, PF_ENTER_LOCKED);
- goto relock;
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ if (pf_state_expires(s) <= time_uptime) {
+ V_pf_status.states -=
+ pf_unlink_state(s, PF_ENTER_LOCKED);
+ goto relock;
+ }
+ s->rule.ptr->rule_flag |= PFRULE_REFS;
+ if (s->nat_rule.ptr != NULL)
+ s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
+ if (s->anchor.ptr != NULL)
+ s->anchor.ptr->rule_flag |= PFRULE_REFS;
+ s->kif->pfik_flags |= PFI_IFLAG_REFS;
+ if (s->rt_kif)
+ s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
}
- s->rule.ptr->rule_flag |= PFRULE_REFS;
- if (s->nat_rule.ptr != NULL)
- s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
- if (s->anchor.ptr != NULL)
- s->anchor.ptr->rule_flag |= PFRULE_REFS;
- s->kif->pfik_flags |= PFI_IFLAG_REFS;
- if (s->rt_kif)
- s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
+ PF_HASHROW_UNLOCK(ih);
}
- PF_HASHROW_UNLOCK(ih);
/* Return when we hit end of hash. */
if (++i > pf_hashmask) {
@@ -2501,6 +2505,81 @@ pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
pf_send(pfse);
}
+static void
+pf_return(struct pf_rule *r, struct pf_rule *nr, struct pf_pdesc *pd,
+ struct pf_state_key *sk, int off, struct mbuf *m, struct tcphdr *th,
+ struct pfi_kif *kif, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen,
+ u_short *reason)
+{
+ struct pf_addr * const saddr = pd->src;
+ struct pf_addr * const daddr = pd->dst;
+ sa_family_t af = pd->af;
+
+ /* undo NAT changes, if they have taken place */
+ if (nr != NULL) {
+ PF_ACPY(saddr, &sk->addr[pd->sidx], af);
+ PF_ACPY(daddr, &sk->addr[pd->didx], af);
+ if (pd->sport)
+ *pd->sport = sk->port[pd->sidx];
+ if (pd->dport)
+ *pd->dport = sk->port[pd->didx];
+ if (pd->proto_sum)
+ *pd->proto_sum = bproto_sum;
+ if (pd->ip_sum)
+ *pd->ip_sum = bip_sum;
+ m_copyback(m, off, hdrlen, pd->hdr.any);
+ }
+ if (pd->proto == IPPROTO_TCP &&
+ ((r->rule_flag & PFRULE_RETURNRST) ||
+ (r->rule_flag & PFRULE_RETURN)) &&
+ !(th->th_flags & TH_RST)) {
+ u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+ int len = 0;
+#ifdef INET
+ struct ip *h4;
+#endif
+#ifdef INET6
+ struct ip6_hdr *h6;
+#endif
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ h4 = mtod(m, struct ip *);
+ len = ntohs(h4->ip_len) - off;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ h6 = mtod(m, struct ip6_hdr *);
+ len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
+ break;
+#endif
+ }
+
+ if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
+ REASON_SET(reason, PFRES_PROTCKSUM);
+ else {
+ if (th->th_flags & TH_SYN)
+ ack++;
+ if (th->th_flags & TH_FIN)
+ ack++;
+ pf_send_tcp(m, r, af, pd->dst,
+ pd->src, th->th_dport, th->th_sport,
+ ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
+ r->return_ttl, 1, 0, kif->pfik_ifp);
+ }
+ } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
+ r->return_icmp)
+ pf_send_icmp(m, r->return_icmp >> 8,
+ r->return_icmp & 255, af, r);
+ else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
+ r->return_icmp6)
+ pf_send_icmp(m, r->return_icmp6 >> 8,
+ r->return_icmp6 & 255, af, r);
+}
+
+
static int
pf_ieee8021q_setpcp(struct mbuf *m, u_int8_t prio)
{
@@ -3475,68 +3554,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
((r->rule_flag & PFRULE_RETURNRST) ||
(r->rule_flag & PFRULE_RETURNICMP) ||
(r->rule_flag & PFRULE_RETURN))) {
- /* undo NAT changes, if they have taken place */
- if (nr != NULL) {
- PF_ACPY(saddr, &sk->addr[pd->sidx], af);
- PF_ACPY(daddr, &sk->addr[pd->didx], af);
- if (pd->sport)
- *pd->sport = sk->port[pd->sidx];
- if (pd->dport)
- *pd->dport = sk->port[pd->didx];
- if (pd->proto_sum)
- *pd->proto_sum = bproto_sum;
- if (pd->ip_sum)
- *pd->ip_sum = bip_sum;
- m_copyback(m, off, hdrlen, pd->hdr.any);
- }
- if (pd->proto == IPPROTO_TCP &&
- ((r->rule_flag & PFRULE_RETURNRST) ||
- (r->rule_flag & PFRULE_RETURN)) &&
- !(th->th_flags & TH_RST)) {
- u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
- int len = 0;
-#ifdef INET
- struct ip *h4;
-#endif
-#ifdef INET6
- struct ip6_hdr *h6;
-#endif
-
- switch (af) {
-#ifdef INET
- case AF_INET:
- h4 = mtod(m, struct ip *);
- len = ntohs(h4->ip_len) - off;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- h6 = mtod(m, struct ip6_hdr *);
- len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
- break;
-#endif
- }
-
- if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
- REASON_SET(&reason, PFRES_PROTCKSUM);
- else {
- if (th->th_flags & TH_SYN)
- ack++;
- if (th->th_flags & TH_FIN)
- ack++;
- pf_send_tcp(m, r, af, pd->dst,
- pd->src, th->th_dport, th->th_sport,
- ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
- r->return_ttl, 1, 0, kif->pfik_ifp);
- }
- } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
- r->return_icmp)
- pf_send_icmp(m, r->return_icmp >> 8,
- r->return_icmp & 255, af, r);
- else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
- r->return_icmp6)
- pf_send_icmp(m, r->return_icmp6 >> 8,
- r->return_icmp6 & 255, af, r);
+ pf_return(r, nr, pd, sk, off, m, th, kif, bproto_sum,
+ bip_sum, hdrlen, &reason);
}
if (r->action == PF_DROP)
@@ -3555,8 +3574,13 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
hdrlen);
- if (action != PF_PASS)
+ if (action != PF_PASS) {
+ if (action == PF_DROP &&
+ (r->rule_flag & PFRULE_RETURN))
+ pf_return(r, nr, pd, sk, off, m, th, kif,
+ bproto_sum, bip_sum, hdrlen, &reason);
return (action);
+ }
} else {
if (sk != NULL)
uma_zfree(V_pf_state_key_z, sk);
@@ -5454,7 +5478,7 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
#ifdef INET
static void
pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
- struct pf_state *s, struct pf_pdesc *pd)
+ struct pf_state *s, struct pf_pdesc *pd, struct inpcb *inp)
{
struct mbuf *m0, *m1;
struct sockaddr_in dst;
@@ -5522,7 +5546,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
goto bad;
if (oifp != ifp) {
- if (pf_test(PF_OUT, 0, ifp, &m0, NULL) != PF_PASS)
+ if (pf_test(PF_OUT, 0, ifp, &m0, inp) != PF_PASS)
goto bad;
else if (m0 == NULL)
goto done;
@@ -5615,7 +5639,7 @@ bad:
#ifdef INET6
static void
pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
- struct pf_state *s, struct pf_pdesc *pd)
+ struct pf_state *s, struct pf_pdesc *pd, struct inpcb *inp)
{
struct mbuf *m0;
struct sockaddr_in6 dst;
@@ -5684,7 +5708,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
goto bad;
if (oifp != ifp) {
- if (pf_test6(PF_OUT, PFIL_FWD, ifp, &m0, NULL) != PF_PASS)
+ if (pf_test6(PF_OUT, PFIL_FWD, ifp, &m0, inp) != PF_PASS)
goto bad;
else if (m0 == NULL)
goto done;
@@ -6248,7 +6272,7 @@ done:
default:
/* pf_route() returns unlocked. */
if (r->rt) {
- pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
+ pf_route(m0, r, dir, kif->pfik_ifp, s, &pd, inp);
return (action);
}
break;
@@ -6645,7 +6669,7 @@ done:
default:
/* pf_route6() returns unlocked. */
if (r->rt) {
- pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
+ pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd, inp);
return (action);
}
break;
diff --git a/freebsd/sys/netpfil/pf/pf_altq.h b/freebsd/sys/netpfil/pf/pf_altq.h
index f6d578d3..35d2d5cb 100644
--- a/freebsd/sys/netpfil/pf/pf_altq.h
+++ b/freebsd/sys/netpfil/pf/pf_altq.h
@@ -57,7 +57,7 @@ struct priq_opts {
int flags;
};
-struct hfsc_opts {
+struct hfsc_opts_v0 {
/* real-time service curve */
u_int rtsc_m1; /* slope of the 1st segment in bps */
u_int rtsc_d; /* the x-projection of m1 in msec */
@@ -73,6 +73,31 @@ struct hfsc_opts {
int flags;
};
+struct hfsc_opts_v1 {
+ /* real-time service curve */
+ u_int64_t rtsc_m1; /* slope of the 1st segment in bps */
+ u_int rtsc_d; /* the x-projection of m1 in msec */
+ u_int64_t rtsc_m2; /* slope of the 2nd segment in bps */
+ /* link-sharing service curve */
+ u_int64_t lssc_m1;
+ u_int lssc_d;
+ u_int64_t lssc_m2;
+ /* upper-limit service curve */
+ u_int64_t ulsc_m1;
+ u_int ulsc_d;
+ u_int64_t ulsc_m2;
+ int flags;
+};
+
+/*
+ * struct hfsc_opts doesn't have a version indicator macro or
+ * backwards-compat and convenience macros because both in the kernel and
+ * the pfctl parser, there are struct hfsc_opts instances named 'hfsc_opts'.
+ * It is believed that only in-tree code uses struct hfsc_opts, so
+ * backwards-compat macros are not necessary. The few in-tree uses can just
+ * be updated to the latest versioned struct tag.
+ */
+
/*
* XXX this needs some work
*/
@@ -87,11 +112,22 @@ struct fairq_opts {
u_int lssc_m2;
};
-struct pf_altq {
+/*
+ * struct pf_altq_v0, struct pf_altq_v1, etc. are the ioctl argument
+ * structures corresponding to struct pfioc_altq_v0, struct pfioc_altq_v1,
+ * etc.
+ *
+ */
+struct pf_altq_v0 {
char ifname[IFNAMSIZ];
- void *altq_disc; /* discipline-specific state */
- TAILQ_ENTRY(pf_altq) entries;
+ /*
+ * This member is a holdover from when the kernel state structure
+ * was reused as the ioctl argument structure, and remains to
+ * preserve the size and layout of this struct for backwards compat.
+ */
+ void *unused1;
+ TAILQ_ENTRY(pf_altq_v0) entries;
/* scheduler spec */
uint8_t scheduler; /* scheduler type */
@@ -113,11 +149,110 @@ struct pf_altq {
struct cbq_opts cbq_opts;
struct codel_opts codel_opts;
struct priq_opts priq_opts;
- struct hfsc_opts hfsc_opts;
+ struct hfsc_opts_v0 hfsc_opts;
+ struct fairq_opts fairq_opts;
+ } pq_u;
+
+ uint32_t qid; /* return value */
+};
+
+struct pf_altq_v1 {
+ char ifname[IFNAMSIZ];
+
+ TAILQ_ENTRY(pf_altq_v1) entries;
+
+ /* scheduler spec */
+ uint8_t scheduler; /* scheduler type */
+ uint32_t tbrsize; /* tokenbucket regulator size */
+ uint64_t ifbandwidth; /* interface bandwidth */
+
+ /* queue spec */
+ char qname[PF_QNAME_SIZE]; /* queue name */
+ char parent[PF_QNAME_SIZE]; /* parent name */
+ uint32_t parent_qid; /* parent queue id */
+ uint64_t bandwidth; /* queue bandwidth */
+ uint8_t priority; /* priority */
+ uint8_t local_flags; /* dynamic interface, see _v0 */
+
+ uint16_t qlimit; /* queue size limit */
+ uint16_t flags; /* misc flags */
+ union {
+ struct cbq_opts cbq_opts;
+ struct codel_opts codel_opts;
+ struct priq_opts priq_opts;
+ struct hfsc_opts_v1 hfsc_opts;
struct fairq_opts fairq_opts;
} pq_u;
uint32_t qid; /* return value */
};
+/* Latest version of struct pf_altq_vX */
+#define PF_ALTQ_VERSION 1
+
+#ifdef _KERNEL
+struct pf_kaltq {
+ char ifname[IFNAMSIZ];
+
+ void *altq_disc; /* discipline-specific state */
+ TAILQ_ENTRY(pf_kaltq) entries;
+
+ /* scheduler spec */
+ uint8_t scheduler; /* scheduler type */
+ uint32_t tbrsize; /* tokenbucket regulator size */
+ uint64_t ifbandwidth; /* interface bandwidth */
+
+ /* queue spec */
+ char qname[PF_QNAME_SIZE]; /* queue name */
+ char parent[PF_QNAME_SIZE]; /* parent name */
+ uint32_t parent_qid; /* parent queue id */
+ uint64_t bandwidth; /* queue bandwidth */
+ uint8_t priority; /* priority */
+ uint8_t local_flags; /* dynamic interface, see _v0 */
+
+ uint16_t qlimit; /* queue size limit */
+ uint16_t flags; /* misc flags */
+ union {
+ struct cbq_opts cbq_opts;
+ struct codel_opts codel_opts;
+ struct priq_opts priq_opts;
+ struct hfsc_opts_v1 hfsc_opts;
+ struct fairq_opts fairq_opts;
+ } pq_u;
+
+ uint32_t qid; /* return value */
+};
+#endif /* _KERNEL */
+
+/*
+ * Compatibility and convenience macros
+ */
+#ifdef _KERNEL
+/*
+ * Avoid a patch with 100+ lines of name substitution.
+ */
+#define pf_altq pf_kaltq
+
+#else /* _KERNEL */
+
+#ifdef PFIOC_USE_LATEST
+/*
+ * Maintaining in-tree consumers of the ioctl interface is easier when that
+ * code can be written in terms old names that refer to the latest interface
+ * version as that reduces the required changes in the consumers to those
+ * that are functionally necessary to accommodate a new interface version.
+ */
+#define pf_altq __CONCAT(pf_altq_v, PF_ALTQ_VERSION)
+
+#else /* PFIOC_USE_LATEST */
+/*
+ * When building out-of-tree code that is written for the old interface,
+ * such as may exist in ports for example, resolve the old pf_altq struct
+ * tag to the v0 version.
+ */
+#define pf_altq __CONCAT(pf_altq_v, 0)
+
+#endif /* PFIOC_USE_LATEST */
+#endif /* _KERNEL */
+
#endif /* _NET_PF_ALTQ_H_ */
diff --git a/freebsd/sys/netpfil/pf/pf_if.c b/freebsd/sys/netpfil/pf/pf_if.c
index 2ac76ff2..2c321118 100644
--- a/freebsd/sys/netpfil/pf/pf_if.c
+++ b/freebsd/sys/netpfil/pf/pf_if.c
@@ -57,16 +57,16 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
VNET_DEFINE(struct pfi_kif *, pfi_all);
-static VNET_DEFINE(long, pfi_update);
+VNET_DEFINE_STATIC(long, pfi_update);
#define V_pfi_update VNET(pfi_update)
#define PFI_BUFFER_MAX 0x10000
VNET_DECLARE(int, pf_vnet_active);
#define V_pf_vnet_active VNET(pf_vnet_active)
-static VNET_DEFINE(struct pfr_addr *, pfi_buffer);
-static VNET_DEFINE(int, pfi_buffer_cnt);
-static VNET_DEFINE(int, pfi_buffer_max);
+VNET_DEFINE_STATIC(struct pfr_addr *, pfi_buffer);
+VNET_DEFINE_STATIC(int, pfi_buffer_cnt);
+VNET_DEFINE_STATIC(int, pfi_buffer_max);
#define V_pfi_buffer VNET(pfi_buffer)
#define V_pfi_buffer_cnt VNET(pfi_buffer_cnt)
#define V_pfi_buffer_max VNET(pfi_buffer_max)
@@ -100,14 +100,14 @@ static void pfi_ifaddr_event(void * __unused, struct ifnet *);
RB_HEAD(pfi_ifhead, pfi_kif);
static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
-static VNET_DEFINE(struct pfi_ifhead, pfi_ifs);
+VNET_DEFINE_STATIC(struct pfi_ifhead, pfi_ifs);
#define V_pfi_ifs VNET(pfi_ifs)
#define PFI_BUFFER_MAX 0x10000
MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database");
LIST_HEAD(pfi_list, pfi_kif);
-static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs);
+VNET_DEFINE_STATIC(struct pfi_list, pfi_unlinked_kifs);
#define V_pfi_unlinked_kifs VNET(pfi_unlinked_kifs)
static struct mtx pfi_unlnkdkifs_mtx;
MTX_SYSINIT(pfi_unlnkdkifs_mtx, &pfi_unlnkdkifs_mtx, "pf unlinked interfaces",
@@ -299,11 +299,16 @@ pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif)
if (rule_kif == NULL || rule_kif == packet_kif)
return (1);
- if (rule_kif->pfik_group != NULL)
- /* XXXGL: locking? */
+ if (rule_kif->pfik_group != NULL) {
+ IF_ADDR_RLOCK(packet_kif->pfik_ifp);
CK_STAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
- if (p->ifgl_group == rule_kif->pfik_group)
+ if (p->ifgl_group == rule_kif->pfik_group) {
+ IF_ADDR_RUNLOCK(packet_kif->pfik_ifp);
return (1);
+ }
+ IF_ADDR_RUNLOCK(packet_kif->pfik_ifp);
+ }
+
return (0);
}
@@ -737,6 +742,7 @@ pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
static int
pfi_skip_if(const char *filter, struct pfi_kif *p)
{
+ struct ifg_list *i;
int n;
if (filter == NULL || !*filter)
@@ -747,10 +753,19 @@ pfi_skip_if(const char *filter, struct pfi_kif *p)
if (n < 1 || n >= IFNAMSIZ)
return (1); /* sanity check */
if (filter[n-1] >= '0' && filter[n-1] <= '9')
- return (1); /* only do exact match in that case */
- if (strncmp(p->pfik_name, filter, n))
- return (1); /* prefix doesn't match */
- return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9');
+ return (1); /* group names may not end in a digit */
+ if (p->pfik_ifp != NULL) {
+ IF_ADDR_RLOCK(p->pfik_ifp);
+ CK_STAILQ_FOREACH(i, &p->pfik_ifp->if_groups, ifgl_next) {
+ if (!strncmp(i->ifgl_group->ifg_group, filter,
+ IFNAMSIZ)) {
+ IF_ADDR_RUNLOCK(p->pfik_ifp);
+ return (0); /* iface is in group "filter" */
+ }
+ }
+ IF_ADDR_RUNLOCK(p->pfik_ifp);
+ }
+ return (1);
}
int
diff --git a/freebsd/sys/netpfil/pf/pf_ioctl.c b/freebsd/sys/netpfil/pf/pf_ioctl.c
index 837ad31c..9ca15a41 100644
--- a/freebsd/sys/netpfil/pf/pf_ioctl.c
+++ b/freebsd/sys/netpfil/pf/pf_ioctl.c
@@ -115,11 +115,17 @@ static int pf_commit_rules(u_int32_t, int, char *);
static int pf_addr_setup(struct pf_ruleset *,
struct pf_addr_wrap *, sa_family_t);
static void pf_addr_copyout(struct pf_addr_wrap *);
+#ifdef ALTQ
+static int pf_export_kaltq(struct pf_altq *,
+ struct pfioc_altq_v1 *, size_t);
+static int pf_import_kaltq(struct pfioc_altq_v1 *,
+ struct pf_altq *, size_t);
+#endif /* ALTQ */
VNET_DEFINE(struct pf_rule, pf_default_rule);
#ifdef ALTQ
-static VNET_DEFINE(int, pf_altq_running);
+VNET_DEFINE_STATIC(int, pf_altq_running);
#define V_pf_altq_running VNET(pf_altq_running)
#endif
@@ -189,7 +195,7 @@ static struct cdevsw pf_cdevsw = {
.d_version = D_VERSION,
};
-static volatile VNET_DEFINE(int, pf_pfil_hooked);
+volatile VNET_DEFINE_STATIC(int, pf_pfil_hooked);
#define V_pf_pfil_hooked VNET(pf_pfil_hooked)
/*
@@ -992,6 +998,222 @@ pf_addr_copyout(struct pf_addr_wrap *addr)
}
}
+#ifdef ALTQ
+/*
+ * Handle export of struct pf_kaltq to user binaries that may be using any
+ * version of struct pf_altq.
+ */
+static int
+pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size)
+{
+ u_int32_t version;
+
+ if (ioc_size == sizeof(struct pfioc_altq_v0))
+ version = 0;
+ else
+ version = pa->version;
+
+ if (version > PFIOC_ALTQ_VERSION)
+ return (EINVAL);
+
+#define ASSIGN(x) exported_q->x = q->x
+#define COPY(x) \
+ bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x)))
+#define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX)
+#define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX)
+
+ switch (version) {
+ case 0: {
+ struct pf_altq_v0 *exported_q =
+ &((struct pfioc_altq_v0 *)pa)->altq;
+
+ COPY(ifname);
+
+ ASSIGN(scheduler);
+ ASSIGN(tbrsize);
+ exported_q->tbrsize = SATU16(q->tbrsize);
+ exported_q->ifbandwidth = SATU32(q->ifbandwidth);
+
+ COPY(qname);
+ COPY(parent);
+ ASSIGN(parent_qid);
+ exported_q->bandwidth = SATU32(q->bandwidth);
+ ASSIGN(priority);
+ ASSIGN(local_flags);
+
+ ASSIGN(qlimit);
+ ASSIGN(flags);
+
+ if (q->scheduler == ALTQT_HFSC) {
+#define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x
+#define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \
+ SATU32(q->pq_u.hfsc_opts.x)
+
+ ASSIGN_OPT_SATU32(rtsc_m1);
+ ASSIGN_OPT(rtsc_d);
+ ASSIGN_OPT_SATU32(rtsc_m2);
+
+ ASSIGN_OPT_SATU32(lssc_m1);
+ ASSIGN_OPT(lssc_d);
+ ASSIGN_OPT_SATU32(lssc_m2);
+
+ ASSIGN_OPT_SATU32(ulsc_m1);
+ ASSIGN_OPT(ulsc_d);
+ ASSIGN_OPT_SATU32(ulsc_m2);
+
+ ASSIGN_OPT(flags);
+
+#undef ASSIGN_OPT
+#undef ASSIGN_OPT_SATU32
+ } else
+ COPY(pq_u);
+
+ ASSIGN(qid);
+ break;
+ }
+ case 1: {
+ struct pf_altq_v1 *exported_q =
+ &((struct pfioc_altq_v1 *)pa)->altq;
+
+ COPY(ifname);
+
+ ASSIGN(scheduler);
+ ASSIGN(tbrsize);
+ ASSIGN(ifbandwidth);
+
+ COPY(qname);
+ COPY(parent);
+ ASSIGN(parent_qid);
+ ASSIGN(bandwidth);
+ ASSIGN(priority);
+ ASSIGN(local_flags);
+
+ ASSIGN(qlimit);
+ ASSIGN(flags);
+ COPY(pq_u);
+
+ ASSIGN(qid);
+ break;
+ }
+ default:
+ panic("%s: unhandled struct pfioc_altq version", __func__);
+ break;
+ }
+
+#undef ASSIGN
+#undef COPY
+#undef SATU16
+#undef SATU32
+
+ return (0);
+}
+
+/*
+ * Handle import to struct pf_kaltq of struct pf_altq from user binaries
+ * that may be using any version of it.
+ */
+static int
+pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size)
+{
+ u_int32_t version;
+
+ if (ioc_size == sizeof(struct pfioc_altq_v0))
+ version = 0;
+ else
+ version = pa->version;
+
+ if (version > PFIOC_ALTQ_VERSION)
+ return (EINVAL);
+
+#define ASSIGN(x) q->x = imported_q->x
+#define COPY(x) \
+ bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x)))
+
+ switch (version) {
+ case 0: {
+ struct pf_altq_v0 *imported_q =
+ &((struct pfioc_altq_v0 *)pa)->altq;
+
+ COPY(ifname);
+
+ ASSIGN(scheduler);
+ ASSIGN(tbrsize); /* 16-bit -> 32-bit */
+ ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */
+
+ COPY(qname);
+ COPY(parent);
+ ASSIGN(parent_qid);
+ ASSIGN(bandwidth); /* 32-bit -> 64-bit */
+ ASSIGN(priority);
+ ASSIGN(local_flags);
+
+ ASSIGN(qlimit);
+ ASSIGN(flags);
+
+ if (imported_q->scheduler == ALTQT_HFSC) {
+#define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x
+
+ /*
+ * The m1 and m2 parameters are being copied from
+ * 32-bit to 64-bit.
+ */
+ ASSIGN_OPT(rtsc_m1);
+ ASSIGN_OPT(rtsc_d);
+ ASSIGN_OPT(rtsc_m2);
+
+ ASSIGN_OPT(lssc_m1);
+ ASSIGN_OPT(lssc_d);
+ ASSIGN_OPT(lssc_m2);
+
+ ASSIGN_OPT(ulsc_m1);
+ ASSIGN_OPT(ulsc_d);
+ ASSIGN_OPT(ulsc_m2);
+
+ ASSIGN_OPT(flags);
+
+#undef ASSIGN_OPT
+ } else
+ COPY(pq_u);
+
+ ASSIGN(qid);
+ break;
+ }
+ case 1: {
+ struct pf_altq_v1 *imported_q =
+ &((struct pfioc_altq_v1 *)pa)->altq;
+
+ COPY(ifname);
+
+ ASSIGN(scheduler);
+ ASSIGN(tbrsize);
+ ASSIGN(ifbandwidth);
+
+ COPY(qname);
+ COPY(parent);
+ ASSIGN(parent_qid);
+ ASSIGN(bandwidth);
+ ASSIGN(priority);
+ ASSIGN(local_flags);
+
+ ASSIGN(qlimit);
+ ASSIGN(flags);
+ COPY(pq_u);
+
+ ASSIGN(qid);
+ break;
+ }
+ default:
+ panic("%s: unhandled struct pfioc_altq version", __func__);
+ break;
+ }
+
+#undef ASSIGN
+#undef COPY
+
+ return (0);
+}
+#endif /* ALTQ */
+
static int
pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
{
@@ -1015,9 +1237,12 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
case DIOCGETTIMEOUT:
case DIOCCLRRULECTRS:
case DIOCGETLIMIT:
- case DIOCGETALTQS:
- case DIOCGETALTQ:
- case DIOCGETQSTATS:
+ case DIOCGETALTQSV0:
+ case DIOCGETALTQSV1:
+ case DIOCGETALTQV0:
+ case DIOCGETALTQV1:
+ case DIOCGETQSTATSV0:
+ case DIOCGETQSTATSV1:
case DIOCGETRULESETS:
case DIOCGETRULESET:
case DIOCRGETTABLES:
@@ -1035,7 +1260,8 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
case DIOCGETSRCNODES:
case DIOCCLRSRCNODES:
case DIOCIGETIFACES:
- case DIOCGIFSPEED:
+ case DIOCGIFSPEEDV0:
+ case DIOCGIFSPEEDV1:
case DIOCSETIFFLAG:
case DIOCCLRIFFLAG:
break;
@@ -1061,9 +1287,12 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
case DIOCGETSTATES:
case DIOCGETTIMEOUT:
case DIOCGETLIMIT:
- case DIOCGETALTQS:
- case DIOCGETALTQ:
- case DIOCGETQSTATS:
+ case DIOCGETALTQSV0:
+ case DIOCGETALTQSV1:
+ case DIOCGETALTQV0:
+ case DIOCGETALTQV1:
+ case DIOCGETQSTATSV0:
+ case DIOCGETQSTATSV1:
case DIOCGETRULESETS:
case DIOCGETRULESET:
case DIOCNATLOOK:
@@ -1075,7 +1304,8 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
case DIOCOSFPGET:
case DIOCGETSRCNODES:
case DIOCIGETIFACES:
- case DIOCGIFSPEED:
+ case DIOCGIFSPEEDV1:
+ case DIOCGIFSPEEDV0:
break;
case DIOCRCLRTABLES:
case DIOCRADDTABLES:
@@ -2013,18 +2243,22 @@ DIOCGETSTATES_full:
break;
}
- case DIOCGIFSPEED: {
- struct pf_ifspeed *psp = (struct pf_ifspeed *)addr;
- struct pf_ifspeed ps;
+ case DIOCGIFSPEEDV0:
+ case DIOCGIFSPEEDV1: {
+ struct pf_ifspeed_v1 *psp = (struct pf_ifspeed_v1 *)addr;
+ struct pf_ifspeed_v1 ps;
struct ifnet *ifp;
if (psp->ifname[0] != 0) {
/* Can we completely trust user-land? */
strlcpy(ps.ifname, psp->ifname, IFNAMSIZ);
ifp = ifunit(ps.ifname);
- if (ifp != NULL)
- psp->baudrate = ifp->if_baudrate;
- else
+ if (ifp != NULL) {
+ psp->baudrate32 =
+ (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX);
+ if (cmd == DIOCGIFSPEEDV1)
+ psp->baudrate = ifp->if_baudrate;
+ } else
error = EINVAL;
} else
error = EINVAL;
@@ -2072,13 +2306,16 @@ DIOCGETSTATES_full:
break;
}
- case DIOCADDALTQ: {
- struct pfioc_altq *pa = (struct pfioc_altq *)addr;
+ case DIOCADDALTQV0:
+ case DIOCADDALTQV1: {
+ struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr;
struct pf_altq *altq, *a;
struct ifnet *ifp;
- altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK);
- bcopy(&pa->altq, altq, sizeof(struct pf_altq));
+ altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO);
+ error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd));
+ if (error)
+ break;
altq->local_flags = 0;
PF_RULES_WLOCK();
@@ -2122,13 +2359,15 @@ DIOCGETSTATES_full:
}
TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
- bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+ /* version error check done on import above */
+ pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
PF_RULES_WUNLOCK();
break;
}
- case DIOCGETALTQS: {
- struct pfioc_altq *pa = (struct pfioc_altq *)addr;
+ case DIOCGETALTQSV0:
+ case DIOCGETALTQSV1: {
+ struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr;
struct pf_altq *altq;
PF_RULES_RLOCK();
@@ -2140,8 +2379,9 @@ DIOCGETSTATES_full:
break;
}
- case DIOCGETALTQ: {
- struct pfioc_altq *pa = (struct pfioc_altq *)addr;
+ case DIOCGETALTQV0:
+ case DIOCGETALTQV1: {
+ struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr;
struct pf_altq *altq;
u_int32_t nr;
@@ -2162,21 +2402,24 @@ DIOCGETSTATES_full:
error = EBUSY;
break;
}
- bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+ pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
PF_RULES_RUNLOCK();
break;
}
- case DIOCCHANGEALTQ:
+ case DIOCCHANGEALTQV0:
+ case DIOCCHANGEALTQV1:
/* CHANGEALTQ not supported yet! */
error = ENODEV;
break;
- case DIOCGETQSTATS: {
- struct pfioc_qstats *pq = (struct pfioc_qstats *)addr;
+ case DIOCGETQSTATSV0:
+ case DIOCGETQSTATSV1: {
+ struct pfioc_qstats_v1 *pq = (struct pfioc_qstats_v1 *)addr;
struct pf_altq *altq;
u_int32_t nr;
int nbytes;
+ u_int32_t version;
PF_RULES_RLOCK();
if (pq->ticket != V_ticket_altqs_active) {
@@ -2203,7 +2446,11 @@ DIOCGETSTATES_full:
break;
}
PF_RULES_RUNLOCK();
- error = altq_getqstats(altq, pq->buf, &nbytes);
+ if (cmd == DIOCGETQSTATSV0)
+ version = 0; /* DIOCGETQSTATSV0 means stats struct v0 */
+ else
+ version = pq->version;
+ error = altq_getqstats(altq, pq->buf, &nbytes, version);
if (error == 0) {
pq->scheduler = altq->scheduler;
pq->nbytes = nbytes;
@@ -3963,7 +4210,6 @@ pf_unload_vnet(void)
V_pf_vnet_active = 0;
V_pf_status.running = 0;
- swi_remove(V_pf_swi_cookie);
error = dehook_pf();
if (error) {
/*
@@ -3979,6 +4225,8 @@ pf_unload_vnet(void)
shutdown_pf();
PF_RULES_WUNLOCK();
+ swi_remove(V_pf_swi_cookie);
+
pf_unload_vnet_purge();
pf_normalize_cleanup();
diff --git a/freebsd/sys/netpfil/pf/pf_norm.c b/freebsd/sys/netpfil/pf/pf_norm.c
index 61da5e4f..0f98c669 100644
--- a/freebsd/sys/netpfil/pf/pf_norm.c
+++ b/freebsd/sys/netpfil/pf/pf_norm.c
@@ -93,8 +93,10 @@ struct pf_fragment {
TAILQ_ENTRY(pf_fragment) frag_next;
uint32_t fr_timeout;
uint16_t fr_maxlen; /* maximum length of single fragment */
+ uint16_t fr_entries; /* Total number of pf_fragment entries */
TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
};
+#define PF_MAX_FRENT_PER_FRAGMENT 64
struct pf_fragment_tag {
uint16_t ft_hdrlen; /* header length of reassembled pkt */
@@ -111,17 +113,17 @@ MTX_SYSINIT(pf_frag_mtx, &pf_frag_mtx, "pf fragments", MTX_DEF);
VNET_DEFINE(uma_zone_t, pf_state_scrub_z); /* XXX: shared with pfsync */
-static VNET_DEFINE(uma_zone_t, pf_frent_z);
+VNET_DEFINE_STATIC(uma_zone_t, pf_frent_z);
#define V_pf_frent_z VNET(pf_frent_z)
-static VNET_DEFINE(uma_zone_t, pf_frag_z);
+VNET_DEFINE_STATIC(uma_zone_t, pf_frag_z);
#define V_pf_frag_z VNET(pf_frag_z)
TAILQ_HEAD(pf_fragqueue, pf_fragment);
TAILQ_HEAD(pf_cachequeue, pf_fragment);
-static VNET_DEFINE(struct pf_fragqueue, pf_fragqueue);
+VNET_DEFINE_STATIC(struct pf_fragqueue, pf_fragqueue);
#define V_pf_fragqueue VNET(pf_fragqueue)
RB_HEAD(pf_frag_tree, pf_fragment);
-static VNET_DEFINE(struct pf_frag_tree, pf_frag_tree);
+VNET_DEFINE_STATIC(struct pf_frag_tree, pf_frag_tree);
#define V_pf_frag_tree VNET(pf_frag_tree)
static int pf_frag_compare(struct pf_fragment *,
struct pf_fragment *);
@@ -386,6 +388,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
*(struct pf_fragment_cmp *)frag = *key;
frag->fr_timeout = time_uptime;
frag->fr_maxlen = frent->fe_len;
+ frag->fr_entries = 0;
TAILQ_INIT(&frag->fr_queue);
RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag);
@@ -397,6 +400,9 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
return (frag);
}
+ if (frag->fr_entries >= PF_MAX_FRENT_PER_FRAGMENT)
+ goto bad_fragment;
+
KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue"));
/* Remember maximum fragment len for refragmentation. */
@@ -469,6 +475,8 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
else
TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+ frag->fr_entries++;
+
return (frag);
bad_fragment:
diff --git a/freebsd/sys/netpfil/pf/pf_osfp.c b/freebsd/sys/netpfil/pf/pf_osfp.c
index b87d39bd..8723830c 100644
--- a/freebsd/sys/netpfil/pf/pf_osfp.c
+++ b/freebsd/sys/netpfil/pf/pf_osfp.c
@@ -49,7 +49,7 @@ static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints")
printf(format , ##x)
SLIST_HEAD(pf_osfp_list, pf_os_fingerprint);
-static VNET_DEFINE(struct pf_osfp_list, pf_osfp_list) =
+VNET_DEFINE_STATIC(struct pf_osfp_list, pf_osfp_list) =
SLIST_HEAD_INITIALIZER();
#define V_pf_osfp_list VNET(pf_osfp_list)
diff --git a/freebsd/sys/netpfil/pf/pf_table.c b/freebsd/sys/netpfil/pf/pf_table.c
index 04a275d9..1fadd38c 100644
--- a/freebsd/sys/netpfil/pf/pf_table.c
+++ b/freebsd/sys/netpfil/pf/pf_table.c
@@ -124,9 +124,9 @@ struct pfr_walktree {
#define senderr(e) do { rv = (e); goto _bad; } while (0)
static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures");
-static VNET_DEFINE(uma_zone_t, pfr_kentry_z);
+VNET_DEFINE_STATIC(uma_zone_t, pfr_kentry_z);
#define V_pfr_kentry_z VNET(pfr_kentry_z)
-static VNET_DEFINE(uma_zone_t, pfr_kcounters_z);
+VNET_DEFINE_STATIC(uma_zone_t, pfr_kcounters_z);
#define V_pfr_kcounters_z VNET(pfr_kcounters_z)
static struct pf_addr pfr_ffaddr = {
@@ -186,13 +186,13 @@ static struct pfr_kentry
static RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
static RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
-static VNET_DEFINE(struct pfr_ktablehead, pfr_ktables);
+VNET_DEFINE_STATIC(struct pfr_ktablehead, pfr_ktables);
#define V_pfr_ktables VNET(pfr_ktables)
-static VNET_DEFINE(struct pfr_table, pfr_nulltable);
+VNET_DEFINE_STATIC(struct pfr_table, pfr_nulltable);
#define V_pfr_nulltable VNET(pfr_nulltable)
-static VNET_DEFINE(int, pfr_ktable_cnt);
+VNET_DEFINE_STATIC(int, pfr_ktable_cnt);
#define V_pfr_ktable_cnt VNET(pfr_ktable_cnt)
void
diff --git a/freebsd/sys/opencrypto/_cryptodev.h b/freebsd/sys/opencrypto/_cryptodev.h
new file mode 100644
index 00000000..d13b41da
--- /dev/null
+++ b/freebsd/sys/opencrypto/_cryptodev.h
@@ -0,0 +1,8 @@
+/*
+ * This trivial work is released to the public domain, or licensed under the
+ * terms of the CC0, at your option.
+ * $FreeBSD$
+ */
+#pragma once
+
+typedef struct crypto_session *crypto_session_t;
diff --git a/freebsd/sys/opencrypto/crypto.c b/freebsd/sys/opencrypto/crypto.c
index 5db2e872..d66fe5d5 100644
--- a/freebsd/sys/opencrypto/crypto.c
+++ b/freebsd/sys/opencrypto/crypto.c
@@ -91,6 +91,13 @@ __FBSDID("$FreeBSD$");
#include <machine/pcb.h>
#endif
+struct crypto_session {
+ device_t parent;
+ void *softc;
+ uint32_t hid;
+ uint32_t capabilities;
+};
+
SDT_PROVIDER_DEFINE(opencrypto);
/*
@@ -127,6 +134,7 @@ struct cryptocap {
#define CRYPTOCAP_F_CLEANUP 0x80000000 /* needs resource cleanup */
int cc_qblocked; /* (q) symmetric q blocked */
int cc_kqblocked; /* (q) asymmetric q blocked */
+ size_t cc_session_size;
};
static struct cryptocap *crypto_drivers = NULL;
static int crypto_drivers_num = 0;
@@ -187,6 +195,7 @@ SYSCTL_INT(_kern, OID_AUTO, crypto_workers_num, CTLFLAG_RDTUN,
static uma_zone_t cryptop_zone;
static uma_zone_t cryptodesc_zone;
+static uma_zone_t cryptoses_zone;
int crypto_userasymcrypto = 1; /* userland may do asym crypto reqs */
SYSCTL_INT(_kern, OID_AUTO, userasymcrypto, CTLFLAG_RW,
@@ -205,6 +214,7 @@ static void crypto_ret_proc(struct crypto_ret_worker *ret_worker);
static void crypto_destroy(void);
static int crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint);
static int crypto_kinvoke(struct cryptkop *krp, int flags);
+static void crypto_remove(struct cryptocap *cap);
static void crypto_task_invoke(void *ctx, int pending);
static void crypto_batch_enqueue(struct cryptop *crp);
@@ -268,7 +278,12 @@ crypto_init(void)
cryptodesc_zone = uma_zcreate("cryptodesc", sizeof (struct cryptodesc),
0, 0, 0, 0,
UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
- if (cryptodesc_zone == NULL || cryptop_zone == NULL) {
+ cryptoses_zone = uma_zcreate("crypto_session",
+ sizeof(struct crypto_session), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
+
+ if (cryptodesc_zone == NULL || cryptop_zone == NULL ||
+ cryptoses_zone == NULL) {
printf("crypto_init: cannot setup crypto zones\n");
error = ENOMEM;
goto bad;
@@ -395,6 +410,8 @@ crypto_destroy(void)
if (crypto_drivers != NULL)
free(crypto_drivers, M_CRYPTO_DATA);
+ if (cryptoses_zone != NULL)
+ uma_zdestroy(cryptoses_zone);
if (cryptodesc_zone != NULL)
uma_zdestroy(cryptodesc_zone);
if (cryptop_zone != NULL)
@@ -408,6 +425,24 @@ crypto_destroy(void)
mtx_destroy(&crypto_drivers_mtx);
}
+uint32_t
+crypto_ses2hid(crypto_session_t crypto_session)
+{
+ return (crypto_session->hid);
+}
+
+uint32_t
+crypto_ses2caps(crypto_session_t crypto_session)
+{
+ return (crypto_session->capabilities);
+}
+
+void *
+crypto_get_driver_session(crypto_session_t crypto_session)
+{
+ return (crypto_session->softc);
+}
+
static struct cryptocap *
crypto_checkdriver(u_int32_t hid)
{
@@ -495,12 +530,19 @@ again:
* must be capable of the requested crypto algorithms.
*/
int
-crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int crid)
+crypto_newsession(crypto_session_t *cses, struct cryptoini *cri, int crid)
{
+ crypto_session_t res;
+ void *softc_mem;
struct cryptocap *cap;
- u_int32_t hid, lid;
+ u_int32_t hid;
+ size_t softc_size;
int err;
+restart:
+ res = NULL;
+ softc_mem = NULL;
+
CRYPTO_DRIVER_LOCK();
if ((crid & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) {
/*
@@ -520,24 +562,53 @@ crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int crid)
* XXX layer right about here.
*/
}
- if (cap != NULL) {
- /* Call the driver initialization routine. */
- hid = cap - crypto_drivers;
- lid = hid; /* Pass the driver ID. */
- err = CRYPTODEV_NEWSESSION(cap->cc_dev, &lid, cri);
- if (err == 0) {
- (*sid) = (cap->cc_flags & 0xff000000)
- | (hid & 0x00ffffff);
- (*sid) <<= 32;
- (*sid) |= (lid & 0xffffffff);
- cap->cc_sessions++;
- } else
- CRYPTDEB("dev newsession failed: %d", err);
- } else {
+ if (cap == NULL) {
CRYPTDEB("no driver");
err = EOPNOTSUPP;
+ goto out;
+ }
+ cap->cc_sessions++;
+ softc_size = cap->cc_session_size;
+ hid = cap - crypto_drivers;
+ cap = NULL;
+ CRYPTO_DRIVER_UNLOCK();
+
+ softc_mem = malloc(softc_size, M_CRYPTO_DATA, M_WAITOK | M_ZERO);
+ res = uma_zalloc(cryptoses_zone, M_WAITOK | M_ZERO);
+ res->softc = softc_mem;
+
+ CRYPTO_DRIVER_LOCK();
+ cap = crypto_checkdriver(hid);
+ if (cap != NULL && (cap->cc_flags & CRYPTOCAP_F_CLEANUP) != 0) {
+ cap->cc_sessions--;
+ crypto_remove(cap);
+ cap = NULL;
+ }
+ if (cap == NULL) {
+ free(softc_mem, M_CRYPTO_DATA);
+ uma_zfree(cryptoses_zone, res);
+ CRYPTO_DRIVER_UNLOCK();
+ goto restart;
+ }
+
+ /* Call the driver initialization routine. */
+ err = CRYPTODEV_NEWSESSION(cap->cc_dev, res, cri);
+ if (err != 0) {
+ CRYPTDEB("dev newsession failed: %d", err);
+ goto out;
}
+
+ res->capabilities = cap->cc_flags & 0xff000000;
+ res->hid = hid;
+ *cses = res;
+
+out:
CRYPTO_DRIVER_UNLOCK();
+ if (err != 0) {
+ free(softc_mem, M_CRYPTO_DATA);
+ if (res != NULL)
+ uma_zfree(cryptoses_zone, res);
+ }
return err;
}
@@ -554,41 +625,41 @@ crypto_remove(struct cryptocap *cap)
* Delete an existing session (or a reserved session on an unregistered
* driver).
*/
-int
-crypto_freesession(u_int64_t sid)
+void
+crypto_freesession(crypto_session_t cses)
{
struct cryptocap *cap;
+ void *ses;
+ size_t ses_size;
u_int32_t hid;
- int err;
- CRYPTO_DRIVER_LOCK();
-
- if (crypto_drivers == NULL) {
- err = EINVAL;
- goto done;
- }
+ if (cses == NULL)
+ return;
- /* Determine two IDs. */
- hid = CRYPTO_SESID2HID(sid);
+ CRYPTO_DRIVER_LOCK();
- if (hid >= crypto_drivers_num) {
- err = ENOENT;
- goto done;
- }
+ hid = crypto_ses2hid(cses);
+ KASSERT(hid < crypto_drivers_num,
+ ("bogus crypto_session %p hid %u", cses, hid));
cap = &crypto_drivers[hid];
+ ses = cses->softc;
+ ses_size = cap->cc_session_size;
+
if (cap->cc_sessions)
cap->cc_sessions--;
/* Call the driver cleanup routine, if available. */
- err = CRYPTODEV_FREESESSION(cap->cc_dev, sid);
+ CRYPTODEV_FREESESSION(cap->cc_dev, cses);
+
+ explicit_bzero(ses, ses_size);
+ free(ses, M_CRYPTO_DATA);
+ uma_zfree(cryptoses_zone, cses);
if (cap->cc_flags & CRYPTOCAP_F_CLEANUP)
crypto_remove(cap);
-done:
CRYPTO_DRIVER_UNLOCK();
- return err;
}
/*
@@ -596,7 +667,7 @@ done:
* support for the algorithms they handle.
*/
int32_t
-crypto_get_driverid(device_t dev, int flags)
+crypto_get_driverid(device_t dev, size_t sessionsize, int flags)
{
struct cryptocap *newdrv;
int i;
@@ -646,6 +717,7 @@ crypto_get_driverid(device_t dev, int flags)
crypto_drivers[i].cc_sessions = 1; /* Mark */
crypto_drivers[i].cc_dev = dev;
crypto_drivers[i].cc_flags = flags;
+ crypto_drivers[i].cc_session_size = sessionsize;
if (bootverbose)
printf("crypto: assign %s driver id %u, flags 0x%x\n",
device_get_nameunit(dev), i, flags);
@@ -903,7 +975,7 @@ crypto_dispatch(struct cryptop *crp)
binuptime(&crp->crp_tstamp);
#endif
- crp->crp_retw_id = crp->crp_sid % crypto_workers_num;
+ crp->crp_retw_id = ((uintptr_t)crp->crp_session) % crypto_workers_num;
if (CRYPTOP_ASYNC(crp)) {
if (crp->crp_flags & CRYPTO_F_ASYNC_KEEPORDER) {
@@ -922,7 +994,7 @@ crypto_dispatch(struct cryptop *crp)
}
if ((crp->crp_flags & CRYPTO_F_BATCH) == 0) {
- hid = CRYPTO_SESID2HID(crp->crp_sid);
+ hid = crypto_ses2hid(crp->crp_session);
/*
* Caller marked the request to be processed
@@ -1143,7 +1215,7 @@ crypto_task_invoke(void *ctx, int pending)
crp = (struct cryptop *)ctx;
- hid = CRYPTO_SESID2HID(crp->crp_sid);
+ hid = crypto_ses2hid(crp->crp_session);
cap = crypto_checkdriver(hid);
result = crypto_invoke(cap, crp, 0);
@@ -1169,7 +1241,7 @@ crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint)
#endif
if (cap->cc_flags & CRYPTOCAP_F_CLEANUP) {
struct cryptodesc *crd;
- u_int64_t nid;
+ crypto_session_t nses;
/*
* Driver has unregistered; migrate the session and return
@@ -1178,15 +1250,15 @@ crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint)
* XXX: What if there are more already queued requests for this
* session?
*/
- crypto_freesession(crp->crp_sid);
+ crypto_freesession(crp->crp_session);
for (crd = crp->crp_desc; crd->crd_next; crd = crd->crd_next)
crd->CRD_INI.cri_next = &(crd->crd_next->CRD_INI);
/* XXX propagate flags from initial session? */
- if (crypto_newsession(&nid, &(crp->crp_desc->CRD_INI),
+ if (crypto_newsession(&nses, &(crp->crp_desc->CRD_INI),
CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE) == 0)
- crp->crp_sid = nid;
+ crp->crp_session = nses;
crp->crp_etype = EAGAIN;
crypto_done(crp);
@@ -1292,7 +1364,7 @@ crypto_done(struct cryptop *crp)
if (!CRYPTOP_ASYNC_KEEPORDER(crp) &&
((crp->crp_flags & CRYPTO_F_CBIMM) ||
((crp->crp_flags & CRYPTO_F_CBIFSYNC) &&
- (CRYPTO_SESID2CAPS(crp->crp_sid) & CRYPTOCAP_F_SYNC)))) {
+ (crypto_ses2caps(crp->crp_session) & CRYPTOCAP_F_SYNC)))) {
/*
* Do the callback directly. This is ok when the
* callback routine does very little (e.g. the
@@ -1454,7 +1526,7 @@ crypto_proc(void)
submit = NULL;
hint = 0;
TAILQ_FOREACH(crp, &crp_q, crp_next) {
- hid = CRYPTO_SESID2HID(crp->crp_sid);
+ hid = crypto_ses2hid(crp->crp_session);
cap = crypto_checkdriver(hid);
/*
* Driver cannot disappeared when there is an active
@@ -1478,7 +1550,7 @@ crypto_proc(void)
* better to just use a per-driver
* queue instead.
*/
- if (CRYPTO_SESID2HID(submit->crp_sid) == hid)
+ if (crypto_ses2hid(submit->crp_session) == hid)
hint = CRYPTO_HINT_MORE;
break;
} else {
@@ -1491,7 +1563,7 @@ crypto_proc(void)
}
if (submit != NULL) {
TAILQ_REMOVE(&crp_q, submit, crp_next);
- hid = CRYPTO_SESID2HID(submit->crp_sid);
+ hid = crypto_ses2hid(submit->crp_session);
cap = crypto_checkdriver(hid);
KASSERT(cap != NULL, ("%s:%u Driver disappeared.",
__func__, __LINE__));
@@ -1507,7 +1579,7 @@ crypto_proc(void)
* it at the end does not work.
*/
/* XXX validate sid again? */
- crypto_drivers[CRYPTO_SESID2HID(submit->crp_sid)].cc_qblocked = 1;
+ crypto_drivers[crypto_ses2hid(submit->crp_session)].cc_qblocked = 1;
TAILQ_INSERT_HEAD(&crp_q, submit, crp_next);
cryptostats.cs_blocks++;
}
@@ -1696,8 +1768,8 @@ DB_SHOW_COMMAND(crypto, db_show_crypto)
"Desc", "Callback");
TAILQ_FOREACH(crp, &crp_q, crp_next) {
db_printf("%4u %08x %4u %4u %4u %04x %8p %8p\n"
- , (int) CRYPTO_SESID2HID(crp->crp_sid)
- , (int) CRYPTO_SESID2CAPS(crp->crp_sid)
+ , (int) crypto_ses2hid(crp->crp_session)
+ , (int) crypto_ses2caps(crp->crp_session)
, crp->crp_ilen, crp->crp_olen
, crp->crp_etype
, crp->crp_flags
@@ -1712,7 +1784,7 @@ DB_SHOW_COMMAND(crypto, db_show_crypto)
TAILQ_FOREACH(crp, &ret_worker->crp_ret_q, crp_next) {
db_printf("%8td %4u %4u %04x %8p\n"
, CRYPTO_RETW_ID(ret_worker)
- , (int) CRYPTO_SESID2HID(crp->crp_sid)
+ , (int) crypto_ses2hid(crp->crp_session)
, crp->crp_etype
, crp->crp_flags
, crp->crp_callback
diff --git a/freebsd/sys/opencrypto/cryptodev.c b/freebsd/sys/opencrypto/cryptodev.c
index 162a247c..b569cbf7 100644
--- a/freebsd/sys/opencrypto/cryptodev.c
+++ b/freebsd/sys/opencrypto/cryptodev.c
@@ -267,7 +267,7 @@ crypt_kop_to_32(const struct crypt_kop *from, struct crypt_kop32 *to)
struct csession {
TAILQ_ENTRY(csession) next;
- u_int64_t sid;
+ crypto_session_t cses;
u_int32_t ses;
struct mtx lock; /* for op submission */
@@ -326,10 +326,10 @@ static const rtems_filesystem_file_handlers_r cryptofops;
static struct csession *csefind(struct fcrypt *, u_int);
static int csedelete(struct fcrypt *, struct csession *);
static struct csession *cseadd(struct fcrypt *, struct csession *);
-static struct csession *csecreate(struct fcrypt *, u_int64_t, caddr_t,
+static struct csession *csecreate(struct fcrypt *, crypto_session_t, caddr_t,
u_int64_t, caddr_t, u_int64_t, u_int32_t, u_int32_t, struct enc_xform *,
struct auth_hash *);
-static int csefree(struct csession *);
+static void csefree(struct csession *);
static int cryptodev_op(struct csession *, struct crypt_op *,
struct ucred *, struct thread *td);
@@ -384,7 +384,7 @@ cryptof_ioctl(
struct enc_xform *txform = NULL;
struct auth_hash *thash = NULL;
struct crypt_kop *kop;
- u_int64_t sid;
+ crypto_session_t cses;
u_int32_t ses;
int error = 0, crid;
#ifdef COMPAT_FREEBSD32
@@ -463,9 +463,15 @@ cryptof_ioctl(
case CRYPTO_MD5_HMAC:
thash = &auth_hash_hmac_md5;
break;
+ case CRYPTO_POLY1305:
+ thash = &auth_hash_poly1305;
+ break;
case CRYPTO_SHA1_HMAC:
thash = &auth_hash_hmac_sha1;
break;
+ case CRYPTO_SHA2_224_HMAC:
+ thash = &auth_hash_hmac_sha2_224;
+ break;
case CRYPTO_SHA2_256_HMAC:
thash = &auth_hash_hmac_sha2_256;
break;
@@ -492,10 +498,23 @@ cryptof_ioctl(
case CRYPTO_MD5:
thash = &auth_hash_md5;
break;
+#endif
case CRYPTO_SHA1:
thash = &auth_hash_sha1;
break;
-#endif
+ case CRYPTO_SHA2_224:
+ thash = &auth_hash_sha2_224;
+ break;
+ case CRYPTO_SHA2_256:
+ thash = &auth_hash_sha2_256;
+ break;
+ case CRYPTO_SHA2_384:
+ thash = &auth_hash_sha2_384;
+ break;
+ case CRYPTO_SHA2_512:
+ thash = &auth_hash_sha2_512;
+ break;
+
case CRYPTO_NULL_HMAC:
thash = &auth_hash_null;
break;
@@ -582,19 +601,19 @@ cryptof_ioctl(
}
} else
crid = CRYPTOCAP_F_HARDWARE;
- error = crypto_newsession(&sid, (txform ? &crie : &cria), crid);
+ error = crypto_newsession(&cses, (txform ? &crie : &cria), crid);
if (error) {
CRYPTDEB("crypto_newsession");
SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__);
goto bail;
}
- cse = csecreate(fcr, sid, crie.cri_key, crie.cri_klen,
+ cse = csecreate(fcr, cses, crie.cri_key, crie.cri_klen,
cria.cri_key, cria.cri_klen, sop->cipher, sop->mac, txform,
thash);
if (cse == NULL) {
- crypto_freesession(sid);
+ crypto_freesession(cses);
error = EINVAL;
SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__);
CRYPTDEB("csecreate");
@@ -607,7 +626,7 @@ cryptof_ioctl(
#endif
) {
/* return hardware/driver id */
- SES2(sop)->crid = CRYPTO_SESID2HID(cse->sid);
+ SES2(sop)->crid = crypto_ses2hid(cse->cses);
}
bail:
if (error) {
@@ -634,7 +653,7 @@ bail:
return (EINVAL);
}
csedelete(fcr, cse);
- error = csefree(cse);
+ csefree(cse);
break;
case CIOCCRYPT:
#ifdef COMPAT_FREEBSD32
@@ -867,7 +886,7 @@ cryptodev_op(
| (cop->flags & COP_F_BATCH);
crp->crp_uio = &cod->uio;
crp->crp_callback = cryptodev_cb;
- crp->crp_sid = cse->sid;
+ crp->crp_session = cse->cses;
crp->crp_opaque = cod;
if (cop->iv) {
@@ -1043,7 +1062,7 @@ cryptodev_aead(
| (caead->flags & COP_F_BATCH);
crp->crp_uio = &cod->uio;
crp->crp_callback = cryptodev_cb;
- crp->crp_sid = cse->sid;
+ crp->crp_session = cse->cses;
crp->crp_opaque = cod;
if (caead->iv) {
@@ -1318,7 +1337,7 @@ cryptof_close(struct file *fp, struct thread *td)
while ((cse = TAILQ_FIRST(&fcr->csessions))) {
TAILQ_REMOVE(&fcr->csessions, cse, next);
- (void)csefree(cse);
+ csefree(cse);
}
free(fcr, M_XDATA);
fp->f_data = NULL;
@@ -1389,7 +1408,7 @@ cseadd(struct fcrypt *fcr, struct csession *cse)
}
struct csession *
-csecreate(struct fcrypt *fcr, u_int64_t sid, caddr_t key, u_int64_t keylen,
+csecreate(struct fcrypt *fcr, crypto_session_t cses, caddr_t key, u_int64_t keylen,
caddr_t mackey, u_int64_t mackeylen, u_int32_t cipher, u_int32_t mac,
struct enc_xform *txform, struct auth_hash *thash)
{
@@ -1403,7 +1422,7 @@ csecreate(struct fcrypt *fcr, u_int64_t sid, caddr_t key, u_int64_t keylen,
cse->keylen = keylen/8;
cse->mackey = mackey;
cse->mackeylen = mackeylen/8;
- cse->sid = sid;
+ cse->cses = cses;
cse->cipher = cipher;
cse->mac = mac;
cse->txform = txform;
@@ -1412,19 +1431,17 @@ csecreate(struct fcrypt *fcr, u_int64_t sid, caddr_t key, u_int64_t keylen,
return (cse);
}
-static int
+static void
csefree(struct csession *cse)
{
- int error;
- error = crypto_freesession(cse->sid);
+ crypto_freesession(cse->cses);
mtx_destroy(&cse->lock);
if (cse->key)
free(cse->key, M_XDATA);
if (cse->mackey)
free(cse->mackey, M_XDATA);
free(cse, M_XDATA);
- return (error);
}
static int
diff --git a/freebsd/sys/opencrypto/cryptodev.h b/freebsd/sys/opencrypto/cryptodev.h
index 65422541..b3f81563 100644
--- a/freebsd/sys/opencrypto/cryptodev.h
+++ b/freebsd/sys/opencrypto/cryptodev.h
@@ -65,6 +65,10 @@
#include <sys/ioccom.h>
#include <sys/_task.h>
+#ifdef _KERNEL
+#include <opencrypto/_cryptodev.h>
+#endif
+
/* Some initial values */
#define CRYPTO_DRIVERS_INITIAL 4
#define CRYPTO_SW_SESSIONS 32
@@ -74,25 +78,29 @@
#define MD5_HASH_LEN 16
#define SHA1_HASH_LEN 20
#define RIPEMD160_HASH_LEN 20
+#define SHA2_224_HASH_LEN 28
#define SHA2_256_HASH_LEN 32
#define SHA2_384_HASH_LEN 48
#define SHA2_512_HASH_LEN 64
#define MD5_KPDK_HASH_LEN 16
#define SHA1_KPDK_HASH_LEN 20
#define AES_GMAC_HASH_LEN 16
+#define POLY1305_HASH_LEN 16
/* Maximum hash algorithm result length */
#define HASH_MAX_LEN SHA2_512_HASH_LEN /* Keep this updated */
+#define MD5_BLOCK_LEN 64
+#define SHA1_BLOCK_LEN 64
+#define RIPEMD160_BLOCK_LEN 64
+#define SHA2_224_BLOCK_LEN 64
+#define SHA2_256_BLOCK_LEN 64
+#define SHA2_384_BLOCK_LEN 128
+#define SHA2_512_BLOCK_LEN 128
+
/* HMAC values */
#define NULL_HMAC_BLOCK_LEN 64
-#define MD5_HMAC_BLOCK_LEN 64
-#define SHA1_HMAC_BLOCK_LEN 64
-#define RIPEMD160_HMAC_BLOCK_LEN 64
-#define SHA2_256_HMAC_BLOCK_LEN 64
-#define SHA2_384_HMAC_BLOCK_LEN 128
-#define SHA2_512_HMAC_BLOCK_LEN 128
/* Maximum HMAC block length */
-#define HMAC_MAX_BLOCK_LEN SHA2_512_HMAC_BLOCK_LEN /* Keep this updated */
+#define HMAC_MAX_BLOCK_LEN SHA2_512_BLOCK_LEN /* Keep this updated */
#define HMAC_IPAD_VAL 0x36
#define HMAC_OPAD_VAL 0x5C
/* HMAC Key Length */
@@ -100,6 +108,8 @@
#define AES_192_GMAC_KEY_LEN 24
#define AES_256_GMAC_KEY_LEN 32
+#define POLY1305_KEY_LEN 32
+
/* Encryption algorithm block sizes */
#define NULL_BLOCK_LEN 4 /* IPsec to maintain alignment */
#define DES_BLOCK_LEN 8
@@ -182,7 +192,14 @@
#define CRYPTO_BLAKE2B 29 /* Blake2b hash */
#define CRYPTO_BLAKE2S 30 /* Blake2s hash */
#define CRYPTO_CHACHA20 31 /* Chacha20 stream cipher */
-#define CRYPTO_ALGORITHM_MAX 31 /* Keep updated - see below */
+#define CRYPTO_SHA2_224_HMAC 32
+#define CRYPTO_RIPEMD160 33
+#define CRYPTO_SHA2_224 34
+#define CRYPTO_SHA2_256 35
+#define CRYPTO_SHA2_384 36
+#define CRYPTO_SHA2_512 37
+#define CRYPTO_POLY1305 38
+#define CRYPTO_ALGORITHM_MAX 38 /* Keep updated - see below */
#define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \
(x) <= CRYPTO_ALGORITHM_MAX)
@@ -216,6 +233,11 @@ struct session_op {
u_int32_t ses; /* returns: session # */
};
+/*
+ * session and crypt _op structs are used by userspace programs to interact
+ * with /dev/crypto. Confusingly, the internal kernel interface is named
+ * "cryptop" (no underscore).
+ */
struct session2_op {
u_int32_t cipher; /* ie. CRYPTO_DES_CBC */
u_int32_t mac; /* ie. CRYPTO_MD5_HMAC */
@@ -399,7 +421,7 @@ struct cryptop {
struct task crp_task;
- u_int64_t crp_sid; /* Session ID */
+ crypto_session_t crp_session; /* Session */
int crp_ilen; /* Input data total length */
int crp_olen; /* Result total length */
@@ -408,7 +430,7 @@ struct cryptop {
* All error codes except EAGAIN
* indicate possible data corruption (as in,
* the data have been touched). On all
- * errors, the crp_sid may have changed
+ * errors, the crp_session may have changed
* (reset to a new one), so the caller
* should always check and use the new
* value on future requests.
@@ -450,7 +472,7 @@ struct cryptop {
#define CRYPTOP_ASYNC(crp) \
(((crp)->crp_flags & CRYPTO_F_ASYNC) && \
- CRYPTO_SESID2CAPS((crp)->crp_sid) & CRYPTOCAP_F_SYNC)
+ crypto_ses2caps((crp)->crp_session) & CRYPTOCAP_F_SYNC)
#define CRYPTOP_ASYNC_KEEPORDER(crp) \
(CRYPTOP_ASYNC(crp) && \
(crp)->crp_flags & CRYPTO_F_ASYNC_KEEPORDER)
@@ -480,25 +502,19 @@ struct cryptkop {
int (*krp_callback)(struct cryptkop *);
};
-/*
- * Session ids are 64 bits. The lower 32 bits contain a "local id" which
- * is a driver-private session identifier. The upper 32 bits contain a
- * "hardware id" used by the core crypto code to identify the driver and
- * a copy of the driver's capabilities that can be used by client code to
- * optimize operation.
- */
-#define CRYPTO_SESID2HID(_sid) (((_sid) >> 32) & 0x00ffffff)
-#define CRYPTO_SESID2CAPS(_sid) (((_sid) >> 32) & 0xff000000)
-#define CRYPTO_SESID2LID(_sid) (((u_int32_t) (_sid)) & 0xffffffff)
+uint32_t crypto_ses2hid(crypto_session_t crypto_session);
+uint32_t crypto_ses2caps(crypto_session_t crypto_session);
+void *crypto_get_driver_session(crypto_session_t crypto_session);
MALLOC_DECLARE(M_CRYPTO_DATA);
-extern int crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int hard);
-extern int crypto_freesession(u_int64_t sid);
+extern int crypto_newsession(crypto_session_t *cses, struct cryptoini *cri, int hard);
+extern void crypto_freesession(crypto_session_t cses);
#define CRYPTOCAP_F_HARDWARE CRYPTO_FLAG_HARDWARE
#define CRYPTOCAP_F_SOFTWARE CRYPTO_FLAG_SOFTWARE
#define CRYPTOCAP_F_SYNC 0x04000000 /* operates synchronously */
-extern int32_t crypto_get_driverid(device_t dev, int flags);
+extern int32_t crypto_get_driverid(device_t dev, size_t session_size,
+ int flags);
extern int crypto_find_driver(const char *);
extern device_t crypto_find_device_byhid(int hid);
extern int crypto_getcaps(int hid);
diff --git a/freebsd/sys/opencrypto/cryptosoft.c b/freebsd/sys/opencrypto/cryptosoft.c
index 69993ae0..8dff61c1 100644
--- a/freebsd/sys/opencrypto/cryptosoft.c
+++ b/freebsd/sys/opencrypto/cryptosoft.c
@@ -64,10 +64,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/cryptodev_if.h>
static int32_t swcr_id;
-static struct swcr_data **swcr_sessions = NULL;
-static u_int32_t swcr_sesnum;
-/* Protects swcr_sessions pointer, not data. */
-static struct rwlock swcr_sessions_lock;
u_int8_t hmac_ipad_buffer[HMAC_MAX_BLOCK_LEN];
u_int8_t hmac_opad_buffer[HMAC_MAX_BLOCK_LEN];
@@ -76,8 +72,7 @@ static int swcr_encdec(struct cryptodesc *, struct swcr_data *, caddr_t, int);
static int swcr_authcompute(struct cryptodesc *, struct swcr_data *, caddr_t, int);
static int swcr_authenc(struct cryptop *crp);
static int swcr_compdec(struct cryptodesc *, struct swcr_data *, caddr_t, int);
-static int swcr_freesession(device_t dev, u_int64_t tid);
-static int swcr_freesession_locked(device_t dev, u_int64_t tid);
+static void swcr_freesession(device_t dev, crypto_session_t cses);
/*
* Apply a symmetric encryption/decryption algorithm.
@@ -328,7 +323,7 @@ out:
return (error);
}
-static void
+static int __result_use_check
swcr_authprepare(struct auth_hash *axf, struct swcr_data *sw, u_char *key,
int klen)
{
@@ -339,6 +334,7 @@ swcr_authprepare(struct auth_hash *axf, struct swcr_data *sw, u_char *key,
switch (axf->type) {
case CRYPTO_MD5_HMAC:
case CRYPTO_SHA1_HMAC:
+ case CRYPTO_SHA2_224_HMAC:
case CRYPTO_SHA2_256_HMAC:
case CRYPTO_SHA2_384_HMAC:
case CRYPTO_SHA2_512_HMAC:
@@ -383,6 +379,12 @@ swcr_authprepare(struct auth_hash *axf, struct swcr_data *sw, u_char *key,
axf->Final(buf, sw->sw_ictx);
break;
}
+ case CRYPTO_POLY1305:
+ if (klen != POLY1305_KEY_LEN) {
+ CRYPTDEB("bad poly1305 key size %d", klen);
+ return EINVAL;
+ }
+ /* FALLTHROUGH */
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
axf->Setkey(sw->sw_ictx, key, klen);
@@ -391,7 +393,9 @@ swcr_authprepare(struct auth_hash *axf, struct swcr_data *sw, u_char *key,
default:
printf("%s: CRD_F_KEY_EXPLICIT flag given, but algorithm %d "
"doesn't use keys.\n", __func__, axf->type);
+ return EINVAL;
}
+ return 0;
}
/*
@@ -411,8 +415,11 @@ swcr_authcompute(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf,
axf = sw->sw_axf;
- if (crd->crd_flags & CRD_F_KEY_EXPLICIT)
- swcr_authprepare(axf, sw, crd->crd_key, crd->crd_klen);
+ if (crd->crd_flags & CRD_F_KEY_EXPLICIT) {
+ err = swcr_authprepare(axf, sw, crd->crd_key, crd->crd_klen);
+ if (err != 0)
+ return err;
+ }
bcopy(sw->sw_ictx, &ctx, axf->ctxsize);
@@ -422,8 +429,17 @@ swcr_authcompute(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf,
return err;
switch (sw->sw_alg) {
+ case CRYPTO_SHA1:
+ case CRYPTO_SHA2_224:
+ case CRYPTO_SHA2_256:
+ case CRYPTO_SHA2_384:
+ case CRYPTO_SHA2_512:
+ axf->Final(aalg, &ctx);
+ break;
+
case CRYPTO_MD5_HMAC:
case CRYPTO_SHA1_HMAC:
+ case CRYPTO_SHA2_224_HMAC:
case CRYPTO_SHA2_256_HMAC:
case CRYPTO_SHA2_384_HMAC:
case CRYPTO_SHA2_512_HMAC:
@@ -457,6 +473,7 @@ swcr_authcompute(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf,
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
case CRYPTO_NULL_HMAC:
+ case CRYPTO_POLY1305:
axf->Final(aalg, &ctx);
break;
}
@@ -482,6 +499,7 @@ swcr_authenc(struct cryptop *crp)
u_char uaalg[AALG_MAX_RESULT_LEN];
u_char iv[EALG_MAX_BLOCK_LEN];
union authctx ctx;
+ struct swcr_session *ses;
struct cryptodesc *crd, *crda = NULL, *crde = NULL;
struct swcr_data *sw, *swa, *swe = NULL;
struct auth_hash *axf = NULL;
@@ -492,14 +510,16 @@ swcr_authenc(struct cryptop *crp)
ivlen = blksz = iskip = oskip = 0;
+ ses = crypto_get_driver_session(crp->crp_session);
+
for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
- for (sw = swcr_sessions[crp->crp_sid & 0xffffffff];
- sw && sw->sw_alg != crd->crd_alg;
- sw = sw->sw_next)
+ for (i = 0; i < nitems(ses->swcr_algorithms) &&
+ ses->swcr_algorithms[i].sw_alg != crd->crd_alg; i++)
;
- if (sw == NULL)
+ if (i == nitems(ses->swcr_algorithms))
return (EINVAL);
+ sw = &ses->swcr_algorithms[i];
switch (sw->sw_alg) {
case CRYPTO_AES_NIST_GCM_16:
case CRYPTO_AES_NIST_GMAC:
@@ -732,68 +752,24 @@ swcr_compdec(struct cryptodesc *crd, struct swcr_data *sw,
* Generate a new software session.
*/
static int
-swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
+swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri)
{
- struct swcr_data **swd;
+ struct swcr_session *ses;
+ struct swcr_data *swd;
struct auth_hash *axf;
struct enc_xform *txf;
struct comp_algo *cxf;
- u_int32_t i;
+ size_t i;
int len;
int error;
- if (sid == NULL || cri == NULL)
+ if (cses == NULL || cri == NULL)
return EINVAL;
- rw_wlock(&swcr_sessions_lock);
- if (swcr_sessions) {
- for (i = 1; i < swcr_sesnum; i++)
- if (swcr_sessions[i] == NULL)
- break;
- } else
- i = 1; /* NB: to silence compiler warning */
-
- if (swcr_sessions == NULL || i == swcr_sesnum) {
- if (swcr_sessions == NULL) {
- i = 1; /* We leave swcr_sessions[0] empty */
- swcr_sesnum = CRYPTO_SW_SESSIONS;
- } else
- swcr_sesnum *= 2;
-
- swd = malloc(swcr_sesnum * sizeof(struct swcr_data *),
- M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
- if (swd == NULL) {
- /* Reset session number */
- if (swcr_sesnum == CRYPTO_SW_SESSIONS)
- swcr_sesnum = 0;
- else
- swcr_sesnum /= 2;
- rw_wunlock(&swcr_sessions_lock);
- return ENOBUFS;
- }
-
- /* Copy existing sessions */
- if (swcr_sessions != NULL) {
- bcopy(swcr_sessions, swd,
- (swcr_sesnum / 2) * sizeof(struct swcr_data *));
- free(swcr_sessions, M_CRYPTO_DATA);
- }
-
- swcr_sessions = swd;
- }
+ ses = crypto_get_driver_session(cses);
- rw_downgrade(&swcr_sessions_lock);
- swd = &swcr_sessions[i];
- *sid = i;
-
- while (cri) {
- *swd = malloc(sizeof(struct swcr_data),
- M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
- if (*swd == NULL) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
- return ENOBUFS;
- }
+ for (i = 0; cri != NULL && i < nitems(ses->swcr_algorithms); i++) {
+ swd = &ses->swcr_algorithms[i];
switch (cri->cri_alg) {
case CRYPTO_DES_CBC:
@@ -825,7 +801,7 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
goto enccommon;
case CRYPTO_AES_NIST_GMAC:
txf = &enc_xform_aes_nist_gmac;
- (*swd)->sw_exf = txf;
+ swd->sw_exf = txf;
break;
case CRYPTO_CAMELLIA_CBC:
txf = &enc_xform_camellia;
@@ -838,15 +814,14 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
goto enccommon;
enccommon:
if (cri->cri_key != NULL) {
- error = txf->setkey(&((*swd)->sw_kschedule),
+ error = txf->setkey(&swd->sw_kschedule,
cri->cri_key, cri->cri_klen / 8);
if (error) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ swcr_freesession(dev, cses);
return error;
}
}
- (*swd)->sw_exf = txf;
+ swd->sw_exf = txf;
break;
case CRYPTO_MD5_HMAC:
@@ -855,6 +830,9 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
case CRYPTO_SHA1_HMAC:
axf = &auth_hash_hmac_sha1;
goto authcommon;
+ case CRYPTO_SHA2_224_HMAC:
+ axf = &auth_hash_hmac_sha2_224;
+ goto authcommon;
case CRYPTO_SHA2_256_HMAC:
axf = &auth_hash_hmac_sha2_256;
goto authcommon;
@@ -870,29 +848,31 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
case CRYPTO_RIPEMD160_HMAC:
axf = &auth_hash_hmac_ripemd_160;
authcommon:
- (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
- if ((*swd)->sw_ictx == NULL) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ if (swd->sw_ictx == NULL) {
+ swcr_freesession(dev, cses);
return ENOBUFS;
}
- (*swd)->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ swd->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
- if ((*swd)->sw_octx == NULL) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ if (swd->sw_octx == NULL) {
+ swcr_freesession(dev, cses);
return ENOBUFS;
}
if (cri->cri_key != NULL) {
- swcr_authprepare(axf, *swd, cri->cri_key,
- cri->cri_klen);
+ error = swcr_authprepare(axf, swd,
+ cri->cri_key, cri->cri_klen);
+ if (error != 0) {
+ swcr_freesession(dev, cses);
+ return error;
+ }
}
- (*swd)->sw_mlen = cri->cri_mlen;
- (*swd)->sw_axf = axf;
+ swd->sw_mlen = cri->cri_mlen;
+ swd->sw_axf = axf;
break;
case CRYPTO_MD5_KPDK:
@@ -902,52 +882,66 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
case CRYPTO_SHA1_KPDK:
axf = &auth_hash_key_sha1;
auth2common:
- (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
- if ((*swd)->sw_ictx == NULL) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ if (swd->sw_ictx == NULL) {
+ swcr_freesession(dev, cses);
return ENOBUFS;
}
- (*swd)->sw_octx = malloc(cri->cri_klen / 8,
+ swd->sw_octx = malloc(cri->cri_klen / 8,
M_CRYPTO_DATA, M_NOWAIT);
- if ((*swd)->sw_octx == NULL) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ if (swd->sw_octx == NULL) {
+ swcr_freesession(dev, cses);
return ENOBUFS;
}
/* Store the key so we can "append" it to the payload */
if (cri->cri_key != NULL) {
- swcr_authprepare(axf, *swd, cri->cri_key,
- cri->cri_klen);
+ error = swcr_authprepare(axf, swd,
+ cri->cri_key, cri->cri_klen);
+ if (error != 0) {
+ swcr_freesession(dev, cses);
+ return error;
+ }
}
- (*swd)->sw_mlen = cri->cri_mlen;
- (*swd)->sw_axf = axf;
+ swd->sw_mlen = cri->cri_mlen;
+ swd->sw_axf = axf;
break;
#ifdef notdef
case CRYPTO_MD5:
axf = &auth_hash_md5;
goto auth3common;
+#endif
case CRYPTO_SHA1:
axf = &auth_hash_sha1;
+ goto auth3common;
+ case CRYPTO_SHA2_224:
+ axf = &auth_hash_sha2_224;
+ goto auth3common;
+ case CRYPTO_SHA2_256:
+ axf = &auth_hash_sha2_256;
+ goto auth3common;
+ case CRYPTO_SHA2_384:
+ axf = &auth_hash_sha2_384;
+ goto auth3common;
+ case CRYPTO_SHA2_512:
+ axf = &auth_hash_sha2_512;
+
auth3common:
- (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
- if ((*swd)->sw_ictx == NULL) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ if (swd->sw_ictx == NULL) {
+ swcr_freesession(dev, cses);
return ENOBUFS;
}
- axf->Init((*swd)->sw_ictx);
- (*swd)->sw_mlen = cri->cri_mlen;
- (*swd)->sw_axf = axf;
+ axf->Init(swd->sw_ictx);
+ swd->sw_mlen = cri->cri_mlen;
+ swd->sw_axf = axf;
break;
-#endif
case CRYPTO_AES_128_NIST_GMAC:
axf = &auth_hash_nist_gmac_aes_128;
@@ -962,21 +956,19 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
auth4common:
len = cri->cri_klen / 8;
if (len != 16 && len != 24 && len != 32) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ swcr_freesession(dev, cses);
return EINVAL;
}
- (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
- if ((*swd)->sw_ictx == NULL) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ if (swd->sw_ictx == NULL) {
+ swcr_freesession(dev, cses);
return ENOBUFS;
}
- axf->Init((*swd)->sw_ictx);
- axf->Setkey((*swd)->sw_ictx, cri->cri_key, len);
- (*swd)->sw_axf = axf;
+ axf->Init(swd->sw_ictx);
+ axf->Setkey(swd->sw_ictx, cri->cri_key, len);
+ swd->sw_axf = axf;
break;
case CRYPTO_BLAKE2B:
@@ -984,70 +976,56 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
goto auth5common;
case CRYPTO_BLAKE2S:
axf = &auth_hash_blake2s;
+ goto auth5common;
+ case CRYPTO_POLY1305:
+ axf = &auth_hash_poly1305;
auth5common:
- (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
- if ((*swd)->sw_ictx == NULL) {
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ if (swd->sw_ictx == NULL) {
+ swcr_freesession(dev, cses);
return ENOBUFS;
}
- axf->Setkey((*swd)->sw_ictx, cri->cri_key,
+ axf->Setkey(swd->sw_ictx, cri->cri_key,
cri->cri_klen / 8);
- axf->Init((*swd)->sw_ictx);
- (*swd)->sw_axf = axf;
+ axf->Init(swd->sw_ictx);
+ swd->sw_axf = axf;
break;
case CRYPTO_DEFLATE_COMP:
cxf = &comp_algo_deflate;
- (*swd)->sw_cxf = cxf;
+ swd->sw_cxf = cxf;
break;
default:
- swcr_freesession_locked(dev, i);
- rw_runlock(&swcr_sessions_lock);
+ swcr_freesession(dev, cses);
return EINVAL;
}
- (*swd)->sw_alg = cri->cri_alg;
+ swd->sw_alg = cri->cri_alg;
cri = cri->cri_next;
- swd = &((*swd)->sw_next);
+ ses->swcr_nalgs++;
}
- rw_runlock(&swcr_sessions_lock);
- return 0;
-}
-static int
-swcr_freesession(device_t dev, u_int64_t tid)
-{
- int error;
-
- rw_rlock(&swcr_sessions_lock);
- error = swcr_freesession_locked(dev, tid);
- rw_runlock(&swcr_sessions_lock);
- return error;
+ if (cri != NULL) {
+ CRYPTDEB("Bogus session request for three or more algorithms");
+ return EINVAL;
+ }
+ return 0;
}
-/*
- * Free a session.
- */
-static int
-swcr_freesession_locked(device_t dev, u_int64_t tid)
+static void
+swcr_freesession(device_t dev, crypto_session_t cses)
{
+ struct swcr_session *ses;
struct swcr_data *swd;
struct enc_xform *txf;
struct auth_hash *axf;
- u_int32_t sid = CRYPTO_SESID2LID(tid);
-
- if (sid > swcr_sesnum || swcr_sessions == NULL ||
- swcr_sessions[sid] == NULL)
- return EINVAL;
+ size_t i;
- /* Silently accept and return */
- if (sid == 0)
- return 0;
+ ses = crypto_get_driver_session(cses);
- while ((swd = swcr_sessions[sid]) != NULL) {
- swcr_sessions[sid] = swd->sw_next;
+ for (i = 0; i < nitems(ses->swcr_algorithms); i++) {
+ swd = &ses->swcr_algorithms[i];
switch (swd->sw_alg) {
case CRYPTO_DES_CBC:
@@ -1071,6 +1049,7 @@ swcr_freesession_locked(device_t dev, u_int64_t tid)
case CRYPTO_MD5_HMAC:
case CRYPTO_SHA1_HMAC:
+ case CRYPTO_SHA2_224_HMAC:
case CRYPTO_SHA2_256_HMAC:
case CRYPTO_SHA2_384_HMAC:
case CRYPTO_SHA2_512_HMAC:
@@ -1105,7 +1084,12 @@ swcr_freesession_locked(device_t dev, u_int64_t tid)
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
case CRYPTO_MD5:
+ case CRYPTO_POLY1305:
case CRYPTO_SHA1:
+ case CRYPTO_SHA2_224:
+ case CRYPTO_SHA2_256:
+ case CRYPTO_SHA2_384:
+ case CRYPTO_SHA2_512:
axf = swd->sw_axf;
if (swd->sw_ictx) {
@@ -1118,10 +1102,7 @@ swcr_freesession_locked(device_t dev, u_int64_t tid)
/* Nothing to do */
break;
}
-
- free(swd, M_CRYPTO_DATA);
}
- return 0;
}
/*
@@ -1130,9 +1111,10 @@ swcr_freesession_locked(device_t dev, u_int64_t tid)
static int
swcr_process(device_t dev, struct cryptop *crp, int hint)
{
+ struct swcr_session *ses;
struct cryptodesc *crd;
struct swcr_data *sw;
- u_int32_t lid;
+ size_t i;
/* Sanity check */
if (crp == NULL)
@@ -1143,15 +1125,7 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
goto done;
}
- lid = CRYPTO_SESID2LID(crp->crp_sid);
- rw_rlock(&swcr_sessions_lock);
- if (swcr_sessions == NULL || lid >= swcr_sesnum || lid == 0 ||
- swcr_sessions[lid] == NULL) {
- rw_runlock(&swcr_sessions_lock);
- crp->crp_etype = ENOENT;
- goto done;
- }
- rw_runlock(&swcr_sessions_lock);
+ ses = crypto_get_driver_session(crp->crp_session);
/* Go through crypto descriptors, processing as we go */
for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
@@ -1165,23 +1139,16 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
* XXX between the various instances of an algorithm (so we can
* XXX locate the correct crypto context).
*/
- rw_rlock(&swcr_sessions_lock);
- if (swcr_sessions == NULL) {
- rw_runlock(&swcr_sessions_lock);
- crp->crp_etype = ENOENT;
- goto done;
- }
- for (sw = swcr_sessions[lid];
- sw && sw->sw_alg != crd->crd_alg;
- sw = sw->sw_next)
+ for (i = 0; i < nitems(ses->swcr_algorithms) &&
+ ses->swcr_algorithms[i].sw_alg != crd->crd_alg; i++)
;
- rw_runlock(&swcr_sessions_lock);
/* No such context ? */
- if (sw == NULL) {
+ if (i == nitems(ses->swcr_algorithms)) {
crp->crp_etype = EINVAL;
goto done;
}
+ sw = &ses->swcr_algorithms[i];
switch (sw->sw_alg) {
case CRYPTO_DES_CBC:
case CRYPTO_3DES_CBC:
@@ -1202,6 +1169,7 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
break;
case CRYPTO_MD5_HMAC:
case CRYPTO_SHA1_HMAC:
+ case CRYPTO_SHA2_224_HMAC:
case CRYPTO_SHA2_256_HMAC:
case CRYPTO_SHA2_384_HMAC:
case CRYPTO_SHA2_512_HMAC:
@@ -1211,8 +1179,13 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
case CRYPTO_SHA1_KPDK:
case CRYPTO_MD5:
case CRYPTO_SHA1:
+ case CRYPTO_SHA2_224:
+ case CRYPTO_SHA2_256:
+ case CRYPTO_SHA2_384:
+ case CRYPTO_SHA2_512:
case CRYPTO_BLAKE2B:
case CRYPTO_BLAKE2S:
+ case CRYPTO_POLY1305:
if ((crp->crp_etype = swcr_authcompute(crd, sw,
crp->crp_buf, crp->crp_flags)) != 0)
goto done;
@@ -1265,11 +1238,10 @@ swcr_probe(device_t dev)
static int
swcr_attach(device_t dev)
{
- rw_init(&swcr_sessions_lock, "swcr_sessions_lock");
memset(hmac_ipad_buffer, HMAC_IPAD_VAL, HMAC_MAX_BLOCK_LEN);
memset(hmac_opad_buffer, HMAC_OPAD_VAL, HMAC_MAX_BLOCK_LEN);
- swcr_id = crypto_get_driverid(dev,
+ swcr_id = crypto_get_driverid(dev, sizeof(struct swcr_session),
CRYPTOCAP_F_SOFTWARE | CRYPTOCAP_F_SYNC);
if (swcr_id < 0) {
device_printf(dev, "cannot initialize!");
@@ -1285,6 +1257,7 @@ swcr_attach(device_t dev)
REGISTER(CRYPTO_NULL_CBC);
REGISTER(CRYPTO_MD5_HMAC);
REGISTER(CRYPTO_SHA1_HMAC);
+ REGISTER(CRYPTO_SHA2_224_HMAC);
REGISTER(CRYPTO_SHA2_256_HMAC);
REGISTER(CRYPTO_SHA2_384_HMAC);
REGISTER(CRYPTO_SHA2_512_HMAC);
@@ -1294,6 +1267,10 @@ swcr_attach(device_t dev)
REGISTER(CRYPTO_SHA1_KPDK);
REGISTER(CRYPTO_MD5);
REGISTER(CRYPTO_SHA1);
+ REGISTER(CRYPTO_SHA2_224);
+ REGISTER(CRYPTO_SHA2_256);
+ REGISTER(CRYPTO_SHA2_384);
+ REGISTER(CRYPTO_SHA2_512);
REGISTER(CRYPTO_RIJNDAEL128_CBC);
REGISTER(CRYPTO_AES_XTS);
REGISTER(CRYPTO_AES_ICM);
@@ -1307,6 +1284,7 @@ swcr_attach(device_t dev)
REGISTER(CRYPTO_BLAKE2B);
REGISTER(CRYPTO_BLAKE2S);
REGISTER(CRYPTO_CHACHA20);
+ REGISTER(CRYPTO_POLY1305);
#undef REGISTER
return 0;
@@ -1316,11 +1294,6 @@ static int
swcr_detach(device_t dev)
{
crypto_unregister_all(swcr_id);
- rw_wlock(&swcr_sessions_lock);
- free(swcr_sessions, M_CRYPTO_DATA);
- swcr_sessions = NULL;
- rw_wunlock(&swcr_sessions_lock);
- rw_destroy(&swcr_sessions_lock);
return 0;
}
diff --git a/freebsd/sys/opencrypto/cryptosoft.h b/freebsd/sys/opencrypto/cryptosoft.h
index af78dc18..d88b09d4 100644
--- a/freebsd/sys/opencrypto/cryptosoft.h
+++ b/freebsd/sys/opencrypto/cryptosoft.h
@@ -55,8 +55,11 @@ struct swcr_data {
#define sw_exf SWCR_UN.SWCR_ENC.SW_exf
#define sw_size SWCR_UN.SWCR_COMP.SW_size
#define sw_cxf SWCR_UN.SWCR_COMP.SW_cxf
+};
- struct swcr_data *sw_next;
+struct swcr_session {
+ struct swcr_data swcr_algorithms[2];
+ unsigned swcr_nalgs;
};
#ifdef _KERNEL
diff --git a/freebsd/sys/opencrypto/xform_auth.h b/freebsd/sys/opencrypto/xform_auth.h
index 74c6d063..06183868 100644
--- a/freebsd/sys/opencrypto/xform_auth.h
+++ b/freebsd/sys/opencrypto/xform_auth.h
@@ -69,14 +69,21 @@ extern struct auth_hash auth_hash_key_sha1;
extern struct auth_hash auth_hash_hmac_md5;
extern struct auth_hash auth_hash_hmac_sha1;
extern struct auth_hash auth_hash_hmac_ripemd_160;
+extern struct auth_hash auth_hash_hmac_sha2_224;
extern struct auth_hash auth_hash_hmac_sha2_256;
extern struct auth_hash auth_hash_hmac_sha2_384;
extern struct auth_hash auth_hash_hmac_sha2_512;
+extern struct auth_hash auth_hash_sha1;
+extern struct auth_hash auth_hash_sha2_224;
+extern struct auth_hash auth_hash_sha2_256;
+extern struct auth_hash auth_hash_sha2_384;
+extern struct auth_hash auth_hash_sha2_512;
extern struct auth_hash auth_hash_nist_gmac_aes_128;
extern struct auth_hash auth_hash_nist_gmac_aes_192;
extern struct auth_hash auth_hash_nist_gmac_aes_256;
extern struct auth_hash auth_hash_blake2b;
extern struct auth_hash auth_hash_blake2s;
+extern struct auth_hash auth_hash_poly1305;
union authctx {
MD5_CTX md5ctx;
diff --git a/freebsd/sys/opencrypto/xform_md5.c b/freebsd/sys/opencrypto/xform_md5.c
index 47dfc75c..5611ee39 100644
--- a/freebsd/sys/opencrypto/xform_md5.c
+++ b/freebsd/sys/opencrypto/xform_md5.c
@@ -59,10 +59,10 @@ static int MD5Update_int(void *, const u_int8_t *, u_int16_t);
struct auth_hash auth_hash_hmac_md5 = {
.type = CRYPTO_MD5_HMAC,
.name = "HMAC-MD5",
- .keysize = MD5_HMAC_BLOCK_LEN,
+ .keysize = MD5_BLOCK_LEN,
.hashsize = MD5_HASH_LEN,
.ctxsize = sizeof(MD5_CTX),
- .blocksize = MD5_HMAC_BLOCK_LEN,
+ .blocksize = MD5_BLOCK_LEN,
.Init = (void (*) (void *)) MD5Init,
.Update = MD5Update_int,
.Final = (void (*) (u_int8_t *, void *)) MD5Final,
diff --git a/freebsd/sys/opencrypto/xform_poly1305.c b/freebsd/sys/opencrypto/xform_poly1305.c
new file mode 100644
index 00000000..a138a3f5
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_poly1305.c
@@ -0,0 +1,93 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* This file is in the public domain. */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/xform_auth.h>
+#include <opencrypto/xform_poly1305.h>
+
+#include <sodium/crypto_onetimeauth_poly1305.h>
+
+struct poly1305_xform_ctx {
+ struct crypto_onetimeauth_poly1305_state state;
+};
+CTASSERT(sizeof(union authctx) >= sizeof(struct poly1305_xform_ctx));
+
+CTASSERT(POLY1305_KEY_LEN == crypto_onetimeauth_poly1305_KEYBYTES);
+CTASSERT(POLY1305_HASH_LEN == crypto_onetimeauth_poly1305_BYTES);
+
+void
+Poly1305_Init(struct poly1305_xform_ctx *polyctx)
+{
+ /* Nop */
+}
+
+void
+Poly1305_Setkey(struct poly1305_xform_ctx *polyctx,
+ const uint8_t key[__min_size(POLY1305_KEY_LEN)], size_t klen)
+{
+ int rc;
+
+ if (klen != POLY1305_KEY_LEN)
+ panic("%s: Bogus keylen: %u bytes", __func__, (unsigned)klen);
+
+ rc = crypto_onetimeauth_poly1305_init(&polyctx->state, key);
+ if (rc != 0)
+ panic("%s: Invariant violated: %d", __func__, rc);
+}
+
+static void
+xform_Poly1305_Setkey(void *ctx, const uint8_t *key, uint16_t klen)
+{
+ Poly1305_Setkey(ctx, key, klen);
+}
+
+int
+Poly1305_Update(struct poly1305_xform_ctx *polyctx, const void *data,
+ size_t len)
+{
+ int rc;
+
+ rc = crypto_onetimeauth_poly1305_update(&polyctx->state, data, len);
+ if (rc != 0)
+ panic("%s: Invariant violated: %d", __func__, rc);
+ return (0);
+}
+
+static int
+xform_Poly1305_Update(void *ctx, const uint8_t *data, uint16_t len)
+{
+ return (Poly1305_Update(ctx, data, len));
+}
+
+void
+Poly1305_Final(uint8_t digest[__min_size(POLY1305_HASH_LEN)],
+ struct poly1305_xform_ctx *polyctx)
+{
+ int rc;
+
+ rc = crypto_onetimeauth_poly1305_final(&polyctx->state, digest);
+ if (rc != 0)
+ panic("%s: Invariant violated: %d", __func__, rc);
+}
+
+static void
+xform_Poly1305_Final(uint8_t *digest, void *ctx)
+{
+ Poly1305_Final(digest, ctx);
+}
+
+struct auth_hash auth_hash_poly1305 = {
+ .type = CRYPTO_POLY1305,
+ .name = "Poly-1305",
+ .keysize = POLY1305_KEY_LEN,
+ .hashsize = POLY1305_HASH_LEN,
+ .ctxsize = sizeof(struct poly1305_xform_ctx),
+ .blocksize = crypto_onetimeauth_poly1305_BYTES,
+ .Init = (void *)Poly1305_Init,
+ .Setkey = xform_Poly1305_Setkey,
+ .Update = xform_Poly1305_Update,
+ .Final = xform_Poly1305_Final,
+};
diff --git a/freebsd/sys/opencrypto/xform_poly1305.h b/freebsd/sys/opencrypto/xform_poly1305.h
new file mode 100644
index 00000000..7364ecde
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_poly1305.h
@@ -0,0 +1,16 @@
+/* This file is in the public domain. */
+/* $FreeBSD$ */
+#pragma once
+
+#include <sys/types.h>
+
+struct poly1305_xform_ctx;
+
+void Poly1305_Init(struct poly1305_xform_ctx *);
+
+void Poly1305_Setkey(struct poly1305_xform_ctx *,
+ const uint8_t [__min_size(32)], size_t);
+
+int Poly1305_Update(struct poly1305_xform_ctx *, const void *, size_t);
+
+void Poly1305_Final(uint8_t [__min_size(16)], struct poly1305_xform_ctx *);
diff --git a/freebsd/sys/opencrypto/xform_rmd160.c b/freebsd/sys/opencrypto/xform_rmd160.c
index ee344e80..98297308 100644
--- a/freebsd/sys/opencrypto/xform_rmd160.c
+++ b/freebsd/sys/opencrypto/xform_rmd160.c
@@ -59,10 +59,10 @@ static int RMD160Update_int(void *, const u_int8_t *, u_int16_t);
struct auth_hash auth_hash_hmac_ripemd_160 = {
.type = CRYPTO_RIPEMD160_HMAC,
.name = "HMAC-RIPEMD-160",
- .keysize = RIPEMD160_HMAC_BLOCK_LEN,
+ .keysize = RIPEMD160_BLOCK_LEN,
.hashsize = RIPEMD160_HASH_LEN,
.ctxsize = sizeof(RMD160_CTX),
- .blocksize = RIPEMD160_HMAC_BLOCK_LEN,
+ .blocksize = RIPEMD160_BLOCK_LEN,
.Init = (void (*)(void *)) RMD160Init,
.Update = RMD160Update_int,
.Final = (void (*)(u_int8_t *, void *)) RMD160Final,
diff --git a/freebsd/sys/opencrypto/xform_sha1.c b/freebsd/sys/opencrypto/xform_sha1.c
index 974dfb8a..44ac8c0e 100644
--- a/freebsd/sys/opencrypto/xform_sha1.c
+++ b/freebsd/sys/opencrypto/xform_sha1.c
@@ -57,14 +57,26 @@ static void SHA1Init_int(void *);
static int SHA1Update_int(void *, const u_int8_t *, u_int16_t);
static void SHA1Final_int(u_int8_t *, void *);
+/* Plain hash */
+struct auth_hash auth_hash_sha1 = {
+ .type = CRYPTO_SHA1,
+ .name = "SHA1",
+ .hashsize = SHA1_HASH_LEN,
+ .ctxsize = sizeof(SHA1_CTX),
+ .blocksize = SHA1_BLOCK_LEN,
+ .Init = SHA1Init_int,
+ .Update = SHA1Update_int,
+ .Final = SHA1Final_int,
+};
+
/* Authentication instances */
struct auth_hash auth_hash_hmac_sha1 = {
.type = CRYPTO_SHA1_HMAC,
.name = "HMAC-SHA1",
- .keysize = SHA1_HMAC_BLOCK_LEN,
+ .keysize = SHA1_BLOCK_LEN,
.hashsize = SHA1_HASH_LEN,
.ctxsize = sizeof(SHA1_CTX),
- .blocksize = SHA1_HMAC_BLOCK_LEN,
+ .blocksize = SHA1_BLOCK_LEN,
.Init = SHA1Init_int,
.Update = SHA1Update_int,
.Final = SHA1Final_int,
diff --git a/freebsd/sys/opencrypto/xform_sha2.c b/freebsd/sys/opencrypto/xform_sha2.c
index 7844b8ff..0775247a 100644
--- a/freebsd/sys/opencrypto/xform_sha2.c
+++ b/freebsd/sys/opencrypto/xform_sha2.c
@@ -50,23 +50,85 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <crypto/sha2/sha224.h>
#include <crypto/sha2/sha256.h>
#include <crypto/sha2/sha384.h>
#include <crypto/sha2/sha512.h>
#include <opencrypto/xform_auth.h>
+static int SHA224Update_int(void *, const u_int8_t *, u_int16_t);
static int SHA256Update_int(void *, const u_int8_t *, u_int16_t);
static int SHA384Update_int(void *, const u_int8_t *, u_int16_t);
static int SHA512Update_int(void *, const u_int8_t *, u_int16_t);
+/* Plain hashes */
+struct auth_hash auth_hash_sha2_224 = {
+ .type = CRYPTO_SHA2_224,
+ .name = "SHA2-224",
+ .hashsize = SHA2_224_HASH_LEN,
+ .ctxsize = sizeof(SHA224_CTX),
+ .blocksize = SHA2_224_BLOCK_LEN,
+ .Init = (void (*)(void *)) SHA224_Init,
+ .Update = SHA224Update_int,
+ .Final = (void (*)(u_int8_t *, void *)) SHA224_Final,
+};
+
+struct auth_hash auth_hash_sha2_256 = {
+ .type = CRYPTO_SHA2_256,
+ .name = "SHA2-256",
+ .keysize = SHA2_256_BLOCK_LEN,
+ .hashsize = SHA2_256_HASH_LEN,
+ .ctxsize = sizeof(SHA256_CTX),
+ .blocksize = SHA2_256_BLOCK_LEN,
+ .Init = (void (*)(void *)) SHA256_Init,
+ .Update = SHA256Update_int,
+ .Final = (void (*)(u_int8_t *, void *)) SHA256_Final,
+};
+
+struct auth_hash auth_hash_sha2_384 = {
+ .type = CRYPTO_SHA2_384,
+ .name = "SHA2-384",
+ .keysize = SHA2_384_BLOCK_LEN,
+ .hashsize = SHA2_384_HASH_LEN,
+ .ctxsize = sizeof(SHA384_CTX),
+ .blocksize = SHA2_384_BLOCK_LEN,
+ .Init = (void (*)(void *)) SHA384_Init,
+ .Update = SHA384Update_int,
+ .Final = (void (*)(u_int8_t *, void *)) SHA384_Final,
+};
+
+struct auth_hash auth_hash_sha2_512 = {
+ .type = CRYPTO_SHA2_512,
+ .name = "SHA2-512",
+ .keysize = SHA2_512_BLOCK_LEN,
+ .hashsize = SHA2_512_HASH_LEN,
+ .ctxsize = sizeof(SHA512_CTX),
+ .blocksize = SHA2_512_BLOCK_LEN,
+ .Init = (void (*)(void *)) SHA512_Init,
+ .Update = SHA512Update_int,
+ .Final = (void (*)(u_int8_t *, void *)) SHA512_Final,
+};
+
/* Authentication instances */
+struct auth_hash auth_hash_hmac_sha2_224 = {
+ .type = CRYPTO_SHA2_224_HMAC,
+ .name = "HMAC-SHA2-224",
+ .keysize = SHA2_224_BLOCK_LEN,
+ .hashsize = SHA2_224_HASH_LEN,
+ .ctxsize = sizeof(SHA224_CTX),
+ .blocksize = SHA2_224_BLOCK_LEN,
+ .Init = (void (*)(void *)) SHA224_Init,
+ .Update = SHA224Update_int,
+ .Final = (void (*)(u_int8_t *, void *)) SHA224_Final,
+};
+
struct auth_hash auth_hash_hmac_sha2_256 = {
.type = CRYPTO_SHA2_256_HMAC,
.name = "HMAC-SHA2-256",
- .keysize = SHA2_256_HMAC_BLOCK_LEN,
+ .keysize = SHA2_256_BLOCK_LEN,
.hashsize = SHA2_256_HASH_LEN,
.ctxsize = sizeof(SHA256_CTX),
- .blocksize = SHA2_256_HMAC_BLOCK_LEN,
+ .blocksize = SHA2_256_BLOCK_LEN,
.Init = (void (*)(void *)) SHA256_Init,
.Update = SHA256Update_int,
.Final = (void (*)(u_int8_t *, void *)) SHA256_Final,
@@ -75,10 +137,10 @@ struct auth_hash auth_hash_hmac_sha2_256 = {
struct auth_hash auth_hash_hmac_sha2_384 = {
.type = CRYPTO_SHA2_384_HMAC,
.name = "HMAC-SHA2-384",
- .keysize = SHA2_384_HMAC_BLOCK_LEN,
+ .keysize = SHA2_384_BLOCK_LEN,
.hashsize = SHA2_384_HASH_LEN,
.ctxsize = sizeof(SHA384_CTX),
- .blocksize = SHA2_384_HMAC_BLOCK_LEN,
+ .blocksize = SHA2_384_BLOCK_LEN,
.Init = (void (*)(void *)) SHA384_Init,
.Update = SHA384Update_int,
.Final = (void (*)(u_int8_t *, void *)) SHA384_Final,
@@ -87,10 +149,10 @@ struct auth_hash auth_hash_hmac_sha2_384 = {
struct auth_hash auth_hash_hmac_sha2_512 = {
.type = CRYPTO_SHA2_512_HMAC,
.name = "HMAC-SHA2-512",
- .keysize = SHA2_512_HMAC_BLOCK_LEN,
+ .keysize = SHA2_512_BLOCK_LEN,
.hashsize = SHA2_512_HASH_LEN,
.ctxsize = sizeof(SHA512_CTX),
- .blocksize = SHA2_512_HMAC_BLOCK_LEN,
+ .blocksize = SHA2_512_BLOCK_LEN,
.Init = (void (*)(void *)) SHA512_Init,
.Update = SHA512Update_int,
.Final = (void (*)(u_int8_t *, void *)) SHA512_Final,
@@ -100,6 +162,13 @@ struct auth_hash auth_hash_hmac_sha2_512 = {
* And now for auth.
*/
static int
+SHA224Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+ SHA224_Update(ctx, buf, len);
+ return 0;
+}
+
+static int
SHA256Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
{
SHA256_Update(ctx, buf, len);
diff --git a/freebsd/sys/powerpc/include/machine/spr.h b/freebsd/sys/powerpc/include/machine/spr.h
index f4769c86..fba367cb 100644
--- a/freebsd/sys/powerpc/include/machine/spr.h
+++ b/freebsd/sys/powerpc/include/machine/spr.h
@@ -229,7 +229,6 @@
#define EPCR_DGTMI 0x00800000
#define EPCR_DMIUH 0x00400000
#define EPCR_PMGS 0x00200000
-#define SPR_SPEFSCR 0x200 /* ..8 Signal Processing Engine FSCR. */
#define SPR_HSRR0 0x13a
#define SPR_HSRR1 0x13b
@@ -245,7 +244,7 @@
#define SPR_LPID 0x13f /* Logical Partitioning Control */
#define SPR_PTCR 0x1d0 /* Partition Table Control Register */
-#define SPR_IBAT0U 0x210 /* .68 Instruction BAT Reg 0 Upper */
+#define SPR_SPEFSCR 0x200 /* ..8 Signal Processing Engine FSCR. */
#define SPR_IBAT0U 0x210 /* .6. Instruction BAT Reg 0 Upper */
#define SPR_IBAT0L 0x211 /* .6. Instruction BAT Reg 0 Lower */
#define SPR_IBAT1U 0x212 /* .6. Instruction BAT Reg 1 Upper */
@@ -384,6 +383,7 @@
#define SPR_MD_RAM0 0x339 /* ..8 IMMU RAM entry read reg 0 */
#define SPR_MD_RAM1 0x33a /* ..8 IMMU RAM entry read reg 1 */
#define SPR_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */
+#define SPR_PMCR 0x374 /* Processor Management Control Register */
#define SPR_UMMCR2 0x3a0 /* .6. User Monitor Mode Control Register 2 */
#define SPR_UMMCR0 0x3a8 /* .6. User Monitor Mode Control Register 0 */
#define SPR_USIA 0x3ab /* .6. User Sampled Instruction Address */
diff --git a/freebsd/sys/sys/bus.h b/freebsd/sys/sys/bus.h
index f3c54f36..74a48f81 100644
--- a/freebsd/sys/sys/bus.h
+++ b/freebsd/sys/sys/bus.h
@@ -92,7 +92,9 @@ struct u_device {
#define DF_EXTERNALSOFTC 0x40 /* softc not allocated by us */
#define DF_REBID 0x80 /* Can rebid after attach */
#define DF_SUSPENDED 0x100 /* Device is suspended. */
-#define DF_QUIET_CHILDREN 0x200 /* Default to quiet for all my children */
+#define DF_QUIET_CHILDREN 0x200 /* Default to quiet for all my children */
+#define DF_ATTACHED_ONCE 0x400 /* Has been attached at least once */
+#define DF_NEEDNOMATCH 0x800 /* Has a pending NOMATCH event */
/**
* @brief Device request structure used for ioctl's.
@@ -126,6 +128,8 @@ struct devreq {
#define DEV_CLEAR_DRIVER _IOW('D', 8, struct devreq)
#define DEV_RESCAN _IOW('D', 9, struct devreq)
#define DEV_DELETE _IOW('D', 10, struct devreq)
+#define DEV_FREEZE _IOW('D', 11, struct devreq)
+#define DEV_THAW _IOW('D', 12, struct devreq)
/* Flags for DEV_DETACH and DEV_DISABLE. */
#define DEVF_FORCE_DETACH 0x0000001
@@ -156,7 +160,8 @@ void devctl_notify(const char *__system, const char *__subsystem,
const char *__type, const char *__data);
void devctl_queue_data_f(char *__data, int __flags);
void devctl_queue_data(char *__data);
-void devctl_safe_quote(char *__dst, const char *__src, size_t len);
+struct sbuf;
+void devctl_safe_quote_sb(struct sbuf *__sb, const char *__src);
/**
* Device name parsers. Hook to allow device enumerators to map
diff --git a/freebsd/sys/sys/cpu.h b/freebsd/sys/sys/cpu.h
index b3b745ab..8a74e470 100644
--- a/freebsd/sys/sys/cpu.h
+++ b/freebsd/sys/sys/cpu.h
@@ -87,7 +87,7 @@ struct cf_setting {
};
/* Maximum number of settings a given driver can have. */
-#define MAX_SETTINGS 24
+#define MAX_SETTINGS 256
/* A combination of settings is a level. */
struct cf_level {
diff --git a/freebsd/sys/sys/file.h b/freebsd/sys/sys/file.h
index a566e69c..20beac22 100644
--- a/freebsd/sys/sys/file.h
+++ b/freebsd/sys/sys/file.h
@@ -297,18 +297,23 @@ rtems_bsd_error_to_status_and_errno(int error)
* Userland version of struct file, for sysctl
*/
struct xfile {
- size_t xf_size; /* size of struct xfile */
+ ksize_t xf_size; /* size of struct xfile */
pid_t xf_pid; /* owning process */
uid_t xf_uid; /* effective uid of owning process */
int xf_fd; /* descriptor number */
- void *xf_file; /* address of struct file */
+ int _xf_int_pad1;
+ kvaddr_t xf_file; /* address of struct file */
short xf_type; /* descriptor type */
+ short _xf_short_pad1;
int xf_count; /* reference count */
int xf_msgcount; /* references from message queue */
+ int _xf_int_pad2;
off_t xf_offset; /* file offset */
- void *xf_data; /* file descriptor specific data */
- void *xf_vnode; /* vnode pointer */
+ kvaddr_t xf_data; /* file descriptor specific data */
+ kvaddr_t xf_vnode; /* vnode pointer */
u_int xf_flag; /* flags (see fcntl.h) */
+ int _xf_int_pad3;
+ int64_t _xf_int64_pad[6];
};
#ifdef _KERNEL
diff --git a/freebsd/sys/sys/interrupt.h b/freebsd/sys/sys/interrupt.h
index 7c9aad4d..105bb968 100644
--- a/freebsd/sys/sys/interrupt.h
+++ b/freebsd/sys/sys/interrupt.h
@@ -33,6 +33,7 @@
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/ck.h>
struct intr_event;
struct intr_thread;
@@ -52,7 +53,7 @@ struct intr_handler {
char ih_name[MAXCOMLEN + 1]; /* Name of handler. */
struct intr_event *ih_event; /* Event we are connected to. */
int ih_need; /* Needs service. */
- TAILQ_ENTRY(intr_handler) ih_next; /* Next handler for this event. */
+ CK_SLIST_ENTRY(intr_handler) ih_next; /* Next handler for this event. */
u_char ih_pri; /* Priority of this handler. */
struct intr_thread *ih_thread; /* Ithread for filtered handler. */
};
@@ -105,7 +106,7 @@ struct intr_handler {
*/
struct intr_event {
TAILQ_ENTRY(intr_event) ie_list;
- TAILQ_HEAD(, intr_handler) ie_handlers; /* Interrupt handlers. */
+ CK_SLIST_HEAD(, intr_handler) ie_handlers; /* Interrupt handlers. */
char ie_name[MAXCOMLEN + 1]; /* Individual event name. */
char ie_fullname[MAXCOMLEN + 1];
struct mtx ie_lock;
@@ -121,6 +122,8 @@ struct intr_event {
struct timeval ie_warntm;
int ie_irq; /* Physical irq number if !SOFT. */
int ie_cpu; /* CPU this event is bound to. */
+ volatile int ie_phase; /* Switched to establish a barrier. */
+ volatile int ie_active[2]; /* Filters in ISR context. */
};
/* Interrupt event flags kept in ie_flags. */
@@ -151,8 +154,13 @@ extern struct intr_event *clk_intr_event;
extern void *vm_ih;
/* Counts and names for statistics (defined in MD code). */
+#if defined(__amd64__) || defined(__i386__)
+extern u_long *intrcnt; /* counts for for each device and stray */
+extern char *intrnames; /* string table containing device names */
+#else
extern u_long intrcnt[]; /* counts for for each device and stray */
extern char intrnames[]; /* string table containing device names */
+#endif
extern size_t sintrcnt; /* size of intrcnt table */
extern size_t sintrnames; /* size of intrnames table */
@@ -174,7 +182,6 @@ int intr_event_create(struct intr_event **event, void *source,
int intr_event_describe_handler(struct intr_event *ie, void *cookie,
const char *descr);
int intr_event_destroy(struct intr_event *ie);
-void intr_event_execute_handlers(struct proc *p, struct intr_event *ie);
int intr_event_handle(struct intr_event *ie, struct trapframe *frame);
int intr_event_remove_handler(void *cookie);
int intr_getaffinity(int irq, int mode, void *mask);
diff --git a/freebsd/sys/sys/jail.h b/freebsd/sys/sys/jail.h
index c42964fe..6f8c9aa5 100644
--- a/freebsd/sys/sys/jail.h
+++ b/freebsd/sys/sys/jail.h
@@ -227,9 +227,10 @@ struct prison_racct {
#define PR_ALLOW_MOUNT 0x00000010
#define PR_ALLOW_QUOTAS 0x00000020
#define PR_ALLOW_SOCKET_AF 0x00000040
+#define PR_ALLOW_MLOCK 0x00000080
#define PR_ALLOW_RESERVED_PORTS 0x00008000
#define PR_ALLOW_KMEM_ACCESS 0x00010000 /* reserved, not used yet */
-#define PR_ALLOW_ALL_STATIC 0x0001807f
+#define PR_ALLOW_ALL_STATIC 0x000180ff
/*
* OSD methods
@@ -417,6 +418,8 @@ int prison_if(struct ucred *cred, struct sockaddr *sa);
char *prison_name(struct prison *, struct prison *);
int prison_priv_check(struct ucred *cred, int priv);
int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
+unsigned prison_add_allow(const char *prefix, const char *name,
+ const char *prefix_descr, const char *descr);
void prison_add_vfs(struct vfsconf *vfsp);
void prison_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
diff --git a/freebsd/sys/sys/libkern.h b/freebsd/sys/sys/libkern.h
index dc24036b..28da25ca 100644
--- a/freebsd/sys/sys/libkern.h
+++ b/freebsd/sys/sys/libkern.h
@@ -144,11 +144,6 @@ arc4rand(void *ptr, u_int len, int reseed)
arc4random_buf(ptr, len);
}
#endif /* __rtems__ */
-#ifndef __rtems__
-int bcmp(const void *, const void *, size_t);
-#else /* __rtems__ */
-#define bcmp(m1, m2, n) memcmp(m1, m2, n)
-#endif /* __rtems__ */
int timingsafe_bcmp(const void *, const void *, size_t);
void *bsearch(const void *, const void *, size_t,
size_t, int (*)(const void *, const void *));
@@ -210,7 +205,6 @@ int fnmatch(const char *, const char *, int);
int locc(int, char *, u_int);
void *memchr(const void *s, int c, size_t n);
void *memcchr(const void *s, int c, size_t n);
-int memcmp(const void *b1, const void *b2, size_t len);
void *memmem(const void *l, size_t l_len, const void *s, size_t s_len);
void qsort(void *base, size_t nmemb, size_t size,
int (*compar)(const void *, const void *));
@@ -290,23 +284,6 @@ uint32_t armv8_crc32c(uint32_t, const unsigned char *, unsigned int);
#endif
#endif
-
-LIBKERN_INLINE void *memset(void *, int, size_t);
-#ifdef LIBKERN_BODY
-LIBKERN_INLINE void *
-memset(void *b, int c, size_t len)
-{
- char *bb;
-
- if (c == 0)
- bzero(b, len);
- else
- for (bb = (char *)b; len--; )
- *bb++ = c;
- return (b);
-}
-#endif
-
#ifndef __rtems__
static __inline char *
index(const char *p, int ch)
diff --git a/freebsd/sys/sys/linker.h b/freebsd/sys/sys/linker.h
index 21c5a41e..8aae31d9 100644
--- a/freebsd/sys/sys/linker.h
+++ b/freebsd/sys/sys/linker.h
@@ -273,10 +273,9 @@ extern int kld_debug;
typedef int elf_lookup_fn(linker_file_t, Elf_Size, int, Elf_Addr *);
/* Support functions */
+bool elf_is_ifunc_reloc(Elf_Size r_info);
int elf_reloc(linker_file_t _lf, Elf_Addr base, const void *_rel,
int _type, elf_lookup_fn _lu);
-int elf_reloc_ifunc(linker_file_t _lf, Elf_Addr base, const void *_rel,
- int _type, elf_lookup_fn _lu);
int elf_reloc_local(linker_file_t _lf, Elf_Addr base, const void *_rel,
int _type, elf_lookup_fn _lu);
Elf_Addr elf_relocaddr(linker_file_t _lf, Elf_Addr addr);
diff --git a/freebsd/sys/sys/malloc.h b/freebsd/sys/sys/malloc.h
index 8c66fe81..33e1aab9 100644
--- a/freebsd/sys/sys/malloc.h
+++ b/freebsd/sys/sys/malloc.h
@@ -38,6 +38,9 @@
#define _SYS_MALLOC_H_
#include <sys/param.h>
+#ifdef _KERNEL
+#include <sys/systm.h>
+#endif
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
@@ -46,7 +49,7 @@
#define MINALLOCSIZE UMA_SMALLEST_UNIT
/*
- * flags to malloc.
+ * Flags to memory allocation functions.
*/
#define M_NOWAIT 0x0001 /* do not block */
#define M_WAITOK 0x0002 /* ok to block */
@@ -56,16 +59,10 @@
#define M_NODUMP 0x0800 /* don't dump pages in this allocation */
#define M_FIRSTFIT 0x1000 /* Only for vmem, fast fit. */
#define M_BESTFIT 0x2000 /* Only for vmem, low fragmentation. */
+#define M_EXEC 0x4000 /* allocate executable space. */
#define M_MAGIC 877983977 /* time when first defined :-) */
-#ifdef INVARIANTS
-#define M_ZERO_INVARIANTS M_ZERO
-#else
-#define M_ZERO_INVARIANTS 0
-#endif
-
-
/*
* Two malloc type structures are present: malloc_type, which is used by a
* type owner to declare the type, and malloc_type_internal, which holds
@@ -142,7 +139,6 @@ struct malloc_type_header {
#ifdef _KERNEL
#ifdef __rtems__
#include <stdlib.h>
-#define malloc _bsd_malloc
#define realloc _bsd_realloc
#define reallocf _bsd_reallocf
#define free _bsd_free
@@ -188,8 +184,70 @@ void *contigmalloc_domain(unsigned long size, struct malloc_type *type,
__malloc_like __result_use_check __alloc_size(1) __alloc_align(6);
void free(void *addr, struct malloc_type *type);
void free_domain(void *addr, struct malloc_type *type);
+#ifndef __rtems__
void *malloc(size_t size, struct malloc_type *type, int flags) __malloc_like
__result_use_check __alloc_size(1);
+#else /* __rtems__ */
+void *_bsd_malloc(size_t size, struct malloc_type *type, int flags)
+ __malloc_like __result_use_check __alloc_size(1);
+#endif /* __rtems__ */
+/*
+ * Try to optimize malloc(..., ..., M_ZERO) allocations by doing zeroing in
+ * place if the size is known at compilation time.
+ *
+ * Passing the flag down requires malloc to blindly zero the entire object.
+ * In practice a lot of the zeroing can be avoided if most of the object
+ * gets explicitly initialized after the allocation. Letting the compiler
+ * zero in place gives it the opportunity to take advantage of this state.
+ *
+ * Note that the operation is only applicable if both flags and size are
+ * known at compilation time. If M_ZERO is passed but M_WAITOK is not, the
+ * allocation can fail and a NULL check is needed. However, if M_WAITOK is
+ * passed we know the allocation must succeed and the check can be elided.
+ *
+ * _malloc_item = malloc(_size, type, (flags) &~ M_ZERO);
+ * if (((flags) & M_WAITOK) != 0 || _malloc_item != NULL)
+ * bzero(_malloc_item, _size);
+ *
+ * If the flag is set, the compiler knows the left side is always true,
+ * therefore the entire statement is true and the callsite is:
+ *
+ * _malloc_item = malloc(_size, type, (flags) &~ M_ZERO);
+ * bzero(_malloc_item, _size);
+ *
+ * If the flag is not set, the compiler knows the left size is always false
+ * and the NULL check is needed, therefore the callsite is:
+ *
+ * _malloc_item = malloc(_size, type, (flags) &~ M_ZERO);
+ * if (_malloc_item != NULL)
+ * bzero(_malloc_item, _size);
+ *
+ * The implementation is a macro because of what appears to be a clang 6 bug:
+ * an inline function variant ended up being compiled to a mere malloc call
+ * regardless of argument. gcc generates expected code (like the above).
+ */
+#ifdef __rtems__
+/*
+ * The macro below was modified without the __rtems__ guards. This macro looks
+ * quite brittle and it is better to provoke a merge conflict in case of a
+ * FreeBSD baseline update.
+ */
+#endif /* __rtems__ */
+#define malloc(size, type, flags) ({ \
+ void *_malloc_item; \
+ size_t _size = (size); \
+ if (__builtin_constant_p(size) && __builtin_constant_p(flags) &&\
+ ((flags) & M_ZERO) != 0) { \
+ _malloc_item = _bsd_malloc(_size, type, (flags) &~ M_ZERO); \
+ if (((flags) & M_WAITOK) != 0 || \
+ __predict_true(_malloc_item != NULL)) \
+ bzero(_malloc_item, _size); \
+ } else { \
+ _malloc_item = _bsd_malloc(_size, type, flags); \
+ } \
+ _malloc_item; \
+})
+
void *malloc_domain(size_t size, struct malloc_type *type, int domain,
int flags) __malloc_like __result_use_check __alloc_size(1);
void *mallocarray(size_t nmemb, size_t size, struct malloc_type *type,
diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h
index 4d2a3223..0423b580 100644
--- a/freebsd/sys/sys/mbuf.h
+++ b/freebsd/sys/sys/mbuf.h
@@ -304,7 +304,7 @@ struct mbuf {
#define M_MCAST 0x00000020 /* send/received as link-level multicast */
#define M_PROMISC 0x00000040 /* packet was not for us */
#define M_VLANTAG 0x00000080 /* ether_vtag is valid */
-#define M_UNUSED_8 0x00000100 /* --available-- */
+#define M_NOMAP 0x00000100 /* mbuf data is unmapped (soon from Drew) */
#define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */
#define M_TSTMP 0x00000400 /* rcv_tstmp field is valid */
#define M_TSTMP_HPREC 0x00000800 /* rcv_tstmp is high-prec, typically
@@ -570,8 +570,8 @@ struct mbuf {
#define MT_EXP4 12 /* for experimental use */
#define MT_CONTROL 14 /* extra-data protocol message */
-#define MT_OOBDATA 15 /* expedited data */
-#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
+#define MT_EXTCONTROL 15 /* control message with externalized contents */
+#define MT_OOBDATA 16 /* expedited data */
#define MT_NOINIT 255 /* Not a type but a flag to allocate
a non-initialized mbuf */
@@ -636,6 +636,7 @@ void m_demote_pkthdr(struct mbuf *);
void m_demote(struct mbuf *, int, int);
struct mbuf *m_devget(char *, int, int, struct ifnet *,
void (*)(char *, caddr_t, u_int));
+void m_dispose_extcontrolm(struct mbuf *m);
struct mbuf *m_dup(const struct mbuf *, int);
int m_dup_pkthdr(struct mbuf *, const struct mbuf *, int);
void m_extadd(struct mbuf *, char *, u_int, m_ext_free_t,
diff --git a/freebsd/sys/sys/module.h b/freebsd/sys/sys/module.h
index 6799b179..b40870d3 100644
--- a/freebsd/sys/sys/module.h
+++ b/freebsd/sys/sys/module.h
@@ -146,8 +146,13 @@ struct mod_pnp_match_info
SYSINIT(name##module, sub, order, module_register_init, &data); \
struct __hack
+#ifdef KLD_TIED
#define DECLARE_MODULE(name, data, sub, order) \
+ DECLARE_MODULE_WITH_MAXVER(name, data, sub, order, __FreeBSD_version)
+#else
+#define DECLARE_MODULE(name, data, sub, order) \
DECLARE_MODULE_WITH_MAXVER(name, data, sub, order, MODULE_KERNEL_MAXVER)
+#endif
/*
* The module declared with DECLARE_MODULE_TIED can only be loaded
diff --git a/freebsd/sys/sys/mouse.h b/freebsd/sys/sys/mouse.h
index a1f950cf..882d59c9 100644
--- a/freebsd/sys/sys/mouse.h
+++ b/freebsd/sys/sys/mouse.h
@@ -38,8 +38,6 @@
#define MOUSE_SETMODE _IOW('M', 3, mousemode_t)
#define MOUSE_GETLEVEL _IOR('M', 4, int)
#define MOUSE_SETLEVEL _IOW('M', 5, int)
-#define MOUSE_GETVARS _IOR('M', 6, mousevar_t)
-#define MOUSE_SETVARS _IOW('M', 7, mousevar_t)
#define MOUSE_READSTATE _IOWR('M', 8, mousedata_t)
#define MOUSE_READDATA _IOWR('M', 9, mousedata_t)
@@ -228,19 +226,6 @@ typedef struct mousedata {
int buf[16]; /* data buffer */
} mousedata_t;
-#if (defined(MOUSE_GETVARS))
-
-typedef struct mousevar {
- int var[16];
-} mousevar_t;
-
-/* magic numbers in var[0] */
-#define MOUSE_VARS_PS2_SIG 0x00325350 /* 'PS2' */
-#define MOUSE_VARS_BUS_SIG 0x00535542 /* 'BUS' */
-#define MOUSE_VARS_INPORT_SIG 0x00504e49 /* 'INP' */
-
-#endif /* MOUSE_GETVARS */
-
/* Synaptics Touchpad */
#define MOUSE_SYNAPTICS_PACKETSIZE 6 /* '3' works better */
diff --git a/freebsd/sys/sys/mutex.h b/freebsd/sys/sys/mutex.h
index e15de1ae..e9c91f80 100644
--- a/freebsd/sys/sys/mutex.h
+++ b/freebsd/sys/sys/mutex.h
@@ -161,7 +161,7 @@ void _thread_lock(struct thread *td, int opts, const char *file, int line);
void _thread_lock(struct thread *);
#endif
-#if defined(LOCK_PROFILING) || defined(KLD_MODULE)
+#if defined(LOCK_PROFILING) || (defined(KLD_MODULE) && !defined(KLD_TIED))
#define thread_lock(tdp) \
thread_lock_flags_((tdp), 0, __FILE__, __LINE__)
#elif LOCK_DEBUG > 0
diff --git a/freebsd/sys/sys/nv.h b/freebsd/sys/sys/nv.h
index bf40f8f3..80fb8777 100644
--- a/freebsd/sys/sys/nv.h
+++ b/freebsd/sys/sys/nv.h
@@ -162,6 +162,14 @@ void nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value);
void nvlist_add_descriptor_array(nvlist_t *nvl, const char *name, const int *value, size_t nitems);
#endif
+void nvlist_append_bool_array(nvlist_t *nvl, const char *name, const bool value);
+void nvlist_append_number_array(nvlist_t *nvl, const char *name, const uint64_t value);
+void nvlist_append_string_array(nvlist_t *nvl, const char *name, const char * const value);
+void nvlist_append_nvlist_array(nvlist_t *nvl, const char *name, const nvlist_t * const value);
+#ifndef _KERNEL
+void nvlist_append_descriptor_array(nvlist_t *nvl, const char *name, int value);
+#endif
+
/*
* The nvlist_move functions add the given name/value pair.
* The functions consumes provided buffer.
diff --git a/freebsd/sys/sys/pciio.h b/freebsd/sys/sys/pciio.h
index 80d2019b..50e9116d 100644
--- a/freebsd/sys/sys/pciio.h
+++ b/freebsd/sys/sys/pciio.h
@@ -138,11 +138,30 @@ struct pci_list_vpd_io {
struct pci_vpd_element *plvi_data;
};
+struct pci_bar_mmap {
+ void *pbm_map_base; /* (sometimes IN)/OUT mmaped base */
+ size_t pbm_map_length; /* mapped length of the BAR, multiple
+ of pages */
+ uint64_t pbm_bar_length; /* actual length of the BAR */
+ int pbm_bar_off; /* offset from the mapped base to the
+ start of BAR */
+ struct pcisel pbm_sel; /* device to operate on */
+ int pbm_reg; /* starting address of BAR */
+ int pbm_flags;
+ int pbm_memattr;
+};
+
+#define PCIIO_BAR_MMAP_FIXED 0x01
+#define PCIIO_BAR_MMAP_EXCL 0x02
+#define PCIIO_BAR_MMAP_RW 0x04
+#define PCIIO_BAR_MMAP_ACTIVATE 0x08
+
#define PCIOCGETCONF _IOWR('p', 5, struct pci_conf_io)
#define PCIOCREAD _IOWR('p', 2, struct pci_io)
#define PCIOCWRITE _IOWR('p', 3, struct pci_io)
#define PCIOCATTACHED _IOWR('p', 4, struct pci_io)
#define PCIOCGETBAR _IOWR('p', 6, struct pci_bar_io)
#define PCIOCLISTVPD _IOWR('p', 7, struct pci_list_vpd_io)
+#define PCIOCBARMMAP _IOWR('p', 8, struct pci_bar_mmap)
#endif /* !_SYS_PCIIO_H_ */
diff --git a/freebsd/sys/sys/pcpu.h b/freebsd/sys/sys/pcpu.h
index bfa7f34d..7aad9f2e 100644
--- a/freebsd/sys/sys/pcpu.h
+++ b/freebsd/sys/sys/pcpu.h
@@ -81,7 +81,31 @@ extern uintptr_t dpcpu_off[];
*/
#define DPCPU_NAME(n) pcpu_entry_##n
#define DPCPU_DECLARE(t, n) extern t DPCPU_NAME(n)
-#define DPCPU_DEFINE(t, n) t DPCPU_NAME(n) __section(DPCPU_SETNAME) __used
+/* struct _hack is to stop this from being used with the static keyword. */
+#define DPCPU_DEFINE(t, n) \
+ struct _hack; t DPCPU_NAME(n) __section(DPCPU_SETNAME) __used
+#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv))
+/*
+ * On some architectures the compiler will use PC-relative load to
+ * find the address of DPCPU data with the static keyword. We then
+ * use this to find the offset of the data in a per-CPU region.
+ * This works for in the kernel as we can allocate the space ahead
+ * of time, however modules need to allocate a sepatate space and
+ * then use relocations to fix the address of the data. As
+ * PC-relative data doesn't have a relocation there is nothing for
+ * the kernel module linker to fix so data is accessed from the
+ * wrong location.
+ *
+ * This is a workaround until a better solution can be found.
+ *
+ * VNET_DEFINE_STATIC also has the same workaround.
+ */
+#define DPCPU_DEFINE_STATIC(t, n) \
+ t DPCPU_NAME(n) __section(DPCPU_SETNAME) __used
+#else
+#define DPCPU_DEFINE_STATIC(t, n) \
+ static t DPCPU_NAME(n) __section(DPCPU_SETNAME) __used
+#endif
/*
* Accessors with a given base.
@@ -189,14 +213,6 @@ struct pcpu {
#endif /* __rtems__ */
} __aligned(CACHE_LINE_SIZE);
-#ifdef CTASSERT
-/*
- * To minimize memory waste in per-cpu UMA zones, size of struct pcpu
- * should be denominator of PAGE_SIZE.
- */
-CTASSERT((PAGE_SIZE / sizeof(struct pcpu)) * sizeof(struct pcpu) == PAGE_SIZE);
-#endif
-
#ifdef _KERNEL
STAILQ_HEAD(cpuhead, pcpu);
@@ -211,15 +227,34 @@ extern struct pcpu *cpuid_to_pcpu[];
#endif
#define curvidata PCPU_GET(vidata)
+#ifndef __rtems__
+#define UMA_PCPU_ALLOC_SIZE PAGE_SIZE
+
+#ifdef CTASSERT
+#if defined(__i386__) || defined(__amd64__)
+/* Required for counters(9) to work on x86. */
+CTASSERT(sizeof(struct pcpu) == UMA_PCPU_ALLOC_SIZE);
+#else
+/*
+ * To minimize memory waste in per-cpu UMA zones, size of struct pcpu
+ * should be denominator of PAGE_SIZE.
+ */
+CTASSERT((PAGE_SIZE / sizeof(struct pcpu)) * sizeof(struct pcpu) == PAGE_SIZE);
+#endif /* UMA_PCPU_ALLOC_SIZE && x86 */
+#endif /* CTASSERT */
+#else /* __rtems__ */
+#define UMA_PCPU_ALLOC_SIZE sizeof(struct pcpu)
+#endif /* __rtems__ */
+
/* Accessor to elements allocated via UMA_ZONE_PCPU zone. */
static inline void *
zpcpu_get(void *base)
{
#ifndef __rtems__
- return ((char *)(base) + sizeof(struct pcpu) * curcpu);
+ return ((char *)(base) + UMA_PCPU_ALLOC_SIZE * curcpu);
#else /* __rtems__ */
- return ((char *)(base) + sizeof(struct pcpu) * _SMP_Get_current_processor());
+ return ((char *)(base) + UMA_PCPU_ALLOC_SIZE * _SMP_Get_current_processor());
#endif /* __rtems__ */
}
@@ -227,7 +262,7 @@ static inline void *
zpcpu_get_cpu(void *base, int cpu)
{
- return ((char *)(base) + sizeof(struct pcpu) * cpu);
+ return ((char *)(base) + UMA_PCPU_ALLOC_SIZE * cpu);
}
/*
diff --git a/freebsd/sys/sys/proc.h b/freebsd/sys/sys/proc.h
index 36ed69cc..9372b3a0 100644
--- a/freebsd/sys/sys/proc.h
+++ b/freebsd/sys/sys/proc.h
@@ -74,19 +74,6 @@
#include <machine/cpu.h>
#endif
-
-/*
- * A section object may be passed to every begin-end pair to allow for
- * forward progress guarantees with-in prolonged active sections.
- *
- * We can't include ck_epoch.h so we define our own variant here and
- * then CTASSERT that it's the same size in subr_epoch.c
- */
-struct epoch_section {
- unsigned int bucket;
-};
-typedef struct epoch_section epoch_section_t;
-
/*
* One structure allocated per session.
*
@@ -408,8 +395,7 @@ struct thread {
int td_lastcpu; /* (t) Last cpu we were on. */
int td_oncpu; /* (t) Which cpu we are on. */
void *td_lkpi_task; /* LinuxKPI task struct pointer */
- TAILQ_ENTRY(thread) td_epochq; /* (t) Epoch queue. */
- epoch_section_t td_epoch_section; /* (t) epoch section object */
+ int td_pmcpend;
#endif /* __rtems__ */
};
@@ -677,7 +663,7 @@ struct proc {
u_int p_stype; /* (c) Stop event type. */
char p_step; /* (c) Process is stopped. */
u_char p_pfsflags; /* (c) Procfs flags. */
- u_int p_ptevents; /* (c) ptrace() event mask. */
+ u_int p_ptevents; /* (c + e) ptrace() event mask. */
struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */
struct kaioinfo *p_aioinfo; /* (y) ASYNC I/O info. */
struct thread *p_singlethread;/* (c + j) If single threading this is it */
@@ -729,8 +715,6 @@ struct proc {
LIST_HEAD(, mqueue_notifier) p_mqnotifier; /* (c) mqueue notifiers.*/
struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */
struct cv p_pwait; /* (*) wait cv for exit/exec. */
- struct cv p_dbgwait; /* (*) wait cv for debugger attach
- after fork. */
uint64_t p_prev_runtime; /* (c) Resource usage accounting. */
struct racct *p_racct; /* (b) Resource accounting. */
int p_throttled; /* (c) Flag for racct pcpu throttling */
@@ -1127,6 +1111,7 @@ struct proc *proc_realparent(struct proc *child);
void proc_reap(struct thread *td, struct proc *p, int *status, int options);
void proc_reparent(struct proc *child, struct proc *newparent);
void proc_set_traced(struct proc *p, bool stop);
+void proc_wkilled(struct proc *p);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
void pstats_free(struct pstats *ps);
diff --git a/freebsd/sys/sys/random.h b/freebsd/sys/sys/random.h
index c717a686..f32d3f66 100644
--- a/freebsd/sys/sys/random.h
+++ b/freebsd/sys/sys/random.h
@@ -35,12 +35,6 @@
#ifdef _KERNEL
-#if !defined(KLD_MODULE)
-#if defined(RANDOM_LOADABLE) && defined(RANDOM_YARROW)
-#error "Cannot define both RANDOM_LOADABLE and RANDOM_YARROW"
-#endif
-#endif
-
struct uio;
#if defined(DEV_RANDOM)
@@ -105,6 +99,7 @@ enum random_entropy_source {
RANDOM_PURE_VIRTIO,
RANDOM_PURE_BROADCOM,
RANDOM_PURE_CCP,
+ RANDOM_PURE_DARN,
ENTROPYSOURCE
};
@@ -117,57 +112,54 @@ enum random_entropy_source {
#if defined(DEV_RANDOM)
extern u_int hc_source_mask;
-void random_harvest_queue_(const void *, u_int, u_int, enum random_entropy_source);
-void random_harvest_fast_(const void *, u_int, u_int);
-void random_harvest_direct_(const void *, u_int, u_int, enum random_entropy_source);
+void random_harvest_queue_(const void *, u_int, enum random_entropy_source);
+void random_harvest_fast_(const void *, u_int);
+void random_harvest_direct_(const void *, u_int, enum random_entropy_source);
static __inline void
-random_harvest_queue(const void *entropy, u_int size, u_int bits,
- enum random_entropy_source origin)
+random_harvest_queue(const void *entropy, u_int size, enum random_entropy_source origin)
{
if (hc_source_mask & (1 << origin))
- random_harvest_queue_(entropy, size, bits, origin);
+ random_harvest_queue_(entropy, size, origin);
}
static __inline void
-random_harvest_fast(const void *entropy, u_int size, u_int bits,
- enum random_entropy_source origin)
+random_harvest_fast(const void *entropy, u_int size, enum random_entropy_source origin)
{
if (hc_source_mask & (1 << origin))
- random_harvest_fast_(entropy, size, bits);
+ random_harvest_fast_(entropy, size);
}
static __inline void
-random_harvest_direct(const void *entropy, u_int size, u_int bits,
- enum random_entropy_source origin)
+random_harvest_direct(const void *entropy, u_int size, enum random_entropy_source origin)
{
if (hc_source_mask & (1 << origin))
- random_harvest_direct_(entropy, size, bits, origin);
+ random_harvest_direct_(entropy, size, origin);
}
void random_harvest_register_source(enum random_entropy_source);
void random_harvest_deregister_source(enum random_entropy_source);
#else
-#define random_harvest_queue(a, b, c, d) do {} while (0)
-#define random_harvest_fast(a, b, c, d) do {} while (0)
-#define random_harvest_direct(a, b, c, d) do {} while (0)
+#define random_harvest_queue(a, b, c) do {} while (0)
+#define random_harvest_fast(a, b, c) do {} while (0)
+#define random_harvest_direct(a, b, c) do {} while (0)
#define random_harvest_register_source(a) do {} while (0)
#define random_harvest_deregister_source(a) do {} while (0)
#endif
#if defined(RANDOM_ENABLE_UMA)
-#define random_harvest_fast_uma(a, b, c, d) random_harvest_fast(a, b, c, d)
+#define random_harvest_fast_uma(a, b, c) random_harvest_fast(a, b, c)
#else /* !defined(RANDOM_ENABLE_UMA) */
-#define random_harvest_fast_uma(a, b, c, d) do {} while (0)
+#define random_harvest_fast_uma(a, b, c) do {} while (0)
#endif /* defined(RANDOM_ENABLE_UMA) */
#if defined(RANDOM_ENABLE_ETHER)
-#define random_harvest_queue_ether(a, b, c) random_harvest_queue(a, b, c, RANDOM_NET_ETHER)
+#define random_harvest_queue_ether(a, b) random_harvest_queue(a, b, RANDOM_NET_ETHER)
#else /* !defined(RANDOM_ENABLE_ETHER) */
-#define random_harvest_queue_ether(a, b, c) do {} while (0)
+#define random_harvest_queue_ether(a, b) do {} while (0)
#endif /* defined(RANDOM_ENABLE_ETHER) */
diff --git a/freebsd/sys/sys/reboot.h b/freebsd/sys/sys/reboot.h
index d0dff609..20b91f8e 100644
--- a/freebsd/sys/sys/reboot.h
+++ b/freebsd/sys/sys/reboot.h
@@ -41,28 +41,29 @@
*/
#define RB_AUTOBOOT 0 /* flags for system auto-booting itself */
-#define RB_ASKNAME 0x001 /* ask for file name to reboot from */
+#define RB_ASKNAME 0x001 /* force prompt of device of root filesystem */
#define RB_SINGLE 0x002 /* reboot to single user only */
#define RB_NOSYNC 0x004 /* dont sync before reboot */
#define RB_HALT 0x008 /* don't reboot, just halt */
-#define RB_INITNAME 0x010 /* name given for /etc/init (unused) */
+#define RB_INITNAME 0x010 /* Unused placeholder to specify init path */
#define RB_DFLTROOT 0x020 /* use compiled-in rootdev */
#define RB_KDB 0x040 /* give control to kernel debugger */
#define RB_RDONLY 0x080 /* mount root fs read-only */
#define RB_DUMP 0x100 /* dump kernel memory before reboot */
-#define RB_MINIROOT 0x200 /* mini-root present in memory at boot time */
+#define RB_MINIROOT 0x200 /* Unused placeholder */
#define RB_VERBOSE 0x800 /* print all potentially useful info */
#define RB_SERIAL 0x1000 /* use serial port as console */
#define RB_CDROM 0x2000 /* use cdrom as root */
#define RB_POWEROFF 0x4000 /* turn the power off if possible */
#define RB_GDB 0x8000 /* use GDB remote debugger instead of DDB */
#define RB_MUTE 0x10000 /* start up with the console muted */
-#define RB_SELFTEST 0x20000 /* don't complete the boot; do selftest */
+#define RB_SELFTEST 0x20000 /* unused placeholder */
#define RB_RESERVED1 0x40000 /* reserved for internal use of boot blocks */
#define RB_RESERVED2 0x80000 /* reserved for internal use of boot blocks */
#define RB_PAUSE 0x100000 /* pause after each output line during probe */
#define RB_REROOT 0x200000 /* unmount the rootfs and mount it again */
#define RB_POWERCYCLE 0x400000 /* Power cycle if possible */
+#define RB_PROBE 0x10000000 /* Probe multiple consoles */
#define RB_MULTIPLE 0x20000000 /* use multiple consoles */
#define RB_BOOTINFO 0x80000000 /* have `struct bootinfo *' arg */
diff --git a/freebsd/sys/sys/sglist.h b/freebsd/sys/sys/sglist.h
new file mode 100644
index 00000000..5674416c
--- /dev/null
+++ b/freebsd/sys/sys/sglist.h
@@ -0,0 +1,113 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2008 Yahoo!, Inc.
+ * All rights reserved.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * A scatter/gather list describes a group of physical address ranges.
+ * Each physical address range consists of a starting address and a
+ * length.
+ */
+
+#ifndef __SGLIST_H__
+#define __SGLIST_H__
+
+#include <sys/refcount.h>
+
+struct sglist_seg {
+ vm_paddr_t ss_paddr;
+ size_t ss_len;
+};
+
+struct sglist {
+ struct sglist_seg *sg_segs;
+ u_int sg_refs;
+ u_short sg_nseg;
+ u_short sg_maxseg;
+};
+
+struct bio;
+struct mbuf;
+struct uio;
+
+static __inline void
+sglist_init(struct sglist *sg, u_short maxsegs, struct sglist_seg *segs)
+{
+
+ sg->sg_segs = segs;
+ sg->sg_nseg = 0;
+ sg->sg_maxseg = maxsegs;
+ refcount_init(&sg->sg_refs, 1);
+}
+
+static __inline void
+sglist_reset(struct sglist *sg)
+{
+
+ sg->sg_nseg = 0;
+}
+
+static __inline struct sglist *
+sglist_hold(struct sglist *sg)
+{
+
+ refcount_acquire(&sg->sg_refs);
+ return (sg);
+}
+
+struct sglist *sglist_alloc(int nsegs, int mflags);
+int sglist_append(struct sglist *sg, void *buf, size_t len);
+int sglist_append_bio(struct sglist *sg, struct bio *bp);
+int sglist_append_mbuf(struct sglist *sg, struct mbuf *m0);
+int sglist_append_phys(struct sglist *sg, vm_paddr_t paddr,
+ size_t len);
+int sglist_append_sglist(struct sglist *sg, struct sglist *source,
+ size_t offset, size_t length);
+int sglist_append_uio(struct sglist *sg, struct uio *uio);
+int sglist_append_user(struct sglist *sg, void *buf, size_t len,
+ struct thread *td);
+int sglist_append_vmpages(struct sglist *sg, vm_page_t *m, size_t pgoff,
+ size_t len);
+struct sglist *sglist_build(void *buf, size_t len, int mflags);
+struct sglist *sglist_clone(struct sglist *sg, int mflags);
+int sglist_consume_uio(struct sglist *sg, struct uio *uio, size_t resid);
+int sglist_count(void *buf, size_t len);
+int sglist_count_vmpages(vm_page_t *m, size_t pgoff, size_t len);
+void sglist_free(struct sglist *sg);
+int sglist_join(struct sglist *first, struct sglist *second);
+size_t sglist_length(struct sglist *sg);
+int sglist_slice(struct sglist *original, struct sglist **slice,
+ size_t offset, size_t length, int mflags);
+int sglist_split(struct sglist *original, struct sglist **head,
+ size_t length, int mflags);
+
+#endif /* !__SGLIST_H__ */
diff --git a/freebsd/sys/sys/sockbuf.h b/freebsd/sys/sys/sockbuf.h
index 91a30b6f..915fee0b 100644
--- a/freebsd/sys/sys/sockbuf.h
+++ b/freebsd/sys/sys/sockbuf.h
@@ -141,9 +141,9 @@ int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control);
int sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
-int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
+void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control);
-int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
+void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control);
void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
@@ -168,6 +168,10 @@ int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
struct mbuf *
sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff);
struct mbuf *
+ sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
+void
+ sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
+struct mbuf *
sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
int sbwait(struct sockbuf *sb);
int sblock(struct sockbuf *sb, int flags);
diff --git a/freebsd/sys/sys/socketvar.h b/freebsd/sys/sys/socketvar.h
index f877a0df..96ba4a01 100644
--- a/freebsd/sys/sys/socketvar.h
+++ b/freebsd/sys/sys/socketvar.h
@@ -63,6 +63,7 @@ struct vnet;
* private data and error information.
*/
typedef int so_upcall_t(struct socket *, void *, int);
+typedef void so_dtor_t(struct socket *);
struct socket;
@@ -84,7 +85,7 @@ struct socket {
struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */
struct selinfo so_wrsel; /* (b/cs) for so_snd */
short so_type; /* (a) generic type, see socket.h */
- short so_options; /* (b) from socket call, see socket.h */
+ int so_options; /* (b) from socket call, see socket.h */
short so_linger; /* time to linger close(2) */
short so_state; /* (b) internal state flags SS_* */
void *so_pcb; /* protocol control block */
@@ -99,6 +100,7 @@ struct socket {
/* NB: generation count must not be first. */
so_gen_t so_gencnt; /* (h) generation count */
void *so_emuldata; /* (b) private data for emulators */
+ so_dtor_t *so_dtor; /* (b) optional destructor */
struct osd osd; /* Object Specific extensions */
/*
* so_fibnum, so_user_cookie and friends can be used to attach
@@ -399,6 +401,7 @@ int soconnect2(struct socket *so1, struct socket *so2);
int socreate(int dom, struct socket **aso, int type, int proto,
struct ucred *cred, struct thread *td);
int sodisconnect(struct socket *so);
+void sodtor_set(struct socket *, so_dtor_t *);
struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
void sofree(struct socket *so);
void sohasoutofband(struct socket *so);
@@ -477,15 +480,9 @@ int accept_filt_generic_mod_event(module_t mod, int event, void *data);
* Structure to export socket from kernel to utilities, via sysctl(3).
*/
struct xsocket {
- size_t xso_len; /* length of this structure */
- union {
- void *xso_so; /* kernel address of struct socket */
- int64_t ph_so;
- };
- union {
- void *so_pcb; /* kernel address of struct inpcb */
- int64_t ph_pcb;
- };
+ ksize_t xso_len; /* length of this structure */
+ kvaddr_t xso_so; /* kernel address of struct socket */
+ kvaddr_t so_pcb; /* kernel address of struct inpcb */
uint64_t so_oobmark;
int64_t so_spare64[8];
int32_t xso_protocol;
diff --git a/freebsd/sys/sys/sockopt.h b/freebsd/sys/sys/sockopt.h
index 7f19ecf8..e7cc6cf0 100644
--- a/freebsd/sys/sys/sockopt.h
+++ b/freebsd/sys/sys/sockopt.h
@@ -62,7 +62,6 @@ int sosetopt(struct socket *so, struct sockopt *sopt);
int sogetopt(struct socket *so, struct sockopt *sopt);
int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen);
int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len);
-/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
int soopt_getm(struct sockopt *sopt, struct mbuf **mp);
int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m);
int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m);
diff --git a/freebsd/sys/sys/sx.h b/freebsd/sys/sys/sx.h
index 566137bd..10cfb10a 100644
--- a/freebsd/sys/sys/sx.h
+++ b/freebsd/sys/sys/sx.h
@@ -76,8 +76,8 @@
#define SX_LOCK_SHARED 0x01
#define SX_LOCK_SHARED_WAITERS 0x02
#define SX_LOCK_EXCLUSIVE_WAITERS 0x04
-#define SX_LOCK_RECURSED 0x08
-#define SX_LOCK_WRITE_SPINNER 0x10
+#define SX_LOCK_WRITE_SPINNER 0x08
+#define SX_LOCK_RECURSED 0x10
#define SX_LOCK_FLAGMASK \
(SX_LOCK_SHARED | SX_LOCK_SHARED_WAITERS | \
SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_RECURSED | SX_LOCK_WRITE_SPINNER)
diff --git a/freebsd/sys/sys/sysproto.h b/freebsd/sys/sys/sysproto.h
index b328cb51..89467f3b 100644
--- a/freebsd/sys/sys/sysproto.h
+++ b/freebsd/sys/sys/sysproto.h
@@ -90,7 +90,7 @@ struct chown_args {
char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)];
char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)];
};
-struct obreak_args {
+struct break_args {
char nsize_l_[PADL_(char *)]; char * nsize; char nsize_r_[PADR_(char *)];
};
struct getpid_args {
@@ -1831,7 +1831,7 @@ int sys_chdir(struct thread *, struct chdir_args *);
int sys_fchdir(struct thread *, struct fchdir_args *);
int sys_chmod(struct thread *, struct chmod_args *);
int sys_chown(struct thread *, struct chown_args *);
-int sys_obreak(struct thread *, struct obreak_args *);
+int sys_break(struct thread *, struct break_args *);
int sys_getpid(struct thread *, struct getpid_args *);
int sys_mount(struct thread *, struct mount_args *);
int sys_unmount(struct thread *, struct unmount_args *);
diff --git a/freebsd/sys/sys/systm.h b/freebsd/sys/sys/systm.h
index 9c21f589..4145be2c 100644
--- a/freebsd/sys/sys/systm.h
+++ b/freebsd/sys/sys/systm.h
@@ -48,6 +48,7 @@
#include <sys/stdint.h> /* for people using printf mainly */
#ifdef __rtems__
#include <string.h>
+#include <rtems/score/threaddispatch.h>
#endif /* __rtems__ */
__NULLABILITY_PRAGMA_PUSH
@@ -106,12 +107,21 @@ extern int vm_guest; /* Running as virtual machine guest? */
enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV,
VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_LAST };
+/*
+ * These functions need to be declared before the KASSERT macro is invoked in
+ * !KASSERT_PANIC_OPTIONAL builds, so their declarations are sort of out of
+ * place compared to other function definitions in this header. On the other
+ * hand, this header is a bit disorganized anyway.
+ */
+void panic(const char *, ...) __dead2 __printflike(1, 2);
+void vpanic(const char *, __va_list) __dead2 __printflike(1, 0);
+
#if defined(WITNESS) || defined(INVARIANT_SUPPORT)
-#ifndef __rtems__
+#ifdef KASSERT_PANIC_OPTIONAL
void kassert_panic(const char *fmt, ...) __printflike(1, 2);
-#else /* __rtems__ */
-#define kassert_panic panic
-#endif /* __rtems__ */
+#else
+#define kassert_panic panic
+#endif
#endif
#ifdef INVARIANTS /* The option is always available */
@@ -137,6 +147,12 @@ void kassert_panic(const char *fmt, ...) __printflike(1, 2);
#define CTASSERT(x) _Static_assert(x, "compile-time assertion failed")
#endif
+#if defined(_KERNEL)
+#include <sys/param.h> /* MAXCPU */
+#include <sys/pcpu.h> /* curthread */
+#include <sys/kpilite.h>
+#endif
+
/*
* Assert that a pointer can be loaded from memory atomically.
*
@@ -184,11 +200,10 @@ void kassert_panic(const char *fmt, ...) __printflike(1, 2);
* XXX most of these variables should be const.
*/
extern int osreldate;
-extern int envmode;
-extern int hintmode; /* 0 = off. 1 = config, 2 = fallback */
-extern int dynamic_kenv;
+extern bool dynamic_kenv;
extern struct mtx kenv_lock;
extern char *kern_envp;
+extern char *md_envp;
extern char static_env[];
extern char static_hints[]; /* by config for now */
@@ -244,34 +259,57 @@ void *phashinit_flags(int count, struct malloc_type *type, u_long *nentries,
int flags);
void g_waitidle(void);
-void panic(const char *, ...) __dead2 __printflike(1, 2);
-void vpanic(const char *, __va_list) __dead2 __printflike(1, 0);
-
void cpu_boot(int);
void cpu_flush_dcache(void *, size_t);
void cpu_rootconf(void);
-#ifndef __rtems__
-void critical_enter(void);
-void critical_exit(void);
-#else /* __rtems__ */
-#include <rtems/score/threaddispatch.h>
+void critical_enter_KBI(void);
+void critical_exit_KBI(void);
+void critical_exit_preempt(void);
+void init_param1(void);
+void init_param2(long physpages);
+void init_static_kenv(char *, size_t);
+void tablefull(const char *);
+#if defined(KLD_MODULE) || defined(KTR_CRITICAL) || !defined(_KERNEL) || defined(GENOFFSET)
+#define critical_enter() critical_enter_KBI()
+#define critical_exit() critical_exit_KBI()
+#else
static __inline void
critical_enter(void)
{
+#ifndef __rtems__
+ struct thread_lite *td;
+
+ td = (struct thread_lite *)curthread;
+ td->td_critnest++;
+ __compiler_membar();
+#else /* __rtems__ */
_Thread_Dispatch_disable();
+#endif /* __rtems__ */
}
static __inline void
critical_exit(void)
{
+#ifndef __rtems__
+ struct thread_lite *td;
+
+ td = (struct thread_lite *)curthread;
+ KASSERT(td->td_critnest != 0,
+ ("critical_exit: td_critnest == 0"));
+ __compiler_membar();
+ td->td_critnest--;
+ __compiler_membar();
+ if (__predict_false(td->td_owepreempt))
+ critical_exit_preempt();
+#else /* __rtems__ */
_Thread_Dispatch_enable(_Per_CPU_Get());
-}
#endif /* __rtems__ */
-void init_param1(void);
-void init_param2(long physpages);
-void init_static_kenv(char *, size_t);
-void tablefull(const char *);
+
+}
+#endif
+
+
#ifdef EARLY_PRINTF
typedef void early_putc_t(int ch);
extern early_putc_t *early_putc;
@@ -329,30 +367,22 @@ void hexdump(const void *ptr, int length, const char *hdr, int flags);
#define HD_OMIT_CHARS (1 << 18)
#define ovbcopy(f, t, l) bcopy((f), (t), (l))
-#ifndef __rtems__
void bcopy(const void * _Nonnull from, void * _Nonnull to, size_t len);
-#define bcopy(from, to, len) ({ \
- if (__builtin_constant_p(len) && (len) <= 64) \
- __builtin_memmove((to), (from), (len)); \
- else \
- bcopy((from), (to), (len)); \
-})
+#define bcopy(from, to, len) __builtin_memmove((to), (from), (len))
void bzero(void * _Nonnull buf, size_t len);
-#define bzero(buf, len) ({ \
- if (__builtin_constant_p(len) && (len) <= 64) \
- __builtin_memset((buf), 0, (len)); \
- else \
- bzero((buf), (len)); \
-})
-#else /* __rtems__ */
-#define bcopy(src, dst, len) memmove((dst), (src), (len))
-#define bzero(buf, size) memset((buf), 0, (size))
-#endif /* __rtems__ */
+#define bzero(buf, len) __builtin_memset((buf), 0, (len))
void explicit_bzero(void * _Nonnull, size_t);
+int bcmp(const void *b1, const void *b2, size_t len);
+#define bcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len))
+void *memset(void * _Nonnull buf, int c, size_t len);
+#define memset(buf, c, len) __builtin_memset((buf), (c), (len))
void *memcpy(void * _Nonnull to, const void * _Nonnull from, size_t len);
-#define memcpy(to, from, len) __builtin_memcpy(to, from, len)
+#define memcpy(to, from, len) __builtin_memcpy((to), (from), (len))
void *memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n);
+#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
+int memcmp(const void *b1, const void *b2, size_t len);
+#define memcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len))
#ifndef __rtems__
int copystr(const void * _Nonnull __restrict kfaddr,
@@ -451,17 +481,13 @@ void realitexpire(void *);
int sysbeep(int hertz, int period);
-void hardclock(int usermode, uintfptr_t pc);
-void hardclock_cnt(int cnt, int usermode);
-void hardclock_cpu(int usermode);
+void hardclock(int cnt, int usermode);
void hardclock_sync(int cpu);
#ifndef __rtems__
void softclock(void *);
+void statclock(int cnt, int usermode);
+void profclock(int cnt, int usermode, uintfptr_t pc);
#endif /* __rtems__ */
-void statclock(int usermode);
-void statclock_cnt(int cnt, int usermode);
-void profclock(int usermode, uintfptr_t pc);
-void profclock_cnt(int cnt, int usermode, uintfptr_t pc);
int hardclockintr(void);
@@ -492,6 +518,11 @@ int kern_setenv(const char *name, const char *value);
int kern_unsetenv(const char *name);
int testenv(const char *name);
+int getenv_array(const char *name, void *data, int size, int *psize,
+ int type_size, bool allow_signed);
+#define GETENV_UNSIGNED false /* negative numbers not allowed */
+#define GETENV_SIGNED true /* negative numbers allowed */
+
typedef uint64_t (cpu_tick_f)(void);
void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var);
extern cpu_tick_f *cpu_ticks;
diff --git a/freebsd/sys/sys/unpcb.h b/freebsd/sys/sys/unpcb.h
index d80b1384..7d7a20ac 100644
--- a/freebsd/sys/sys/unpcb.h
+++ b/freebsd/sys/sys/unpcb.h
@@ -142,12 +142,12 @@ struct unpcb {
*/
#ifdef _SYS_SOCKETVAR_H_
struct xunpcb {
- size_t xu_len; /* length of this structure */
- void *xu_unpp; /* to help netstat, fstat */
- void *unp_vnode; /* (s) */
- void *unp_conn; /* (s) */
- void *xu_firstref; /* (s) */
- void *xu_nextref; /* (s) */
+ ksize_t xu_len; /* length of this structure */
+ kvaddr_t xu_unpp; /* to help netstat, fstat */
+ kvaddr_t unp_vnode; /* (s) */
+ kvaddr_t unp_conn; /* (s) */
+ kvaddr_t xu_firstref; /* (s) */
+ kvaddr_t xu_nextref; /* (s) */
unp_gen_t unp_gencnt; /* (s) */
int64_t xu_spare64[8];
int32_t xu_spare32[8];
@@ -163,11 +163,20 @@ struct xunpcb {
} __aligned(8);
struct xunpgen {
- size_t xug_len;
+ ksize_t xug_len;
u_int xug_count;
unp_gen_t xug_gen;
so_gen_t xug_sogen;
} __aligned(8);;
#endif /* _SYS_SOCKETVAR_H_ */
+#if defined(_KERNEL)
+struct thread;
+
+/* In uipc_userreq.c */
+void
+unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
+ struct unpcb *server_unp, struct unpcb *listen_unp);
+#endif
+
#endif /* _SYS_UNPCB_H_ */
diff --git a/freebsd/sys/sys/vmmeter.h b/freebsd/sys/sys/vmmeter.h
index 3c570b0a..c41b151f 100644
--- a/freebsd/sys/sys/vmmeter.h
+++ b/freebsd/sys/sys/vmmeter.h
@@ -187,6 +187,13 @@ vm_page_count_severe(void)
return (!DOMAINSET_EMPTY(&vm_severe_domains));
}
+static inline int
+vm_page_count_severe_set(domainset_t *mask)
+{
+
+ return (DOMAINSET_SUBSET(&vm_severe_domains, mask));
+}
+
/*
* Return TRUE if we are under our minimum low-free-pages threshold.
*
diff --git a/freebsd/sys/vm/uma.h b/freebsd/sys/vm/uma.h
index d71f0ee3..3ab65563 100644
--- a/freebsd/sys/vm/uma.h
+++ b/freebsd/sys/vm/uma.h
@@ -279,8 +279,7 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
* mini-dumps.
*/
#define UMA_ZONE_PCPU 0x8000 /*
- * Allocates mp_maxid + 1 slabs sized to
- * sizeof(struct pcpu).
+ * Allocates mp_maxid + 1 slabs of PAGE_SIZE
*/
#define UMA_ZONE_NUMA 0x10000 /*
* NUMA aware Zone. Implements a best
@@ -333,6 +332,7 @@ void uma_zdestroy(uma_zone_t zone);
*/
void *uma_zalloc_arg(uma_zone_t zone, void *arg, int flags);
+void *uma_zalloc_pcpu_arg(uma_zone_t zone, void *arg, int flags);
/*
* Allocate an item from a specific NUMA domain. This uses a slow path in
@@ -354,6 +354,7 @@ void *uma_zalloc_domain(uma_zone_t zone, void *arg, int domain, int flags);
*
*/
static __inline void *uma_zalloc(uma_zone_t zone, int flags);
+static __inline void *uma_zalloc_pcpu(uma_zone_t zone, int flags);
static __inline void *
uma_zalloc(uma_zone_t zone, int flags)
@@ -361,6 +362,12 @@ uma_zalloc(uma_zone_t zone, int flags)
return uma_zalloc_arg(zone, NULL, flags);
}
+static __inline void *
+uma_zalloc_pcpu(uma_zone_t zone, int flags)
+{
+ return uma_zalloc_pcpu_arg(zone, NULL, flags);
+}
+
/*
* Frees an item back into the specified zone.
*
@@ -374,6 +381,7 @@ uma_zalloc(uma_zone_t zone, int flags)
*/
void uma_zfree_arg(uma_zone_t zone, void *item, void *arg);
+void uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *arg);
/*
* Frees an item back to the specified zone's domain specific pool.
@@ -392,6 +400,7 @@ void uma_zfree_domain(uma_zone_t zone, void *item, void *arg);
*
*/
static __inline void uma_zfree(uma_zone_t zone, void *item);
+static __inline void uma_zfree_pcpu(uma_zone_t zone, void *item);
static __inline void
uma_zfree(uma_zone_t zone, void *item)
@@ -399,6 +408,12 @@ uma_zfree(uma_zone_t zone, void *item)
uma_zfree_arg(zone, item, NULL);
}
+static __inline void
+uma_zfree_pcpu(uma_zone_t zone, void *item)
+{
+ uma_zfree_pcpu_arg(zone, item, NULL);
+}
+
/*
* Wait until the specified zone can allocate an item.
*/
@@ -603,12 +618,12 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free freef);
#ifndef __rtems__
#define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */
#endif /* __rtems__ */
-#define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kernel_map */
+#define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kmem */
#ifndef __rtems__
#define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */
#define UMA_SLAB_OFFP 0x10 /* Slab is managed separately */
#define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */
-/* 0x02, 0x40 and 0x80 are available */
+/* 0x02, 0x40, and 0x80 are available */
#endif /* __rtems__ */
/*
diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c
index b8145c72..0f4bbb35 100644
--- a/freebsd/sys/vm/uma_core.c
+++ b/freebsd/sys/vm/uma_core.c
@@ -276,9 +276,13 @@ static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
#endif /* __rtems__ */
static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
#ifndef __rtems__
+static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
#endif /* __rtems__ */
static void page_free(void *, vm_size_t, uint8_t);
+#ifndef __rtems__
+static void pcpu_page_free(void *, vm_size_t, uint8_t);
+#endif /* __rtems__ */
static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int);
static void cache_drain(uma_zone_t);
static void bucket_drain(uma_zone_t, uma_bucket_t);
@@ -323,8 +327,25 @@ static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
#ifdef INVARIANTS
+static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
+static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
+
+static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0,
+ "Memory allocation debugging");
+
+static u_int dbg_divisor = 1;
+SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
+ CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
+ "Debug & thrash every this item in memory allocator");
+
+static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
+static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
+ &uma_dbg_cnt, "memory items debugged");
+SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
+ &uma_skip_cnt, "memory items skipped, not debugged");
#endif
SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
@@ -910,6 +931,18 @@ keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
i = start;
if (keg->uk_fini != NULL) {
for (i--; i > -1; i--)
+#ifdef INVARIANTS
+ /*
+ * trash_fini implies that dtor was trash_dtor. trash_fini
+ * would check that memory hasn't been modified since free,
+ * which executed trash_dtor.
+ * That's why we need to run uma_dbg_kskip() check here,
+ * albeit we don't make skip check for other init/fini
+ * invocations.
+ */
+ if (!uma_dbg_kskip(keg, slab->us_data + (keg->uk_rsize * i)) ||
+ keg->uk_fini != trash_fini)
+#endif
keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
keg->uk_size);
}
@@ -1209,6 +1242,57 @@ page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
}
#ifndef __rtems__
+static void *
+pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
+ int wait)
+{
+ struct pglist alloctail;
+ vm_offset_t addr, zkva;
+ int cpu, flags;
+ vm_page_t p, p_next;
+#ifdef NUMA
+ struct pcpu *pc;
+#endif
+
+ MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
+
+ TAILQ_INIT(&alloctail);
+ flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
+ malloc2vm_flags(wait);
+ *pflag = UMA_SLAB_KERNEL;
+ for (cpu = 0; cpu <= mp_maxid; cpu++) {
+ if (CPU_ABSENT(cpu)) {
+ p = vm_page_alloc(NULL, 0, flags);
+ } else {
+#ifndef NUMA
+ p = vm_page_alloc(NULL, 0, flags);
+#else
+ pc = pcpu_find(cpu);
+ p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags);
+ if (__predict_false(p == NULL))
+ p = vm_page_alloc(NULL, 0, flags);
+#endif
+ }
+ if (__predict_false(p == NULL))
+ goto fail;
+ TAILQ_INSERT_TAIL(&alloctail, p, listq);
+ }
+ if ((addr = kva_alloc(bytes)) == 0)
+ goto fail;
+ zkva = addr;
+ TAILQ_FOREACH(p, &alloctail, listq) {
+ pmap_qenter(zkva, &p, 1);
+ zkva += PAGE_SIZE;
+ }
+ return ((void*)addr);
+ fail:
+ TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
+ vm_page_unwire(p, PQ_NONE);
+ vm_page_free(p);
+ }
+ return (NULL);
+}
+
/*
* Allocates a number of pages from within an object
*
@@ -1286,14 +1370,11 @@ static void
page_free(void *mem, vm_size_t size, uint8_t flags)
{
#ifndef __rtems__
- struct vmem *vmem;
- if (flags & UMA_SLAB_KERNEL)
- vmem = kernel_arena;
- else
+ if ((flags & UMA_SLAB_KERNEL) == 0)
panic("UMA: page_free used with invalid flags %x", flags);
- kmem_free(vmem, (vm_offset_t)mem, size);
+ kmem_free((vm_offset_t)mem, size);
#else /* __rtems__ */
if (flags & UMA_SLAB_KERNEL)
free(mem, M_TEMP);
@@ -1302,6 +1383,39 @@ page_free(void *mem, vm_size_t size, uint8_t flags)
#endif /* __rtems__ */
}
+#ifndef __rtems__
+/*
+ * Frees pcpu zone allocations
+ *
+ * Arguments:
+ * mem A pointer to the memory to be freed
+ * size The size of the memory being freed
+ * flags The original p->us_flags field
+ *
+ * Returns:
+ * Nothing
+ */
+static void
+pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
+{
+ vm_offset_t sva, curva;
+ vm_paddr_t paddr;
+ vm_page_t m;
+
+ MPASS(size == (mp_maxid+1)*PAGE_SIZE);
+ sva = (vm_offset_t)mem;
+ for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
+ paddr = pmap_kextract(curva);
+ m = PHYS_TO_VM_PAGE(paddr);
+ vm_page_unwire(m, PQ_NONE);
+ vm_page_free(m);
+ }
+ pmap_qremove(sva, size >> PAGE_SHIFT);
+ kva_free(sva, size);
+}
+#endif /* __rtems__ */
+
+
/*
* Zero fill initializer
*
@@ -1335,9 +1449,8 @@ keg_small_init(uma_keg_t keg)
if (keg->uk_flags & UMA_ZONE_PCPU) {
u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
- slabsize = sizeof(struct pcpu);
- keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
- PAGE_SIZE);
+ slabsize = UMA_PCPU_ALLOC_SIZE;
+ keg->uk_ppera = ncpus;
} else {
slabsize = UMA_SLAB_SIZE;
keg->uk_ppera = 1;
@@ -1356,7 +1469,7 @@ keg_small_init(uma_keg_t keg)
keg->uk_rsize = rsize;
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
- keg->uk_rsize < sizeof(struct pcpu),
+ keg->uk_rsize < UMA_PCPU_ALLOC_SIZE,
("%s: size %u too large", __func__, keg->uk_rsize));
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
@@ -1575,6 +1688,8 @@ keg_ctor(void *mem, int size, void *udata, int flags)
else if (keg->uk_ppera == 1)
keg->uk_allocf = uma_small_alloc;
#endif
+ else if (keg->uk_flags & UMA_ZONE_PCPU)
+ keg->uk_allocf = pcpu_page_alloc;
else
#endif /* __rtems__ */
keg->uk_allocf = page_alloc;
@@ -1584,6 +1699,9 @@ keg_ctor(void *mem, int size, void *udata, int flags)
keg->uk_freef = uma_small_free;
else
#endif
+ if (keg->uk_flags & UMA_ZONE_PCPU)
+ keg->uk_freef = pcpu_page_free;
+ else
#endif /* __rtems__ */
keg->uk_freef = page_free;
@@ -2066,11 +2184,16 @@ static void
uma_startup3(void)
{
+#ifdef INVARIANTS
+ TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
+ uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
+ uma_skip_cnt = counter_u64_alloc(M_WAITOK);
+#endif
+ callout_init(&uma_callout, 1);
+ callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
#ifndef __rtems__
booted = BOOT_RUNNING;
#endif /* __rtems__ */
- callout_init(&uma_callout, 1);
- callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
}
static uma_keg_t
@@ -2324,6 +2447,40 @@ uma_zwait(uma_zone_t zone)
uma_zfree(zone, item);
}
+void *
+uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
+{
+ void *item;
+#ifdef SMP
+ int i;
+
+ MPASS(zone->uz_flags & UMA_ZONE_PCPU);
+#endif
+ item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
+ if (item != NULL && (flags & M_ZERO)) {
+#ifdef SMP
+ for (i = 0; i <= mp_maxid; i++)
+ bzero(zpcpu_get_cpu(item, i), zone->uz_size);
+#else
+ bzero(item, zone->uz_size);
+#endif
+ }
+ return (item);
+}
+
+/*
+ * A stub while both regular and pcpu cases are identical.
+ */
+void
+uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *udata)
+{
+
+#ifdef SMP
+ MPASS(zone->uz_flags & UMA_ZONE_PCPU);
+#endif
+ uma_zfree_arg(zone, item, udata);
+}
+
/* See uma.h */
void *
uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
@@ -2333,9 +2490,12 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
uma_cache_t cache;
void *item;
int cpu, domain, lockfail;
+#ifdef INVARIANTS
+ bool skipdbg;
+#endif
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
- random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
+ random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
/* This is the fast path allocation */
CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
@@ -2346,8 +2506,12 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
}
#ifndef __rtems__
+ KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC"));
KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
("uma_zalloc_arg: called with spinlock or critical section held"));
+ if (zone->uz_flags & UMA_ZONE_PCPU)
+ KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone "
+ "with M_ZERO passed"));
#endif /* __rtems__ */
#ifdef DEBUG_MEMGUARD
@@ -2394,14 +2558,22 @@ zalloc_start:
KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
cache->uc_allocs++;
critical_exit();
+#ifdef INVARIANTS
+ skipdbg = uma_dbg_zskip(zone, item);
+#endif
if (zone->uz_ctor != NULL &&
+#ifdef INVARIANTS
+ (!skipdbg || zone->uz_ctor != trash_ctor ||
+ zone->uz_dtor != trash_dtor) &&
+#endif
zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
atomic_add_long(&zone->uz_fails, 1);
zone_free_item(zone, item, udata, SKIP_DTOR);
return (NULL);
}
#ifdef INVARIANTS
- uma_dbg_alloc(zone, NULL, item);
+ if (!skipdbg)
+ uma_dbg_alloc(zone, NULL, item);
#endif
if (flags & M_ZERO)
uma_zero_item(item, zone);
@@ -2534,7 +2706,7 @@ uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
{
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
- random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
+ random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
/* This is the fast path allocation */
CTR5(KTR_UMA,
@@ -2820,9 +2992,9 @@ zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
{
uma_slab_t slab;
uma_keg_t keg;
-#ifndef __rtems__
+#ifdef NUMA
int stripe;
-#endif /* __rtems__ */
+#endif
int i;
slab = NULL;
@@ -2832,9 +3004,9 @@ zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
if ((slab = zone->uz_slab(zone, keg, domain, flags)) == NULL)
break;
keg = slab->us_keg;
-#ifndef __rtems__
+#ifdef NUMA
stripe = howmany(max, vm_ndomains);
-#endif /* __rtems__ */
+#endif
while (slab->us_freecount && i < max) {
bucket[i++] = slab_alloc_item(keg, slab);
if (keg->uk_free <= keg->uk_reserve)
@@ -2930,6 +3102,9 @@ static void *
zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
{
void *item;
+#ifdef INVARIANTS
+ bool skipdbg;
+#endif
item = NULL;
@@ -2937,6 +3112,9 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
goto fail;
atomic_add_long(&zone->uz_allocs, 1);
+#ifdef INVARIANTS
+ skipdbg = uma_dbg_zskip(zone, item);
+#endif
/*
* We have to call both the zone's init (not the keg's init)
* and the zone's ctor. This is because the item is going from
@@ -2949,14 +3127,18 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
goto fail;
}
}
- if (zone->uz_ctor != NULL) {
- if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
- zone_free_item(zone, item, udata, SKIP_DTOR);
- goto fail;
- }
+ if (zone->uz_ctor != NULL &&
+#ifdef INVARIANTS
+ (!skipdbg || zone->uz_ctor != trash_ctor ||
+ zone->uz_dtor != trash_dtor) &&
+#endif
+ zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
+ zone_free_item(zone, item, udata, SKIP_DTOR);
+ goto fail;
}
#ifdef INVARIANTS
- uma_dbg_alloc(zone, NULL, item);
+ if (!skipdbg)
+ uma_dbg_alloc(zone, NULL, item);
#endif
if (flags & M_ZERO)
uma_zero_item(item, zone);
@@ -2981,9 +3163,12 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
uma_bucket_t bucket;
uma_zone_domain_t zdom;
int cpu, domain, lockfail;
+#ifdef INVARIANTS
+ bool skipdbg;
+#endif
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
- random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
+ random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
zone->uz_name);
@@ -3007,12 +3192,18 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
}
#endif
#ifdef INVARIANTS
- if (zone->uz_flags & UMA_ZONE_MALLOC)
- uma_dbg_free(zone, udata, item);
- else
- uma_dbg_free(zone, NULL, item);
-#endif
+ skipdbg = uma_dbg_zskip(zone, item);
+ if (skipdbg == false) {
+ if (zone->uz_flags & UMA_ZONE_MALLOC)
+ uma_dbg_free(zone, udata, item);
+ else
+ uma_dbg_free(zone, NULL, item);
+ }
+ if (zone->uz_dtor != NULL && (!skipdbg ||
+ zone->uz_dtor != trash_dtor || zone->uz_ctor != trash_ctor))
+#else
if (zone->uz_dtor != NULL)
+#endif
zone->uz_dtor(item, zone->uz_size, udata);
/*
@@ -3079,14 +3270,6 @@ zfree_start:
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
- /*
- * Since we have locked the zone we may as well send back our stats.
- */
- atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
- atomic_add_long(&zone->uz_frees, cache->uc_frees);
- cache->uc_allocs = 0;
- cache->uc_frees = 0;
-
bucket = cache->uc_freebucket;
if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
ZONE_UNLOCK(zone);
@@ -3163,7 +3346,7 @@ uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
{
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
- random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
+ random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread,
zone->uz_name);
@@ -3276,16 +3459,23 @@ zone_release(uma_zone_t zone, void **bucket, int cnt)
static void
zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
{
-
#ifdef INVARIANTS
- if (skip == SKIP_NONE) {
+ bool skipdbg;
+
+ skipdbg = uma_dbg_zskip(zone, item);
+ if (skip == SKIP_NONE && !skipdbg) {
if (zone->uz_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
}
+
+ if (skip < SKIP_DTOR && zone->uz_dtor != NULL &&
+ (!skipdbg || zone->uz_dtor != trash_dtor ||
+ zone->uz_ctor != trash_ctor))
+#else
+ if (skip < SKIP_DTOR && zone->uz_dtor != NULL)
#endif
- if (skip < SKIP_DTOR && zone->uz_dtor)
zone->uz_dtor(item, zone->uz_size, udata);
if (skip < SKIP_FINI && zone->uz_fini)
@@ -3648,7 +3838,7 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait)
if (slab == NULL)
return (NULL);
if (domain == UMA_ANYDOMAIN)
- addr = kmem_malloc(kernel_arena, size, wait);
+ addr = kmem_malloc(size, wait);
else
addr = kmem_malloc_domain(domain, size, wait);
if (addr != 0) {
@@ -3679,7 +3869,7 @@ uma_large_free(uma_slab_t slab)
KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0,
("uma_large_free: Memory not allocated with uma_large_malloc."));
- kmem_free(kernel_arena, (vm_offset_t)slab->us_data, slab->us_size);
+ kmem_free((vm_offset_t)slab->us_data, slab->us_size);
uma_total_dec(slab->us_size);
zone_free_item(slabzone, slab, NULL, SKIP_NONE);
}
@@ -3688,13 +3878,8 @@ uma_large_free(uma_slab_t slab)
static void
uma_zero_item(void *item, uma_zone_t zone)
{
- int i;
- if (zone->uz_flags & UMA_ZONE_PCPU) {
- CPU_FOREACH(i)
- bzero(zpcpu_get_cpu(item, i), zone->uz_size);
- } else
- bzero(item, zone->uz_size);
+ bzero(item, zone->uz_size);
}
unsigned long
@@ -4022,6 +4207,43 @@ uma_dbg_getslab(uma_zone_t zone, void *item)
return (slab);
}
+static bool
+uma_dbg_zskip(uma_zone_t zone, void *mem)
+{
+ uma_keg_t keg;
+
+ if ((keg = zone_first_keg(zone)) == NULL)
+ return (true);
+
+ return (uma_dbg_kskip(keg, mem));
+}
+
+static bool
+uma_dbg_kskip(uma_keg_t keg, void *mem)
+{
+ uintptr_t idx;
+
+ if (dbg_divisor == 0)
+ return (true);
+
+ if (dbg_divisor == 1)
+ return (false);
+
+ idx = (uintptr_t)mem >> PAGE_SHIFT;
+ if (keg->uk_ipers > 1) {
+ idx *= keg->uk_ipers;
+ idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
+ }
+
+ if ((idx / dbg_divisor) * dbg_divisor != idx) {
+ counter_u64_add(uma_skip_cnt, 1);
+ return (true);
+ }
+ counter_u64_add(uma_dbg_cnt, 1);
+
+ return (false);
+}
+
/*
* Set up the slab's freei data such that uma_dbg_free can function.
*
@@ -4032,8 +4254,6 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
uma_keg_t keg;
int freei;
- if (zone_first_keg(zone) == NULL)
- return;
if (slab == NULL) {
slab = uma_dbg_getslab(zone, item);
if (slab == NULL)
@@ -4062,8 +4282,6 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
uma_keg_t keg;
int freei;
- if (zone_first_keg(zone) == NULL)
- return;
if (slab == NULL) {
slab = uma_dbg_getslab(zone, item);
if (slab == NULL)
diff --git a/freebsd/sys/vm/uma_int.h b/freebsd/sys/vm/uma_int.h
index 8d58fa33..5f787dfa 100644
--- a/freebsd/sys/vm/uma_int.h
+++ b/freebsd/sys/vm/uma_int.h
@@ -176,7 +176,7 @@ struct uma_hash {
/*
* align field or structure to cache line
*/
-#if defined(__amd64__)
+#if defined(__amd64__) || defined(__powerpc64__)
#define UMA_ALIGN __aligned(128)
#else
#define UMA_ALIGN
@@ -188,7 +188,7 @@ struct uma_hash {
struct uma_bucket {
LIST_ENTRY(uma_bucket) ub_link; /* Link into the zone */
- int16_t ub_cnt; /* Count of free items. */
+ int16_t ub_cnt; /* Count of items in bucket. */
int16_t ub_entries; /* Max items. */
void *ub_bucket[]; /* actual allocation storage */
};
@@ -222,9 +222,8 @@ typedef struct uma_domain * uma_domain_t;
*
*/
struct uma_keg {
- struct mtx_padalign uk_lock; /* Lock for the keg */
+ struct mtx uk_lock; /* Lock for the keg */
struct uma_hash uk_hash;
-
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
uint32_t uk_cursor; /* Domain alloc cursor. */
@@ -319,41 +318,49 @@ typedef struct uma_zone_domain * uma_zone_domain_t;
*
*/
struct uma_zone {
- struct mtx_padalign uz_lock; /* Lock for the zone */
- struct mtx_padalign *uz_lockptr;
- const char *uz_name; /* Text name of the zone */
-
- LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
+ /* Offset 0, used in alloc/free fast/medium fast path and const. */
+ struct mtx *uz_lockptr;
+ const char *uz_name; /* Text name of the zone */
struct uma_zone_domain *uz_domain; /* per-domain buckets */
-
- LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */
- struct uma_klink uz_klink; /* klink for first keg. */
-
- uma_slaballoc uz_slab; /* Allocate a slab from the backend. */
+ uint32_t uz_flags; /* Flags inherited from kegs */
+ uint32_t uz_size; /* Size inherited from kegs */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
uma_init uz_init; /* Initializer for each item */
uma_fini uz_fini; /* Finalizer for each item. */
+
+ /* Offset 64, used in bucket replenish. */
uma_import uz_import; /* Import new memory to cache. */
uma_release uz_release; /* Release memory from cache. */
void *uz_arg; /* Import/release argument. */
-
- uint32_t uz_flags; /* Flags inherited from kegs */
- uint32_t uz_size; /* Size inherited from kegs */
-
- volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */
- volatile u_long uz_fails; /* Total number of alloc failures */
- volatile u_long uz_frees; /* Total number of frees */
- uint64_t uz_sleeps; /* Total number of alloc sleeps */
+ uma_slaballoc uz_slab; /* Allocate a slab from the backend. */
uint16_t uz_count; /* Amount of items in full bucket */
uint16_t uz_count_min; /* Minimal amount of items there */
+ /* 32bit pad on 64bit. */
+ LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
+ LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */
+ /* Offset 128 Rare. */
+ /*
+ * The lock is placed here to avoid adjacent line prefetcher
+ * in fast paths and to take up space near infrequently accessed
+ * members to reduce alignment overhead.
+ */
+ struct mtx uz_lock; /* Lock for the zone */
+ struct uma_klink uz_klink; /* klink for first keg. */
/* The next two fields are used to print a rate-limited warnings. */
const char *uz_warning; /* Warning to print on failure */
struct timeval uz_ratecheck; /* Warnings rate-limiting */
-
struct task uz_maxaction; /* Task to run when at limit */
+ /* 16 bytes of pad. */
+
+ /* Offset 256, atomic stats. */
+ volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */
+ volatile u_long uz_fails; /* Total number of alloc failures */
+ volatile u_long uz_frees; /* Total number of frees */
+ uint64_t uz_sleeps; /* Total number of alloc sleeps */
+
/*
* This HAS to be the last item because we adjust the zone size
* based on NCPU and then allocate the space for the zones.
diff --git a/freebsd/sys/vm/vm_extern.h b/freebsd/sys/vm/vm_extern.h
index 47e35b2d..b2f1d726 100644
--- a/freebsd/sys/vm/vm_extern.h
+++ b/freebsd/sys/vm/vm_extern.h
@@ -54,19 +54,19 @@ vm_offset_t kmap_alloc_wait(vm_map_t, vm_size_t);
void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t);
/* These operate on virtual addresses backed by memory. */
-vm_offset_t kmem_alloc_attr(struct vmem *, vm_size_t size, int flags,
+vm_offset_t kmem_alloc_attr(vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr);
vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr);
-vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags,
+vm_offset_t kmem_alloc_contig(vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr);
vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr);
-vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags);
+vm_offset_t kmem_malloc(vm_size_t size, int flags);
vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags);
-void kmem_free(struct vmem *, vm_offset_t, vm_size_t);
+void kmem_free(vm_offset_t addr, vm_size_t size);
/* This provides memory for previously allocated address space. */
int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int);
@@ -74,6 +74,7 @@ int kmem_back_domain(int, vm_object_t, vm_offset_t, vm_size_t, int);
void kmem_unback(vm_object_t, vm_offset_t, vm_size_t);
/* Bootstrapping. */
+void kmem_bootstrap_free(vm_offset_t, vm_size_t);
vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t,
boolean_t);
void kmem_init(vm_offset_t, vm_offset_t);