summaryrefslogtreecommitdiffstats
path: root/freebsd/sys
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2019-09-24 11:05:03 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2019-11-13 10:47:04 +0100
commita5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf (patch)
treedb091fb0f7d091804482156c9f3f55879ac93d5b /freebsd/sys
parenttest/syscalls01: Fix sporadic test failures (diff)
downloadrtems-libbsd-a5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf.tar.bz2
Update to FreeBSD head 2019-09-24
Git mirror commit 6b0307a0a5184339393f555d5d424190d8a8277a.
Diffstat (limited to 'freebsd/sys')
-rw-r--r--freebsd/sys/arm/include/machine/cpufunc.h58
-rw-r--r--freebsd/sys/arm/ti/am335x/tda19988.c25
-rw-r--r--freebsd/sys/arm/ti/cpsw/if_cpsw.c22
-rw-r--r--freebsd/sys/arm/ti/ti_hwmods.c16
-rw-r--r--freebsd/sys/arm/ti/ti_sdhci.c24
-rw-r--r--freebsd/sys/cam/ata/ata_all.h1
-rw-r--r--freebsd/sys/cam/cam.c10
-rw-r--r--freebsd/sys/cam/cam_ccb.h1
-rw-r--r--freebsd/sys/cam/cam_periph.h3
-rw-r--r--freebsd/sys/cam/cam_sim.h10
-rw-r--r--freebsd/sys/cam/nvme/nvme_all.h6
-rw-r--r--freebsd/sys/cam/scsi/scsi_all.c135
-rw-r--r--freebsd/sys/cam/scsi/scsi_all.h60
-rw-r--r--freebsd/sys/crypto/blowfish/bf_skey.c4
-rw-r--r--freebsd/sys/crypto/blowfish/blowfish.h2
-rw-r--r--freebsd/sys/crypto/chacha20/chacha-sw.c4
-rw-r--r--freebsd/sys/crypto/chacha20/chacha.c33
-rw-r--r--freebsd/sys/crypto/chacha20/chacha.h11
-rw-r--r--freebsd/sys/crypto/des/des.h27
-rw-r--r--freebsd/sys/crypto/des/des_ecb.c12
-rw-r--r--freebsd/sys/crypto/des/des_setkey.c22
-rw-r--r--freebsd/sys/dev/bge/if_bge.c41
-rw-r--r--freebsd/sys/dev/bge/if_bgereg.h6
-rw-r--r--freebsd/sys/dev/cadence/if_cgem.c10
-rw-r--r--freebsd/sys/dev/e1000/em_txrx.c9
-rw-r--r--freebsd/sys/dev/e1000/if_em.c347
-rw-r--r--freebsd/sys/dev/e1000/if_em.h9
-rw-r--r--freebsd/sys/dev/e1000/igb_txrx.c9
-rw-r--r--freebsd/sys/dev/evdev/evdev.c93
-rw-r--r--freebsd/sys/dev/evdev/evdev_private.h6
-rw-r--r--freebsd/sys/dev/extres/clk/clk.h1
-rw-r--r--freebsd/sys/dev/fb/fbd.c1
-rw-r--r--freebsd/sys/dev/fdt/fdt_common.c3
-rw-r--r--freebsd/sys/dev/gpio/gpiobus.c185
-rw-r--r--freebsd/sys/dev/gpio/gpiobusvar.h14
-rw-r--r--freebsd/sys/dev/gpio/ofw_gpiobus.c4
-rw-r--r--freebsd/sys/dev/iicbus/iicbus.c6
-rw-r--r--freebsd/sys/dev/iicbus/iicbus.h12
-rw-r--r--freebsd/sys/dev/iicbus/iiconf.c55
-rw-r--r--freebsd/sys/dev/iicbus/iiconf.h2
-rw-r--r--freebsd/sys/dev/led/led.c15
-rw-r--r--freebsd/sys/dev/mii/micphy.c12
-rw-r--r--freebsd/sys/dev/mmc/bridge.h2
-rw-r--r--freebsd/sys/dev/mmc/mmc.c2
-rw-r--r--freebsd/sys/dev/mmc/mmc_private.h2
-rw-r--r--freebsd/sys/dev/mmc/mmc_subr.c2
-rw-r--r--freebsd/sys/dev/mmc/mmc_subr.h2
-rw-r--r--freebsd/sys/dev/mmc/mmcbrvar.h2
-rw-r--r--freebsd/sys/dev/mmc/mmcreg.h46
-rw-r--r--freebsd/sys/dev/mmc/mmcsd.c8
-rw-r--r--freebsd/sys/dev/mmc/mmcvar.h2
-rw-r--r--freebsd/sys/dev/nvme/nvme.h524
-rw-r--r--freebsd/sys/dev/ofw/ofw_bus_subr.h8
-rw-r--r--freebsd/sys/dev/ofw/ofw_subr.c3
-rw-r--r--freebsd/sys/dev/pci/pci.c177
-rw-r--r--freebsd/sys/dev/pci/pci_pci.c45
-rw-r--r--freebsd/sys/dev/pci/pci_user.c8
-rw-r--r--freebsd/sys/dev/pci/pcivar.h19
-rw-r--r--freebsd/sys/dev/rtwn/if_rtwn.c12
-rw-r--r--freebsd/sys/dev/rtwn/if_rtwnvar.h3
-rw-r--r--freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.h7
-rw-r--r--freebsd/sys/dev/rtwn/pci/rtwn_pci_reg.c2
-rw-r--r--freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c123
-rw-r--r--freebsd/sys/dev/rtwn/pci/rtwn_pci_tx.c4
-rw-r--r--freebsd/sys/dev/rtwn/pci/rtwn_pci_var.h12
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e.h6
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e_calib.c331
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c6
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e_init.c77
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e_priv.h34
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e_reg.h54
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e_rom.c3
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e_rom_image.h23
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c19
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu.h12
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c37
-rw-r--r--freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c81
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce.h2
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c7
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_calib.c55
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_rx.c4
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/r92c.h1
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/r92c_calib.c352
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c1
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h22
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/r92c_rom.c2
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c7
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/r92c_tx.c8
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/r92c_var.h6
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu.h1
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c8
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_init.c2
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_rx.c7
-rw-r--r--freebsd/sys/dev/rtwn/rtl8192e/usb/r92eu_attach.c2
-rw-r--r--freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c2
-rw-r--r--freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h6
-rw-r--r--freebsd/sys/dev/rtwn/usb/rtwn_usb_reg.c6
-rw-r--r--freebsd/sys/dev/sdhci/fsl_sdhci.c2
-rw-r--r--freebsd/sys/dev/sdhci/sdhci.c410
-rw-r--r--freebsd/sys/dev/sdhci/sdhci.h29
-rw-r--r--freebsd/sys/dev/usb/net/if_aue.c6
-rw-r--r--freebsd/sys/dev/usb/net/if_axe.c4
-rw-r--r--freebsd/sys/dev/usb/net/if_axge.c8
-rw-r--r--freebsd/sys/dev/usb/net/if_cdce.c5
-rw-r--r--freebsd/sys/dev/usb/net/if_mos.c6
-rw-r--r--freebsd/sys/dev/usb/net/if_rue.c6
-rw-r--r--freebsd/sys/dev/usb/net/if_smsc.c150
-rw-r--r--freebsd/sys/dev/usb/net/if_udav.c6
-rw-r--r--freebsd/sys/dev/usb/net/if_ure.c22
-rw-r--r--freebsd/sys/dev/usb/net/if_urereg.h2
-rw-r--r--freebsd/sys/dev/usb/net/usb_ethernet.c15
-rw-r--r--freebsd/sys/dev/usb/net/usb_ethernet.h8
-rw-r--r--freebsd/sys/dev/usb/quirk/usb_quirk.c44
-rw-r--r--freebsd/sys/dev/usb/serial/u3g.c4
-rw-r--r--freebsd/sys/dev/usb/serial/ugensa.c109
-rw-r--r--freebsd/sys/dev/usb/serial/umcs.c4
-rw-r--r--freebsd/sys/dev/usb/serial/usb_serial.c3
-rw-r--r--freebsd/sys/dev/usb/usb.h1
-rw-r--r--freebsd/sys/dev/usb/usb_bus.h1
-rw-r--r--freebsd/sys/dev/usb/usb_device.c2
-rw-r--r--freebsd/sys/dev/usb/usb_fdt_support.h (renamed from freebsd/sys/sys/capability.h)33
-rw-r--r--freebsd/sys/dev/usb/usb_generic.c58
-rw-r--r--freebsd/sys/dev/usb/usb_hid.c2
-rw-r--r--freebsd/sys/dev/usb/usb_hub.c76
-rw-r--r--freebsd/sys/dev/usb/usb_hub_private.h86
-rw-r--r--freebsd/sys/dev/usb/usb_ioctl.h3
-rw-r--r--freebsd/sys/dev/usb/usb_request.c5
-rw-r--r--freebsd/sys/dev/usb/usb_transfer.c33
-rw-r--r--freebsd/sys/dev/usb/usbdi.h2
-rw-r--r--freebsd/sys/dev/usb/wlan/if_rsu.c56
-rw-r--r--freebsd/sys/dev/usb/wlan/if_rsureg.h13
-rw-r--r--freebsd/sys/dev/usb/wlan/if_rum.c10
-rw-r--r--freebsd/sys/dev/usb/wlan/if_rumvar.h2
-rw-r--r--freebsd/sys/dev/usb/wlan/if_run.c145
-rw-r--r--freebsd/sys/dev/usb/wlan/if_runreg.h3
-rw-r--r--freebsd/sys/dev/usb/wlan/if_runvar.h2
-rw-r--r--freebsd/sys/dev/usb/wlan/if_uath.c6
-rw-r--r--freebsd/sys/dev/usb/wlan/if_uathvar.h3
-rw-r--r--freebsd/sys/dev/usb/wlan/if_upgt.c5
-rw-r--r--freebsd/sys/dev/usb/wlan/if_upgtvar.h2
-rw-r--r--freebsd/sys/dev/usb/wlan/if_ural.c10
-rw-r--r--freebsd/sys/dev/usb/wlan/if_uralvar.h2
-rw-r--r--freebsd/sys/dev/usb/wlan/if_urtw.c142
-rw-r--r--freebsd/sys/dev/usb/wlan/if_urtwvar.h12
-rw-r--r--freebsd/sys/dev/usb/wlan/if_zyd.c7
-rw-r--r--freebsd/sys/dev/usb/wlan/if_zydreg.h6
-rw-r--r--freebsd/sys/fs/devfs/devfs_vnops.c3
-rw-r--r--freebsd/sys/i386/include/machine/cpufunc.h50
-rw-r--r--freebsd/sys/i386/include/machine/md_var.h2
-rw-r--r--freebsd/sys/kern/init_main.c25
-rw-r--r--freebsd/sys/kern/kern_conf.c2
-rw-r--r--freebsd/sys/kern/kern_event.c1
-rw-r--r--freebsd/sys/kern/kern_intr.c37
-rw-r--r--freebsd/sys/kern/kern_mbuf.c470
-rw-r--r--freebsd/sys/kern/kern_mib.c101
-rw-r--r--freebsd/sys/kern/kern_mtxpool.c4
-rw-r--r--freebsd/sys/kern/kern_synch.c83
-rw-r--r--freebsd/sys/kern/kern_sysctl.c683
-rw-r--r--freebsd/sys/kern/kern_time.c12
-rw-r--r--freebsd/sys/kern/kern_timeout.c57
-rw-r--r--freebsd/sys/kern/kern_uuid.c2
-rw-r--r--freebsd/sys/kern/subr_blist.c344
-rw-r--r--freebsd/sys/kern/subr_bus.c135
-rw-r--r--freebsd/sys/kern/subr_eventhandler.c1
-rw-r--r--freebsd/sys/kern/subr_gtaskqueue.c101
-rw-r--r--freebsd/sys/kern/subr_kobj.c95
-rw-r--r--freebsd/sys/kern/subr_lock.c42
-rw-r--r--freebsd/sys/kern/subr_pcpu.c12
-rw-r--r--freebsd/sys/kern/subr_prf.c45
-rw-r--r--freebsd/sys/kern/subr_sbuf.c159
-rw-r--r--freebsd/sys/kern/subr_sleepqueue.c55
-rw-r--r--freebsd/sys/kern/subr_taskqueue.c2
-rw-r--r--freebsd/sys/kern/sys_generic.c6
-rwxr-xr-xfreebsd/sys/kern/sys_pipe.c95
-rw-r--r--freebsd/sys/kern/tty.c8
-rw-r--r--freebsd/sys/kern/uipc_mbuf.c292
-rw-r--r--freebsd/sys/kern/uipc_mbuf2.c2
-rw-r--r--freebsd/sys/kern/uipc_sockbuf.c139
-rw-r--r--freebsd/sys/kern/uipc_socket.c136
-rw-r--r--freebsd/sys/kern/uipc_syscalls.c45
-rw-r--r--freebsd/sys/kern/uipc_usrreq.c67
-rw-r--r--freebsd/sys/libkern/gsb_crc32.c (renamed from freebsd/sys/libkern/crc32.c)1
-rw-r--r--freebsd/sys/mips/include/machine/cpufunc.h12
-rw-r--r--freebsd/sys/net/altq/altq_cbq.c5
-rw-r--r--freebsd/sys/net/altq/altq_codel.c5
-rw-r--r--freebsd/sys/net/altq/altq_fairq.c5
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.c15
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.h1
-rw-r--r--freebsd/sys/net/altq/altq_priq.c5
-rw-r--r--freebsd/sys/net/altq/altq_subr.c21
-rw-r--r--freebsd/sys/net/altq/altq_var.h12
-rw-r--r--freebsd/sys/net/bpf.c778
-rw-r--r--freebsd/sys/net/bpf.h16
-rw-r--r--freebsd/sys/net/bpf_buffer.c13
-rw-r--r--freebsd/sys/net/bpfdesc.h8
-rw-r--r--freebsd/sys/net/bridgestp.c8
-rw-r--r--freebsd/sys/net/ethernet.h5
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c49
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.h9
-rw-r--r--freebsd/sys/net/ieee_oui.h85
-rw-r--r--freebsd/sys/net/if.c193
-rw-r--r--freebsd/sys/net/if_arp.h3
-rw-r--r--freebsd/sys/net/if_bridge.c208
-rw-r--r--freebsd/sys/net/if_clone.h4
-rw-r--r--freebsd/sys/net/if_dead.c18
-rw-r--r--freebsd/sys/net/if_enc.c10
-rw-r--r--freebsd/sys/net/if_ethersubr.c104
-rw-r--r--freebsd/sys/net/if_gre.c153
-rw-r--r--freebsd/sys/net/if_gre.h66
-rw-r--r--freebsd/sys/net/if_lagg.c242
-rw-r--r--freebsd/sys/net/if_lagg.h6
-rw-r--r--freebsd/sys/net/if_llatbl.c11
-rw-r--r--freebsd/sys/net/if_llatbl.h2
-rw-r--r--freebsd/sys/net/if_spppsubr.c20
-rw-r--r--freebsd/sys/net/if_stf.c1
-rw-r--r--freebsd/sys/net/if_tap.c1133
-rw-r--r--freebsd/sys/net/if_tap.h24
-rw-r--r--freebsd/sys/net/if_tapvar.h71
-rw-r--r--freebsd/sys/net/if_tun.c1055
-rw-r--r--freebsd/sys/net/if_tun.h1
-rw-r--r--freebsd/sys/net/if_tuntap.c1734
-rw-r--r--freebsd/sys/net/if_var.h75
-rw-r--r--freebsd/sys/net/if_vlan.c282
-rw-r--r--freebsd/sys/net/if_vlan_var.h4
-rw-r--r--freebsd/sys/net/iflib.h65
-rw-r--r--freebsd/sys/net/netisr.c1
-rw-r--r--freebsd/sys/net/pfil.c882
-rw-r--r--freebsd/sys/net/pfil.h233
-rw-r--r--freebsd/sys/net/pfvar.h47
-rw-r--r--freebsd/sys/net/route.c86
-rw-r--r--freebsd/sys/net/route.h1
-rw-r--r--freebsd/sys/net/route_var.h1
-rw-r--r--freebsd/sys/net/rtsock.c448
-rw-r--r--freebsd/sys/net/sff8472.h79
-rw-r--r--freebsd/sys/net/vnet.h3
-rw-r--r--freebsd/sys/net80211/ieee80211.c17
-rw-r--r--freebsd/sys/net80211/ieee80211.h2
-rw-r--r--freebsd/sys/net80211/ieee80211_adhoc.c8
-rw-r--r--freebsd/sys/net80211/ieee80211_amrr.c43
-rw-r--r--freebsd/sys/net80211/ieee80211_crypto.c11
-rw-r--r--freebsd/sys/net80211/ieee80211_dfs.c3
-rw-r--r--freebsd/sys/net80211/ieee80211_freebsd.c53
-rw-r--r--freebsd/sys/net80211/ieee80211_freebsd.h12
-rw-r--r--freebsd/sys/net80211/ieee80211_hostap.c8
-rw-r--r--freebsd/sys/net80211/ieee80211_ht.c9
-rw-r--r--freebsd/sys/net80211/ieee80211_hwmp.c1
-rw-r--r--freebsd/sys/net80211/ieee80211_ioctl.c107
-rw-r--r--freebsd/sys/net80211/ieee80211_mesh.c8
-rw-r--r--freebsd/sys/net80211/ieee80211_output.c108
-rw-r--r--freebsd/sys/net80211/ieee80211_proto.c3
-rw-r--r--freebsd/sys/net80211/ieee80211_proto.h16
-rw-r--r--freebsd/sys/net80211/ieee80211_rssadapt.c33
-rw-r--r--freebsd/sys/net80211/ieee80211_scan.c12
-rw-r--r--freebsd/sys/net80211/ieee80211_scan_sta.c69
-rw-r--r--freebsd/sys/net80211/ieee80211_sta.c8
-rw-r--r--freebsd/sys/net80211/ieee80211_tdma.c3
-rw-r--r--freebsd/sys/net80211/ieee80211_var.h9
-rw-r--r--freebsd/sys/net80211/ieee80211_wds.c10
-rw-r--r--freebsd/sys/net80211/ieee80211_wps.h149
-rw-r--r--freebsd/sys/netinet/cc/cc_newreno.c15
-rw-r--r--freebsd/sys/netinet/if_ether.c82
-rw-r--r--freebsd/sys/netinet/igmp.c42
-rw-r--r--freebsd/sys/netinet/in.c58
-rw-r--r--freebsd/sys/netinet/in_fib.c7
-rw-r--r--freebsd/sys/netinet/in_fib.h3
-rw-r--r--freebsd/sys/netinet/in_mcast.c533
-rw-r--r--freebsd/sys/netinet/in_pcb.c204
-rw-r--r--freebsd/sys/netinet/in_pcb.h28
-rw-r--r--freebsd/sys/netinet/in_var.h52
-rw-r--r--freebsd/sys/netinet/ip_carp.c137
-rw-r--r--freebsd/sys/netinet/ip_divert.c9
-rw-r--r--freebsd/sys/netinet/ip_fastfwd.c16
-rw-r--r--freebsd/sys/netinet/ip_fw.h8
-rw-r--r--freebsd/sys/netinet/ip_gre.c271
-rw-r--r--freebsd/sys/netinet/ip_icmp.c34
-rw-r--r--freebsd/sys/netinet/ip_input.c46
-rw-r--r--freebsd/sys/netinet/ip_mroute.c13
-rw-r--r--freebsd/sys/netinet/ip_options.c9
-rw-r--r--freebsd/sys/netinet/ip_output.c372
-rw-r--r--freebsd/sys/netinet/ip_reass.c1
-rw-r--r--freebsd/sys/netinet/ip_var.h23
-rw-r--r--freebsd/sys/netinet/libalias/alias_sctp.c1
-rw-r--r--freebsd/sys/netinet/netdump/netdump.h18
-rw-r--r--freebsd/sys/netinet/raw_ip.c62
-rw-r--r--freebsd/sys/netinet/sctp.h2
-rw-r--r--freebsd/sys/netinet/sctp_asconf.c18
-rw-r--r--freebsd/sys/netinet/sctp_auth.c2
-rw-r--r--freebsd/sys/netinet/sctp_bsd_addr.c6
-rw-r--r--freebsd/sys/netinet/sctp_constants.h3
-rw-r--r--freebsd/sys/netinet/sctp_crc32.c13
-rw-r--r--freebsd/sys/netinet/sctp_indata.c73
-rw-r--r--freebsd/sys/netinet/sctp_indata.h1
-rw-r--r--freebsd/sys/netinet/sctp_input.c104
-rw-r--r--freebsd/sys/netinet/sctp_os_bsd.h9
-rw-r--r--freebsd/sys/netinet/sctp_output.c370
-rw-r--r--freebsd/sys/netinet/sctp_output.h4
-rw-r--r--freebsd/sys/netinet/sctp_pcb.c40
-rw-r--r--freebsd/sys/netinet/sctp_pcb.h8
-rw-r--r--freebsd/sys/netinet/sctp_structs.h2
-rw-r--r--freebsd/sys/netinet/sctp_usrreq.c285
-rw-r--r--freebsd/sys/netinet/sctputil.c140
-rw-r--r--freebsd/sys/netinet/sctputil.h11
-rw-r--r--freebsd/sys/netinet/tcp_hpts.h132
-rw-r--r--freebsd/sys/netinet/tcp_input.c134
-rw-r--r--freebsd/sys/netinet/tcp_log_buf.h32
-rw-r--r--freebsd/sys/netinet/tcp_lro.c915
-rw-r--r--freebsd/sys/netinet/tcp_lro.h16
-rw-r--r--freebsd/sys/netinet/tcp_offload.c2
-rw-r--r--freebsd/sys/netinet/tcp_output.c83
-rw-r--r--freebsd/sys/netinet/tcp_reass.c58
-rw-r--r--freebsd/sys/netinet/tcp_sack.c210
-rw-r--r--freebsd/sys/netinet/tcp_subr.c198
-rw-r--r--freebsd/sys/netinet/tcp_syncache.c40
-rw-r--r--freebsd/sys/netinet/tcp_timer.c10
-rw-r--r--freebsd/sys/netinet/tcp_timer.h4
-rw-r--r--freebsd/sys/netinet/tcp_timewait.c2
-rw-r--r--freebsd/sys/netinet/tcp_usrreq.c88
-rw-r--r--freebsd/sys/netinet/tcp_var.h23
-rw-r--r--freebsd/sys/netinet/toecore.h3
-rw-r--r--freebsd/sys/netinet/udp_usrreq.c83
-rw-r--r--freebsd/sys/netinet6/frag6.c815
-rw-r--r--freebsd/sys/netinet6/icmp6.c101
-rw-r--r--freebsd/sys/netinet6/in6.c101
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.c75
-rw-r--r--freebsd/sys/netinet6/in6_mcast.c525
-rw-r--r--freebsd/sys/netinet6/in6_pcb.c50
-rw-r--r--freebsd/sys/netinet6/in6_pcb.h2
-rw-r--r--freebsd/sys/netinet6/in6_proto.c42
-rw-r--r--freebsd/sys/netinet6/in6_src.c16
-rw-r--r--freebsd/sys/netinet6/in6_var.h98
-rw-r--r--freebsd/sys/netinet6/ip6_fastfwd.c12
-rw-r--r--freebsd/sys/netinet6/ip6_forward.c9
-rw-r--r--freebsd/sys/netinet6/ip6_id.c11
-rw-r--r--freebsd/sys/netinet6/ip6_input.c57
-rw-r--r--freebsd/sys/netinet6/ip6_output.c488
-rw-r--r--freebsd/sys/netinet6/ip6_var.h49
-rw-r--r--freebsd/sys/netinet6/mld6.c229
-rw-r--r--freebsd/sys/netinet6/mld6_var.h3
-rw-r--r--freebsd/sys/netinet6/nd6.c81
-rw-r--r--freebsd/sys/netinet6/nd6.h4
-rw-r--r--freebsd/sys/netinet6/nd6_nbr.c6
-rw-r--r--freebsd/sys/netinet6/nd6_rtr.c136
-rw-r--r--freebsd/sys/netinet6/raw_ip6.c82
-rw-r--r--freebsd/sys/netinet6/scope6.c13
-rw-r--r--freebsd/sys/netinet6/sctp6_usrreq.c53
-rw-r--r--freebsd/sys/netinet6/udp6_usrreq.c15
-rw-r--r--freebsd/sys/netipsec/ipsec.c7
-rw-r--r--freebsd/sys/netipsec/ipsec.h2
-rw-r--r--freebsd/sys/netipsec/key.c40
-rw-r--r--freebsd/sys/netipsec/key.h1
-rw-r--r--freebsd/sys/netipsec/xform_ah.c21
-rw-r--r--freebsd/sys/netipsec/xform_esp.c24
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_private.h109
-rw-r--r--freebsd/sys/netpfil/pf/if_pfsync.c48
-rw-r--r--freebsd/sys/netpfil/pf/pf.c54
-rw-r--r--freebsd/sys/netpfil/pf/pf_if.c33
-rw-r--r--freebsd/sys/netpfil/pf/pf_ioctl.c676
-rw-r--r--freebsd/sys/netpfil/pf/pf_norm.c45
-rw-r--r--freebsd/sys/netpfil/pf/pf_table.c231
-rw-r--r--freebsd/sys/opencrypto/cast.c2
-rw-r--r--freebsd/sys/opencrypto/cast.h2
-rw-r--r--freebsd/sys/opencrypto/cbc_mac.c267
-rw-r--r--freebsd/sys/opencrypto/cbc_mac.h67
-rw-r--r--freebsd/sys/opencrypto/cryptodeflate.c97
-rw-r--r--freebsd/sys/opencrypto/cryptodev.c75
-rw-r--r--freebsd/sys/opencrypto/cryptodev.h11
-rw-r--r--freebsd/sys/opencrypto/cryptosoft.c86
-rw-r--r--freebsd/sys/opencrypto/deflate.h4
-rw-r--r--freebsd/sys/opencrypto/skipjack.c2
-rw-r--r--freebsd/sys/opencrypto/skipjack.h2
-rw-r--r--freebsd/sys/opencrypto/xform_aes_icm.c42
-rw-r--r--freebsd/sys/opencrypto/xform_aes_xts.c8
-rw-r--r--freebsd/sys/opencrypto/xform_auth.h5
-rw-r--r--freebsd/sys/opencrypto/xform_blf.c4
-rw-r--r--freebsd/sys/opencrypto/xform_cast5.c4
-rw-r--r--freebsd/sys/opencrypto/xform_cbc_mac.c57
-rw-r--r--freebsd/sys/opencrypto/xform_cml.c6
-rw-r--r--freebsd/sys/opencrypto/xform_des1.c12
-rw-r--r--freebsd/sys/opencrypto/xform_des3.c16
-rw-r--r--freebsd/sys/opencrypto/xform_enc.h5
-rw-r--r--freebsd/sys/opencrypto/xform_null.c4
-rw-r--r--freebsd/sys/opencrypto/xform_rijndael.c6
-rw-r--r--freebsd/sys/opencrypto/xform_skipjack.c4
-rw-r--r--freebsd/sys/powerpc/include/machine/cpufunc.h37
-rw-r--r--freebsd/sys/powerpc/include/machine/intr_machdep.h2
-rw-r--r--freebsd/sys/powerpc/include/machine/spr.h62
-rw-r--r--freebsd/sys/sys/_eventhandler.h144
-rw-r--r--freebsd/sys/sys/_lock.h34
-rw-r--r--freebsd/sys/sys/_rwlock.h1
-rw-r--r--freebsd/sys/sys/_task.h20
-rw-r--r--freebsd/sys/sys/ata.h50
-rw-r--r--freebsd/sys/sys/blist.h10
-rw-r--r--freebsd/sys/sys/buf.h31
-rw-r--r--freebsd/sys/sys/buf_ring.h21
-rw-r--r--freebsd/sys/sys/bufobj.h2
-rw-r--r--freebsd/sys/sys/bus.h26
-rw-r--r--freebsd/sys/sys/bus_dma.h4
-rw-r--r--freebsd/sys/sys/capsicum.h7
-rw-r--r--freebsd/sys/sys/conf.h17
-rw-r--r--freebsd/sys/sys/counter.h16
-rw-r--r--freebsd/sys/sys/cpu.h2
-rw-r--r--freebsd/sys/sys/ctype.h72
-rw-r--r--freebsd/sys/sys/disk.h54
-rw-r--r--freebsd/sys/sys/eventhandler.h25
-rw-r--r--freebsd/sys/sys/fail.h13
-rw-r--r--freebsd/sys/sys/file.h15
-rw-r--r--freebsd/sys/sys/filedesc.h16
-rw-r--r--freebsd/sys/sys/gsb_crc32.h47
-rw-r--r--freebsd/sys/sys/gtaskqueue.h44
-rw-r--r--freebsd/sys/sys/interrupt.h5
-rw-r--r--freebsd/sys/sys/ktls.h194
-rw-r--r--freebsd/sys/sys/libkern.h35
-rw-r--r--freebsd/sys/sys/lockmgr.h4
-rw-r--r--freebsd/sys/sys/lockstat.h7
-rw-r--r--freebsd/sys/sys/malloc.h9
-rw-r--r--freebsd/sys/sys/mbuf.h257
-rw-r--r--freebsd/sys/sys/mount.h121
-rw-r--r--freebsd/sys/sys/mouse.h1
-rw-r--r--freebsd/sys/sys/pcpu.h41
-rw-r--r--freebsd/sys/sys/proc.h43
-rw-r--r--freebsd/sys/sys/random.h35
-rw-r--r--freebsd/sys/sys/refcount.h131
-rw-r--r--freebsd/sys/sys/rmlock.h1
-rw-r--r--freebsd/sys/sys/rwlock.h2
-rw-r--r--freebsd/sys/sys/sbuf.h5
-rw-r--r--freebsd/sys/sys/seq.h156
-rw-r--r--freebsd/sys/sys/seqc.h107
-rw-r--r--freebsd/sys/sys/sglist.h7
-rw-r--r--freebsd/sys/sys/sleepqueue.h2
-rw-r--r--freebsd/sys/sys/slicer.h2
-rw-r--r--freebsd/sys/sys/smp.h2
-rw-r--r--freebsd/sys/sys/sockbuf.h7
-rw-r--r--freebsd/sys/sys/socketvar.h14
-rw-r--r--freebsd/sys/sys/sysctl.h54
-rw-r--r--freebsd/sys/sys/sysproto.h32
-rw-r--r--freebsd/sys/sys/systm.h19
-rw-r--r--freebsd/sys/sys/tree.h6
-rw-r--r--freebsd/sys/sys/ucred.h7
-rw-r--r--freebsd/sys/sys/user.h2
-rw-r--r--freebsd/sys/sys/vmmeter.h2
-rw-r--r--freebsd/sys/sys/vnode.h71
-rw-r--r--freebsd/sys/sys/watchdog.h2
-rw-r--r--freebsd/sys/vm/uma.h56
-rw-r--r--freebsd/sys/vm/uma_core.c1274
-rw-r--r--freebsd/sys/vm/uma_int.h84
-rw-r--r--freebsd/sys/x86/include/machine/bus.h4
-rw-r--r--freebsd/sys/x86/include/machine/pci_cfgreg.h9
447 files changed, 20648 insertions, 10684 deletions
diff --git a/freebsd/sys/arm/include/machine/cpufunc.h b/freebsd/sys/arm/include/machine/cpufunc.h
index 9dba8043..f34cfbff 100644
--- a/freebsd/sys/arm/include/machine/cpufunc.h
+++ b/freebsd/sys/arm/include/machine/cpufunc.h
@@ -360,6 +360,64 @@ extern u_int arm_cache_level;
extern u_int arm_cache_loc;
extern u_int arm_cache_type[14];
+#if __ARM_ARCH >= 6
+#define HAVE_INLINE_FFS
+
+static __inline __pure2 int
+ffs(int mask)
+{
+
+ return (__builtin_ffs(mask));
+}
+
+#define HAVE_INLINE_FFSL
+
+static __inline __pure2 int
+ffsl(long mask)
+{
+
+ return (__builtin_ffsl(mask));
+}
+
+#define HAVE_INLINE_FFSLL
+
+static __inline __pure2 int
+ffsll(long long mask)
+{
+
+ return (__builtin_ffsll(mask));
+}
+
+#define HAVE_INLINE_FLS
+
+static __inline __pure2 int
+fls(int mask)
+{
+
+ return (mask == 0 ? 0 :
+ 8 * sizeof(mask) - __builtin_clz((u_int)mask));
+}
+
+#define HAVE_INLINE_FLSL
+
+static __inline __pure2 int
+flsl(long mask)
+{
+
+ return (mask == 0 ? 0 :
+ 8 * sizeof(mask) - __builtin_clzl((u_long)mask));
+}
+
+#define HAVE_INLINE_FLSLL
+
+static __inline __pure2 int
+flsll(long long mask)
+{
+
+ return (mask == 0 ? 0 :
+ 8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
+}
+#endif
#else /* !_KERNEL */
static __inline void
diff --git a/freebsd/sys/arm/ti/am335x/tda19988.c b/freebsd/sys/arm/ti/am335x/tda19988.c
index 282353ab..7ff4cf5b 100644
--- a/freebsd/sys/arm/ti/am335x/tda19988.c
+++ b/freebsd/sys/arm/ti/am335x/tda19988.c
@@ -245,7 +245,6 @@ struct tda19988_softc {
uint32_t sc_addr;
uint32_t sc_cec_addr;
uint16_t sc_version;
- struct intr_config_hook enum_hook;
int sc_current_page;
uint8_t *sc_edid;
uint32_t sc_edid_len;
@@ -647,15 +646,14 @@ done:
}
static void
-tda19988_start(void *xdev)
+tda19988_start(struct tda19988_softc *sc)
{
- struct tda19988_softc *sc;
- device_t dev = (device_t)xdev;
+ device_t dev;
uint8_t data;
uint16_t version;
- sc = device_get_softc(dev);
-
+ dev = sc->sc_dev;
+
tda19988_cec_write(sc, TDA_CEC_ENAMODS, ENAMODS_RXSENS | ENAMODS_HDMI);
DELAY(1000);
tda19988_cec_read(sc, 0xfe, &data);
@@ -701,7 +699,7 @@ tda19988_start(void *xdev)
break;
default:
device_printf(dev, "Unknown device: %04x\n", sc->sc_version);
- goto done;
+ return;
}
tda19988_reg_write(sc, TDA_DDC_CTRL, DDC_ENABLE);
@@ -712,16 +710,13 @@ tda19988_start(void *xdev)
if (tda19988_read_edid(sc) < 0) {
device_printf(dev, "failed to read EDID\n");
- goto done;
+ return;
}
/* Default values for RGB 4:4:4 mapping */
tda19988_reg_write(sc, TDA_VIP_CNTRL_0, 0x23);
tda19988_reg_write(sc, TDA_VIP_CNTRL_1, 0x01);
tda19988_reg_write(sc, TDA_VIP_CNTRL_2, 0x45);
-
-done:
- config_intrhook_disestablish(&sc->enum_hook);
}
static int
@@ -740,15 +735,11 @@ tda19988_attach(device_t dev)
device_set_desc(dev, "NXP TDA19988 HDMI transmitter");
- sc->enum_hook.ich_func = tda19988_start;
- sc->enum_hook.ich_arg = dev;
-
- if (config_intrhook_establish(&sc->enum_hook) != 0)
- return (ENOMEM);
-
node = ofw_bus_get_node(dev);
OF_device_register_xref(OF_xref_from_node(node), dev);
+ tda19988_start(sc);
+
return (0);
}
diff --git a/freebsd/sys/arm/ti/cpsw/if_cpsw.c b/freebsd/sys/arm/ti/cpsw/if_cpsw.c
index 1fbda688..be9ad62b 100644
--- a/freebsd/sys/arm/ti/cpsw/if_cpsw.c
+++ b/freebsd/sys/arm/ti/cpsw/if_cpsw.c
@@ -84,6 +84,8 @@ __FBSDID("$FreeBSD$");
#include <dev/ofw/ofw_bus.h>
#include <dev/ofw/ofw_bus_subr.h>
+
+#include <dev/fdt/fdt_common.h>
#ifdef CPSW_ETHERSWITCH
#include <dev/etherswitch/etherswitch.h>
@@ -749,7 +751,7 @@ cpsw_get_fdt_data(struct cpsw_softc *sc, int port)
phandle_t child;
unsigned long mdio_child_addr;
- /* Find any slave with phy_id */
+ /* Find any slave with phy-handle/phy_id */
phy = -1;
vlan = -1;
for (child = OF_child(sc->node); child != 0; child = OF_peer(child)) {
@@ -760,14 +762,20 @@ cpsw_get_fdt_data(struct cpsw_softc *sc, int port)
continue;
}
OF_prop_free(name);
- if (mdio_child_addr != slave_mdio_addr[port])
+
+ if (mdio_child_addr != slave_mdio_addr[port] &&
+ mdio_child_addr != (slave_mdio_addr[port] & 0xFFF))
continue;
- len = OF_getproplen(child, "phy_id");
- if (len / sizeof(pcell_t) == 2) {
- /* Get phy address from fdt */
- if (OF_getencprop(child, "phy_id", phy_id, len) > 0)
- phy = phy_id[1];
+ if (fdt_get_phyaddr(child, NULL, &phy, NULL) != 0){
+ /* Users with old DTB will have phy_id instead */
+ phy = -1;
+ len = OF_getproplen(child, "phy_id");
+ if (len / sizeof(pcell_t) == 2) {
+ /* Get phy address from fdt */
+ if (OF_getencprop(child, "phy_id", phy_id, len) > 0)
+ phy = phy_id[1];
+ }
}
len = OF_getproplen(child, "dual_emac_res_vlan");
diff --git a/freebsd/sys/arm/ti/ti_hwmods.c b/freebsd/sys/arm/ti/ti_hwmods.c
index 450679a7..a546d762 100644
--- a/freebsd/sys/arm/ti/ti_hwmods.c
+++ b/freebsd/sys/arm/ti/ti_hwmods.c
@@ -99,6 +99,16 @@ struct hwmod ti_hwmods[] = {
{NULL, 0}
};
+static inline int
+ti_get_hwmods_prop(phandle_t node, void **name)
+{
+ int len;
+
+ if ((len = OF_getprop_alloc(node, "ti,hwmods", name)) > 0)
+ return (len);
+ return (OF_getprop_alloc(OF_parent(node), "ti,hwmods", name));
+}
+
clk_ident_t
ti_hwmods_get_clock(device_t dev)
{
@@ -112,7 +122,7 @@ ti_hwmods_get_clock(device_t dev)
if ((node = ofw_bus_get_node(dev)) == 0)
return (INVALID_CLK_IDENT);
- if ((len = OF_getprop_alloc(node, "ti,hwmods", (void**)&name)) <= 0)
+ if ((len = ti_get_hwmods_prop(node, (void **)&name)) <= 0)
return (INVALID_CLK_IDENT);
buf = name;
@@ -150,7 +160,7 @@ int ti_hwmods_contains(device_t dev, const char *hwmod)
if ((node = ofw_bus_get_node(dev)) == 0)
return (0);
- if ((len = OF_getprop_alloc(node, "ti,hwmods", (void**)&name)) <= 0)
+ if ((len = ti_get_hwmods_prop(node, (void **)&name)) <= 0)
return (0);
buf = name;
@@ -184,7 +194,7 @@ ti_hwmods_get_unit(device_t dev, const char *hwmod)
if ((node = ofw_bus_get_node(dev)) == 0)
return (0);
- if ((len = OF_getprop_alloc(node, "ti,hwmods", (void**)&name)) <= 0)
+ if ((len = ti_get_hwmods_prop(node, (void **)&name)) <= 0)
return (0);
buf = name;
diff --git a/freebsd/sys/arm/ti/ti_sdhci.c b/freebsd/sys/arm/ti/ti_sdhci.c
index c5d29cb6..a2be1f19 100644
--- a/freebsd/sys/arm/ti/ti_sdhci.c
+++ b/freebsd/sys/arm/ti/ti_sdhci.c
@@ -484,15 +484,14 @@ ti_sdhci_hw_init(device_t dev)
* The attach() routine has examined fdt data and set flags in
* slot.host.caps to reflect what voltages we can handle. Set those
* values in the CAPA register. The manual says that these values can
- * only be set once, "before initialization" whatever that means, and
- * that they survive a reset. So maybe doing this will be a no-op if
- * u-boot has already initialized the hardware.
+ * only be set once, and that they survive a reset so unless u-boot didn't
+ * set this register this code is a no-op.
*/
regval = ti_mmchs_read_4(sc, MMCHS_SD_CAPA);
if (sc->slot.host.caps & MMC_OCR_LOW_VOLTAGE)
regval |= MMCHS_SD_CAPA_VS18;
- if (sc->slot.host.caps & (MMC_OCR_290_300 | MMC_OCR_300_310))
- regval |= MMCHS_SD_CAPA_VS30;
+ if (sc->slot.host.caps & (MMC_OCR_320_330 | MMC_OCR_330_340))
+ regval |= MMCHS_SD_CAPA_VS33;
ti_mmchs_write_4(sc, MMCHS_SD_CAPA, regval);
/* Set initial host configuration (1-bit, std speed, pwr off). */
@@ -526,17 +525,20 @@ ti_sdhci_attach(device_t dev)
}
/*
- * The hardware can inherently do dual-voltage (1p8v, 3p0v) on the first
+ * The hardware can inherently do dual-voltage (1p8v, 3p3v) on the first
* device, and only 1p8v on other devices unless an external transceiver
* is used. The only way we could know about a transceiver is fdt data.
* Note that we have to do this before calling ti_sdhci_hw_init() so
* that it can set the right values in the CAPA register, which can only
* be done once and never reset.
*/
- sc->slot.host.caps |= MMC_OCR_LOW_VOLTAGE;
- if (sc->mmchs_clk_id == MMC1_CLK || OF_hasprop(node, "ti,dual-volt")) {
- sc->slot.host.caps |= MMC_OCR_290_300 | MMC_OCR_300_310;
- }
+ if (OF_hasprop(node, "ti,dual-volt")) {
+ sc->slot.host.caps |= MMC_OCR_LOW_VOLTAGE | MMC_OCR_320_330 | MMC_OCR_330_340;
+ } else if (OF_hasprop(node, "no-1-8-v")) {
+ sc->slot.host.caps |= MMC_OCR_320_330 | MMC_OCR_330_340;
+ } else
+ sc->slot.host.caps |= MMC_OCR_LOW_VOLTAGE;
+
/*
* Set the offset from the device's memory start to the MMCHS registers.
@@ -757,7 +759,7 @@ static driver_t ti_sdhci_driver = {
DRIVER_MODULE(sdhci_ti, simplebus, ti_sdhci_driver, ti_sdhci_devclass, NULL,
NULL);
-MODULE_DEPEND(sdhci_ti, sdhci, 1, 1, 1);
+SDHCI_DEPEND(sdhci_ti);
#ifndef MMCCAM
MMC_DECLARE_BRIDGE(sdhci_ti);
diff --git a/freebsd/sys/cam/ata/ata_all.h b/freebsd/sys/cam/ata/ata_all.h
index 087d6820..ca635253 100644
--- a/freebsd/sys/cam/ata/ata_all.h
+++ b/freebsd/sys/cam/ata/ata_all.h
@@ -135,6 +135,7 @@ void ata_read_log(struct ccb_ataio *ataio, uint32_t retries,
uint16_t block_count, uint32_t protocol,
uint8_t *data_ptr, uint32_t dxfer_len, uint32_t timeout);
+void ata_param_fixup(struct ata_params *ident_buf);
void ata_bswap(int8_t *buf, int len);
void ata_btrim(int8_t *buf, int len);
void ata_bpack(int8_t *src, int8_t *dst, int len);
diff --git a/freebsd/sys/cam/cam.c b/freebsd/sys/cam/cam.c
index 5d07bebf..25f99ae7 100644
--- a/freebsd/sys/cam/cam.c
+++ b/freebsd/sys/cam/cam.c
@@ -418,7 +418,6 @@ cam_error_string(struct cam_device *device, union ccb *ccb, char *str,
switch (ccb->ccb_h.func_code) {
case XPT_ATA_IO:
ata_command_sbuf(&ccb->ataio, &sb);
- sbuf_printf(&sb, "\n");
break;
case XPT_SCSI_IO:
#ifdef _KERNEL
@@ -426,17 +425,22 @@ cam_error_string(struct cam_device *device, union ccb *ccb, char *str,
#else /* !_KERNEL */
scsi_command_string(device, &ccb->csio, &sb);
#endif /* _KERNEL/!_KERNEL */
- sbuf_printf(&sb, "\n");
break;
case XPT_SMP_IO:
smp_command_sbuf(&ccb->smpio, &sb, path_str, 79 -
strlen(path_str), (proto_flags &
CAM_ESMF_PRINT_FULL_CMD) ? 79 : 0);
- sbuf_printf(&sb, "\n");
+ break;
+ case XPT_NVME_IO:
+ case XPT_NVME_ADMIN:
+ nvme_command_sbuf(&ccb->nvmeio, &sb);
break;
default:
+ sbuf_printf(&sb, "CAM func %#x",
+ ccb->ccb_h.func_code);
break;
}
+ sbuf_printf(&sb, "\n");
}
if (flags & CAM_ESF_CAM_STATUS) {
diff --git a/freebsd/sys/cam/cam_ccb.h b/freebsd/sys/cam/cam_ccb.h
index 9119468d..7deeb523 100644
--- a/freebsd/sys/cam/cam_ccb.h
+++ b/freebsd/sys/cam/cam_ccb.h
@@ -1074,6 +1074,7 @@ struct ccb_trans_settings_mmc {
#define MMC_CAP_8_BIT_DATA (1 << 1) /* Can do 8-bit data transfers */
#define MMC_CAP_HSPEED (1 << 2) /* Can do High Speed transfers */
uint32_t host_caps;
+ uint32_t host_max_data;
};
/* Get/Set transfer rate/width/disconnection/tag queueing settings */
diff --git a/freebsd/sys/cam/cam_periph.h b/freebsd/sys/cam/cam_periph.h
index 6eb0084a..8cd9f800 100644
--- a/freebsd/sys/cam/cam_periph.h
+++ b/freebsd/sys/cam/cam_periph.h
@@ -37,6 +37,8 @@
#include <cam/cam_sim.h>
#ifdef _KERNEL
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
@@ -149,6 +151,7 @@ struct cam_periph {
struct cam_periph_map_info {
int num_bufs_used;
+ void *orig[CAM_PERIPH_MAXMAPS];
struct buf *bp[CAM_PERIPH_MAXMAPS];
};
diff --git a/freebsd/sys/cam/cam_sim.h b/freebsd/sys/cam/cam_sim.h
index 95dedb08..55392e48 100644
--- a/freebsd/sys/cam/cam_sim.h
+++ b/freebsd/sys/cam/cam_sim.h
@@ -67,6 +67,15 @@ struct cam_sim * cam_sim_alloc(sim_action_func sim_action,
int max_dev_transactions,
int max_tagged_dev_transactions,
struct cam_devq *queue);
+struct cam_sim * cam_sim_alloc_dev(sim_action_func sim_action,
+ sim_poll_func sim_poll,
+ const char *sim_name,
+ void *softc,
+ device_t dev,
+ struct mtx *mtx,
+ int max_dev_transactions,
+ int max_tagged_dev_transactions,
+ struct cam_devq *queue);
void cam_sim_free(struct cam_sim *sim, int free_devq);
void cam_sim_hold(struct cam_sim *sim);
void cam_sim_release(struct cam_sim *sim);
@@ -150,6 +159,7 @@ struct cam_sim {
struct callout callout;
struct cam_devq *devq; /* Device Queue to use for this SIM */
int refcount; /* References to the SIM. */
+ device_t sim_dev; /* For attached peripherals. */
#endif /* __rtems__ */
};
diff --git a/freebsd/sys/cam/nvme/nvme_all.h b/freebsd/sys/cam/nvme/nvme_all.h
index e31c1e5e..da40dea1 100644
--- a/freebsd/sys/cam/nvme/nvme_all.h
+++ b/freebsd/sys/cam/nvme/nvme_all.h
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2015 Netflix, Inc
+ * Copyright (c) 2015 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -42,8 +42,10 @@ int nvme_identify_match(caddr_t identbuffer, caddr_t table_entry);
struct sbuf;
void nvme_print_ident(const struct nvme_controller_data *, const struct nvme_namespace_data *, struct sbuf *);
-const char *nvme_op_string(const struct nvme_command *);
+const char *nvme_op_string(const struct nvme_command *, int admin);
const char *nvme_cmd_string(const struct nvme_command *, char *, size_t);
+void nvme_cmd_sbuf(const struct nvme_command *, struct sbuf *sb);
+int nvme_command_sbuf(struct ccb_nvmeio *nvmeio, struct sbuf *sb);
const void *nvme_get_identify_cntrl(struct cam_periph *);
const void *nvme_get_identify_ns(struct cam_periph *);
diff --git a/freebsd/sys/cam/scsi/scsi_all.c b/freebsd/sys/cam/scsi/scsi_all.c
index 0be7e692..99d82fee 100644
--- a/freebsd/sys/cam/scsi/scsi_all.c
+++ b/freebsd/sys/cam/scsi/scsi_all.c
@@ -382,7 +382,7 @@ static struct op_table_entry scsi_op_codes[] = {
{ 0x40, D | T | L | P | W | R | O | M | S | C, "CHANGE DEFINITION" },
/* 41 O WRITE SAME(10) */
{ 0x41, D, "WRITE SAME(10)" },
- /* 42 O UNMAP */
+ /* 42 O UNMAP */
{ 0x42, D, "UNMAP" },
/* 42 O READ SUB-CHANNEL */
{ 0x42, R, "READ SUB-CHANNEL" },
@@ -397,7 +397,8 @@ static struct op_table_entry scsi_op_codes[] = {
{ 0x46, R, "GET CONFIGURATION" },
/* 47 O PLAY AUDIO MSF */
{ 0x47, R, "PLAY AUDIO MSF" },
- /* 48 */
+ /* 48 O SANITIZE */
+ { 0x48, D, "SANITIZE" },
/* 49 */
/* 4A M GET EVENT STATUS NOTIFICATION */
{ 0x4A, R, "GET EVENT STATUS NOTIFICATION" },
@@ -1165,7 +1166,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x1A, SS_RDEF, /* XXX TBD */
"Logical unit not ready, START/STOP UNIT command in progress") },
/* D B */
- { SST(0x04, 0x1B, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x1B, SS_WAIT | EBUSY,
"Logical unit not ready, sanitize in progress") },
/* DT MAEB */
{ SST(0x04, 0x1C, SS_START | SSQ_DECREMENT_COUNT | ENXIO,
@@ -1456,7 +1457,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x11, 0x14, SS_RDEF, /* XXX TBD */
"Read error - LBA marked bad by application client") },
/* D */
- { SST(0x11, 0x15, SS_RDEF, /* XXX TBD */
+ { SST(0x11, 0x15, SS_FATAL | EIO,
"Write after sanitize required") },
/* D W O BK */
{ SST(0x12, 0x00, SS_RDEF,
@@ -2058,7 +2059,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x30, 0x13, SS_RDEF, /* XXX TBD */
"Cleaning volume expired") },
/* DT WRO BK */
- { SST(0x31, 0x00, SS_RDEF,
+ { SST(0x31, 0x00, SS_FATAL | ENXIO,
"Medium format corrupted") },
/* D L RO B */
{ SST(0x31, 0x01, SS_RDEF,
@@ -2067,7 +2068,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x31, 0x02, SS_RDEF, /* XXX TBD */
"Zoned formatting failed due to spare linking") },
/* D B */
- { SST(0x31, 0x03, SS_RDEF, /* XXX TBD */
+ { SST(0x31, 0x03, SS_FATAL | EIO,
"SANITIZE command failed") },
/* D W O BK */
{ SST(0x32, 0x00, SS_RDEF,
@@ -3947,7 +3948,7 @@ scsi_set_sense_data_fixed_va(struct scsi_sense_data *sense_data,
}
if (len > sizeof(sense->cmd_spec_info)) {
data += len - sizeof(sense->cmd_spec_info);
- len -= len - sizeof(sense->cmd_spec_info);
+ len = sizeof(sense->cmd_spec_info);
}
bcopy(data, &sense->cmd_spec_info[
sizeof(sense->cmd_spec_info) - len], len);
@@ -4066,6 +4067,10 @@ scsi_get_sense_info(struct scsi_sense_data *sense_data, u_int sense_len,
struct scsi_sense_info *info_desc;
info_desc = (struct scsi_sense_info *)desc;
+
+ if ((info_desc->byte2 & SSD_INFO_VALID) == 0)
+ goto bailout;
+
*info = scsi_8btou64(info_desc->info);
if (signed_info != NULL)
*signed_info = *info;
@@ -4086,6 +4091,9 @@ scsi_get_sense_info(struct scsi_sense_data *sense_data, u_int sense_len,
fru_desc = (struct scsi_sense_fru *)desc;
+ if (fru_desc->fru == 0)
+ goto bailout;
+
*info = fru_desc->fru;
if (signed_info != NULL)
*signed_info = (int8_t)fru_desc->fru;
@@ -4186,10 +4194,9 @@ scsi_get_sks(struct scsi_sense_data *sense_data, u_int sense_len, uint8_t *sks)
if (desc == NULL)
goto bailout;
- /*
- * No need to check the SKS valid bit for descriptor sense.
- * If the descriptor is present, it is valid.
- */
+ if ((desc->sense_key_spec[0] & SSD_SKS_VALID) == 0)
+ goto bailout;
+
bcopy(desc->sense_key_spec, sks, sizeof(desc->sense_key_spec));
break;
}
@@ -4266,9 +4273,6 @@ scsi_get_block_info(struct scsi_sense_data *sense_data, u_int sense_len,
if (SSD_FIXED_IS_PRESENT(sense, sense_len, flags) == 0)
goto bailout;
- if ((sense->flags & SSD_ILI) == 0)
- goto bailout;
-
*block_bits = sense->flags & SSD_ILI;
break;
}
@@ -4322,9 +4326,6 @@ scsi_get_stream_info(struct scsi_sense_data *sense_data, u_int sense_len,
if (SSD_FIXED_IS_PRESENT(sense, sense_len, flags) == 0)
goto bailout;
- if ((sense->flags & (SSD_ILI|SSD_EOM|SSD_FILEMARK)) == 0)
- goto bailout;
-
*stream_bits = sense->flags & (SSD_ILI|SSD_EOM|SSD_FILEMARK);
break;
}
@@ -4366,8 +4367,6 @@ scsi_progress_sbuf(struct sbuf *sb, uint16_t progress)
int
scsi_sks_sbuf(struct sbuf *sb, int sense_key, uint8_t *sks)
{
- if ((sks[0] & SSD_SKS_VALID) == 0)
- return (1);
switch (sense_key) {
case SSD_KEY_ILLEGAL_REQUEST: {
@@ -4464,7 +4463,7 @@ scsi_fru_sbuf(struct sbuf *sb, uint64_t fru)
}
void
-scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits, uint64_t info)
+scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits)
{
int need_comma;
@@ -4472,6 +4471,7 @@ scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits, uint64_t info)
/*
* XXX KDM this needs more descriptive decoding.
*/
+ sbuf_printf(sb, "Stream Command Sense Data: ");
if (stream_bits & SSD_DESC_STREAM_FM) {
sbuf_printf(sb, "Filemark");
need_comma = 1;
@@ -4484,15 +4484,15 @@ scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits, uint64_t info)
if (stream_bits & SSD_DESC_STREAM_ILI)
sbuf_printf(sb, "%sILI", (need_comma) ? "," : "");
-
- sbuf_printf(sb, ": Info: %#jx", (uintmax_t) info);
}
void
-scsi_block_sbuf(struct sbuf *sb, uint8_t block_bits, uint64_t info)
+scsi_block_sbuf(struct sbuf *sb, uint8_t block_bits)
{
+
+ sbuf_printf(sb, "Block Command Sense Data: ");
if (block_bits & SSD_DESC_BLOCK_ILI)
- sbuf_printf(sb, "ILI: residue %#jx", (uintmax_t) info);
+ sbuf_printf(sb, "ILI");
}
void
@@ -4505,6 +4505,9 @@ scsi_sense_info_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
info = (struct scsi_sense_info *)header;
+ if ((info->byte2 & SSD_INFO_VALID) == 0)
+ return;
+
scsi_info_sbuf(sb, cdb, cdb_len, inq_data, scsi_8btou64(info->info));
}
@@ -4533,6 +4536,9 @@ scsi_sense_sks_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
sks = (struct scsi_sense_sks *)header;
+ if ((sks->sense_key_spec[0] & SSD_SKS_VALID) == 0)
+ return;
+
scsi_extract_sense_len(sense, sense_len, &error_code, &sense_key,
&asc, &ascq, /*show_errors*/ 1);
@@ -4549,6 +4555,9 @@ scsi_sense_fru_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
fru = (struct scsi_sense_fru *)header;
+ if (fru->fru == 0)
+ return;
+
scsi_fru_sbuf(sb, (uint64_t)fru->fru);
}
@@ -4559,14 +4568,9 @@ scsi_sense_stream_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
struct scsi_sense_desc_header *header)
{
struct scsi_sense_stream *stream;
- uint64_t info;
stream = (struct scsi_sense_stream *)header;
- info = 0;
-
- scsi_get_sense_info(sense, sense_len, SSD_DESC_INFO, &info, NULL);
-
- scsi_stream_sbuf(sb, stream->byte3, info);
+ scsi_stream_sbuf(sb, stream->byte3);
}
void
@@ -4576,14 +4580,9 @@ scsi_sense_block_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
struct scsi_sense_desc_header *header)
{
struct scsi_sense_block *block;
- uint64_t info;
block = (struct scsi_sense_block *)header;
- info = 0;
-
- scsi_get_sense_info(sense, sense_len, SSD_DESC_INFO, &info, NULL);
-
- scsi_block_sbuf(sb, block->byte3, info);
+ scsi_block_sbuf(sb, block->byte3);
}
void
@@ -4868,7 +4867,7 @@ scsi_sense_only_sbuf(struct scsi_sense_data *sense, u_int sense_len,
const char *asc_desc;
uint8_t sks[3];
uint64_t val;
- int info_valid;
+ uint8_t bits;
/*
* Get descriptions for the sense key, ASC, and ASCQ. If
@@ -4887,42 +4886,28 @@ scsi_sense_only_sbuf(struct scsi_sense_data *sense, u_int sense_len,
sbuf_printf(sb, " asc:%x,%x (%s)\n", asc, ascq, asc_desc);
/*
- * Get the info field if it is valid.
+ * Print any block or stream device-specific information.
*/
- if (scsi_get_sense_info(sense, sense_len, SSD_DESC_INFO,
- &val, NULL) == 0)
- info_valid = 1;
- else
- info_valid = 0;
-
- if (info_valid != 0) {
- uint8_t bits;
+ if (scsi_get_block_info(sense, sense_len, inq_data,
+ &bits) == 0 && bits != 0) {
+ sbuf_cat(sb, path_str);
+ scsi_block_sbuf(sb, bits);
+ sbuf_printf(sb, "\n");
+ } else if (scsi_get_stream_info(sense, sense_len, inq_data,
+ &bits) == 0 && bits != 0) {
+ sbuf_cat(sb, path_str);
+ scsi_stream_sbuf(sb, bits);
+ sbuf_printf(sb, "\n");
+ }
- /*
- * Determine whether we have any block or stream
- * device-specific information.
- */
- if (scsi_get_block_info(sense, sense_len, inq_data,
- &bits) == 0) {
- sbuf_cat(sb, path_str);
- scsi_block_sbuf(sb, bits, val);
- sbuf_printf(sb, "\n");
- } else if (scsi_get_stream_info(sense, sense_len,
- inq_data, &bits) == 0) {
- sbuf_cat(sb, path_str);
- scsi_stream_sbuf(sb, bits, val);
- sbuf_printf(sb, "\n");
- } else if (val != 0) {
- /*
- * The information field can be valid but 0.
- * If the block or stream bits aren't set,
- * and this is 0, it isn't terribly useful
- * to print it out.
- */
- sbuf_cat(sb, path_str);
- scsi_info_sbuf(sb, cdb, cdb_len, inq_data, val);
- sbuf_printf(sb, "\n");
- }
+ /*
+ * Print the info field.
+ */
+ if (scsi_get_sense_info(sense, sense_len, SSD_DESC_INFO,
+ &val, NULL) == 0) {
+ sbuf_cat(sb, path_str);
+ scsi_info_sbuf(sb, cdb, cdb_len, inq_data, val);
+ sbuf_printf(sb, "\n");
}
/*
@@ -5600,6 +5585,7 @@ scsi_devid_is_naa_ieee_reg(uint8_t *bufp)
{
struct scsi_vpd_id_descriptor *descr;
struct scsi_vpd_id_naa_basic *naa;
+ int n;
descr = (struct scsi_vpd_id_descriptor *)bufp;
naa = (struct scsi_vpd_id_naa_basic *)descr->identifier;
@@ -5607,7 +5593,8 @@ scsi_devid_is_naa_ieee_reg(uint8_t *bufp)
return 0;
if (descr->length < sizeof(struct scsi_vpd_id_naa_ieee_reg))
return 0;
- if ((naa->naa >> SVPD_ID_NAA_NAA_SHIFT) != SVPD_ID_NAA_IEEE_REG)
+ n = naa->naa >> SVPD_ID_NAA_NAA_SHIFT;
+ if (n != SVPD_ID_NAA_LOCAL_REG && n != SVPD_ID_NAA_IEEE_REG)
return 0;
return 1;
}
@@ -8307,10 +8294,10 @@ scsi_ata_identify(struct ccb_scsiio *csio, u_int32_t retries,
tag_action,
/*protocol*/AP_PROTO_PIO_IN,
/*ata_flags*/AP_FLAG_TDIR_FROM_DEV |
- AP_FLAG_BYT_BLOK_BYTES |
+ AP_FLAG_BYT_BLOK_BLOCKS |
AP_FLAG_TLEN_SECT_CNT,
/*features*/0,
- /*sector_count*/dxfer_len,
+ /*sector_count*/dxfer_len / 512,
/*lba*/0,
/*command*/ATA_ATA_IDENTIFY,
/*device*/ 0,
diff --git a/freebsd/sys/cam/scsi/scsi_all.h b/freebsd/sys/cam/scsi/scsi_all.h
index b5c45bc0..1e0c75bb 100644
--- a/freebsd/sys/cam/scsi/scsi_all.h
+++ b/freebsd/sys/cam/scsi/scsi_all.h
@@ -264,7 +264,9 @@ struct scsi_mode_hdr_10
u_int8_t datalen[2];
u_int8_t medium_type;
u_int8_t dev_specific;
- u_int8_t reserved[2];
+ u_int8_t flags;
+#define SMH_LONGLBA 0x01
+ u_int8_t reserved;
u_int8_t block_descr_len[2];
};
@@ -276,6 +278,20 @@ struct scsi_mode_block_descr
u_int8_t block_len[3];
};
+struct scsi_mode_block_descr_dshort
+{
+ u_int8_t num_blocks[4];
+ u_int8_t reserved;
+ u_int8_t block_len[3];
+};
+
+struct scsi_mode_block_descr_dlong
+{
+ u_int8_t num_blocks[8];
+ u_int8_t reserved[4];
+ u_int8_t block_len[4];
+};
+
struct scsi_per_res_in
{
u_int8_t opcode;
@@ -568,6 +584,7 @@ struct scsi_log_sense
#define SLS_ERROR_NONMEDIUM_PAGE 0x06
#define SLS_ERROR_LASTN_PAGE 0x07
#define SLS_LOGICAL_BLOCK_PROVISIONING 0x0c
+#define SLS_TEMPERATURE 0x0d
#define SLS_SELF_TEST_PAGE 0x10
#define SLS_SOLID_STATE_MEDIA 0x11
#define SLS_STAT_AND_PERF 0x19
@@ -683,6 +700,14 @@ struct scsi_log_informational_exceptions {
uint8_t temperature;
};
+struct scsi_log_temperature {
+ struct scsi_log_param_header hdr;
+#define SLP_TEMPERATURE 0x0000
+#define SLP_REFTEMPERATURE 0x0001
+ uint8_t reserved;
+ uint8_t temperature;
+};
+
struct scsi_control_page {
u_int8_t page_code;
u_int8_t page_length;
@@ -2763,6 +2788,19 @@ struct scsi_vpd_tpc
};
/*
+ * SCSI Feature Sets VPD Page
+ */
+struct scsi_vpd_sfs
+{
+ uint8_t device;
+ uint8_t page_code;
+#define SVPD_SCSI_SFS 0x92
+ uint8_t page_length[2];
+ uint8_t reserved[4];
+ uint8_t codes[];
+};
+
+/*
* Block Device Characteristics VPD Page based on
* T10/1799-D Revision 31
*/
@@ -2803,11 +2841,15 @@ struct scsi_vpd_block_device_characteristics
uint8_t flags;
#define SVPD_VBULS 0x01
#define SVPD_FUAB 0x02
+#define SVPD_BOCS 0x04
+#define SVPD_RBWZ 0x08
#define SVPD_ZBC_NR 0x00 /* Not Reported */
#define SVPD_HAW_ZBC 0x10 /* Host Aware */
#define SVPD_DM_ZBC 0x20 /* Drive Managed */
#define SVPD_ZBC_MASK 0x30 /* Zoned mask */
- uint8_t reserved[55];
+ uint8_t reserved[3];
+ uint8_t depopulation_time[4];
+ uint8_t reserved2[48];
};
#define SBDC_IS_PRESENT(bdc, length, field) \
@@ -2844,7 +2886,7 @@ struct scsi_vpd_logical_block_prov
};
/*
- * Block Limits VDP Page based on SBC-4 Revision 2
+ * Block Limits VDP Page based on SBC-4 Revision 17
*/
struct scsi_vpd_block_limits
{
@@ -2854,7 +2896,8 @@ struct scsi_vpd_block_limits
u_int8_t page_length[2];
#define SVPD_BL_PL_BASIC 0x10
#define SVPD_BL_PL_TP 0x3C
- u_int8_t reserved1;
+ u_int8_t flags;
+#define SVPD_BL_WSNZ 0x01
u_int8_t max_cmp_write_len;
u_int8_t opt_txfer_len_grain[2];
u_int8_t max_txfer_len[4];
@@ -2931,6 +2974,7 @@ struct scsi_read_capacity_data_long
uint8_t length[4];
#define SRC16_PROT_EN 0x01
#define SRC16_P_TYPE 0x0e
+#define SRC16_P_TYPE_SHIFT 1
#define SRC16_PTYPE_1 0x00
#define SRC16_PTYPE_2 0x02
#define SRC16_PTYPE_3 0x04
@@ -3578,7 +3622,9 @@ struct scsi_mode_header_10
u_int8_t data_length[2];/* Sense data length */
u_int8_t medium_type;
u_int8_t dev_spec;
- u_int8_t unused[2];
+ u_int8_t flags;
+#define SMH_LONGLBA 0x01
+ u_int8_t unused;
u_int8_t blk_desc_len[2];
};
@@ -3749,8 +3795,8 @@ void scsi_command_sbuf(struct sbuf *sb, uint8_t *cdb, int cdb_len,
void scsi_progress_sbuf(struct sbuf *sb, uint16_t progress);
int scsi_sks_sbuf(struct sbuf *sb, int sense_key, uint8_t *sks);
void scsi_fru_sbuf(struct sbuf *sb, uint64_t fru);
-void scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits, uint64_t info);
-void scsi_block_sbuf(struct sbuf *sb, uint8_t block_bits, uint64_t info);
+void scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits);
+void scsi_block_sbuf(struct sbuf *sb, uint8_t block_bits);
void scsi_sense_info_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
u_int sense_len, uint8_t *cdb, int cdb_len,
struct scsi_inquiry_data *inq_data,
diff --git a/freebsd/sys/crypto/blowfish/bf_skey.c b/freebsd/sys/crypto/blowfish/bf_skey.c
index f793d689..9bccaaf2 100644
--- a/freebsd/sys/crypto/blowfish/bf_skey.c
+++ b/freebsd/sys/crypto/blowfish/bf_skey.c
@@ -75,11 +75,11 @@ void
BF_set_key(key, len, data)
BF_KEY *key;
int len;
- unsigned char *data;
+ const unsigned char *data;
{
int i;
BF_LONG *p, ri, in[2];
- unsigned char *d, *end;
+ const unsigned char *d, *end;
memcpy((char *)key, (const char *)&bf_init, sizeof(BF_KEY));
p = key->P;
diff --git a/freebsd/sys/crypto/blowfish/blowfish.h b/freebsd/sys/crypto/blowfish/blowfish.h
index ecc14075..d09f83cf 100644
--- a/freebsd/sys/crypto/blowfish/blowfish.h
+++ b/freebsd/sys/crypto/blowfish/blowfish.h
@@ -80,7 +80,7 @@ typedef struct bf_key_st {
BF_LONG S[4*256];
} BF_KEY;
-void BF_set_key(BF_KEY *, int, unsigned char *);
+void BF_set_key(BF_KEY *, int, const unsigned char *);
void BF_encrypt(BF_LONG *, BF_KEY *);
void BF_decrypt(BF_LONG *, BF_KEY *);
void BF_ecb_encrypt(const unsigned char *, unsigned char *,
diff --git a/freebsd/sys/crypto/chacha20/chacha-sw.c b/freebsd/sys/crypto/chacha20/chacha-sw.c
index 0a03d91b..f610dac0 100644
--- a/freebsd/sys/crypto/chacha20/chacha-sw.c
+++ b/freebsd/sys/crypto/chacha20/chacha-sw.c
@@ -9,7 +9,7 @@ __FBSDID("$FreeBSD$");
#include <opencrypto/xform_enc.h>
static int
-chacha20_xform_setkey(u_int8_t **sched, u_int8_t *key, int len)
+chacha20_xform_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
struct chacha_ctx *ctx;
@@ -26,7 +26,7 @@ chacha20_xform_setkey(u_int8_t **sched, u_int8_t *key, int len)
}
static void
-chacha20_xform_reinit(caddr_t key, u_int8_t *iv)
+chacha20_xform_reinit(caddr_t key, const u_int8_t *iv)
{
struct chacha_ctx *ctx;
diff --git a/freebsd/sys/crypto/chacha20/chacha.c b/freebsd/sys/crypto/chacha20/chacha.c
index 154726c2..ff7c4f81 100644
--- a/freebsd/sys/crypto/chacha20/chacha.c
+++ b/freebsd/sys/crypto/chacha20/chacha.c
@@ -90,12 +90,32 @@ chacha_keysetup(chacha_ctx *x,const u8 *k,u_int kbits)
LOCAL void
chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
{
+#ifndef CHACHA_NONCE0_CTR128
x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
x->input[14] = U8TO32_LITTLE(iv + 0);
x->input[15] = U8TO32_LITTLE(iv + 4);
+#else
+ // CHACHA_STATELEN
+ (void)iv;
+ x->input[12] = U8TO32_LITTLE(counter + 0);
+ x->input[13] = U8TO32_LITTLE(counter + 4);
+ x->input[14] = U8TO32_LITTLE(counter + 8);
+ x->input[15] = U8TO32_LITTLE(counter + 12);
+#endif
}
+#ifdef CHACHA_NONCE0_CTR128
+LOCAL void
+chacha_ctrsave(const chacha_ctx *x, u8 *counter)
+{
+ U32TO8_LITTLE(counter + 0, x->input[12]);
+ U32TO8_LITTLE(counter + 4, x->input[13]);
+ U32TO8_LITTLE(counter + 8, x->input[14]);
+ U32TO8_LITTLE(counter + 12, x->input[15]);
+}
+#endif
+
LOCAL void
#ifndef __rtems__
chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
@@ -202,7 +222,16 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u_int bytes)
j12 = PLUSONE(j12);
if (!j12) {
j13 = PLUSONE(j13);
+#ifndef CHACHA_NONCE0_CTR128
/* stopping at 2^70 bytes per nonce is user's responsibility */
+#else
+ if (!j13) {
+ j14 = PLUSONE(j14);
+ if (!j14) {
+ j15 = PLUSONE(j15);
+ }
+ }
+#endif
}
U32TO8_LITTLE(c + 0,x0);
@@ -228,6 +257,10 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u_int bytes)
}
x->input[12] = j12;
x->input[13] = j13;
+#ifdef CHACHA_NONCE0_CTR128
+ x->input[14] = j14;
+ x->input[15] = j15;
+#endif
return;
}
bytes -= 64;
diff --git a/freebsd/sys/crypto/chacha20/chacha.h b/freebsd/sys/crypto/chacha20/chacha.h
index 73548331..32262b04 100644
--- a/freebsd/sys/crypto/chacha20/chacha.h
+++ b/freebsd/sys/crypto/chacha20/chacha.h
@@ -26,10 +26,19 @@ Public domain.
#define LOCAL
#endif
+#ifdef CHACHA_NONCE0_CTR128
+#define CHACHA_UNUSED __unused
+#else
+#define CHACHA_UNUSED
+#endif
+
LOCAL void chacha_keysetup(struct chacha_ctx *x, const u_char *k, u_int kbits);
-LOCAL void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv, const u_char *ctr);
+LOCAL void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv CHACHA_UNUSED,
+ const u_char *ctr);
LOCAL void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
u_char *c, u_int bytes);
+#undef CHACHA_UNUSED
+
#endif /* CHACHA_H */
diff --git a/freebsd/sys/crypto/des/des.h b/freebsd/sys/crypto/des/des.h
index 81c7bfbe..339921a7 100644
--- a/freebsd/sys/crypto/des/des.h
+++ b/freebsd/sys/crypto/des/des.h
@@ -82,7 +82,7 @@ typedef struct des_ks_struct
extern int des_check_key; /* defaults to false */
char *des_options(void);
-void des_ecb_encrypt(des_cblock *, des_cblock *, des_key_schedule, int);
+void des_ecb_encrypt(unsigned char *, unsigned char *, des_key_schedule, int);
void des_encrypt1(DES_LONG *, des_key_schedule, int);
void des_encrypt2(DES_LONG *, des_key_schedule, int);
@@ -91,24 +91,17 @@ void des_encrypt3(DES_LONG *, des_key_schedule, des_key_schedule,
void des_decrypt3(DES_LONG *, des_key_schedule, des_key_schedule,
des_key_schedule);
-void des_ecb3_encrypt(des_cblock *, des_cblock *, des_key_schedule,
+void des_ecb3_encrypt(unsigned char *, unsigned char *, des_key_schedule,
des_key_schedule, des_key_schedule, int);
-void des_ncbc_encrypt(const unsigned char *, unsigned char *, long,
- des_key_schedule, des_cblock *, int);
-
-void des_ede3_cbc_encrypt(const unsigned char *, unsigned char *, long,
- des_key_schedule, des_key_schedule,
- des_key_schedule, des_cblock *, int);
-
-void des_set_odd_parity(des_cblock *);
-void des_fixup_key_parity(des_cblock *);
-int des_is_weak_key(des_cblock *);
-int des_set_key(des_cblock *, des_key_schedule);
-int des_key_sched(des_cblock *, des_key_schedule);
-int des_set_key_checked(des_cblock *, des_key_schedule);
-void des_set_key_unchecked(des_cblock *, des_key_schedule);
-int des_check_key_parity(des_cblock *);
+void des_set_odd_parity(unsigned char *);
+void des_fixup_key_parity(unsigned char *);
+int des_is_weak_key(const unsigned char *);
+int des_set_key(const unsigned char *, des_key_schedule);
+int des_key_sched(const unsigned char *, des_key_schedule);
+int des_set_key_checked(const unsigned char *, des_key_schedule);
+void des_set_key_unchecked(const unsigned char *, des_key_schedule);
+int des_check_key_parity(const unsigned char *);
#ifdef __cplusplus
}
diff --git a/freebsd/sys/crypto/des/des_ecb.c b/freebsd/sys/crypto/des/des_ecb.c
index 4c383f1d..3819d91b 100644
--- a/freebsd/sys/crypto/des/des_ecb.c
+++ b/freebsd/sys/crypto/des/des_ecb.c
@@ -99,13 +99,13 @@ char *des_options(void)
}
return(buf);
}
-void des_ecb_encrypt(des_cblock *input, des_cblock *output,
+void des_ecb_encrypt(unsigned char *input, unsigned char *output,
des_key_schedule ks, int enc)
{
register DES_LONG l;
DES_LONG ll[2];
- const unsigned char *in=&(*input)[0];
- unsigned char *out = &(*output)[0];
+ const unsigned char *in = input;
+ unsigned char *out = output;
c2l(in,l); ll[0]=l;
c2l(in,l); ll[1]=l;
@@ -115,14 +115,14 @@ void des_ecb_encrypt(des_cblock *input, des_cblock *output,
l=ll[0]=ll[1]=0;
}
-void des_ecb3_encrypt(des_cblock *input, des_cblock *output,
+void des_ecb3_encrypt(unsigned char *input, unsigned char *output,
des_key_schedule ks1, des_key_schedule ks2, des_key_schedule ks3,
int enc)
{
register DES_LONG l0,l1;
DES_LONG ll[2];
- const unsigned char *in = &(*input)[0];
- unsigned char *out = &(*output)[0];
+ const unsigned char *in = input;
+ unsigned char *out = output;
c2l(in,l0);
c2l(in,l1);
diff --git a/freebsd/sys/crypto/des/des_setkey.c b/freebsd/sys/crypto/des/des_setkey.c
index 966b17d0..32a55ab1 100644
--- a/freebsd/sys/crypto/des/des_setkey.c
+++ b/freebsd/sys/crypto/des/des_setkey.c
@@ -69,21 +69,21 @@ __FBSDID("$FreeBSD$");
int des_check_key=0;
-void des_set_odd_parity(des_cblock *key)
+void des_set_odd_parity(unsigned char *key)
{
int i;
for (i=0; i<DES_KEY_SZ; i++)
- (*key)[i]=odd_parity[(*key)[i]];
+ key[i]=odd_parity[key[i]];
}
-int des_check_key_parity(des_cblock *key)
+int des_check_key_parity(const unsigned char *key)
{
int i;
for (i=0; i<DES_KEY_SZ; i++)
{
- if ((*key)[i] != odd_parity[(*key)[i]])
+ if (key[i] != odd_parity[key[i]])
return(0);
}
return(1);
@@ -119,7 +119,7 @@ static des_cblock weak_keys[NUM_WEAK_KEY]={
{0xE0,0xFE,0xE0,0xFE,0xF1,0xFE,0xF1,0xFE},
{0xFE,0xE0,0xFE,0xE0,0xFE,0xF1,0xFE,0xF1}};
-int des_is_weak_key(des_cblock *key)
+int des_is_weak_key(const unsigned char *key)
{
int i;
@@ -144,7 +144,7 @@ int des_is_weak_key(des_cblock *key)
#define HPERM_OP(a,t,n,m) ((t)=((((a)<<(16-(n)))^(a))&(m)),\
(a)=(a)^(t)^(t>>(16-(n))))
-int des_set_key(des_cblock *key, des_key_schedule schedule)
+int des_set_key(const unsigned char *key, des_key_schedule schedule)
{
if (des_check_key)
{
@@ -161,7 +161,7 @@ int des_set_key(des_cblock *key, des_key_schedule schedule)
* return -1 if key parity error,
* return -2 if illegal weak key.
*/
-int des_set_key_checked(des_cblock *key, des_key_schedule schedule)
+int des_set_key_checked(const unsigned char *key, des_key_schedule schedule)
{
if (!des_check_key_parity(key))
return(-1);
@@ -171,7 +171,7 @@ int des_set_key_checked(des_cblock *key, des_key_schedule schedule)
return 0;
}
-void des_set_key_unchecked(des_cblock *key, des_key_schedule schedule)
+void des_set_key_unchecked(const unsigned char *key, des_key_schedule schedule)
{
static int shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0};
DES_LONG c,d,t,s,t2;
@@ -180,7 +180,7 @@ void des_set_key_unchecked(des_cblock *key, des_key_schedule schedule)
int i;
k = &schedule->ks.deslong[0];
- in = &(*key)[0];
+ in = key;
c2l(in,c);
c2l(in,d);
@@ -227,12 +227,12 @@ void des_set_key_unchecked(des_cblock *key, des_key_schedule schedule)
}
}
-int des_key_sched(des_cblock *key, des_key_schedule schedule)
+int des_key_sched(const unsigned char *key, des_key_schedule schedule)
{
return(des_set_key(key,schedule));
}
-void des_fixup_key_parity(des_cblock *key)
+void des_fixup_key_parity(unsigned char *key)
{
des_set_odd_parity(key);
}
diff --git a/freebsd/sys/dev/bge/if_bge.c b/freebsd/sys/dev/bge/if_bge.c
index a4a937b2..79e930ca 100644
--- a/freebsd/sys/dev/bge/if_bge.c
+++ b/freebsd/sys/dev/bge/if_bge.c
@@ -2929,10 +2929,14 @@ bge_dma_ring_alloc(struct bge_softc *sc, bus_size_t alignment,
bus_addr_t *paddr, const char *msg)
{
struct bge_dmamap_arg ctx;
+ bus_addr_t lowaddr;
+ bus_size_t ring_end;
int error;
+ lowaddr = BUS_SPACE_MAXADDR;
+again:
error = bus_dma_tag_create(sc->bge_cdata.bge_parent_tag,
- alignment, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
+ alignment, 0, lowaddr, BUS_SPACE_MAXADDR, NULL,
NULL, maxsize, 1, maxsize, 0, NULL, NULL, tag);
if (error != 0) {
device_printf(sc->bge_dev,
@@ -2957,6 +2961,25 @@ bge_dma_ring_alloc(struct bge_softc *sc, bus_size_t alignment,
return (ENOMEM);
}
*paddr = ctx.bge_busaddr;
+ ring_end = *paddr + maxsize;
+ if ((sc->bge_flags & BGE_FLAG_4G_BNDRY_BUG) != 0 &&
+ BGE_ADDR_HI(*paddr) != BGE_ADDR_HI(ring_end)) {
+ /*
+ * 4GB boundary crossed. Limit maximum allowable DMA
+ * address space to 32bit and try again.
+ */
+ bus_dmamap_unload(*tag, *map);
+ bus_dmamem_free(*tag, *ring, *map);
+ bus_dma_tag_destroy(*tag);
+ if (bootverbose)
+ device_printf(sc->bge_dev, "4GB boundary crossed, "
+ "limit DMA address space to 32bit for %s\n", msg);
+ *ring = NULL;
+ *tag = NULL;
+ *map = NULL;
+ lowaddr = BUS_SPACE_MAXADDR_32BIT;
+ goto again;
+ }
return (0);
}
@@ -2964,7 +2987,7 @@ static int
bge_dma_alloc(struct bge_softc *sc)
{
bus_addr_t lowaddr;
- bus_size_t rxmaxsegsz, sbsz, txsegsz, txmaxsegsz;
+ bus_size_t boundary, sbsz, rxmaxsegsz, txsegsz, txmaxsegsz;
int i, error;
lowaddr = BUS_SPACE_MAXADDR;
@@ -3051,7 +3074,9 @@ bge_dma_alloc(struct bge_softc *sc)
}
/* Create parent tag for buffers. */
+ boundary = 0;
if ((sc->bge_flags & BGE_FLAG_4G_BNDRY_BUG) != 0) {
+ boundary = BGE_DMA_BNDRY;
/*
* XXX
* watchdog timeout issue was observed on BCM5704 which
@@ -3062,10 +3087,10 @@ bge_dma_alloc(struct bge_softc *sc)
if (sc->bge_pcixcap != 0)
lowaddr = BUS_SPACE_MAXADDR_32BIT;
}
- error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev), 1, 0, lowaddr,
- BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0,
- BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL,
- &sc->bge_cdata.bge_buffer_tag);
+ error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev),
+ 1, boundary, lowaddr, BUS_SPACE_MAXADDR, NULL,
+ NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT,
+ 0, NULL, NULL, &sc->bge_cdata.bge_buffer_tag);
if (error != 0) {
device_printf(sc->bge_dev,
"could not allocate buffer dma tag\n");
@@ -3253,6 +3278,8 @@ bge_mbox_reorder(struct bge_softc *sc)
bus = device_get_parent(dev);
if (device_get_devclass(dev) != pcib)
break;
+ if (device_get_devclass(bus) != pci)
+ break;
for (i = 0; i < nitems(mbox_reorder_lists); i++) {
if (pci_get_vendor(dev) ==
mbox_reorder_lists[i].vendor &&
@@ -3264,8 +3291,6 @@ bge_mbox_reorder(struct bge_softc *sc)
return (1);
}
}
- if (device_get_devclass(bus) != pci)
- break;
}
return (0);
}
diff --git a/freebsd/sys/dev/bge/if_bgereg.h b/freebsd/sys/dev/bge/if_bgereg.h
index eb7686e7..58fe8040 100644
--- a/freebsd/sys/dev/bge/if_bgereg.h
+++ b/freebsd/sys/dev/bge/if_bgereg.h
@@ -2866,6 +2866,12 @@ struct bge_gib {
#define BGE_DMA_MAXADDR 0xFFFFFFFFFF
#endif
+#if (BUS_SPACE_MAXSIZE > 0xFFFFFFFF)
+#define BGE_DMA_BNDRY 0x100000000
+#else
+#define BGE_DMA_BNDRY 0
+#endif
+
/*
* Ring structures. Most of these reside in host memory and we tell
* the NIC where they are via the ring control blocks. The exceptions
diff --git a/freebsd/sys/dev/cadence/if_cgem.c b/freebsd/sys/dev/cadence/if_cgem.c
index 191362c4..34340f22 100644
--- a/freebsd/sys/dev/cadence/if_cgem.c
+++ b/freebsd/sys/dev/cadence/if_cgem.c
@@ -107,6 +107,14 @@ __FBSDID("$FreeBSD$");
#define CGEM_CKSUM_ASSIST (CSUM_IP | CSUM_TCP | CSUM_UDP | \
CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
+#ifndef __rtems__
+static struct ofw_compat_data compat_data[] = {
+ { "cadence,gem", 1 },
+ { "cdns,macb", 1 },
+ { NULL, 0 },
+};
+#endif /* __rtems__ */
+
struct cgem_softc {
if_t ifp;
struct mtx sc_mtx;
@@ -1724,7 +1732,7 @@ cgem_probe(device_t dev)
if (!ofw_bus_status_okay(dev))
return (ENXIO);
- if (!ofw_bus_is_compatible(dev, "cadence,gem"))
+ if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0)
return (ENXIO);
#endif /* __rtems__ */
diff --git a/freebsd/sys/dev/e1000/em_txrx.c b/freebsd/sys/dev/e1000/em_txrx.c
index c2b60743..4faf806e 100644
--- a/freebsd/sys/dev/e1000/em_txrx.c
+++ b/freebsd/sys/dev/e1000/em_txrx.c
@@ -459,16 +459,11 @@ em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
prev = txr->tx_cidx_processed;
ntxd = scctx->isc_ntxd[0];
do {
+ MPASS(prev != cur);
delta = (int32_t)cur - (int32_t)prev;
- /*
- * XXX This appears to be a hack for first-packet.
- * A correct fix would prevent prev == cur in the first place.
- */
- MPASS(prev == 0 || delta != 0);
- if (prev == 0 && cur == 0)
- delta += 1;
if (delta < 0)
delta += ntxd;
+ MPASS(delta > 0);
DPRINTF(iflib_get_dev(adapter->ctx),
"%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
__FUNCTION__, prev, cur, clear, delta);
diff --git a/freebsd/sys/dev/e1000/if_em.c b/freebsd/sys/dev/e1000/if_em.c
index 803b68ec..32eb4afe 100644
--- a/freebsd/sys/dev/e1000/if_em.c
+++ b/freebsd/sys/dev/e1000/if_em.c
@@ -251,6 +251,7 @@ static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
static void em_if_timer(if_ctx_t ctx, uint16_t qid);
static void em_if_vlan_register(if_ctx_t ctx, u16 vtag);
static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
+static void em_if_watchdog_reset(if_ctx_t ctx);
static void em_identify_hardware(if_ctx_t ctx);
static int em_allocate_pci_resources(if_ctx_t ctx);
@@ -262,10 +263,14 @@ static int em_setup_msix(if_ctx_t ctx);
static void em_initialize_transmit_unit(if_ctx_t ctx);
static void em_initialize_receive_unit(if_ctx_t ctx);
-static void em_if_enable_intr(if_ctx_t ctx);
-static void em_if_disable_intr(if_ctx_t ctx);
+static void em_if_intr_enable(if_ctx_t ctx);
+static void em_if_intr_disable(if_ctx_t ctx);
+static void igb_if_intr_enable(if_ctx_t ctx);
+static void igb_if_intr_disable(if_ctx_t ctx);
static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
+static int igb_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
+static int igb_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
static void em_if_multi_set(if_ctx_t ctx);
static void em_if_update_admin_status(if_ctx_t ctx);
static void em_if_debug(if_ctx_t ctx);
@@ -295,7 +300,7 @@ static void em_disable_aspm(struct adapter *);
int em_intr(void *arg);
static void em_disable_promisc(if_ctx_t ctx);
-/* MSIX handlers */
+/* MSI-X handlers */
static int em_if_msix_intr_assign(if_ctx_t, int);
static int em_msix_link(void *);
static void em_handle_link(void *context);
@@ -376,8 +381,8 @@ static device_method_t em_if_methods[] = {
DEVMETHOD(ifdi_init, em_if_init),
DEVMETHOD(ifdi_stop, em_if_stop),
DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign),
- DEVMETHOD(ifdi_intr_enable, em_if_enable_intr),
- DEVMETHOD(ifdi_intr_disable, em_if_disable_intr),
+ DEVMETHOD(ifdi_intr_enable, em_if_intr_enable),
+ DEVMETHOD(ifdi_intr_disable, em_if_intr_disable),
DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc),
DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc),
DEVMETHOD(ifdi_queues_free, em_if_queues_free),
@@ -388,6 +393,7 @@ static device_method_t em_if_methods[] = {
DEVMETHOD(ifdi_mtu_set, em_if_mtu_set),
DEVMETHOD(ifdi_promisc_set, em_if_set_promisc),
DEVMETHOD(ifdi_timer, em_if_timer),
+ DEVMETHOD(ifdi_watchdog_reset, em_if_watchdog_reset),
DEVMETHOD(ifdi_vlan_register, em_if_vlan_register),
DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister),
DEVMETHOD(ifdi_get_counter, em_if_get_counter),
@@ -398,14 +404,47 @@ static device_method_t em_if_methods[] = {
DEVMETHOD_END
};
-/*
- * note that if (adapter->msix_mem) is replaced by:
- * if (adapter->intr_type == IFLIB_INTR_MSIX)
- */
static driver_t em_if_driver = {
"em_if", em_if_methods, sizeof(struct adapter)
};
+static device_method_t igb_if_methods[] = {
+ DEVMETHOD(ifdi_attach_pre, em_if_attach_pre),
+ DEVMETHOD(ifdi_attach_post, em_if_attach_post),
+ DEVMETHOD(ifdi_detach, em_if_detach),
+ DEVMETHOD(ifdi_shutdown, em_if_shutdown),
+ DEVMETHOD(ifdi_suspend, em_if_suspend),
+ DEVMETHOD(ifdi_resume, em_if_resume),
+ DEVMETHOD(ifdi_init, em_if_init),
+ DEVMETHOD(ifdi_stop, em_if_stop),
+ DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign),
+ DEVMETHOD(ifdi_intr_enable, igb_if_intr_enable),
+ DEVMETHOD(ifdi_intr_disable, igb_if_intr_disable),
+ DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc),
+ DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc),
+ DEVMETHOD(ifdi_queues_free, em_if_queues_free),
+ DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status),
+ DEVMETHOD(ifdi_multi_set, em_if_multi_set),
+ DEVMETHOD(ifdi_media_status, em_if_media_status),
+ DEVMETHOD(ifdi_media_change, em_if_media_change),
+ DEVMETHOD(ifdi_mtu_set, em_if_mtu_set),
+ DEVMETHOD(ifdi_promisc_set, em_if_set_promisc),
+ DEVMETHOD(ifdi_timer, em_if_timer),
+ DEVMETHOD(ifdi_watchdog_reset, em_if_watchdog_reset),
+ DEVMETHOD(ifdi_vlan_register, em_if_vlan_register),
+ DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister),
+ DEVMETHOD(ifdi_get_counter, em_if_get_counter),
+ DEVMETHOD(ifdi_led_func, em_if_led_func),
+ DEVMETHOD(ifdi_rx_queue_intr_enable, igb_if_rx_queue_intr_enable),
+ DEVMETHOD(ifdi_tx_queue_intr_enable, igb_if_tx_queue_intr_enable),
+ DEVMETHOD(ifdi_debug, em_if_debug),
+ DEVMETHOD_END
+};
+
+static driver_t igb_if_driver = {
+ "igb_if", igb_if_methods, sizeof(struct adapter)
+};
+
/*********************************************************************
* Tunable default values.
*********************************************************************/
@@ -525,7 +564,7 @@ static struct if_shared_ctx igb_sctx_init = {
.isc_admin_intrcnt = 1,
.isc_vendor_info = igb_vendor_info_array,
.isc_driver_version = em_driver_version,
- .isc_driver = &em_if_driver,
+ .isc_driver = &igb_if_driver,
.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM,
.isc_nrxd_min = {EM_MIN_RXD},
@@ -723,7 +762,6 @@ em_set_num_queues(if_ctx_t ctx)
*
* return 0 on success, positive on failure
*********************************************************************/
-
static int
em_if_attach_pre(if_ctx_t ctx)
{
@@ -733,15 +771,10 @@ em_if_attach_pre(if_ctx_t ctx)
struct e1000_hw *hw;
int error = 0;
- INIT_DEBUGOUT("em_if_attach_pre begin");
+ INIT_DEBUGOUT("em_if_attach_pre: begin");
dev = iflib_get_dev(ctx);
adapter = iflib_get_softc(ctx);
- if (resource_disabled("em", device_get_unit(dev))) {
- device_printf(dev, "Disabled by device hint\n");
- return (ENXIO);
- }
-
adapter->ctx = adapter->osdep.ctx = ctx;
adapter->dev = adapter->osdep.dev = dev;
scctx = adapter->shared = iflib_get_softc_ctx(ctx);
@@ -779,14 +812,13 @@ em_if_attach_pre(if_ctx_t ctx)
/* Determine hardware and mac info */
em_identify_hardware(ctx);
- scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR);
scctx->isc_tx_nsegments = EM_MAX_SCATTER;
scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx);
- device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max);
+ if (bootverbose)
+ device_printf(dev, "attach_pre capping queues at %d\n",
+ scctx->isc_ntxqsets_max);
if (adapter->hw.mac.type >= igb_mac_min) {
- int try_second_bar;
-
scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), EM_DBA_ALIGN);
scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), EM_DBA_ALIGN);
scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc);
@@ -800,14 +832,13 @@ em_if_attach_pre(if_ctx_t ctx)
CSUM_IP6_TCP | CSUM_IP6_UDP;
if (adapter->hw.mac.type != e1000_82575)
scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP;
-
/*
** Some new devices, as with ixgbe, now may
** use a different BAR, so we need to keep
** track of which is used.
*/
- try_second_bar = pci_read_config(dev, scctx->isc_msix_bar, 4);
- if (try_second_bar == 0)
+ scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR);
+ if (pci_read_config(dev, scctx->isc_msix_bar, 4) == 0)
scctx->isc_msix_bar += 4;
} else if (adapter->hw.mac.type >= em_mac_min) {
scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
@@ -837,6 +868,16 @@ em_if_attach_pre(if_ctx_t ctx)
*/
scctx->isc_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO);
scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO;
+ /*
+ * We support MSI-X with 82574 only, but indicate to iflib(4)
+ * that it shall give MSI at least a try with other devices.
+ */
+ if (adapter->hw.mac.type == e1000_82574) {
+ scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR);
+ } else {
+ scctx->isc_msix_bar = -1;
+ scctx->isc_disable_msix = 1;
+ }
} else {
scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
@@ -847,6 +888,7 @@ em_if_attach_pre(if_ctx_t ctx)
scctx->isc_capabilities = scctx->isc_capenable = LEM_CAPS;
if (adapter->hw.mac.type < e1000_82543)
scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM);
+ /* INTx only */
scctx->isc_msix_bar = 0;
}
@@ -1092,13 +1134,12 @@ err_late:
*
* return 0 on success, positive on failure
*********************************************************************/
-
static int
em_if_detach(if_ctx_t ctx)
{
struct adapter *adapter = iflib_get_softc(ctx);
- INIT_DEBUGOUT("em_detach: begin");
+ INIT_DEBUGOUT("em_if_detach: begin");
e1000_phy_hw_reset(&adapter->hw);
@@ -1203,16 +1244,16 @@ em_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
* by the driver as a hw/sw initialization routine to get to a
* consistent state.
*
- * return 0 on success, positive on failure
**********************************************************************/
-
static void
em_if_init(if_ctx_t ctx)
{
struct adapter *adapter = iflib_get_softc(ctx);
+ if_softc_ctx_t scctx = adapter->shared;
struct ifnet *ifp = iflib_get_ifp(ctx);
struct em_tx_queue *tx_que;
int i;
+
INIT_DEBUGOUT("em_if_init: begin");
/* Get the latest mac address, User can use a LAA */
@@ -1242,7 +1283,14 @@ em_if_init(if_ctx_t ctx)
for (i = 0, tx_que = adapter->tx_queues; i < adapter->tx_num_queues; i++, tx_que++) {
struct tx_ring *txr = &tx_que->txr;
- txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0;
+ txr->tx_rs_cidx = txr->tx_rs_pidx;
+
+ /* Initialize the last processed descriptor to be the end of
+ * the ring, rather than the start, so that we avoid an
+ * off-by-one error when calculating how many descriptors are
+ * done in the credits_update function.
+ */
+ txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1;
}
/* Setup VLAN support, basic and offload if available */
@@ -1261,21 +1309,7 @@ em_if_init(if_ctx_t ctx)
/* Setup Multicast table */
em_if_multi_set(ctx);
- /*
- * Figure out the desired mbuf
- * pool for doing jumbos
- */
- if (adapter->hw.mac.max_frame_size <= 2048)
- adapter->rx_mbuf_sz = MCLBYTES;
-#ifndef CONTIGMALLOC_WORKS
- else
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
-#else
- else if (adapter->hw.mac.max_frame_size <= 4096)
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
- else
- adapter->rx_mbuf_sz = MJUM9BYTES;
-#endif
+ adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
em_initialize_receive_unit(ctx);
/* Use real VLAN Filter support? */
@@ -1295,7 +1329,7 @@ em_if_init(if_ctx_t ctx)
em_if_set_promisc(ctx, IFF_PROMISC);
e1000_clear_hw_cntrs_base_generic(&adapter->hw);
- /* MSI/X configuration for 82574 */
+ /* MSI-X configuration for 82574 */
if (adapter->hw.mac.type == e1000_82574) {
int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
@@ -1338,8 +1372,6 @@ em_intr(void *arg)
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
- if (adapter->intr_type != IFLIB_INTR_LEGACY)
- goto skip_stray;
/* Hot eject? */
if (reg_icr == 0xffffffff)
return FILTER_STRAY;
@@ -1356,7 +1388,14 @@ em_intr(void *arg)
(reg_icr & E1000_ICR_INT_ASSERTED) == 0)
return FILTER_STRAY;
-skip_stray:
+ /*
+ * Only MSI-X interrupts have one-shot behavior by taking advantage
+ * of the EIAC register. Thus, explicitly disable interrupts. This
+ * also works around the MSI message reordering errata on certain
+ * systems.
+ */
+ IFDI_INTR_DISABLE(ctx);
+
/* Link status change */
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
adapter->hw.mac.get_link_status = 1;
@@ -1369,59 +1408,49 @@ skip_stray:
return (FILTER_SCHEDULE_THREAD);
}
-static void
-igb_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq)
+static int
+em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
{
- E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxq->eims);
-}
+ struct adapter *adapter = iflib_get_softc(ctx);
+ struct em_rx_queue *rxq = &adapter->rx_queues[rxqid];
-static void
-em_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq)
-{
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxq->eims);
+ return (0);
}
-static void
-igb_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq)
+static int
+em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
{
- E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txq->eims);
-}
+ struct adapter *adapter = iflib_get_softc(ctx);
+ struct em_tx_queue *txq = &adapter->tx_queues[txqid];
-static void
-em_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq)
-{
E1000_WRITE_REG(&adapter->hw, E1000_IMS, txq->eims);
+ return (0);
}
static int
-em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
+igb_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
{
struct adapter *adapter = iflib_get_softc(ctx);
struct em_rx_queue *rxq = &adapter->rx_queues[rxqid];
- if (adapter->hw.mac.type >= igb_mac_min)
- igb_rx_enable_queue(adapter, rxq);
- else
- em_rx_enable_queue(adapter, rxq);
+ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxq->eims);
return (0);
}
static int
-em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
+igb_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
{
struct adapter *adapter = iflib_get_softc(ctx);
struct em_tx_queue *txq = &adapter->tx_queues[txqid];
- if (adapter->hw.mac.type >= igb_mac_min)
- igb_tx_enable_queue(adapter, txq);
- else
- em_tx_enable_queue(adapter, txq);
+ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txq->eims);
return (0);
}
/*********************************************************************
*
- * MSIX RX Interrupt Service routine
+ * MSI-X RX Interrupt Service routine
*
**********************************************************************/
static int
@@ -1436,7 +1465,7 @@ em_msix_que(void *arg)
/*********************************************************************
*
- * MSIX Link Fast Interrupt Service routine
+ * MSI-X Link Fast Interrupt Service routine
*
**********************************************************************/
static int
@@ -1689,37 +1718,24 @@ em_if_multi_set(if_ctx_t ctx)
}
}
-
/*********************************************************************
* Timer routine
*
- * This routine checks for link status and updates statistics.
+ * This routine schedules em_if_update_admin_status() to check for
+ * link status and to gather statistics as well as to perform some
+ * controller-specific hardware patting.
*
**********************************************************************/
-
static void
em_if_timer(if_ctx_t ctx, uint16_t qid)
{
- struct adapter *adapter = iflib_get_softc(ctx);
- struct em_rx_queue *que;
- int i;
- int trigger = 0;
if (qid != 0)
return;
iflib_admin_intr_deferred(ctx);
-
- /* Mask to use in the irq trigger */
- if (adapter->intr_type == IFLIB_INTR_MSIX) {
- for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++)
- trigger |= que->eims;
- } else {
- trigger = E1000_ICS_RXDMT0;
- }
}
-
static void
em_if_update_admin_status(if_ctx_t ctx)
{
@@ -1825,21 +1841,30 @@ em_if_update_admin_status(if_ctx_t ctx)
E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
}
+static void
+em_if_watchdog_reset(if_ctx_t ctx)
+{
+ struct adapter *adapter = iflib_get_softc(ctx);
+
+ /*
+ * Just count the event; iflib(4) will already trigger a
+ * sufficient reset of the controller.
+ */
+ adapter->watchdog_events++;
+}
+
/*********************************************************************
*
* This routine disables all traffic on the adapter by issuing a
- * global reset on the MAC and deallocates TX/RX buffers.
+ * global reset on the MAC.
*
- * This routine should always be called with BOTH the CORE
- * and TX locks.
**********************************************************************/
-
static void
em_if_stop(if_ctx_t ctx)
{
struct adapter *adapter = iflib_get_softc(ctx);
- INIT_DEBUGOUT("em_stop: begin");
+ INIT_DEBUGOUT("em_if_stop: begin");
e1000_reset_hw(&adapter->hw);
if (adapter->hw.mac.type >= e1000_82544)
@@ -1849,7 +1874,6 @@ em_if_stop(if_ctx_t ctx)
e1000_cleanup_led(&adapter->hw);
}
-
/*********************************************************************
*
* Determine hardware revision.
@@ -1906,7 +1930,6 @@ em_allocate_pci_resources(if_ctx_t ctx)
for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
val = pci_read_config(dev, rid, 4);
if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
- adapter->io_rid = rid;
break;
}
rid += 4;
@@ -1918,8 +1941,8 @@ em_allocate_pci_resources(if_ctx_t ctx)
device_printf(dev, "Unable to locate IO BAR\n");
return (ENXIO);
}
- adapter->ioport = bus_alloc_resource_any(dev,
- SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
+ adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
+ &rid, RF_ACTIVE);
if (adapter->ioport == NULL) {
device_printf(dev, "Unable to allocate bus resource: "
"ioport\n");
@@ -1939,7 +1962,7 @@ em_allocate_pci_resources(if_ctx_t ctx)
/*********************************************************************
*
- * Setup the MSIX Interrupt handlers
+ * Set up the MSI-X Interrupt handlers
*
**********************************************************************/
static int
@@ -1968,7 +1991,7 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix)
* Set the bit to enable interrupt
* in E1000_IMS -- bits 20 and 21
* are for RX0 and RX1, note this has
- * NOTHING to do with the MSIX vector
+ * NOTHING to do with the MSI-X vector
*/
if (adapter->hw.mac.type == e1000_82574) {
rx_que->eims = 1 << (20 + i);
@@ -1989,22 +2012,22 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix)
&adapter->rx_queues[i % adapter->rx_num_queues].que_irq,
IFLIB_INTR_TX, tx_que, tx_que->me, buf);
- tx_que->msix = (vector % adapter->tx_num_queues);
+ tx_que->msix = (vector % adapter->rx_num_queues);
/*
* Set the bit to enable interrupt
* in E1000_IMS -- bits 22 and 23
* are for TX0 and TX1, note this has
- * NOTHING to do with the MSIX vector
+ * NOTHING to do with the MSI-X vector
*/
if (adapter->hw.mac.type == e1000_82574) {
tx_que->eims = 1 << (22 + i);
adapter->ims |= tx_que->eims;
adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4));
} else if (adapter->hw.mac.type == e1000_82575) {
- tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues);
+ tx_que->eims = E1000_EICR_TX_QUEUE0 << i;
} else {
- tx_que->eims = 1 << (i % adapter->tx_num_queues);
+ tx_que->eims = 1 << i;
}
}
@@ -2044,7 +2067,7 @@ igb_configure_queues(struct adapter *adapter)
E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
E1000_GPIE_PBA | E1000_GPIE_NSICR);
- /* Turn on MSIX */
+ /* Turn on MSI-X */
switch (adapter->hw.mac.type) {
case e1000_82580:
case e1000_i350:
@@ -2178,7 +2201,7 @@ em_free_pci_resources(if_ctx_t ctx)
struct em_rx_queue *que = adapter->rx_queues;
device_t dev = iflib_get_dev(ctx);
- /* Release all msix queue resources */
+ /* Release all MSI-X queue resources */
if (adapter->intr_type == IFLIB_INTR_MSIX)
iflib_irq_free(ctx, &adapter->irq);
@@ -2186,24 +2209,26 @@ em_free_pci_resources(if_ctx_t ctx)
iflib_irq_free(ctx, &que->que_irq);
}
- /* First release all the interrupt resources */
if (adapter->memory != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY,
- PCIR_BAR(0), adapter->memory);
+ rman_get_rid(adapter->memory), adapter->memory);
adapter->memory = NULL;
}
if (adapter->flash != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY,
- EM_FLASH, adapter->flash);
+ rman_get_rid(adapter->flash), adapter->flash);
adapter->flash = NULL;
}
- if (adapter->ioport != NULL)
+
+ if (adapter->ioport != NULL) {
bus_release_resource(dev, SYS_RES_IOPORT,
- adapter->io_rid, adapter->ioport);
+ rman_get_rid(adapter->ioport), adapter->ioport);
+ adapter->ioport = NULL;
+ }
}
-/* Setup MSI or MSI/X */
+/* Set up MSI or MSI-X */
static int
em_setup_msix(if_ctx_t ctx)
{
@@ -2217,11 +2242,9 @@ em_setup_msix(if_ctx_t ctx)
/*********************************************************************
*
- * Initialize the hardware to a configuration
- * as specified by the adapter structure.
+ * Workaround for SmartSpeed on 82541 and 82547 controllers
*
**********************************************************************/
-
static void
lem_smartspeed(struct adapter *adapter)
{
@@ -2386,6 +2409,12 @@ igb_init_dmac(struct adapter *adapter, u32 pba)
}
}
+/*********************************************************************
+ *
+ * Initialize the hardware to a configuration as specified by the
+ * adapter structure.
+ *
+ **********************************************************************/
static void
em_reset(if_ctx_t ctx)
{
@@ -2620,6 +2649,11 @@ em_reset(if_ctx_t ctx)
e1000_check_for_link(hw);
}
+/*
+ * Initialise the RSS mapping for NICs that support multiple transmit/
+ * receive rings.
+ */
+
#define RSSKEYLEN 10
static void
em_initialize_rss_mapping(struct adapter *adapter)
@@ -2660,7 +2694,6 @@ em_initialize_rss_mapping(struct adapter *adapter)
E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
E1000_MRQC_RSS_FIELD_IPV6_EX |
E1000_MRQC_RSS_FIELD_IPV6);
-
}
static void
@@ -2760,7 +2793,7 @@ igb_initialize_rss_mapping(struct adapter *adapter)
/*********************************************************************
*
- * Setup networking device structure and register an interface.
+ * Setup networking device structure and register interface media.
*
**********************************************************************/
static int
@@ -2845,7 +2878,9 @@ em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs
txr->tx_paddr = paddrs[i*ntxqs];
}
- device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues);
+ if (bootverbose)
+ device_printf(iflib_get_dev(ctx),
+ "allocated for %d tx_queues\n", adapter->tx_num_queues);
return (0);
fail:
em_if_queues_free(ctx);
@@ -2883,8 +2918,10 @@ em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs
rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs];
rxr->rx_paddr = paddrs[i*nrxqs];
}
-
- device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues);
+
+ if (bootverbose)
+ device_printf(iflib_get_dev(ctx),
+ "allocated for %d rx_queues\n", adapter->rx_num_queues);
return (0);
fail:
@@ -3127,7 +3164,7 @@ em_initialize_receive_unit(if_ctx_t ctx)
rfctl = E1000_READ_REG(hw, E1000_RFCTL);
rfctl |= E1000_RFCTL_EXTEN;
/*
- * When using MSIX interrupts we need to throttle
+ * When using MSI-X interrupts we need to throttle
* using the EITR register (82574 only)
*/
if (hw->mac.type == e1000_82574) {
@@ -3371,7 +3408,7 @@ em_setup_vlan_hw_support(struct adapter *adapter)
}
static void
-em_if_enable_intr(if_ctx_t ctx)
+em_if_intr_enable(if_ctx_t ctx)
{
struct adapter *adapter = iflib_get_softc(ctx);
struct e1000_hw *hw = &adapter->hw;
@@ -3380,30 +3417,51 @@ em_if_enable_intr(if_ctx_t ctx)
if (hw->mac.type == e1000_82574) {
E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
ims_mask |= adapter->ims;
- } else if (adapter->intr_type == IFLIB_INTR_MSIX && hw->mac.type >= igb_mac_min) {
- u32 mask = (adapter->que_mask | adapter->link_mask);
-
- E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
- E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
- E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
- ims_mask = E1000_IMS_LSC;
}
-
E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
}
static void
-em_if_disable_intr(if_ctx_t ctx)
+em_if_intr_disable(if_ctx_t ctx)
+{
+ struct adapter *adapter = iflib_get_softc(ctx);
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (hw->mac.type == e1000_82574)
+ E1000_WRITE_REG(hw, EM_EIAC, 0);
+ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+}
+
+static void
+igb_if_intr_enable(if_ctx_t ctx)
+{
+ struct adapter *adapter = iflib_get_softc(ctx);
+ struct e1000_hw *hw = &adapter->hw;
+ u32 mask;
+
+ if (__predict_true(adapter->intr_type == IFLIB_INTR_MSIX)) {
+ mask = (adapter->que_mask | adapter->link_mask);
+ E1000_WRITE_REG(hw, E1000_EIAC, mask);
+ E1000_WRITE_REG(hw, E1000_EIAM, mask);
+ E1000_WRITE_REG(hw, E1000_EIMS, mask);
+ E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
+ } else
+ E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK);
+ E1000_WRITE_FLUSH(hw);
+}
+
+static void
+igb_if_intr_disable(if_ctx_t ctx)
{
struct adapter *adapter = iflib_get_softc(ctx);
struct e1000_hw *hw = &adapter->hw;
- if (adapter->intr_type == IFLIB_INTR_MSIX) {
- if (hw->mac.type >= igb_mac_min)
- E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
- E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
+ if (__predict_true(adapter->intr_type == IFLIB_INTR_MSIX)) {
+ E1000_WRITE_REG(hw, E1000_EIMC, 0xffffffff);
+ E1000_WRITE_REG(hw, E1000_EIAC, 0);
}
- E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
+ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+ E1000_WRITE_FLUSH(hw);
}
/*
@@ -4001,13 +4059,7 @@ em_add_hw_stats(struct adapter *adapter)
"Driver dropped packets");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
CTLFLAG_RD, &adapter->link_irq,
- "Link MSIX IRQ Handled");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
- CTLFLAG_RD, &adapter->mbuf_defrag_failed,
- "Defragmenting mbuf chain failed");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
- CTLFLAG_RD, &adapter->no_tx_dma_setup,
- "Driver tx dma failure in xmit");
+ "Link MSI-X IRQ Handled");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
CTLFLAG_RD, &adapter->rx_overruns,
"RX overruns");
@@ -4518,7 +4570,7 @@ em_print_debug_info(struct adapter *adapter)
/*
* 82574 only:
- * Write a new value to the EEPROM increasing the number of MSIX
+ * Write a new value to the EEPROM increasing the number of MSI-X
* vectors from 3 to 5, for proper multiqueue support.
*/
static void
@@ -4530,10 +4582,11 @@ em_enable_vectors_82574(if_ctx_t ctx)
u16 edata;
e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
- printf("Current cap: %#06x\n", edata);
+ if (bootverbose)
+ device_printf(dev, "EM_NVM_PCIE_CTRL = %#06x\n", edata);
if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
device_printf(dev, "Writing to eeprom: increasing "
- "reported MSIX vectors from 3 to 5...\n");
+ "reported MSI-X vectors from 3 to 5...\n");
edata &= ~(EM_NVM_MSIX_N_MASK);
edata |= 4 << EM_NVM_MSIX_N_SHIFT;
e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
diff --git a/freebsd/sys/dev/e1000/if_em.h b/freebsd/sys/dev/e1000/if_em.h
index d573107b..392f6b3a 100644
--- a/freebsd/sys/dev/e1000/if_em.h
+++ b/freebsd/sys/dev/e1000/if_em.h
@@ -352,8 +352,8 @@
/*
* 82574 has a nonstandard address for EIAC
- * and since its only used in MSIX, and in
- * the em driver only 82574 uses MSIX we can
+ * and since its only used in MSI-X, and in
+ * the em driver only 82574 uses MSI-X we can
* solve it just using this define.
*/
#define EM_EIAC 0x000DC
@@ -468,7 +468,6 @@ struct adapter {
struct resource *memory;
struct resource *flash;
struct resource *ioport;
- int io_rid;
struct resource *res;
void *tag;
@@ -520,7 +519,6 @@ struct adapter {
u64 que_mask;
-
struct em_int_delay_info tx_int_delay;
struct em_int_delay_info tx_abs_int_delay;
struct em_int_delay_info rx_int_delay;
@@ -530,9 +528,6 @@ struct adapter {
/* Misc stats maintained by the driver */
unsigned long dropped_pkts;
unsigned long link_irq;
- unsigned long mbuf_defrag_failed;
- unsigned long no_tx_dma_setup;
- unsigned long no_tx_map_avail;
unsigned long rx_overruns;
unsigned long watchdog_events;
diff --git a/freebsd/sys/dev/e1000/igb_txrx.c b/freebsd/sys/dev/e1000/igb_txrx.c
index c54315f0..6da52b7e 100644
--- a/freebsd/sys/dev/e1000/igb_txrx.c
+++ b/freebsd/sys/dev/e1000/igb_txrx.c
@@ -334,16 +334,11 @@ igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
prev = txr->tx_cidx_processed;
ntxd = scctx->isc_ntxd[0];
do {
+ MPASS(prev != cur);
delta = (int32_t)cur - (int32_t)prev;
- /*
- * XXX This appears to be a hack for first-packet.
- * A correct fix would prevent prev == cur in the first place.
- */
- MPASS(prev == 0 || delta != 0);
- if (prev == 0 && cur == 0)
- delta += 1;
if (delta < 0)
delta += ntxd;
+ MPASS(delta > 0);
processed += delta;
prev = cur;
diff --git a/freebsd/sys/dev/evdev/evdev.c b/freebsd/sys/dev/evdev/evdev.c
index 63e651a2..90d6423d 100644
--- a/freebsd/sys/dev/evdev/evdev.c
+++ b/freebsd/sys/dev/evdev/evdev.c
@@ -75,14 +75,16 @@ int evdev_rcpt_mask = EVDEV_RCPT_HW_MOUSE | EVDEV_RCPT_HW_KBD;
#endif /* __rtems__ */
int evdev_sysmouse_t_axis = 0;
-#ifdef EVDEV_SUPPORT
SYSCTL_NODE(_kern, OID_AUTO, evdev, CTLFLAG_RW, 0, "Evdev args");
+#ifdef EVDEV_SUPPORT
SYSCTL_INT(_kern_evdev, OID_AUTO, rcpt_mask, CTLFLAG_RW, &evdev_rcpt_mask, 0,
"Who is receiving events: bit0 - sysmouse, bit1 - kbdmux, "
"bit2 - mouse hardware, bit3 - keyboard hardware");
SYSCTL_INT(_kern_evdev, OID_AUTO, sysmouse_t_axis, CTLFLAG_RW,
&evdev_sysmouse_t_axis, 0, "Extract T-axis from 0-none, 1-ums, 2-psm");
#endif
+SYSCTL_NODE(_kern_evdev, OID_AUTO, input, CTLFLAG_RD, 0,
+ "Evdev input devices");
static void evdev_start_repeat(struct evdev_dev *, uint16_t);
static void evdev_stop_repeat(struct evdev_dev *);
@@ -202,6 +204,87 @@ evdev_estimate_report_size(struct evdev_dev *evdev)
return (size);
}
+static void
+evdev_sysctl_create(struct evdev_dev *evdev)
+{
+ struct sysctl_oid *ev_sysctl_tree;
+ char ev_unit_str[8];
+
+ snprintf(ev_unit_str, sizeof(ev_unit_str), "%d", evdev->ev_unit);
+ sysctl_ctx_init(&evdev->ev_sysctl_ctx);
+
+ ev_sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&evdev->ev_sysctl_ctx,
+ SYSCTL_STATIC_CHILDREN(_kern_evdev_input), OID_AUTO,
+ ev_unit_str, CTLFLAG_RD, NULL, "", "device index");
+
+ SYSCTL_ADD_STRING(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "name", CTLFLAG_RD,
+ evdev->ev_name, 0,
+ "Input device name");
+
+ SYSCTL_ADD_STRUCT(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "id", CTLFLAG_RD,
+ &evdev->ev_id, input_id,
+ "Input device identification");
+
+ /* ioctl returns ENOENT if phys is not set. sysctl returns "" here */
+ SYSCTL_ADD_STRING(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "phys", CTLFLAG_RD,
+ evdev->ev_shortname, 0,
+ "Input device short name");
+
+ /* ioctl returns ENOENT if uniq is not set. sysctl returns "" here */
+ SYSCTL_ADD_STRING(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "uniq", CTLFLAG_RD,
+ evdev->ev_serial, 0,
+ "Input device unique number");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "props", CTLFLAG_RD,
+ evdev->ev_prop_flags, sizeof(evdev->ev_prop_flags), "",
+ "Input device properties");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "type_bits", CTLFLAG_RD,
+ evdev->ev_type_flags, sizeof(evdev->ev_type_flags), "",
+ "Input device supported events types");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "key_bits", CTLFLAG_RD,
+ evdev->ev_key_flags, sizeof(evdev->ev_key_flags),
+ "", "Input device supported keys");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "rel_bits", CTLFLAG_RD,
+ evdev->ev_rel_flags, sizeof(evdev->ev_rel_flags), "",
+ "Input device supported relative events");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "abs_bits", CTLFLAG_RD,
+ evdev->ev_abs_flags, sizeof(evdev->ev_abs_flags), "",
+ "Input device supported absolute events");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "msc_bits", CTLFLAG_RD,
+ evdev->ev_msc_flags, sizeof(evdev->ev_msc_flags), "",
+ "Input device supported miscellaneous events");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "led_bits", CTLFLAG_RD,
+ evdev->ev_led_flags, sizeof(evdev->ev_led_flags), "",
+ "Input device supported LED events");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "snd_bits", CTLFLAG_RD,
+ evdev->ev_snd_flags, sizeof(evdev->ev_snd_flags), "",
+ "Input device supported sound events");
+
+ SYSCTL_ADD_OPAQUE(&evdev->ev_sysctl_ctx,
+ SYSCTL_CHILDREN(ev_sysctl_tree), OID_AUTO, "sw_bits", CTLFLAG_RD,
+ evdev->ev_sw_flags, sizeof(evdev->ev_sw_flags), "",
+ "Input device supported switch events");
+}
+
static int
evdev_register_common(struct evdev_dev *evdev)
{
@@ -241,6 +324,12 @@ evdev_register_common(struct evdev_dev *evdev)
/* Create char device node */
ret = evdev_cdev_create(evdev);
+ if (ret != 0)
+ goto bail_out;
+
+ /* Create sysctls (for device enumeration without /dev/input access rights) */
+ evdev_sysctl_create(evdev);
+
bail_out:
return (ret);
}
@@ -278,6 +367,8 @@ evdev_unregister(struct evdev_dev *evdev)
debugf(evdev, "%s: unregistered evdev provider: %s\n",
evdev->ev_shortname, evdev->ev_name);
+ sysctl_ctx_free(&evdev->ev_sysctl_ctx);
+
EVDEV_LOCK(evdev);
evdev->ev_cdev->si_drv1 = NULL;
/* Wake up sleepers */
diff --git a/freebsd/sys/dev/evdev/evdev_private.h b/freebsd/sys/dev/evdev/evdev_private.h
index 71bdecaa..d7f0b4ea 100644
--- a/freebsd/sys/dev/evdev/evdev_private.h
+++ b/freebsd/sys/dev/evdev/evdev_private.h
@@ -32,9 +32,12 @@
#include <sys/bitstring.h>
#include <sys/kbio.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/selinfo.h>
+#include <sys/sysctl.h>
#include <dev/evdev/evdev.h>
#include <dev/evdev/input.h>
@@ -132,6 +135,9 @@ struct evdev_dev
const struct evdev_methods * ev_methods;
void * ev_softc;
+ /* Sysctl: */
+ struct sysctl_ctx_list ev_sysctl_ctx;
+
LIST_ENTRY(evdev_dev) ev_link;
LIST_HEAD(, evdev_client) ev_clients;
};
diff --git a/freebsd/sys/dev/extres/clk/clk.h b/freebsd/sys/dev/extres/clk/clk.h
index 617cd5e7..7e9fe4eb 100644
--- a/freebsd/sys/dev/extres/clk/clk.h
+++ b/freebsd/sys/dev/extres/clk/clk.h
@@ -48,6 +48,7 @@
#define CLK_SET_ROUND_EXACT 0
#define CLK_SET_ROUND_UP 0x00000001
#define CLK_SET_ROUND_DOWN 0x00000002
+#define CLK_SET_ROUND_MULTIPLE 0x00000004
#define CLK_SET_ROUND_ANY (CLK_SET_ROUND_UP | CLK_SET_ROUND_DOWN)
#define CLK_SET_USER_MASK 0x0000FFFF
diff --git a/freebsd/sys/dev/fb/fbd.c b/freebsd/sys/dev/fb/fbd.c
index 871e193c..56f3605d 100644
--- a/freebsd/sys/dev/fb/fbd.c
+++ b/freebsd/sys/dev/fb/fbd.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/conf.h>
+#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
diff --git a/freebsd/sys/dev/fdt/fdt_common.c b/freebsd/sys/dev/fdt/fdt_common.c
index ff32dc0a..12979fda 100644
--- a/freebsd/sys/dev/fdt/fdt_common.c
+++ b/freebsd/sys/dev/fdt/fdt_common.c
@@ -401,6 +401,9 @@ fdt_get_phyaddr(phandle_t node, device_t dev, int *phy_addr, void **phy_sc)
*phy_addr = phy_reg;
+ if (phy_sc == NULL)
+ return (0);
+
/*
* Search for softc used to communicate with phy.
*/
diff --git a/freebsd/sys/dev/gpio/gpiobus.c b/freebsd/sys/dev/gpio/gpiobus.c
index 2b1899e6..d256ee4a 100644
--- a/freebsd/sys/dev/gpio/gpiobus.c
+++ b/freebsd/sys/dev/gpio/gpiobus.c
@@ -257,13 +257,6 @@ gpiobus_alloc_ivars(struct gpiobus_ivar *devi)
M_NOWAIT | M_ZERO);
if (devi->pins == NULL)
return (ENOMEM);
- devi->flags = malloc(sizeof(uint32_t) * devi->npins, M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (devi->flags == NULL) {
- free(devi->pins, M_DEVBUF);
- return (ENOMEM);
- }
-
return (0);
}
@@ -271,14 +264,11 @@ void
gpiobus_free_ivars(struct gpiobus_ivar *devi)
{
- if (devi->flags) {
- free(devi->flags, M_DEVBUF);
- devi->flags = NULL;
- }
if (devi->pins) {
free(devi->pins, M_DEVBUF);
devi->pins = NULL;
}
+ devi->npins = 0;
}
int
@@ -328,6 +318,34 @@ gpiobus_release_pin(device_t bus, uint32_t pin)
}
static int
+gpiobus_acquire_child_pins(device_t dev, device_t child)
+{
+ struct gpiobus_ivar *devi = GPIOBUS_IVAR(child);
+ int i;
+
+ for (i = 0; i < devi->npins; i++) {
+ /* Reserve the GPIO pin. */
+ if (gpiobus_acquire_pin(dev, devi->pins[i]) != 0) {
+ device_printf(child, "cannot acquire pin %d\n",
+ devi->pins[i]);
+ while (--i >= 0) {
+ (void)gpiobus_release_pin(dev,
+ devi->pins[i]);
+ }
+ gpiobus_free_ivars(devi);
+ return (EBUSY);
+ }
+ }
+ for (i = 0; i < devi->npins; i++) {
+ /* Use the child name as pin name. */
+ GPIOBUS_PIN_SETNAME(dev, devi->pins[i],
+ device_get_nameunit(child));
+
+ }
+ return (0);
+}
+
+static int
gpiobus_parse_pins(struct gpiobus_softc *sc, device_t child, int mask)
{
struct gpiobus_ivar *devi = GPIOBUS_IVAR(child);
@@ -351,17 +369,66 @@ gpiobus_parse_pins(struct gpiobus_softc *sc, device_t child, int mask)
for (i = 0; i < 32; i++) {
if ((mask & (1 << i)) == 0)
continue;
- /* Reserve the GPIO pin. */
- if (gpiobus_acquire_pin(sc->sc_busdev, i) != 0) {
- gpiobus_free_ivars(devi);
- return (EINVAL);
- }
devi->pins[npins++] = i;
- /* Use the child name as pin name. */
- GPIOBUS_PIN_SETNAME(sc->sc_busdev, i,
- device_get_nameunit(child));
}
+ if (gpiobus_acquire_child_pins(sc->sc_busdev, child) != 0)
+ return (EINVAL);
+ return (0);
+}
+
+static int
+gpiobus_parse_pin_list(struct gpiobus_softc *sc, device_t child,
+ const char *pins)
+{
+ struct gpiobus_ivar *devi = GPIOBUS_IVAR(child);
+ const char *p;
+ char *endp;
+ unsigned long pin;
+ int i, npins;
+
+ npins = 0;
+ p = pins;
+ for (;;) {
+ pin = strtoul(p, &endp, 0);
+ if (endp == p)
+ break;
+ npins++;
+ if (*endp == '\0')
+ break;
+ p = endp + 1;
+ }
+
+ if (*endp != '\0') {
+ device_printf(child, "garbage in the pin list: %s\n", endp);
+ return (EINVAL);
+ }
+ if (npins == 0) {
+ device_printf(child, "empty pin list\n");
+ return (EINVAL);
+ }
+
+ devi->npins = npins;
+ if (gpiobus_alloc_ivars(devi) != 0) {
+ device_printf(child, "cannot allocate device ivars\n");
+ return (EINVAL);
+ }
+
+ i = 0;
+ p = pins;
+ for (;;) {
+ pin = strtoul(p, &endp, 0);
+
+ devi->pins[i] = pin;
+
+ if (*endp == '\0')
+ break;
+ i++;
+ p = endp + 1;
+ }
+
+ if (gpiobus_acquire_child_pins(sc->sc_busdev, child) != 0)
+ return (EINVAL);
return (0);
}
@@ -541,15 +608,26 @@ gpiobus_hinted_child(device_t bus, const char *dname, int dunit)
struct gpiobus_softc *sc = GPIOBUS_SOFTC(bus);
struct gpiobus_ivar *devi;
device_t child;
- int irq, pins;
+ const char *pins;
+ int irq, pinmask;
child = BUS_ADD_CHILD(bus, 0, dname, dunit);
devi = GPIOBUS_IVAR(child);
- resource_int_value(dname, dunit, "pins", &pins);
- if (gpiobus_parse_pins(sc, child, pins)) {
- resource_list_free(&devi->rl);
- free(devi, M_DEVBUF);
- device_delete_child(bus, child);
+ if (resource_int_value(dname, dunit, "pins", &pinmask) == 0) {
+ if (gpiobus_parse_pins(sc, child, pinmask)) {
+ resource_list_free(&devi->rl);
+ free(devi, M_DEVBUF);
+ device_delete_child(bus, child);
+ return;
+ }
+ }
+ else if (resource_string_value(dname, dunit, "pin_list", &pins) == 0) {
+ if (gpiobus_parse_pin_list(sc, child, pins)) {
+ resource_list_free(&devi->rl);
+ free(devi, M_DEVBUF);
+ device_delete_child(bus, child);
+ return;
+ }
}
if (resource_int_value(dname, dunit, "irq", &irq) == 0) {
if (bus_set_resource(child, SYS_RES_IRQ, 0, irq, 1) != 0)
@@ -576,6 +654,61 @@ gpiobus_set_resource(device_t dev, device_t child, int type, int rid,
return (0);
}
+static int
+gpiobus_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
+{
+ struct gpiobus_ivar *devi;
+
+ devi = GPIOBUS_IVAR(child);
+ switch (which) {
+ case GPIOBUS_IVAR_NPINS:
+ *result = devi->npins;
+ break;
+ case GPIOBUS_IVAR_PINS:
+ /* Children do not ever need to directly examine this. */
+ return (ENOTSUP);
+ default:
+ return (ENOENT);
+ }
+
+ return (0);
+}
+
+static int
+gpiobus_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
+{
+ struct gpiobus_ivar *devi;
+ const uint32_t *ptr;
+ int i;
+
+ devi = GPIOBUS_IVAR(child);
+ switch (which) {
+ case GPIOBUS_IVAR_NPINS:
+ /* GPIO ivars are set once. */
+ if (devi->npins != 0) {
+ return (EBUSY);
+ }
+ devi->npins = value;
+ if (gpiobus_alloc_ivars(devi) != 0) {
+ device_printf(child, "cannot allocate device ivars\n");
+ devi->npins = 0;
+ return (ENOMEM);
+ }
+ break;
+ case GPIOBUS_IVAR_PINS:
+ ptr = (const uint32_t *)value;
+ for (i = 0; i < devi->npins; i++)
+ devi->pins[i] = ptr[i];
+ if (gpiobus_acquire_child_pins(dev, child) != 0)
+ return (EBUSY);
+ break;
+ default:
+ return (ENOENT);
+ }
+
+ return (0);
+}
+
static struct resource *
gpiobus_alloc_resource(device_t bus, device_t child, int type, int *rid,
rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
@@ -835,6 +968,8 @@ static device_method_t gpiobus_methods[] = {
DEVMETHOD(bus_child_pnpinfo_str, gpiobus_child_pnpinfo_str),
DEVMETHOD(bus_child_location_str, gpiobus_child_location_str),
DEVMETHOD(bus_hinted_child, gpiobus_hinted_child),
+ DEVMETHOD(bus_read_ivar, gpiobus_read_ivar),
+ DEVMETHOD(bus_write_ivar, gpiobus_write_ivar),
/* GPIO protocol */
DEVMETHOD(gpiobus_acquire_bus, gpiobus_acquire_bus),
diff --git a/freebsd/sys/dev/gpio/gpiobusvar.h b/freebsd/sys/dev/gpio/gpiobusvar.h
index 29677298..3ba8993e 100644
--- a/freebsd/sys/dev/gpio/gpiobusvar.h
+++ b/freebsd/sys/dev/gpio/gpiobusvar.h
@@ -107,10 +107,22 @@ struct gpiobus_ivar
{
struct resource_list rl; /* isr resource list */
uint32_t npins; /* pins total */
- uint32_t *flags; /* pins flags */
uint32_t *pins; /* pins map */
};
+enum gpiobus_ivars {
+ GPIOBUS_IVAR_NPINS = 10500,
+ GPIOBUS_IVAR_PINS,
+};
+
+#define GPIOBUS_ACCESSOR(var, ivar, type) \
+ __BUS_ACCESSOR(gpiobus, var, GPIOBUS, ivar, type)
+
+GPIOBUS_ACCESSOR(npins, NPINS, uint32_t)
+GPIOBUS_ACCESSOR(pins, PINS, const uint32_t *)
+
+#undef GPIOBUS_ACCESSOR
+
#ifdef FDT
struct ofw_gpiobus_devinfo {
struct gpiobus_ivar opd_dinfo;
diff --git a/freebsd/sys/dev/gpio/ofw_gpiobus.c b/freebsd/sys/dev/gpio/ofw_gpiobus.c
index ac0ea9bf..1cf3aa82 100644
--- a/freebsd/sys/dev/gpio/ofw_gpiobus.c
+++ b/freebsd/sys/dev/gpio/ofw_gpiobus.c
@@ -327,10 +327,8 @@ ofw_gpiobus_setup_devinfo(device_t bus, device_t child, phandle_t node)
ofw_gpiobus_destroy_devinfo(bus, dinfo);
return (NULL);
}
- for (i = 0; i < devi->npins; i++) {
- devi->flags[i] = pins[i].flags;
+ for (i = 0; i < devi->npins; i++)
devi->pins[i] = pins[i].pin;
- }
free(pins, M_DEVBUF);
/* Parse the interrupt resources. */
if (ofw_bus_intr_to_rl(bus, node, &dinfo->opd_dinfo.rl, NULL) != 0) {
diff --git a/freebsd/sys/dev/iicbus/iicbus.c b/freebsd/sys/dev/iicbus/iicbus.c
index d1e55483..db8c8d92 100644
--- a/freebsd/sys/dev/iicbus/iicbus.c
+++ b/freebsd/sys/dev/iicbus/iicbus.c
@@ -196,9 +196,6 @@ iicbus_read_ivar(device_t bus, device_t child, int which, uintptr_t *result)
case IICBUS_IVAR_ADDR:
*result = devi->addr;
break;
- case IICBUS_IVAR_NOSTOP:
- *result = devi->nostop;
- break;
}
return (0);
}
@@ -215,9 +212,6 @@ iicbus_write_ivar(device_t bus, device_t child, int which, uintptr_t value)
if (devi->addr != 0)
return (EINVAL);
devi->addr = value;
- case IICBUS_IVAR_NOSTOP:
- devi->nostop = value;
- break;
}
return (0);
}
diff --git a/freebsd/sys/dev/iicbus/iicbus.h b/freebsd/sys/dev/iicbus/iicbus.h
index 503305c7..c6382b63 100644
--- a/freebsd/sys/dev/iicbus/iicbus.h
+++ b/freebsd/sys/dev/iicbus/iicbus.h
@@ -41,6 +41,7 @@ struct iicbus_softc
{
device_t dev; /* Myself */
device_t owner; /* iicbus owner device structure */
+ device_t busydev; /* iicbus_release_bus calls unbusy on this */
u_int owncount; /* iicbus ownership nesting count */
u_char started; /* address of the 'started' slave
* 0 if no start condition succeeded */
@@ -54,24 +55,27 @@ struct iicbus_ivar
{
uint32_t addr;
struct resource_list rl;
- bool nostop;
};
enum {
- IICBUS_IVAR_ADDR, /* Address or base address */
- IICBUS_IVAR_NOSTOP, /* nostop defaults */
+ IICBUS_IVAR_ADDR /* Address or base address */
};
#define IICBUS_ACCESSOR(A, B, T) \
__BUS_ACCESSOR(iicbus, A, IICBUS, B, T)
IICBUS_ACCESSOR(addr, ADDR, uint32_t)
-IICBUS_ACCESSOR(nostop, NOSTOP, bool)
#define IICBUS_LOCK(sc) mtx_lock(&(sc)->lock)
#define IICBUS_UNLOCK(sc) mtx_unlock(&(sc)->lock)
#define IICBUS_ASSERT_LOCKED(sc) mtx_assert(&(sc)->lock, MA_OWNED)
+#ifdef FDT
+#define IICBUS_FDT_PNP_INFO(t) FDTCOMPAT_PNP_INFO(t, iicbus)
+#else
+#define IICBUS_FDT_PNP_INFO(t)
+#endif
+
int iicbus_generic_intr(device_t dev, int event, char *buf);
void iicbus_init_frequency(device_t dev, u_int bus_freq);
diff --git a/freebsd/sys/dev/iicbus/iiconf.c b/freebsd/sys/dev/iicbus/iiconf.c
index afdce118..c4926852 100644
--- a/freebsd/sys/dev/iicbus/iiconf.c
+++ b/freebsd/sys/dev/iicbus/iiconf.c
@@ -44,6 +44,18 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/iicbus_if.h>
/*
+ * Encode a system errno value into the IIC_Exxxxx space by setting the
+ * IIC_ERRNO marker bit, so that iic2errno() can turn it back into a plain
+ * system errno value later. This lets controller- and bus-layer code get
+ * important system errno values (such as EINTR/ERESTART) back to the caller.
+ */
+int
+errno2iic(int error)
+{
+ return ((error == 0) ? 0 : error | IIC_ERRNO);
+}
+
+/*
* Translate IIC_Exxxxx status values to vaguely-equivelent errno values.
*/
int
@@ -61,7 +73,22 @@ iic2errno(int iic_status)
case IIC_ENOTSUPP: return (EOPNOTSUPP);
case IIC_ENOADDR: return (EADDRNOTAVAIL);
case IIC_ERESOURCE: return (ENOMEM);
- default: return (EIO);
+ default:
+ /*
+ * If the high bit is set, that means it's a system errno value
+ * that was encoded into the IIC_Exxxxxx space by setting the
+ * IIC_ERRNO marker bit. If lots of high-order bits are set,
+ * then it's one of the negative pseudo-errors such as ERESTART
+ * and we return it as-is. Otherwise it's a plain "small
+ * positive integer" errno, so just remove the IIC_ERRNO marker
+ * bit. If it's some unknown number without the high bit set,
+ * there isn't much we can do except call it an I/O error.
+ */
+ if ((iic_status & IIC_ERRNO) == 0)
+ return (EIO);
+ if ((iic_status & 0xFFFF0000) != 0)
+ return (iic_status);
+ return (iic_status & ~IIC_ERRNO);
}
}
@@ -99,7 +126,7 @@ iicbus_poll(struct iicbus_softc *sc, int how)
return (IIC_EBUSBSY);
}
- return (error);
+ return (errno2iic(error));
}
/*
@@ -130,6 +157,18 @@ iicbus_request_bus(device_t bus, device_t dev, int how)
++sc->owncount;
if (sc->owner == NULL) {
sc->owner = dev;
+ /*
+ * Mark the device busy while it owns the bus, to
+ * prevent detaching the device, bus, or hardware
+ * controller, until ownership is relinquished. If the
+ * device is doing IO from its probe method before
+ * attaching, it cannot be busied; mark the bus busy.
+ */
+ if (device_get_state(dev) < DS_ATTACHING)
+ sc->busydev = bus;
+ else
+ sc->busydev = dev;
+ device_busy(sc->busydev);
/*
* Drop the lock around the call to the bus driver, it
* should be allowed to sleep in the IIC_WAIT case.
@@ -146,6 +185,7 @@ iicbus_request_bus(device_t bus, device_t dev, int how)
sc->owner = NULL;
sc->owncount = 0;
wakeup_one(sc);
+ device_unbusy(sc->busydev);
}
}
}
@@ -179,6 +219,7 @@ iicbus_release_bus(device_t bus, device_t dev)
IICBUS_LOCK(sc);
sc->owner = NULL;
wakeup_one(sc);
+ device_unbusy(sc->busydev);
}
IICBUS_UNLOCK(sc);
return (0);
@@ -422,7 +463,7 @@ iicbus_transfer_gen(device_t dev, struct iic_msg *msgs, uint32_t nmsgs)
{
int i, error, lenread, lenwrote, nkid, rpstart, addr;
device_t *children, bus;
- bool nostop, started;
+ bool started;
if ((error = device_get_children(dev, &children, &nkid)) != 0)
return (IIC_ERESOURCE);
@@ -433,7 +474,6 @@ iicbus_transfer_gen(device_t dev, struct iic_msg *msgs, uint32_t nmsgs)
bus = children[0];
rpstart = 0;
free(children, M_TEMP);
- nostop = iicbus_get_nostop(dev);
started = false;
for (i = 0, error = 0; i < nmsgs && error == 0; i++) {
addr = msgs[i].slave;
@@ -461,12 +501,11 @@ iicbus_transfer_gen(device_t dev, struct iic_msg *msgs, uint32_t nmsgs)
if (error != 0)
break;
- if ((msgs[i].flags & IIC_M_NOSTOP) != 0 ||
- (nostop && i + 1 < nmsgs)) {
- rpstart = 1; /* Next message gets repeated start */
- } else {
+ if (!(msgs[i].flags & IIC_M_NOSTOP)) {
rpstart = 0;
iicbus_stop(bus);
+ } else {
+ rpstart = 1; /* Next message gets repeated start */
}
}
if (error != 0 && started)
diff --git a/freebsd/sys/dev/iicbus/iiconf.h b/freebsd/sys/dev/iicbus/iiconf.h
index c264183e..5fbfcea5 100644
--- a/freebsd/sys/dev/iicbus/iiconf.h
+++ b/freebsd/sys/dev/iicbus/iiconf.h
@@ -96,12 +96,14 @@
#define IIC_ENOTSUPP 0x8 /* request not supported */
#define IIC_ENOADDR 0x9 /* no address assigned to the interface */
#define IIC_ERESOURCE 0xa /* resources (memory, whatever) unavailable */
+#define IIC_ERRNO INT_MIN /* marker bit: errno is in low-order bits */
/*
* Note that all iicbus functions return IIC_Exxxxx status values,
* except iic2errno() (obviously) and iicbus_started() (returns bool).
*/
extern int iic2errno(int);
+extern int errno2iic(int);
extern int iicbus_request_bus(device_t, device_t, int);
extern int iicbus_release_bus(device_t, device_t);
extern device_t iicbus_alloc_bus(device_t);
diff --git a/freebsd/sys/dev/led/led.c b/freebsd/sys/dev/led/led.c
index 70de95bb..43ae2f66 100644
--- a/freebsd/sys/dev/led/led.c
+++ b/freebsd/sys/dev/led/led.c
@@ -17,16 +17,19 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/conf.h>
+#include <sys/ctype.h>
#include <sys/kernel.h>
-#include <sys/systm.h>
#include <sys/limits.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/ctype.h>
-#include <sys/sbuf.h>
+#include <sys/mutex.h>
#include <sys/queue.h>
-#include <dev/led/led.h>
-#include <sys/uio.h>
+#include <sys/sbuf.h>
#include <sys/sx.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+
+#include <dev/led/led.h>
struct ledsc {
LIST_ENTRY(ledsc) list;
@@ -263,7 +266,7 @@ led_set(char const *name, char const *cmd)
mtx_unlock(&led_mtx);
if (sb != NULL)
sbuf_delete(sb);
- return (0);
+ return (error);
}
static struct cdevsw led_cdevsw = {
diff --git a/freebsd/sys/dev/mii/micphy.c b/freebsd/sys/dev/mii/micphy.c
index 01e75357..f9493332 100644
--- a/freebsd/sys/dev/mii/micphy.c
+++ b/freebsd/sys/dev/mii/micphy.c
@@ -1,7 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/*-
- * Copyright (c) 2014 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2014,2019 Ruslan Bukin <br@bsdpad.com>
* All rights reserved.
*
* This software was developed by SRI International and the University of
@@ -34,7 +34,7 @@
__FBSDID("$FreeBSD$");
/*
- * Micrel KSZ9021 Gigabit Ethernet Transceiver
+ * Micrel KSZ8081/KSZ9021/KSZ9031 Gigabit Ethernet Transceiver
*/
#include <sys/param.h>
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <dev/ofw/openfirm.h>
#include <dev/ofw/ofw_bus.h>
#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/mii/mii_fdt.h>
#define MII_KSZPHY_EXTREG 0x0b
#define KSZPHY_EXTREG_WRITE (1 << 15)
@@ -253,6 +254,7 @@ micphy_probe(device_t dev)
static int
micphy_attach(device_t dev)
{
+ mii_fdt_phy_config_t *cfg;
struct mii_softc *sc;
phandle_t node;
device_t miibus;
@@ -273,10 +275,12 @@ micphy_attach(device_t dev)
if ((node = ofw_bus_get_node(parent)) == -1)
return (ENXIO);
+ cfg = mii_fdt_get_config(dev);
+
if (sc->mii_mpd_model == MII_MODEL_MICREL_KSZ9031)
- ksz9031_load_values(sc, node);
+ ksz9031_load_values(sc, cfg->phynode);
else
- ksz9021_load_values(sc, node);
+ ksz9021_load_values(sc, cfg->phynode);
return (0);
}
diff --git a/freebsd/sys/dev/mmc/bridge.h b/freebsd/sys/dev/mmc/bridge.h
index 7af811f1..d32abbac 100644
--- a/freebsd/sys/dev/mmc/bridge.h
+++ b/freebsd/sys/dev/mmc/bridge.h
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/freebsd/sys/dev/mmc/mmc.c b/freebsd/sys/dev/mmc/mmc.c
index 4c8aefcf..5bc3bbf7 100644
--- a/freebsd/sys/dev/mmc/mmc.c
+++ b/freebsd/sys/dev/mmc/mmc.c
@@ -4,7 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 Bernd Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
* Copyright (c) 2017 Marius Strobl <marius@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/mmc/mmc_private.h b/freebsd/sys/dev/mmc/mmc_private.h
index 633d0784..a633d235 100644
--- a/freebsd/sys/dev/mmc/mmc_private.h
+++ b/freebsd/sys/dev/mmc/mmc_private.h
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2006 Bernd Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/freebsd/sys/dev/mmc/mmc_subr.c b/freebsd/sys/dev/mmc/mmc_subr.c
index 76a14028..1e3bef16 100644
--- a/freebsd/sys/dev/mmc/mmc_subr.c
+++ b/freebsd/sys/dev/mmc/mmc_subr.c
@@ -2,7 +2,7 @@
/*-
* Copyright (c) 2006 Bernd Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/freebsd/sys/dev/mmc/mmc_subr.h b/freebsd/sys/dev/mmc/mmc_subr.h
index 33ea6760..80c1ce2f 100644
--- a/freebsd/sys/dev/mmc/mmc_subr.h
+++ b/freebsd/sys/dev/mmc/mmc_subr.h
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2006 Bernd Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/freebsd/sys/dev/mmc/mmcbrvar.h b/freebsd/sys/dev/mmc/mmcbrvar.h
index acddd3a3..f2f107c7 100644
--- a/freebsd/sys/dev/mmc/mmcbrvar.h
+++ b/freebsd/sys/dev/mmc/mmcbrvar.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 Bernd Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/freebsd/sys/dev/mmc/mmcreg.h b/freebsd/sys/dev/mmc/mmcreg.h
index 4b1f8a0e..f6031410 100644
--- a/freebsd/sys/dev/mmc/mmcreg.h
+++ b/freebsd/sys/dev/mmc/mmcreg.h
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
* Copyright (c) 2017 Marius Strobl <marius@FreeBSD.org>
* Copyright (c) 2015-2016 Ilya Bakulin <kibab@FreeBSD.org>
*
@@ -200,7 +200,10 @@ struct mmc_data {
#define MMC_DATA_READ (1UL << 1)
#define MMC_DATA_STREAM (1UL << 2)
#define MMC_DATA_MULTI (1UL << 3)
+#define MMC_DATA_BLOCK_SIZE (1UL << 4)
struct mmc_request *mrq;
+ size_t block_size; /* block size for CMD53 */
+ size_t block_count; /* block count for CMD53 */
};
struct mmc_request {
@@ -554,30 +557,39 @@ struct mmc_request {
#define SD_IO_RW_LEN(x) (((x) & 0xFF) << 0)
#define SD_IOE_RW_LEN(x) (((x) & 0x1FF) << 0)
+#define SD_IOE_RW_ADR(x) (((x) & 0x1FFFF) << 9)
+#define SD_IOE_RW_INCR (1u << 26)
#define SD_IOE_RW_BLK (1u << 27)
+#define SD_IOE_RW_FUNC(x) (((x) & 0x7) << 28)
+#define SD_IOE_RW_WR (1u << 31)
/* Card Common Control Registers (CCCR) */
-#define SD_IO_CCCR_START 0x00000
-#define SD_IO_CCCR_SIZE 0x100
-#define SD_IO_CCCR_FN_ENABLE 0x02
-#define SD_IO_CCCR_FN_READY 0x03
-#define SD_IO_CCCR_INT_ENABLE 0x04
-#define SD_IO_CCCR_INT_PENDING 0x05
-#define SD_IO_CCCR_CTL 0x06
-#define CCCR_CTL_RES (1 << 3)
-#define SD_IO_CCCR_BUS_WIDTH 0x07
+#define SD_IO_CCCR_START 0x00000 /* Offset in F0 address space */
+#define SD_IO_CCCR_SIZE 0x100 /* Total size of CCCR */
+#define SD_IO_CCCR_FN_ENABLE 0x02 /* Enabled functions */
+#define SD_IO_CCCR_FN_READY 0x03 /* Function ready status */
+#define SD_IO_CCCR_INT_ENABLE 0x04 /* Per-function interrupt enable */
+#define SD_IO_CCCR_INT_PENDING 0x05 /* Per-function interrupt pending */
+#define SD_IO_CCCR_CTL 0x06 /* I/O Abort register */
+#define CCCR_CTL_RES (1 << 3) /* Perform SDIO reset */
+#define SD_IO_CCCR_BUS_WIDTH 0x07 /* Bus Width register */
#define CCCR_BUS_WIDTH_4 (1 << 1)
#define CCCR_BUS_WIDTH_1 (1 << 0)
-#define SD_IO_CCCR_CARDCAP 0x08
-#define SD_IO_CCCR_CISPTR 0x09 /* XXX 9-10, 10-11, or 9-12 */
-
+#define SD_IO_CCCR_CARDCAP 0x08 /* SDIO card capabilities */
+#define CCCR_CC_SMB (1 << 1) /* CMD53 block mode support */
+#define SD_IO_CCCR_CISPTR 0x09 /* 0x09 - 0x0B */
+#define SD_IO_CCCR_FN0_BLKSZ 0x10 /* 0x10 - 0x11 */
/* Function Basic Registers (FBR) */
-#define SD_IO_FBR_START 0x00100
-#define SD_IO_FBR_SIZE 0x00700
+#define SD_IO_FBR_START 0x00100 /* Offset in F0 address space */
+#define SD_IO_FBR_SIZE 0x00700 /* Total size of FBR */
+#define SD_IO_FBR_F_SIZE 0x00100 /* Size of each function */
+#define SD_IO_FBR_START_F(n) (SD_IO_FBR_START + (n-1) * SD_IO_FBR_F_SIZE)
+#define SD_IO_FBR_CIS_OFFSET 0x9 /* Offset of this function's info block within CIS area */
+#define SD_IO_FBR_IOBLKSZ 0x10 /* Block size for CMD53 block mode operations */
/* Card Information Structure (CIS) */
-#define SD_IO_CIS_START 0x01000
-#define SD_IO_CIS_SIZE 0x17000
+#define SD_IO_CIS_START 0x01000 /* Offset in F0 address space */
+#define SD_IO_CIS_SIZE 0x17000 /* Total size of CIS */
/* CIS tuple codes (based on PC Card 16) */
#define SD_IO_CISTPL_VERS_1 0x15
diff --git a/freebsd/sys/dev/mmc/mmcsd.c b/freebsd/sys/dev/mmc/mmcsd.c
index e469c1d5..8fc06eb2 100644
--- a/freebsd/sys/dev/mmc/mmcsd.c
+++ b/freebsd/sys/dev/mmc/mmcsd.c
@@ -4,7 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 Bernd Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
* Copyright (c) 2017 Marius Strobl <marius@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
@@ -813,7 +813,7 @@ mmcsd_add_part(struct mmcsd_softc *sc, u_int type, const char *name, u_int cnt,
speed / 1000000, (speed / 100000) % 10,
mmcsd_bus_bit_width(dev), sc->max_data);
} else if (type == EXT_CSD_PART_CONFIG_ACC_RPMB) {
- printf("%s: %ju%sB partion %d%s at %s\n", part->name, bytes,
+ printf("%s: %ju%sB partition %d%s at %s\n", part->name, bytes,
unit, type, ro ? " (read-only)" : "",
device_get_nameunit(dev));
} else {
@@ -849,12 +849,12 @@ mmcsd_add_part(struct mmcsd_softc *sc, u_int type, const char *name, u_int cnt,
}
}
if (ext == NULL)
- printf("%s%d: %ju%sB partion %d%s%s at %s\n",
+ printf("%s%d: %ju%sB partition %d%s%s at %s\n",
part->name, cnt, bytes, unit, type, enh ?
" enhanced" : "", ro ? " (read-only)" : "",
device_get_nameunit(dev));
else
- printf("%s%d: %ju%sB partion %d extended 0x%x "
+ printf("%s%d: %ju%sB partition %d extended 0x%x "
"(%s)%s at %s\n", part->name, cnt, bytes, unit,
type, extattr, ext, ro ? " (read-only)" : "",
device_get_nameunit(dev));
diff --git a/freebsd/sys/dev/mmc/mmcvar.h b/freebsd/sys/dev/mmc/mmcvar.h
index 1604c306..8d8c5547 100644
--- a/freebsd/sys/dev/mmc/mmcvar.h
+++ b/freebsd/sys/dev/mmc/mmcvar.h
@@ -2,7 +2,7 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 Bernd Walter. All rights reserved.
- * Copyright (c) 2006 M. Warner Losh. All rights reserved.
+ * Copyright (c) 2006 M. Warner Losh.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/freebsd/sys/dev/nvme/nvme.h b/freebsd/sys/dev/nvme/nvme.h
index 845ba75b..0d4a18b7 100644
--- a/freebsd/sys/dev/nvme/nvme.h
+++ b/freebsd/sys/dev/nvme/nvme.h
@@ -40,6 +40,7 @@
#define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command)
#define NVME_RESET_CONTROLLER _IO('n', 1)
+#define NVME_GET_NSID _IOR('n', 2, struct nvme_get_nsid)
#define NVME_IO_TEST _IOWR('n', 100, struct nvme_io_test)
#define NVME_BIO_TEST _IOWR('n', 101, struct nvme_io_test)
@@ -69,15 +70,39 @@
#define NVME_CAP_LO_REG_AMS_MASK (0x3)
#define NVME_CAP_LO_REG_TO_SHIFT (24)
#define NVME_CAP_LO_REG_TO_MASK (0xFF)
+#define NVME_CAP_LO_MQES(x) \
+ (((x) >> NVME_CAP_LO_REG_MQES_SHIFT) & NVME_CAP_LO_REG_MQES_MASK)
+#define NVME_CAP_LO_CQR(x) \
+ (((x) >> NVME_CAP_LO_REG_CQR_SHIFT) & NVME_CAP_LO_REG_CQR_MASK)
+#define NVME_CAP_LO_AMS(x) \
+ (((x) >> NVME_CAP_LO_REG_AMS_SHIFT) & NVME_CAP_LO_REG_AMS_MASK)
+#define NVME_CAP_LO_TO(x) \
+ (((x) >> NVME_CAP_LO_REG_TO_SHIFT) & NVME_CAP_LO_REG_TO_MASK)
#define NVME_CAP_HI_REG_DSTRD_SHIFT (0)
#define NVME_CAP_HI_REG_DSTRD_MASK (0xF)
+#define NVME_CAP_HI_REG_NSSRS_SHIFT (4)
+#define NVME_CAP_HI_REG_NSSRS_MASK (0x1)
#define NVME_CAP_HI_REG_CSS_NVM_SHIFT (5)
#define NVME_CAP_HI_REG_CSS_NVM_MASK (0x1)
+#define NVME_CAP_HI_REG_BPS_SHIFT (13)
+#define NVME_CAP_HI_REG_BPS_MASK (0x1)
#define NVME_CAP_HI_REG_MPSMIN_SHIFT (16)
#define NVME_CAP_HI_REG_MPSMIN_MASK (0xF)
#define NVME_CAP_HI_REG_MPSMAX_SHIFT (20)
#define NVME_CAP_HI_REG_MPSMAX_MASK (0xF)
+#define NVME_CAP_HI_REG_PMRS_SHIFT (24)
+#define NVME_CAP_HI_REG_PMRS_MASK (0x1)
+#define NVME_CAP_HI_REG_CMBS_SHIFT (25)
+#define NVME_CAP_HI_REG_CMBS_MASK (0x1)
+#define NVME_CAP_HI_DSTRD(x) \
+ (((x) >> NVME_CAP_HI_REG_DSTRD_SHIFT) & NVME_CAP_HI_REG_DSTRD_MASK)
+#define NVME_CAP_HI_CSS_NVM(x) \
+ (((x) >> NVME_CAP_HI_REG_CSS_NVM_SHIFT) & NVME_CAP_HI_REG_CSS_NVM_MASK)
+#define NVME_CAP_HI_MPSMIN(x) \
+ (((x) >> NVME_CAP_HI_REG_MPSMIN_SHIFT) & NVME_CAP_HI_REG_MPSMIN_MASK)
+#define NVME_CAP_HI_MPSMAX(x) \
+ (((x) >> NVME_CAP_HI_REG_MPSMAX_SHIFT) & NVME_CAP_HI_REG_MPSMAX_MASK)
#define NVME_CC_REG_EN_SHIFT (0)
#define NVME_CC_REG_EN_MASK (0x1)
@@ -100,6 +125,10 @@
#define NVME_CSTS_REG_CFS_MASK (0x1)
#define NVME_CSTS_REG_SHST_SHIFT (2)
#define NVME_CSTS_REG_SHST_MASK (0x3)
+#define NVME_CSTS_REG_NVSRO_SHIFT (4)
+#define NVME_CSTS_REG_NVSRO_MASK (0x1)
+#define NVME_CSTS_REG_PP_SHIFT (5)
+#define NVME_CSTS_REG_PP_MASK (0x1)
#define NVME_CSTS_GET_SHST(csts) (((csts) >> NVME_CSTS_REG_SHST_SHIFT) & NVME_CSTS_REG_SHST_MASK)
@@ -119,6 +148,8 @@
#define NVME_STATUS_SC_MASK (0xFF)
#define NVME_STATUS_SCT_SHIFT (9)
#define NVME_STATUS_SCT_MASK (0x7)
+#define NVME_STATUS_CRD_SHIFT (12)
+#define NVME_STATUS_CRD_MASK (0x3)
#define NVME_STATUS_M_SHIFT (14)
#define NVME_STATUS_M_MASK (0x1)
#define NVME_STATUS_DNR_SHIFT (15)
@@ -159,6 +190,9 @@
/* SR-IOV Virtual Function */
#define NVME_CTRLR_DATA_MIC_SRIOVVF_SHIFT (2)
#define NVME_CTRLR_DATA_MIC_SRIOVVF_MASK (0x1)
+/* Asymmetric Namespace Access Reporting */
+#define NVME_CTRLR_DATA_MIC_ANAR_SHIFT (3)
+#define NVME_CTRLR_DATA_MIC_ANAR_MASK (0x1)
/** OACS - optional admin command support */
/* supports security send/receive commands */
@@ -188,6 +222,9 @@
/* supports Doorbell Buffer Config */
#define NVME_CTRLR_DATA_OACS_DBBUFFER_SHIFT (8)
#define NVME_CTRLR_DATA_OACS_DBBUFFER_MASK (0x1)
+/* supports Get LBA Status */
+#define NVME_CTRLR_DATA_OACS_GETLBA_SHIFT (9)
+#define NVME_CTRLR_DATA_OACS_GETLBA_MASK (0x1)
/** firmware updates */
/* first slot is read-only */
@@ -196,6 +233,9 @@
/* number of firmware slots */
#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT (1)
#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK (0x7)
+/* firmware activation without reset */
+#define NVME_CTRLR_DATA_FRMW_ACT_WO_RESET_SHIFT (4)
+#define NVME_CTRLR_DATA_FRMW_ACT_WO_RESET_MASK (0x1)
/** log page attributes */
/* per namespace smart/health log page */
@@ -212,6 +252,26 @@
#define NVME_CTRLR_DATA_APSTA_APST_SUPP_SHIFT (0)
#define NVME_CTRLR_DATA_APSTA_APST_SUPP_MASK (0x1)
+/** Sanitize Capabilities */
+/* Crypto Erase Support */
+#define NVME_CTRLR_DATA_SANICAP_CES_SHIFT (0)
+#define NVME_CTRLR_DATA_SANICAP_CES_MASK (0x1)
+/* Block Erase Support */
+#define NVME_CTRLR_DATA_SANICAP_BES_SHIFT (1)
+#define NVME_CTRLR_DATA_SANICAP_BES_MASK (0x1)
+/* Overwrite Support */
+#define NVME_CTRLR_DATA_SANICAP_OWS_SHIFT (2)
+#define NVME_CTRLR_DATA_SANICAP_OWS_MASK (0x1)
+/* No-Deallocate Inhibited */
+#define NVME_CTRLR_DATA_SANICAP_NDI_SHIFT (29)
+#define NVME_CTRLR_DATA_SANICAP_NDI_MASK (0x1)
+/* No-Deallocate Modifies Media After Sanitize */
+#define NVME_CTRLR_DATA_SANICAP_NODMMAS_SHIFT (30)
+#define NVME_CTRLR_DATA_SANICAP_NODMMAS_MASK (0x3)
+#define NVME_CTRLR_DATA_SANICAP_NODMMAS_UNDEF (0)
+#define NVME_CTRLR_DATA_SANICAP_NODMMAS_NO (1)
+#define NVME_CTRLR_DATA_SANICAP_NODMMAS_YES (2)
+
/** submission queue entry size */
#define NVME_CTRLR_DATA_SQES_MIN_SHIFT (0)
#define NVME_CTRLR_DATA_SQES_MIN_MASK (0xF)
@@ -239,6 +299,8 @@
#define NVME_CTRLR_DATA_ONCS_RESERV_MASK (0x1)
#define NVME_CTRLR_DATA_ONCS_TIMESTAMP_SHIFT (6)
#define NVME_CTRLR_DATA_ONCS_TIMESTAMP_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_VERIFY_SHIFT (7)
+#define NVME_CTRLR_DATA_ONCS_VERIFY_MASK (0x1)
/** Fused Operation Support */
#define NVME_CTRLR_DATA_FUSES_CNW_SHIFT (0)
@@ -253,8 +315,15 @@
#define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_MASK (0x1)
/** volatile write cache */
+/* volatile write cache present */
#define NVME_CTRLR_DATA_VWC_PRESENT_SHIFT (0)
#define NVME_CTRLR_DATA_VWC_PRESENT_MASK (0x1)
+/* flush all namespaces supported */
+#define NVME_CTRLR_DATA_VWC_ALL_SHIFT (1)
+#define NVME_CTRLR_DATA_VWC_ALL_MASK (0x3)
+#define NVME_CTRLR_DATA_VWC_ALL_UNKNOWN (0)
+#define NVME_CTRLR_DATA_VWC_ALL_NO (2)
+#define NVME_CTRLR_DATA_VWC_ALL_YES (3)
/** namespace features */
/* thin provisioning */
@@ -269,6 +338,9 @@
/* NGUID and EUI64 fields are not reusable */
#define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_SHIFT (3)
#define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_MASK (0x1)
+/* NPWG, NPWA, NPDG, NPDA, and NOWS are valid */
+#define NVME_NS_DATA_NSFEAT_NPVALID_SHIFT (4)
+#define NVME_NS_DATA_NSFEAT_NPVALID_MASK (0x1)
/** formatted lba size */
#define NVME_NS_DATA_FLBAS_FORMAT_SHIFT (0)
@@ -349,6 +421,20 @@
#define NVME_NS_DATA_FPI_SUPP_SHIFT (7)
#define NVME_NS_DATA_FPI_SUPP_MASK (0x1)
+/** Deallocate Logical Block Features */
+/* deallocated logical block read behavior */
+#define NVME_NS_DATA_DLFEAT_READ_SHIFT (0)
+#define NVME_NS_DATA_DLFEAT_READ_MASK (0x07)
+#define NVME_NS_DATA_DLFEAT_READ_NR (0x00)
+#define NVME_NS_DATA_DLFEAT_READ_00 (0x01)
+#define NVME_NS_DATA_DLFEAT_READ_FF (0x02)
+/* supports the Deallocate bit in the Write Zeroes */
+#define NVME_NS_DATA_DLFEAT_DWZ_SHIFT (3)
+#define NVME_NS_DATA_DLFEAT_DWZ_MASK (0x01)
+/* Guard field for deallocated logical blocks is set to the CRC */
+#define NVME_NS_DATA_DLFEAT_GCRC_SHIFT (4)
+#define NVME_NS_DATA_DLFEAT_GCRC_MASK (0x01)
+
/** lba format support */
/* metadata size */
#define NVME_NS_DATA_LBAF_MS_SHIFT (0)
@@ -373,6 +459,35 @@ enum nvme_critical_warning_state {
#define NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT (0)
#define NVME_FIRMWARE_PAGE_AFI_SLOT_MASK (0x7)
+/* Commands Supported and Effects */
+#define NVME_CE_PAGE_CSUP_SHIFT (0)
+#define NVME_CE_PAGE_CSUP_MASK (0x1)
+#define NVME_CE_PAGE_LBCC_SHIFT (1)
+#define NVME_CE_PAGE_LBCC_MASK (0x1)
+#define NVME_CE_PAGE_NCC_SHIFT (2)
+#define NVME_CE_PAGE_NCC_MASK (0x1)
+#define NVME_CE_PAGE_NIC_SHIFT (3)
+#define NVME_CE_PAGE_NIC_MASK (0x1)
+#define NVME_CE_PAGE_CCC_SHIFT (4)
+#define NVME_CE_PAGE_CCC_MASK (0x1)
+#define NVME_CE_PAGE_CSE_SHIFT (16)
+#define NVME_CE_PAGE_CSE_MASK (0x7)
+#define NVME_CE_PAGE_UUID_SHIFT (19)
+#define NVME_CE_PAGE_UUID_MASK (0x1)
+
+/* Sanitize Status */
+#define NVME_SS_PAGE_SSTAT_STATUS_SHIFT (0)
+#define NVME_SS_PAGE_SSTAT_STATUS_MASK (0x7)
+#define NVME_SS_PAGE_SSTAT_STATUS_NEVER (0)
+#define NVME_SS_PAGE_SSTAT_STATUS_COMPLETED (1)
+#define NVME_SS_PAGE_SSTAT_STATUS_INPROG (2)
+#define NVME_SS_PAGE_SSTAT_STATUS_FAILED (3)
+#define NVME_SS_PAGE_SSTAT_STATUS_COMPLETEDWD (4)
+#define NVME_SS_PAGE_SSTAT_PASSES_SHIFT (3)
+#define NVME_SS_PAGE_SSTAT_PASSES_MASK (0x1f)
+#define NVME_SS_PAGE_SSTAT_GDE_SHIFT (8)
+#define NVME_SS_PAGE_SSTAT_GDE_MASK (0x1)
+
/* CC register SHN field values */
enum shn_value {
NVME_SHN_NORMAL = 0x1,
@@ -388,34 +503,37 @@ enum shst_value {
struct nvme_registers
{
- /** controller capabilities */
- uint32_t cap_lo;
- uint32_t cap_hi;
-
- uint32_t vs; /* version */
- uint32_t intms; /* interrupt mask set */
- uint32_t intmc; /* interrupt mask clear */
-
- /** controller configuration */
- uint32_t cc;
-
- uint32_t reserved1;
-
- /** controller status */
- uint32_t csts;
-
- uint32_t reserved2;
-
- /** admin queue attributes */
- uint32_t aqa;
-
- uint64_t asq; /* admin submission queue base addr */
- uint64_t acq; /* admin completion queue base addr */
- uint32_t reserved3[0x3f2];
-
+ uint32_t cap_lo; /* controller capabilities */
+ uint32_t cap_hi;
+ uint32_t vs; /* version */
+ uint32_t intms; /* interrupt mask set */
+ uint32_t intmc; /* interrupt mask clear */
+ uint32_t cc; /* controller configuration */
+ uint32_t reserved1;
+ uint32_t csts; /* controller status */
+ uint32_t nssr; /* NVM Subsystem Reset */
+ uint32_t aqa; /* admin queue attributes */
+ uint64_t asq; /* admin submission queue base addr */
+ uint64_t acq; /* admin completion queue base addr */
+ uint32_t cmbloc; /* Controller Memory Buffer Location */
+ uint32_t cmbsz; /* Controller Memory Buffer Size */
+ uint32_t bpinfo; /* Boot Partition Information */
+ uint32_t bprsel; /* Boot Partition Read Select */
+ uint64_t bpmbl; /* Boot Partition Memory Buffer Location */
+ uint64_t cmbmsc; /* Controller Memory Buffer Memory Space Control */
+ uint32_t cmbsts; /* Controller Memory Buffer Status */
+ uint8_t reserved3[3492]; /* 5Ch - DFFh */
+ uint32_t pmrcap; /* Persistent Memory Capabilities */
+ uint32_t pmrctl; /* Persistent Memory Region Control */
+ uint32_t pmrsts; /* Persistent Memory Region Status */
+ uint32_t pmrebs; /* Persistent Memory Region Elasticity Buffer Size */
+ uint32_t pmrswtp; /* Persistent Memory Region Sustained Write Throughput */
+ uint32_t pmrmsc_lo; /* Persistent Memory Region Controller Memory Space Control */
+ uint32_t pmrmsc_hi;
+ uint8_t reserved4[484]; /* E1Ch - FFFh */
struct {
- uint32_t sq_tdbl; /* submission queue tail doorbell */
- uint32_t cq_hdbl; /* completion queue head doorbell */
+ uint32_t sq_tdbl; /* submission queue tail doorbell */
+ uint32_t cq_hdbl; /* completion queue head doorbell */
} doorbell[1] __packed;
} __packed;
@@ -490,6 +608,7 @@ enum nvme_status_code_type {
NVME_SCT_GENERIC = 0x0,
NVME_SCT_COMMAND_SPECIFIC = 0x1,
NVME_SCT_MEDIA_ERROR = 0x2,
+ NVME_SCT_PATH_RELATED = 0x3,
/* 0x3-0x6 - reserved */
NVME_SCT_VENDOR_SPECIFIC = 0x7,
};
@@ -528,6 +647,9 @@ enum nvme_generic_command_status_code {
NVME_SC_SANITIZE_IN_PROGRESS = 0x1d,
NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID = 0x1e,
NVME_SC_NOT_SUPPORTED_IN_CMB = 0x1f,
+ NVME_SC_NAMESPACE_IS_WRITE_PROTECTED = 0x20,
+ NVME_SC_COMMAND_INTERRUPTED = 0x21,
+ NVME_SC_TRANSIENT_TRANSPORT_ERROR = 0x22,
NVME_SC_LBA_OUT_OF_RANGE = 0x80,
NVME_SC_CAPACITY_EXCEEDED = 0x81,
@@ -573,6 +695,9 @@ enum nvme_command_specific_status_code {
NVME_SC_INVALID_SEC_CTRLR_STATE = 0x20,
NVME_SC_INVALID_NUM_OF_CTRLR_RESRC = 0x21,
NVME_SC_INVALID_RESOURCE_ID = 0x22,
+ NVME_SC_SANITIZE_PROHIBITED_WPMRE = 0x23,
+ NVME_SC_ANA_GROUP_ID_INVALID = 0x24,
+ NVME_SC_ANA_ATTACH_FAILED = 0x25,
NVME_SC_CONFLICTING_ATTRIBUTES = 0x80,
NVME_SC_INVALID_PROTECTION_INFO = 0x81,
@@ -591,6 +716,17 @@ enum nvme_media_error_status_code {
NVME_SC_DEALLOCATED_OR_UNWRITTEN = 0x87,
};
+/* path related status codes */
+enum nvme_path_related_status_code {
+ NVME_SC_INTERNAL_PATH_ERROR = 0x00,
+ NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS = 0x01,
+ NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE = 0x02,
+ NVME_SC_ASYMMETRIC_ACCESS_TRANSITION = 0x03,
+ NVME_SC_CONTROLLER_PATHING_ERROR = 0x60,
+ NVME_SC_HOST_PATHING_ERROR = 0x70,
+ NVME_SC_COMMAND_ABOTHED_BY_HOST = 0x71,
+};
+
/* admin opcodes */
enum nvme_admin_opcode {
NVME_OPC_DELETE_IO_SQ = 0x00,
@@ -610,20 +746,27 @@ enum nvme_admin_opcode {
/* 0x0e-0x0f - reserved */
NVME_OPC_FIRMWARE_ACTIVATE = 0x10,
NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11,
+ /* 0x12-0x13 - reserved */
NVME_OPC_DEVICE_SELF_TEST = 0x14,
NVME_OPC_NAMESPACE_ATTACHMENT = 0x15,
+ /* 0x16-0x17 - reserved */
NVME_OPC_KEEP_ALIVE = 0x18,
NVME_OPC_DIRECTIVE_SEND = 0x19,
NVME_OPC_DIRECTIVE_RECEIVE = 0x1a,
+ /* 0x1b - reserved */
NVME_OPC_VIRTUALIZATION_MANAGEMENT = 0x1c,
NVME_OPC_NVME_MI_SEND = 0x1d,
NVME_OPC_NVME_MI_RECEIVE = 0x1e,
+ /* 0x1f-0x7b - reserved */
NVME_OPC_DOORBELL_BUFFER_CONFIG = 0x7c,
NVME_OPC_FORMAT_NVM = 0x80,
NVME_OPC_SECURITY_SEND = 0x81,
NVME_OPC_SECURITY_RECEIVE = 0x82,
+ /* 0x83 - reserved */
NVME_OPC_SANITIZE = 0x84,
+ /* 0x85 - reserved */
+ NVME_OPC_GET_LBA_STATUS = 0x86,
};
/* nvme nvm opcodes */
@@ -634,11 +777,11 @@ enum nvme_nvm_opcode {
/* 0x03 - reserved */
NVME_OPC_WRITE_UNCORRECTABLE = 0x04,
NVME_OPC_COMPARE = 0x05,
- /* 0x06 - reserved */
+ /* 0x06-0x07 - reserved */
NVME_OPC_WRITE_ZEROES = 0x08,
- /* 0x07 - reserved */
NVME_OPC_DATASET_MANAGEMENT = 0x09,
- /* 0x0a-0x0c - reserved */
+ /* 0x0a-0x0b - reserved */
+ NVME_OPC_VERIFY = 0x0c,
NVME_OPC_RESERVATION_REGISTER = 0x0d,
NVME_OPC_RESERVATION_REPORT = 0x0e,
/* 0x0f-0x10 - reserved */
@@ -666,10 +809,21 @@ enum nvme_feature {
NVME_FEAT_KEEP_ALIVE_TIMER = 0x0F,
NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT = 0x10,
NVME_FEAT_NON_OP_POWER_STATE_CONFIG = 0x11,
- /* 0x12-0x77 - reserved */
+ NVME_FEAT_READ_RECOVERY_LEVEL_CONFIG = 0x12,
+ NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG = 0x13,
+ NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW = 0x14,
+ NVME_FEAT_LBA_STATUS_INFORMATION_ATTRIBUTES = 0x15,
+ NVME_FEAT_HOST_BEHAVIOR_SUPPORT = 0x16,
+ NVME_FEAT_SANITIZE_CONFIG = 0x17,
+ NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION = 0x18,
+ /* 0x19-0x77 - reserved */
/* 0x78-0x7f - NVMe Management Interface */
NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80,
- /* 0x81-0xBF - command set specific (reserved) */
+ NVME_FEAT_HOST_IDENTIFIER = 0x81,
+ NVME_FEAT_RESERVATION_NOTIFICATION_MASK = 0x82,
+ NVME_FEAT_RESERVATION_PERSISTENCE = 0x83,
+ NVME_FEAT_NAMESPACE_WRITE_PROTECTION_CONFIG = 0x84,
+ /* 0x85-0xBF - command set specific (reserved) */
/* 0xC0-0xFF - vendor specific */
};
@@ -763,12 +917,27 @@ struct nvme_controller_data {
/** Controller Attributes */
uint32_t ctratt; /* bitfield really */
- uint8_t reserved1[12];
+ /** Read Recovery Levels Supported */
+ uint16_t rrls;
+
+ uint8_t reserved1[9];
+
+ /** Controller Type */
+ uint8_t cntrltype;
/** FRU Globally Unique Identifier */
uint8_t fguid[16];
- uint8_t reserved2[128];
+ /** Command Retry Delay Time 1 */
+ uint16_t crdt1;
+
+ /** Command Retry Delay Time 2 */
+ uint16_t crdt2;
+
+ /** Command Retry Delay Time 3 */
+ uint16_t crdt3;
+
+ uint8_t reserved2[122];
/* bytes 256-511: admin command set attributes */
@@ -848,7 +1017,34 @@ struct nvme_controller_data {
/** Sanitize Capabilities */
uint32_t sanicap; /* Really a bitfield */
- uint8_t reserved3[180];
+ /** Host Memory Buffer Minimum Descriptor Entry Size */
+ uint32_t hmminds;
+
+ /** Host Memory Maximum Descriptors Entries */
+ uint16_t hmmaxd;
+
+ /** NVM Set Identifier Maximum */
+ uint16_t nsetidmax;
+
+ /** Endurance Group Identifier Maximum */
+ uint16_t endgidmax;
+
+ /** ANA Transition Time */
+ uint8_t anatt;
+
+ /** Asymmetric Namespace Access Capabilities */
+ uint8_t anacap;
+
+ /** ANA Group Identifier Maximum */
+ uint32_t anagrpmax;
+
+ /** Number of ANA Group Identifiers */
+ uint32_t nanagrpid;
+
+ /** Persistent Event Log Size */
+ uint32_t pels;
+
+ uint8_t reserved3[156];
/* bytes 512-703: nvm command set attributes */
/** submission queue entry size */
@@ -883,7 +1079,9 @@ struct nvme_controller_data {
/** NVM Vendor Specific Command Configuration */
uint8_t nvscc;
- uint8_t reserved5;
+
+ /** Namespace Write Protection Capabilities */
+ uint8_t nwpc;
/** Atomic Compare & Write Unit */
uint16_t acwu;
@@ -892,8 +1090,11 @@ struct nvme_controller_data {
/** SGL Support */
uint32_t sgls;
+ /** Maximum Number of Allowed Namespaces */
+ uint32_t mnan;
+
/* bytes 540-767: Reserved */
- uint8_t reserved7[228];
+ uint8_t reserved7[224];
/** NVM Subsystem NVMe Qualified Name */
uint8_t subnqn[256];
@@ -978,8 +1179,38 @@ struct nvme_namespace_data {
/** NVM Capacity */
uint8_t nvmcap[16];
- /* bytes 64-103: Reserved */
- uint8_t reserved5[40];
+ /** Namespace Preferred Write Granularity */
+ uint16_t npwg;
+
+ /** Namespace Preferred Write Alignment */
+ uint16_t npwa;
+
+ /** Namespace Preferred Deallocate Granularity */
+ uint16_t npdg;
+
+ /** Namespace Preferred Deallocate Alignment */
+ uint16_t npda;
+
+ /** Namespace Optimal Write Size */
+ uint16_t nows;
+
+ /* bytes 74-91: Reserved */
+ uint8_t reserved5[18];
+
+ /** ANA Group Identifier */
+ uint32_t anagrpid;
+
+ /* bytes 96-98: Reserved */
+ uint8_t reserved6[3];
+
+ /** Namespace Attributes */
+ uint8_t nsattr;
+
+ /** NVM Set Identifier */
+ uint16_t nvmsetid;
+
+ /** Endurance Group Identifier */
+ uint16_t endgid;
/** Namespace Globally Unique Identifier */
uint8_t nguid[16];
@@ -990,7 +1221,7 @@ struct nvme_namespace_data {
/** lba format support */
uint32_t lbaf[16];
- uint8_t reserved6[192];
+ uint8_t reserved7[192];
uint8_t vendor_specific[3712];
} __packed __aligned(4);
@@ -1005,9 +1236,21 @@ enum nvme_log_page {
NVME_LOG_FIRMWARE_SLOT = 0x03,
NVME_LOG_CHANGED_NAMESPACE = 0x04,
NVME_LOG_COMMAND_EFFECT = 0x05,
+ NVME_LOG_DEVICE_SELF_TEST = 0x06,
+ NVME_LOG_TELEMETRY_HOST_INITIATED = 0x07,
+ NVME_LOG_TELEMETRY_CONTROLLER_INITIATED = 0x08,
+ NVME_LOG_ENDURANCE_GROUP_INFORMATION = 0x09,
+ NVME_LOG_PREDICTABLE_LATENCY_PER_NVM_SET = 0x0a,
+ NVME_LOG_PREDICTABLE_LATENCY_EVENT_AGGREGATE = 0x0b,
+ NVME_LOG_ASYMMETRIC_NAMESPAVE_ACCESS = 0x0c,
+ NVME_LOG_PERSISTENT_EVENT_LOG = 0x0d,
+ NVME_LOG_LBA_STATUS_INFORMATION = 0x0e,
+ NVME_LOG_ENDURANCE_GROUP_EVENT_AGGREGATE = 0x0f,
/* 0x06-0x7F - reserved */
/* 0x80-0xBF - I/O command set specific */
NVME_LOG_RES_NOTIFICATION = 0x80,
+ NVME_LOG_SANITIZE_STATUS = 0x81,
+ /* 0x82-0xBF - reserved */
/* 0xC0-0xFF - vendor specific */
/*
@@ -1036,7 +1279,11 @@ struct nvme_error_information_entry {
uint64_t lba;
uint32_t nsid;
uint8_t vendor_specific;
- uint8_t reserved[35];
+ uint8_t trtype;
+ uint16_t reserved30;
+ uint64_t csi;
+ uint16_t ttsi;
+ uint8_t reserved[22];
} __packed __aligned(4);
_Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry");
@@ -1072,8 +1319,16 @@ struct nvme_health_information_page {
uint32_t warning_temp_time;
uint32_t error_temp_time;
uint16_t temp_sensor[8];
-
- uint8_t reserved2[296];
+ /* Thermal Management Temperature 1 Transition Count */
+ uint32_t tmt1tc;
+ /* Thermal Management Temperature 2 Transition Count */
+ uint32_t tmt2tc;
+ /* Total Time For Thermal Management Temperature 1 */
+ uint32_t ttftmt1;
+ /* Total Time For Thermal Management Temperature 2 */
+ uint32_t ttftmt2;
+
+ uint8_t reserved2[280];
} __packed __aligned(4);
_Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for nvme_health_information_page");
@@ -1094,6 +1349,43 @@ struct nvme_ns_list {
_Static_assert(sizeof(struct nvme_ns_list) == 4096, "bad size for nvme_ns_list");
+struct nvme_command_effects_page {
+ uint32_t acs[256];
+ uint32_t iocs[256];
+ uint8_t reserved[2048];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_command_effects_page) == 4096,
+ "bad size for nvme_command_effects_page");
+
+struct nvme_res_notification_page {
+ uint64_t log_page_count;
+ uint8_t log_page_type;
+ uint8_t available_log_pages;
+ uint8_t reserved2;
+ uint32_t nsid;
+ uint8_t reserved[48];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_res_notification_page) == 64,
+ "bad size for nvme_res_notification_page");
+
+struct nvme_sanitize_status_page {
+ uint16_t sprog;
+ uint16_t sstat;
+ uint32_t scdw10;
+ uint32_t etfo;
+ uint32_t etfbe;
+ uint32_t etfce;
+ uint32_t etfownd;
+ uint32_t etfbewnd;
+ uint32_t etfcewnd;
+ uint8_t reserved[480];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_sanitize_status_page) == 512,
+ "bad size for nvme_sanitize_status_page");
+
struct intel_log_temp_stats
{
uint64_t current;
@@ -1109,6 +1401,56 @@ struct intel_log_temp_stats
_Static_assert(sizeof(struct intel_log_temp_stats) == 13 * 8, "bad size for intel_log_temp_stats");
+struct nvme_resv_reg_ctrlr
+{
+ uint16_t ctrlr_id; /* Controller ID */
+ uint8_t rcsts; /* Reservation Status */
+ uint8_t reserved3[5];
+ uint64_t hostid; /* Host Identifier */
+ uint64_t rkey; /* Reservation Key */
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_resv_reg_ctrlr) == 24, "bad size for nvme_resv_reg_ctrlr");
+
+struct nvme_resv_reg_ctrlr_ext
+{
+ uint16_t ctrlr_id; /* Controller ID */
+ uint8_t rcsts; /* Reservation Status */
+ uint8_t reserved3[5];
+ uint64_t rkey; /* Reservation Key */
+ uint64_t hostid[2]; /* Host Identifier */
+ uint8_t reserved32[32];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_resv_reg_ctrlr_ext) == 64, "bad size for nvme_resv_reg_ctrlr_ext");
+
+struct nvme_resv_status
+{
+ uint32_t gen; /* Generation */
+ uint8_t rtype; /* Reservation Type */
+ uint8_t regctl[2]; /* Number of Registered Controllers */
+ uint8_t reserved7[2];
+ uint8_t ptpls; /* Persist Through Power Loss State */
+ uint8_t reserved10[14];
+ struct nvme_resv_reg_ctrlr ctrlr[0];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_resv_status) == 24, "bad size for nvme_resv_status");
+
+struct nvme_resv_status_ext
+{
+ uint32_t gen; /* Generation */
+ uint8_t rtype; /* Reservation Type */
+ uint8_t regctl[2]; /* Number of Registered Controllers */
+ uint8_t reserved7[2];
+ uint8_t ptpls; /* Persist Through Power Loss State */
+ uint8_t reserved10[14];
+ uint8_t reserved24[40];
+ struct nvme_resv_reg_ctrlr_ext ctrlr[0];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_resv_status_ext) == 64, "bad size for nvme_resv_status_ext");
+
#define NVME_TEST_MAX_THREADS 128
struct nvme_io_test {
@@ -1184,6 +1526,11 @@ struct nvme_pt_command {
struct mtx * driver_lock;
};
+struct nvme_get_nsid {
+ char cdev[SPECNAMELEN + 1];
+ uint32_t nsid;
+};
+
#define nvme_completion_is_error(cpl) \
(NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0)
@@ -1192,6 +1539,7 @@ void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen);
#ifdef _KERNEL
struct bio;
+struct thread;
struct nvme_namespace;
struct nvme_controller;
@@ -1281,6 +1629,8 @@ uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns);
int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
nvme_cb_fn_t cb_fn);
+int nvme_ns_ioctl_process(struct nvme_namespace *ns, u_long cmd,
+ caddr_t arg, int flag, struct thread *td);
/*
* Command building helper functions -- shared with CAM
@@ -1372,6 +1722,10 @@ void nvme_controller_data_swapbytes(struct nvme_controller_data *s)
s->rtd3e = le32toh(s->rtd3e);
s->oaes = le32toh(s->oaes);
s->ctratt = le32toh(s->ctratt);
+ s->rrls = le16toh(s->rrls);
+ s->crdt1 = le16toh(s->crdt1);
+ s->crdt2 = le16toh(s->crdt2);
+ s->crdt3 = le16toh(s->crdt3);
s->oacs = le16toh(s->oacs);
s->wctemp = le16toh(s->wctemp);
s->cctemp = le16toh(s->cctemp);
@@ -1385,6 +1739,13 @@ void nvme_controller_data_swapbytes(struct nvme_controller_data *s)
s->mntmt = le16toh(s->mntmt);
s->mxtmt = le16toh(s->mxtmt);
s->sanicap = le32toh(s->sanicap);
+ s->hmminds = le32toh(s->hmminds);
+ s->hmmaxd = le16toh(s->hmmaxd);
+ s->nsetidmax = le16toh(s->nsetidmax);
+ s->endgidmax = le16toh(s->endgidmax);
+ s->anagrpmax = le32toh(s->anagrpmax);
+ s->nanagrpid = le32toh(s->nanagrpid);
+ s->pels = le32toh(s->pels);
s->maxcmd = le16toh(s->maxcmd);
s->nn = le32toh(s->nn);
s->oncs = le16toh(s->oncs);
@@ -1393,6 +1754,7 @@ void nvme_controller_data_swapbytes(struct nvme_controller_data *s)
s->awupf = le16toh(s->awupf);
s->acwu = le16toh(s->acwu);
s->sgls = le32toh(s->sgls);
+ s->mnan = le32toh(s->mnan);
for (i = 0; i < 32; i++)
nvme_power_state_swapbytes(&s->power_state[i]);
}
@@ -1412,6 +1774,14 @@ void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s)
s->nabo = le16toh(s->nabo);
s->nabspf = le16toh(s->nabspf);
s->noiob = le16toh(s->noiob);
+ s->npwg = le16toh(s->npwg);
+ s->npwa = le16toh(s->npwa);
+ s->npdg = le16toh(s->npdg);
+ s->npda = le16toh(s->npda);
+ s->nows = le16toh(s->nows);
+ s->anagrpid = le32toh(s->anagrpid);
+ s->nvmsetid = le16toh(s->nvmsetid);
+ s->endgid = le16toh(s->endgid);
for (i = 0; i < 16; i++)
s->lbaf[i] = le32toh(s->lbaf[i]);
}
@@ -1427,6 +1797,8 @@ void nvme_error_information_entry_swapbytes(struct nvme_error_information_entry
s->error_location = le16toh(s->error_location);
s->lba = le64toh(s->lba);
s->nsid = le32toh(s->nsid);
+ s->csi = le64toh(s->csi);
+ s->ttsi = le16toh(s->ttsi);
}
static inline
@@ -1467,6 +1839,10 @@ void nvme_health_information_page_swapbytes(struct nvme_health_information_page
s->error_temp_time = le32toh(s->error_temp_time);
for (i = 0; i < 8; i++)
s->temp_sensor[i] = le16toh(s->temp_sensor[i]);
+ s->tmt1tc = le32toh(s->tmt1tc);
+ s->tmt2tc = le32toh(s->tmt2tc);
+ s->ttftmt1 = le32toh(s->ttftmt1);
+ s->ttftmt2 = le32toh(s->ttftmt2);
}
@@ -1489,6 +1865,38 @@ void nvme_ns_list_swapbytes(struct nvme_ns_list *s)
}
static inline
+void nvme_command_effects_page_swapbytes(struct nvme_command_effects_page *s)
+{
+ int i;
+
+ for (i = 0; i < 256; i++)
+ s->acs[i] = le32toh(s->acs[i]);
+ for (i = 0; i < 256; i++)
+ s->iocs[i] = le32toh(s->iocs[i]);
+}
+
+static inline
+void nvme_res_notification_page_swapbytes(struct nvme_res_notification_page *s)
+{
+ s->log_page_count = le64toh(s->log_page_count);
+ s->nsid = le32toh(s->nsid);
+}
+
+static inline
+void nvme_sanitize_status_page_swapbytes(struct nvme_sanitize_status_page *s)
+{
+ s->sprog = le16toh(s->sprog);
+ s->sstat = le16toh(s->sstat);
+ s->scdw10 = le32toh(s->scdw10);
+ s->etfo = le32toh(s->etfo);
+ s->etfbe = le32toh(s->etfbe);
+ s->etfce = le32toh(s->etfce);
+ s->etfownd = le32toh(s->etfownd);
+ s->etfbewnd = le32toh(s->etfbewnd);
+ s->etfcewnd = le32toh(s->etfcewnd);
+}
+
+static inline
void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s)
{
@@ -1503,4 +1911,34 @@ void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s)
s->est_offset = le64toh(s->est_offset);
}
+static inline
+void nvme_resv_status_swapbytes(struct nvme_resv_status *s, size_t size)
+{
+ u_int i, n;
+
+ s->gen = le32toh(s->gen);
+ n = (s->regctl[1] << 8) | s->regctl[0];
+ n = MIN(n, (size - sizeof(s)) / sizeof(s->ctrlr[0]));
+ for (i = 0; i < n; i++) {
+ s->ctrlr[i].ctrlr_id = le16toh(s->ctrlr[i].ctrlr_id);
+ s->ctrlr[i].hostid = le64toh(s->ctrlr[i].hostid);
+ s->ctrlr[i].rkey = le64toh(s->ctrlr[i].rkey);
+ }
+}
+
+static inline
+void nvme_resv_status_ext_swapbytes(struct nvme_resv_status_ext *s, size_t size)
+{
+ u_int i, n;
+
+ s->gen = le32toh(s->gen);
+ n = (s->regctl[1] << 8) | s->regctl[0];
+ n = MIN(n, (size - sizeof(s)) / sizeof(s->ctrlr[0]));
+ for (i = 0; i < n; i++) {
+ s->ctrlr[i].ctrlr_id = le16toh(s->ctrlr[i].ctrlr_id);
+ s->ctrlr[i].rkey = le64toh(s->ctrlr[i].rkey);
+ nvme_le128toh((void *)s->ctrlr[i].hostid);
+ }
+}
+
#endif /* __NVME_H__ */
diff --git a/freebsd/sys/dev/ofw/ofw_bus_subr.h b/freebsd/sys/dev/ofw/ofw_bus_subr.h
index 468fdc39..218ba710 100644
--- a/freebsd/sys/dev/ofw/ofw_bus_subr.h
+++ b/freebsd/sys/dev/ofw/ofw_bus_subr.h
@@ -65,9 +65,11 @@ struct intr_map_data_fdt {
};
#endif
-#define SIMPLEBUS_PNP_DESCR "Z:compat;P:#;"
-#define SIMPLEBUS_PNP_INFO(t) \
- MODULE_PNP_INFO(SIMPLEBUS_PNP_DESCR, simplebus, t, t, sizeof(t) / sizeof(t[0]));
+#define FDTCOMPAT_PNP_DESCR "Z:compat;P:#;"
+#define FDTCOMPAT_PNP_INFO(t, busname) \
+ MODULE_PNP_INFO(FDTCOMPAT_PNP_DESCR, busname, t, t, sizeof(t) / sizeof(t[0]));
+
+#define SIMPLEBUS_PNP_INFO(t) FDTCOMPAT_PNP_INFO(t, simplebus)
/* Generic implementation of ofw_bus_if.m methods and helper routines */
int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *, phandle_t);
diff --git a/freebsd/sys/dev/ofw/ofw_subr.c b/freebsd/sys/dev/ofw/ofw_subr.c
index 4a20727c..7483a2d2 100644
--- a/freebsd/sys/dev/ofw/ofw_subr.c
+++ b/freebsd/sys/dev/ofw/ofw_subr.c
@@ -81,7 +81,8 @@ int
ofw_reg_to_paddr(phandle_t dev, int regno, bus_addr_t *paddr,
bus_size_t *psize, pcell_t *ppci_hi)
{
- pcell_t cell[32], pci_hi;
+ static pcell_t cell[256];
+ pcell_t pci_hi;
uint64_t addr, raddr, baddr;
uint64_t size, rsize;
uint32_t c, nbridge, naddr, nsize;
diff --git a/freebsd/sys/dev/pci/pci.c b/freebsd/sys/dev/pci/pci.c
index f2a46d03..5402cb66 100644
--- a/freebsd/sys/dev/pci/pci.c
+++ b/freebsd/sys/dev/pci/pci.c
@@ -33,20 +33,22 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_acpi.h>
#include <rtems/bsd/local/opt_bus.h>
#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/limits.h>
-#include <sys/linker.h>
-#include <sys/fcntl.h>
#include <sys/conf.h>
+#include <sys/endian.h>
+#include <sys/eventhandler.h>
+#include <sys/fcntl.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/linker.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
#include <sys/queue.h>
#include <sys/sysctl.h>
-#include <sys/endian.h>
+#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@@ -137,6 +139,10 @@ static int pci_remap_intr_method(device_t bus, device_t dev,
static void pci_hint_device_unit(device_t acdev, device_t child,
const char *name, int *unitp);
#endif /* __rtems__ */
+static int pci_reset_post(device_t dev, device_t child);
+static int pci_reset_prepare(device_t dev, device_t child);
+static int pci_reset_child(device_t dev, device_t child,
+ int flags);
static int pci_get_id_method(device_t dev, device_t child,
enum pci_id_type type, uintptr_t *rid);
@@ -161,6 +167,9 @@ static device_method_t pci_methods[] = {
DEVMETHOD(bus_driver_added, pci_driver_added),
DEVMETHOD(bus_setup_intr, pci_setup_intr),
DEVMETHOD(bus_teardown_intr, pci_teardown_intr),
+ DEVMETHOD(bus_reset_prepare, pci_reset_prepare),
+ DEVMETHOD(bus_reset_post, pci_reset_post),
+ DEVMETHOD(bus_reset_child, pci_reset_child),
DEVMETHOD(bus_get_dma_tag, pci_get_dma_tag),
DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
@@ -355,7 +364,7 @@ SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
" enable these bits correctly. We'd like to do this all the time, but"
" there are some peripherals that this causes problems with.");
-static int pci_do_realloc_bars = 0;
+static int pci_do_realloc_bars = 1;
SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
&pci_do_realloc_bars, 0,
"Attempt to allocate a new range for any BARs whose original "
@@ -1678,10 +1687,13 @@ pci_mask_msix(device_t dev, u_int index)
KASSERT(msix->msix_msgnum > index, ("bogus index"));
offset = msix->msix_table_offset + index * 16 + 12;
val = bus_read_4(msix->msix_table_res, offset);
- if (!(val & PCIM_MSIX_VCTRL_MASK)) {
- val |= PCIM_MSIX_VCTRL_MASK;
- bus_write_4(msix->msix_table_res, offset, val);
- }
+ val |= PCIM_MSIX_VCTRL_MASK;
+
+ /*
+ * Some devices (e.g. Samsung PM961) do not support reads of this
+ * register, so always write the new value.
+ */
+ bus_write_4(msix->msix_table_res, offset, val);
}
void
@@ -1694,10 +1706,13 @@ pci_unmask_msix(device_t dev, u_int index)
KASSERT(msix->msix_table_len > index, ("bogus index"));
offset = msix->msix_table_offset + index * 16 + 12;
val = bus_read_4(msix->msix_table_res, offset);
- if (val & PCIM_MSIX_VCTRL_MASK) {
- val &= ~PCIM_MSIX_VCTRL_MASK;
- bus_write_4(msix->msix_table_res, offset, val);
- }
+ val &= ~PCIM_MSIX_VCTRL_MASK;
+
+ /*
+ * Some devices (e.g. Samsung PM961) do not support reads of this
+ * register, so always write the new value.
+ */
+ bus_write_4(msix->msix_table_res, offset, val);
}
int
@@ -4359,9 +4374,6 @@ pci_attach_common(device_t dev)
{
struct pci_softc *sc;
int busno, domain;
-#ifdef PCI_DMA_BOUNDARY
- int error, tag_valid;
-#endif
#ifdef PCI_RES_BUS
int rid;
#endif
@@ -4381,23 +4393,7 @@ pci_attach_common(device_t dev)
if (bootverbose)
device_printf(dev, "domain=%d, physical bus=%d\n",
domain, busno);
-#ifdef PCI_DMA_BOUNDARY
- tag_valid = 0;
- if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
- devclass_find("pci")) {
- error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
- PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
- NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
- BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
- if (error)
- device_printf(dev, "Failed to create DMA tag: %d\n",
- error);
- else
- tag_valid = 1;
- }
- if (!tag_valid)
-#endif
- sc->sc_dma_tag = bus_get_dma_tag(dev);
+ sc->sc_dma_tag = bus_get_dma_tag(dev);
return (0);
}
@@ -5730,6 +5726,26 @@ pci_get_resource_list (device_t dev, device_t child)
return (&dinfo->resources);
}
+#ifdef ACPI_DMAR
+bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child);
+bus_dma_tag_t
+pci_get_dma_tag(device_t bus, device_t dev)
+{
+ bus_dma_tag_t tag;
+ struct pci_softc *sc;
+
+ if (device_get_parent(dev) == bus) {
+ /* try dmar and return if it works */
+ tag = dmar_get_dma_tag(bus, dev);
+ } else
+ tag = NULL;
+ if (tag == NULL) {
+ sc = device_get_softc(bus);
+ tag = sc->sc_dma_tag;
+ }
+ return (tag);
+}
+#else
bus_dma_tag_t
pci_get_dma_tag(device_t bus, device_t dev)
{
@@ -5737,6 +5753,7 @@ pci_get_dma_tag(device_t bus, device_t dev)
return (sc->sc_dma_tag);
}
+#endif
uint32_t
pci_read_config_method(device_t dev, device_t child, int reg, int width)
@@ -6402,6 +6419,94 @@ pcie_flr(device_t dev, u_int max_delay, bool force)
return (true);
}
+/*
+ * Attempt a power-management reset by cycling the device in/out of D3
+ * state. PCI spec says we can only go into D3 state from D0 state.
+ * Transition from D[12] into D0 before going to D3 state.
+ */
+int
+pci_power_reset(device_t dev)
+{
+ int ps;
+
+ ps = pci_get_powerstate(dev);
+ if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
+ pci_set_powerstate(dev, PCI_POWERSTATE_D0);
+ pci_set_powerstate(dev, PCI_POWERSTATE_D3);
+ pci_set_powerstate(dev, ps);
+ return (0);
+}
+
+/*
+ * Try link drop and retrain of the downstream port of upstream
+ * switch, for PCIe. According to the PCIe 3.0 spec 6.6.1, this must
+ * cause Conventional Hot reset of the device in the slot.
+ * Alternative, for PCIe, could be the secondary bus reset initiatied
+ * on the upstream switch PCIR_BRIDGECTL_1, bit 6.
+ */
+int
+pcie_link_reset(device_t port, int pcie_location)
+{
+ uint16_t v;
+
+ v = pci_read_config(port, pcie_location + PCIER_LINK_CTL, 2);
+ v |= PCIEM_LINK_CTL_LINK_DIS;
+ pci_write_config(port, pcie_location + PCIER_LINK_CTL, v, 2);
+ pause_sbt("pcier1", mstosbt(20), 0, 0);
+ v &= ~PCIEM_LINK_CTL_LINK_DIS;
+ v |= PCIEM_LINK_CTL_RETRAIN_LINK;
+ pci_write_config(port, pcie_location + PCIER_LINK_CTL, v, 2);
+ pause_sbt("pcier2", mstosbt(100), 0, 0); /* 100 ms */
+ v = pci_read_config(port, pcie_location + PCIER_LINK_STA, 2);
+ return ((v & PCIEM_LINK_STA_TRAINING) != 0 ? ETIMEDOUT : 0);
+}
+
+static int
+pci_reset_post(device_t dev, device_t child)
+{
+
+ if (dev == device_get_parent(child))
+ pci_restore_state(child);
+ return (0);
+}
+
+static int
+pci_reset_prepare(device_t dev, device_t child)
+{
+
+ if (dev == device_get_parent(child))
+ pci_save_state(child);
+ return (0);
+}
+
+static int
+pci_reset_child(device_t dev, device_t child, int flags)
+{
+ int error;
+
+ if (dev == NULL || device_get_parent(child) != dev)
+ return (0);
+ if ((flags & DEVF_RESET_DETACH) != 0) {
+ error = device_get_state(child) == DS_ATTACHED ?
+ device_detach(child) : 0;
+ } else {
+ error = BUS_SUSPEND_CHILD(dev, child);
+ }
+ if (error == 0) {
+ if (!pcie_flr(child, 1000, false)) {
+ error = BUS_RESET_PREPARE(dev, child);
+ if (error == 0)
+ pci_power_reset(child);
+ BUS_RESET_POST(dev, child);
+ }
+ if ((flags & DEVF_RESET_DETACH) != 0)
+ device_probe_and_attach(child);
+ else
+ BUS_RESUME_CHILD(dev, child);
+ }
+ return (error);
+}
+
const struct pci_device_table *
pci_match_device(device_t child, const struct pci_device_table *id, size_t nelt)
{
diff --git a/freebsd/sys/dev/pci/pci_pci.c b/freebsd/sys/dev/pci/pci_pci.c
index 607a0614..cdcba150 100644
--- a/freebsd/sys/dev/pci/pci_pci.c
+++ b/freebsd/sys/dev/pci/pci_pci.c
@@ -44,8 +44,11 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pciio.h>
#include <sys/rman.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
@@ -82,6 +85,7 @@ static void pcib_pcie_dll_timeout(void *arg);
#endif
static int pcib_request_feature_default(device_t pcib, device_t dev,
enum pci_feature feature);
+static int pcib_reset_child(device_t dev, device_t child, int flags);
static device_method_t pcib_methods[] = {
/* Device interface */
@@ -108,6 +112,7 @@ static device_method_t pcib_methods[] = {
DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
+ DEVMETHOD(bus_reset_child, pcib_reset_child),
/* pcib interface */
DEVMETHOD(pcib_maxslots, pcib_ari_maxslots),
@@ -1167,9 +1172,11 @@ pcib_pcie_intr_hotplug(void *arg)
{
struct pcib_softc *sc;
device_t dev;
+ uint16_t old_slot_sta;
sc = arg;
dev = sc->dev;
+ old_slot_sta = sc->pcie_slot_sta;
sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
/* Clear the events just reported. */
@@ -1185,7 +1192,8 @@ pcib_pcie_intr_hotplug(void *arg)
"Attention Button Pressed: Detach Cancelled\n");
sc->flags &= ~PCIB_DETACH_PENDING;
callout_stop(&sc->pcie_ab_timer);
- } else {
+ } else if (old_slot_sta & PCIEM_SLOT_STA_PDS) {
+ /* Only initiate detach sequence if device present. */
device_printf(dev,
"Attention Button Pressed: Detaching in 5 seconds\n");
sc->flags |= PCIB_DETACH_PENDING;
@@ -1266,11 +1274,8 @@ pcib_pcie_cc_timeout(void *arg)
mtx_assert(&Giant, MA_OWNED);
sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
if (!(sta & PCIEM_SLOT_STA_CC)) {
- device_printf(dev,
- "HotPlug Command Timed Out - forcing detach\n");
- sc->flags &= ~(PCIB_HOTPLUG_CMD_PENDING | PCIB_DETACH_PENDING);
- sc->flags |= PCIB_DETACHING;
- pcib_pcie_hotplug_update(sc, 0, 0, true);
+ device_printf(dev, "HotPlug Command Timed Out\n");
+ sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING;
} else {
device_printf(dev,
"Missed HotPlug interrupt waiting for Command Completion\n");
@@ -2911,3 +2916,31 @@ pcib_request_feature_default(device_t pcib, device_t dev,
bus = device_get_parent(pcib);
return (PCIB_REQUEST_FEATURE(device_get_parent(bus), dev, feature));
}
+
+static int
+pcib_reset_child(device_t dev, device_t child, int flags)
+{
+ struct pci_devinfo *pdinfo;
+ int error;
+
+ error = 0;
+ if (dev == NULL || device_get_parent(child) != dev)
+ goto out;
+ error = ENXIO;
+ if (device_get_devclass(child) != devclass_find("pci"))
+ goto out;
+ pdinfo = device_get_ivars(dev);
+ if (pdinfo->cfg.pcie.pcie_location != 0 &&
+ (pdinfo->cfg.pcie.pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT ||
+ pdinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)) {
+ error = bus_helper_reset_prepare(child, flags);
+ if (error == 0) {
+ error = pcie_link_reset(dev,
+ pdinfo->cfg.pcie.pcie_location);
+ /* XXXKIB call _post even if error != 0 ? */
+ bus_helper_reset_post(child, flags);
+ }
+ }
+out:
+ return (error);
+}
diff --git a/freebsd/sys/dev/pci/pci_user.c b/freebsd/sys/dev/pci/pci_user.c
index 3e2c3c7e..bdf8a935 100644
--- a/freebsd/sys/dev/pci/pci_user.c
+++ b/freebsd/sys/dev/pci/pci_user.c
@@ -858,6 +858,7 @@ pci_bar_mmap(device_t pcidev, struct pci_bar_mmap *pbm)
struct thread *td;
struct sglist *sg;
struct pci_map *pm;
+ vm_paddr_t membase;
vm_paddr_t pbase;
vm_size_t plen;
vm_offset_t addr;
@@ -880,8 +881,9 @@ pci_bar_mmap(device_t pcidev, struct pci_bar_mmap *pbm)
return (EBUSY); /* XXXKIB enable if _ACTIVATE */
if (!PCI_BAR_MEM(pm->pm_value))
return (EIO);
- pbase = trunc_page(pm->pm_value);
- plen = round_page(pm->pm_value + ((pci_addr_t)1 << pm->pm_size)) -
+ membase = pm->pm_value & PCIM_BAR_MEM_BASE;
+ pbase = trunc_page(membase);
+ plen = round_page(membase + ((pci_addr_t)1 << pm->pm_size)) -
pbase;
prot = VM_PROT_READ | (((pbm->pbm_flags & PCIIO_BAR_MMAP_RW) != 0) ?
VM_PROT_WRITE : 0);
@@ -913,7 +915,7 @@ pci_bar_mmap(device_t pcidev, struct pci_bar_mmap *pbm)
}
pbm->pbm_map_base = (void *)addr;
pbm->pbm_map_length = plen;
- pbm->pbm_bar_off = pm->pm_value - pbase;
+ pbm->pbm_bar_off = membase - pbase;
pbm->pbm_bar_length = (pci_addr_t)1 << pm->pm_size;
out:
diff --git a/freebsd/sys/dev/pci/pcivar.h b/freebsd/sys/dev/pci/pcivar.h
index 2ea7b877..d27cd1d2 100644
--- a/freebsd/sys/dev/pci/pcivar.h
+++ b/freebsd/sys/dev/pci/pcivar.h
@@ -33,7 +33,7 @@
#define _PCIVAR_H_
#include <sys/queue.h>
-#include <sys/eventhandler.h>
+#include <sys/_eventhandler.h>
/* some PCI bus constants */
#define PCI_MAXMAPS_0 6 /* max. no. of memory/port maps */
@@ -259,6 +259,13 @@ typedef struct {
extern uint32_t pci_numdevs;
+/*
+ * The bitfield has to be stable and match the fields below (so that
+ * match_flag_vendor must be bit 0) so we have to do the endian dance. We can't
+ * use enums or #define constants because then the macros for subsetting matches
+ * wouldn't work. These tables are parsed by devmatch and others to connect
+ * modules with devices on the PCI bus.
+ */
struct pci_device_table {
#if BYTE_ORDER == LITTLE_ENDIAN
uint16_t
@@ -674,6 +681,7 @@ int pci_get_max_read_req(device_t dev);
void pci_restore_state(device_t dev);
void pci_save_state(device_t dev);
int pci_set_max_read_req(device_t dev, int size);
+int pci_power_reset(device_t dev);
uint32_t pcie_read_config(device_t dev, int reg, int width);
void pcie_write_config(device_t dev, int reg, uint32_t value, int width);
uint32_t pcie_adjust_config(device_t dev, int reg, uint32_t mask,
@@ -681,17 +689,10 @@ uint32_t pcie_adjust_config(device_t dev, int reg, uint32_t mask,
bool pcie_flr(device_t dev, u_int max_delay, bool force);
int pcie_get_max_completion_timeout(device_t dev);
bool pcie_wait_for_pending_transactions(device_t dev, u_int max_delay);
+int pcie_link_reset(device_t port, int pcie_location);
void pci_print_faulted_dev(void);
-#ifdef BUS_SPACE_MAXADDR
-#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
-#define PCI_DMA_BOUNDARY 0x100000000
-#else
-#define PCI_DMA_BOUNDARY 0
-#endif
-#endif
-
#endif /* _SYS_BUS_H_ */
/*
diff --git a/freebsd/sys/dev/rtwn/if_rtwn.c b/freebsd/sys/dev/rtwn/if_rtwn.c
index 79868dc0..0ca83e8a 100644
--- a/freebsd/sys/dev/rtwn/if_rtwn.c
+++ b/freebsd/sys/dev/rtwn/if_rtwn.c
@@ -155,9 +155,6 @@ static void rtwn_stop(struct rtwn_softc *);
MALLOC_DEFINE(M_RTWN_PRIV, "rtwn_priv", "rtwn driver private state");
-static const uint8_t rtwn_chan_2ghz[] =
- { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-
static const uint16_t wme2reg[] =
{ R92C_EDCA_BE_PARAM, R92C_EDCA_BK_PARAM,
R92C_EDCA_VI_PARAM, R92C_EDCA_VO_PARAM };
@@ -1536,9 +1533,8 @@ rtwn_getradiocaps(struct ieee80211com *ic,
setbit(bands, IEEE80211_MODE_11B);
setbit(bands, IEEE80211_MODE_11G);
setbit(bands, IEEE80211_MODE_11NG);
- ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
- rtwn_chan_2ghz, nitems(rtwn_chan_2ghz), bands,
- !!(ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40));
+ ieee80211_add_channels_default_2ghz(chans, maxchans, nchans,
+ bands, !!(ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40));
/* XXX workaround add_channel_list() limitations */
setbit(bands, IEEE80211_MODE_11A);
@@ -1566,10 +1562,6 @@ rtwn_set_channel(struct ieee80211com *ic)
RTWN_LOCK(sc);
rtwn_set_chan(sc, c);
- sc->sc_rxtap.wr_chan_freq = htole16(c->ic_freq);
- sc->sc_rxtap.wr_chan_flags = htole16(c->ic_flags);
- sc->sc_txtap.wt_chan_freq = htole16(c->ic_freq);
- sc->sc_txtap.wt_chan_flags = htole16(c->ic_flags);
RTWN_UNLOCK(sc);
}
diff --git a/freebsd/sys/dev/rtwn/if_rtwnvar.h b/freebsd/sys/dev/rtwn/if_rtwnvar.h
index 3ebcba52..a6e7ea9f 100644
--- a/freebsd/sys/dev/rtwn/if_rtwnvar.h
+++ b/freebsd/sys/dev/rtwn/if_rtwnvar.h
@@ -62,9 +62,10 @@ struct rtwn_rx_radiotap_header {
struct rtwn_tx_radiotap_header {
struct ieee80211_radiotap_header wt_ihdr;
uint8_t wt_flags;
+ uint8_t wt_pad;
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
-} __packed __aligned(8);
+} __packed;
#define RTWN_TX_RADIOTAP_PRESENT \
(1 << IEEE80211_RADIOTAP_FLAGS | \
diff --git a/freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.h b/freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.h
index 6df5812e..796d8c4d 100644
--- a/freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.h
+++ b/freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.h
@@ -17,9 +17,11 @@
*/
void r92ce_attach(struct rtwn_pci_softc *);
+void r88ee_attach(struct rtwn_pci_softc *);
enum {
RTWN_CHIP_RTL8192CE,
+ RTWN_CHIP_RTL8188EE,
RTWN_CHIP_MAX_PCI
};
@@ -32,13 +34,16 @@ struct rtwn_pci_ident {
static const struct rtwn_pci_ident rtwn_pci_ident_table[] = {
{ 0x10ec, 0x8176, "Realtek RTL8188CE", RTWN_CHIP_RTL8192CE },
+ { 0x10ec, 0x8179, "Realtek RTL8188EE", RTWN_CHIP_RTL8188EE },
+ { 0x10ec, 0x8178, "Realtek RTL8192CE", RTWN_CHIP_RTL8192CE },
{ 0, 0, NULL, RTWN_CHIP_MAX_PCI }
};
typedef void (*chip_pci_attach)(struct rtwn_pci_softc *);
static const chip_pci_attach rtwn_chip_pci_attach[RTWN_CHIP_MAX_PCI] = {
- [RTWN_CHIP_RTL8192CE] = r92ce_attach
+ [RTWN_CHIP_RTL8192CE] = r92ce_attach,
+ [RTWN_CHIP_RTL8188EE] = r88ee_attach
};
static __inline void
diff --git a/freebsd/sys/dev/rtwn/pci/rtwn_pci_reg.c b/freebsd/sys/dev/rtwn/pci/rtwn_pci_reg.c
index 8ce54f0d..5b998b5a 100644
--- a/freebsd/sys/dev/rtwn/pci/rtwn_pci_reg.c
+++ b/freebsd/sys/dev/rtwn/pci/rtwn_pci_reg.c
@@ -120,6 +120,6 @@ rtwn_pci_delay(struct rtwn_softc *sc, int usec)
DELAY(usec);
else {
(void) mtx_sleep(sc, &sc->sc_mtx, 0, "rtwn_pci",
- MAX(msecs_to_ticks(usec / 1000), 1));
+ msecs_to_ticks(usec / 1000));
}
}
diff --git a/freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c b/freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c
index 1934b741..f97fea44 100644
--- a/freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c
+++ b/freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c
@@ -85,12 +85,12 @@ rtwn_pci_setup_rx_desc(struct rtwn_pci_softc *pc,
}
static void
-rtwn_pci_rx_frame(struct rtwn_softc *sc, struct rtwn_rx_stat_pci *rx_desc,
- int desc_idx)
+rtwn_pci_rx_frame(struct rtwn_pci_softc *pc)
{
- struct rtwn_pci_softc *pc = RTWN_PCI_SOFTC(sc);
+ struct rtwn_softc *sc = &pc->pc_sc;
struct rtwn_rx_ring *ring = &pc->rx_ring;
- struct rtwn_rx_data *rx_data = &ring->rx_data[desc_idx];
+ struct rtwn_rx_stat_pci *rx_desc = &ring->desc[ring->cur];
+ struct rtwn_rx_data *rx_data = &ring->rx_data[ring->cur];
struct ieee80211com *ic = &sc->sc_ic;
struct ieee80211_node *ni;
uint32_t rxdw0;
@@ -150,9 +150,6 @@ rtwn_pci_rx_frame(struct rtwn_softc *sc, struct rtwn_rx_stat_pci *rx_desc,
panic("%s: could not load old RX mbuf",
device_get_name(sc->sc_dev));
- /* Physical address may have changed. */
- rtwn_pci_setup_rx_desc(pc, rx_desc, rx_data->paddr,
- MJUMPAGESIZE, desc_idx);
goto fail;
}
@@ -167,10 +164,6 @@ rtwn_pci_rx_frame(struct rtwn_softc *sc, struct rtwn_rx_stat_pci *rx_desc,
"%s: Rx frame len %d, infosz %d, shift %d\n",
__func__, pktlen, infosz, shift);
- /* Update RX descriptor. */
- rtwn_pci_setup_rx_desc(pc, rx_desc, rx_data->paddr, MJUMPAGESIZE,
- desc_idx);
-
/* Send the frame to the 802.11 layer. */
RTWN_UNLOCK(sc);
if (ni != NULL) {
@@ -188,6 +181,72 @@ fail:
counter_u64_add(ic->ic_ierrors, 1);
}
+static int
+rtwn_pci_rx_buf_copy(struct rtwn_pci_softc *pc)
+{
+ struct rtwn_rx_ring *ring = &pc->rx_ring;
+ struct rtwn_rx_stat_pci *rx_desc = &ring->desc[ring->cur];
+ struct rtwn_rx_data *rx_data = &ring->rx_data[ring->cur];
+ uint32_t rxdw0;
+ int desc_size, pktlen;
+
+ /*
+ * NB: tx_report() / c2h_report() expects to see USB Rx
+ * descriptor - same as for PCIe, but without rxbufaddr* fields.
+ */
+ desc_size = sizeof(struct rtwn_rx_stat_common);
+ KASSERT(sizeof(pc->pc_rx_buf) >= desc_size,
+ ("adjust size for PCIe Rx buffer!"));
+
+ memcpy(pc->pc_rx_buf, rx_desc, desc_size);
+
+ rxdw0 = le32toh(rx_desc->rxdw0);
+ pktlen = MS(rxdw0, RTWN_RXDW0_PKTLEN);
+
+ if (pktlen > sizeof(pc->pc_rx_buf) - desc_size)
+ {
+ /* Looks like an ordinary Rx frame. */
+ return (desc_size);
+ }
+
+ bus_dmamap_sync(ring->data_dmat, rx_data->map, BUS_DMASYNC_POSTREAD);
+ memcpy(pc->pc_rx_buf + desc_size, mtod(rx_data->m, void *), pktlen);
+
+ return (desc_size + pktlen);
+}
+
+static void
+rtwn_pci_tx_report(struct rtwn_pci_softc *pc, int len)
+{
+ struct rtwn_softc *sc = &pc->pc_sc;
+
+ if (sc->sc_ratectl != RTWN_RATECTL_NET80211) {
+ /* shouldn't happen */
+ device_printf(sc->sc_dev,
+ "%s called while ratectl = %d!\n",
+ __func__, sc->sc_ratectl);
+ return;
+ }
+
+ RTWN_NT_LOCK(sc);
+ rtwn_handle_tx_report(sc, pc->pc_rx_buf, len);
+ RTWN_NT_UNLOCK(sc);
+
+#ifdef IEEE80211_SUPPORT_SUPERG
+ /*
+ * NB: this will executed only when 'report' bit is set.
+ */
+ if (sc->sc_tx_n_active > 0 && --sc->sc_tx_n_active <= 1)
+ rtwn_cmd_sleepable(sc, NULL, 0, rtwn_ff_flush_all);
+#endif
+}
+
+static void
+rtwn_pci_c2h_report(struct rtwn_pci_softc *pc, int len)
+{
+ rtwn_handle_c2h_report(&pc->pc_sc, pc->pc_rx_buf, len);
+}
+
static void
rtwn_pci_tx_done(struct rtwn_softc *sc, int qid)
{
@@ -199,7 +258,8 @@ rtwn_pci_tx_done(struct rtwn_softc *sc, int qid)
RTWN_DPRINTF(sc, RTWN_DEBUG_INTR, "%s: qid %d, last %d, cur %d\n",
__func__, qid, ring->last, ring->cur);
- bus_dmamap_sync(ring->desc_dmat, ring->desc_map, BUS_DMASYNC_POSTREAD);
+ bus_dmamap_sync(ring->desc_dmat, ring->desc_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
while(ring->last != ring->cur) {
data = &ring->tx_data[ring->last];
@@ -264,21 +324,50 @@ rtwn_pci_rx_done(struct rtwn_softc *sc)
{
struct rtwn_pci_softc *pc = RTWN_PCI_SOFTC(sc);
struct rtwn_rx_ring *ring = &pc->rx_ring;
+ struct rtwn_rx_stat_pci *rx_desc;
+ struct rtwn_rx_data *rx_data;
+ int len;
bus_dmamap_sync(ring->desc_dmat, ring->desc_map, BUS_DMASYNC_POSTREAD);
for (;;) {
- struct rtwn_rx_stat_pci *rx_desc = &ring->desc[ring->cur];
+ rx_desc = &ring->desc[ring->cur];
+ rx_data = &ring->rx_data[ring->cur];
if (le32toh(rx_desc->rxdw0) & RTWN_RXDW0_OWN)
break;
- rtwn_pci_rx_frame(sc, rx_desc, ring->cur);
+ len = rtwn_pci_rx_buf_copy(pc);
+
+ switch (rtwn_classify_intr(sc, pc->pc_rx_buf, len)) {
+ case RTWN_RX_DATA:
+ rtwn_pci_rx_frame(pc);
+ break;
+ case RTWN_RX_TX_REPORT:
+ rtwn_pci_tx_report(pc, len);
+ break;
+ case RTWN_RX_OTHER:
+ rtwn_pci_c2h_report(pc, len);
+ break;
+ default:
+ /* NOTREACHED */
+ KASSERT(0, ("unknown Rx classification code"));
+ break;
+ }
+
+ /* Update / reset RX descriptor (and set OWN bit). */
+ rtwn_pci_setup_rx_desc(pc, rx_desc, rx_data->paddr,
+ MJUMPAGESIZE, ring->cur);
if (!(sc->sc_flags & RTWN_RUNNING))
return;
- ring->cur = (ring->cur + 1) % RTWN_PCI_RX_LIST_COUNT;
+ /* NB: device can reuse current descriptor. */
+ bus_dmamap_sync(ring->desc_dmat, ring->desc_map,
+ BUS_DMASYNC_POSTREAD);
+
+ if (le32toh(rx_desc->rxdw0) & RTWN_RXDW0_OWN)
+ ring->cur = (ring->cur + 1) % RTWN_PCI_RX_LIST_COUNT;
}
}
@@ -290,13 +379,13 @@ rtwn_pci_intr(void *arg)
int i, status, tx_rings;
RTWN_LOCK(sc);
- status = rtwn_classify_intr(sc, &tx_rings, 0);
+ status = rtwn_pci_get_intr_status(pc, &tx_rings);
RTWN_DPRINTF(sc, RTWN_DEBUG_INTR, "%s: status %08X, tx_rings %08X\n",
__func__, status, tx_rings);
if (status == 0 && tx_rings == 0)
goto unlock;
- if (status & RTWN_PCI_INTR_RX) {
+ if (status & (RTWN_PCI_INTR_RX | RTWN_PCI_INTR_TX_REPORT)) {
rtwn_pci_rx_done(sc);
if (!(sc->sc_flags & RTWN_RUNNING))
goto unlock;
diff --git a/freebsd/sys/dev/rtwn/pci/rtwn_pci_tx.c b/freebsd/sys/dev/rtwn/pci/rtwn_pci_tx.c
index 50e915ee..1d3852ca 100644
--- a/freebsd/sys/dev/rtwn/pci/rtwn_pci_tx.c
+++ b/freebsd/sys/dev/rtwn/pci/rtwn_pci_tx.c
@@ -176,8 +176,8 @@ rtwn_pci_tx_start_frame(struct rtwn_softc *sc, struct ieee80211_node *ni,
rtwn_dump_tx_desc(sc, txd);
bus_dmamap_sync(ring->desc_dmat, ring->desc_map,
- BUS_DMASYNC_POSTWRITE);
- bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREWRITE);
data->m = m;
data->ni = ni;
diff --git a/freebsd/sys/dev/rtwn/pci/rtwn_pci_var.h b/freebsd/sys/dev/rtwn/pci/rtwn_pci_var.h
index 194fab4a..95b2effe 100644
--- a/freebsd/sys/dev/rtwn/pci/rtwn_pci_var.h
+++ b/freebsd/sys/dev/rtwn/pci/rtwn_pci_var.h
@@ -26,6 +26,9 @@
#define RTWN_PCI_RX_LIST_COUNT 256
#define RTWN_PCI_TX_LIST_COUNT 256
+/* sizeof(struct rtwn_rx_stat_common) + R88E_INTR_MSG_LEN */
+#define RTWN_PCI_RX_TMP_BUF_SIZE 84
+
struct rtwn_rx_data {
bus_dmamap_t map;
struct mbuf *m;
@@ -95,8 +98,8 @@ enum {
/* Shortcuts */
/* Vendor driver treats RX errors like ROK... */
#define RTWN_PCI_INTR_RX \
- (RTWN_PCI_INTR_RX_OVERFLOW | RTWN_PCI_INTR_RX_DESC_UNAVAIL | \
- RTWN_PCI_INTR_RX_DONE)
+ (RTWN_PCI_INTR_RX_ERROR | RTWN_PCI_INTR_RX_OVERFLOW | \
+ RTWN_PCI_INTR_RX_DESC_UNAVAIL | RTWN_PCI_INTR_RX_DONE)
struct rtwn_pci_softc {
@@ -109,6 +112,7 @@ struct rtwn_pci_softc {
void *pc_ih;
bus_size_t pc_mapsize;
+ uint8_t pc_rx_buf[RTWN_PCI_RX_TMP_BUF_SIZE];
struct rtwn_rx_ring rx_ring;
struct rtwn_tx_ring tx_ring[RTWN_PCI_NTXQUEUES];
@@ -122,6 +126,8 @@ struct rtwn_pci_softc {
void *, bus_dma_segment_t *);
void (*pc_copy_tx_desc)(void *, const void *);
void (*pc_enable_intr)(struct rtwn_pci_softc *);
+ int (*pc_get_intr_status)(struct rtwn_pci_softc *,
+ int *);
};
#define RTWN_PCI_SOFTC(sc) ((struct rtwn_pci_softc *)(sc))
@@ -133,5 +139,7 @@ struct rtwn_pci_softc {
(((_pc)->pc_copy_tx_desc)((_dest), (_src)))
#define rtwn_pci_enable_intr(_pc) \
(((_pc)->pc_enable_intr)((_pc)))
+#define rtwn_pci_get_intr_status(_pc, _tx_rings) \
+ (((_pc)->pc_get_intr_status)((_pc), (_tx_rings)))
#endif /* RTWN_PCI_VAR_H */
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e.h b/freebsd/sys/dev/rtwn/rtl8188e/r88e.h
index 3a3c0865..ce9fa19a 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e.h
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e.h
@@ -24,9 +24,7 @@
/*
* Global definitions.
*/
-#define R88E_PUBQ_NPAGES 142
#define R88E_TXPKTBUF_COUNT 177
-#define R88E_TX_PAGE_COUNT 169
#define R88E_MACID_MAX 63
#define R88E_RX_DMA_BUFFER_SIZE 0x2400
@@ -67,9 +65,8 @@ int r88e_set_pwrmode(struct rtwn_softc *, struct ieee80211vap *, int);
#endif
/* r88e_init.c */
-void r88e_init_bb(struct rtwn_softc *);
+void r88e_init_bb_common(struct rtwn_softc *);
void r88e_init_rf(struct rtwn_softc *);
-int r88e_power_on(struct rtwn_softc *);
/* r88e_led.c */
void r88e_set_led(struct rtwn_softc *, int, int);
@@ -81,6 +78,7 @@ void r88e_rf_write(struct rtwn_softc *, int, uint8_t, uint32_t);
void r88e_parse_rom(struct rtwn_softc *, uint8_t *);
/* r88e_rx.c */
+int r88e_classify_intr(struct rtwn_softc *, void *, int);
void r88e_ratectl_tx_complete(struct rtwn_softc *, uint8_t *, int);
void r88e_handle_c2h_report(struct rtwn_softc *, uint8_t *, int);
int8_t r88e_get_rssi_cck(struct rtwn_softc *, void *);
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_calib.c b/freebsd/sys/dev/rtwn/rtl8188e/r88e_calib.c
index 592f391a..94974983 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_calib.c
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_calib.c
@@ -1,7 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/*-
- * Copyright (c) 2016 Andriy Voskoboinyk <avos@FreeBSD.org>
+ * Copyright (c) 2016-2019 Andriy Voskoboinyk <avos@FreeBSD.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -44,16 +44,343 @@ __FBSDID("$FreeBSD$");
#include <dev/rtwn/if_rtwnreg.h>
#include <dev/rtwn/if_rtwnvar.h>
+#include <dev/rtwn/if_rtwn_debug.h>
#include <dev/rtwn/rtl8188e/r88e.h>
#include <dev/rtwn/rtl8188e/r88e_reg.h>
+/* Registers to save and restore during IQ calibration. */
+struct r88e_iq_cal_reg_vals {
+ uint32_t adda[16];
+ uint8_t txpause;
+ uint8_t bcn_ctrl[2];
+ uint32_t gpio_muxcfg;
+ uint32_t cck0_afesetting;
+ uint32_t ofdm0_trxpathena;
+ uint32_t ofdm0_trmuxpar;
+ uint32_t fpga0_rfifacesw0;
+ uint32_t fpga0_rfifacesw1;
+ uint32_t fpga0_rfifaceoe0;
+ uint32_t fpga0_rfifaceoe1;
+ uint32_t config_ant0;
+ uint32_t config_ant1;
+};
+
+static int
+r88e_iq_calib_chain(struct rtwn_softc *sc, uint16_t tx[2], uint16_t rx[2])
+{
+ uint32_t status;
+
+ /* Set Rx IQ calibration mode table. */
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0);
+ rtwn_rf_write(sc, 0, R88E_RF_WE_LUT, 0x800a0);
+ rtwn_rf_write(sc, 0, R92C_RF_RCK_OS, 0x30000);
+ rtwn_rf_write(sc, 0, R92C_RF_TXPA_G(0), 0xf);
+ rtwn_rf_write(sc, 0, R92C_RF_TXPA_G(1), 0xf117b);
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0x80800000);
+
+ /* IQ calibration settings. */
+ rtwn_bb_write(sc, R92C_TX_IQK, 0x01007c00);
+ rtwn_bb_write(sc, R92C_RX_IQK, 0x81004800);
+
+ /* IQ calibration settings for chain 0. */
+ rtwn_bb_write(sc, R92C_TX_IQK_TONE(0), 0x10008c1c);
+ rtwn_bb_write(sc, R92C_RX_IQK_TONE(0), 0x30008c1c);
+ rtwn_bb_write(sc, R92C_TX_IQK_PI(0), 0x82160804);
+ rtwn_bb_write(sc, R92C_RX_IQK_PI(0), 0x28160000);
+
+ /* LO calibration settings. */
+ rtwn_bb_write(sc, R92C_IQK_AGC_RSP, 0x0046a911);
+
+ /* We're doing LO and IQ calibration in one shot. */
+ rtwn_bb_write(sc, R92C_IQK_AGC_PTS, 0xf9000000);
+ rtwn_bb_write(sc, R92C_IQK_AGC_PTS, 0xf8000000);
+
+ /* Give LO and IQ calibrations the time to complete. */
+ rtwn_delay(sc, 10000);
+
+ /* Read IQ calibration status. */
+ status = rtwn_bb_read(sc, R92C_RX_POWER_IQK_AFTER(0));
+ if (status & (1 << 28))
+ return (0); /* Tx failed. */
+
+ /* Read Tx IQ calibration results. */
+ tx[0] = MS(rtwn_bb_read(sc, R92C_TX_POWER_IQK_BEFORE(0)),
+ R92C_POWER_IQK_RESULT);
+ tx[1] = MS(rtwn_bb_read(sc, R92C_TX_POWER_IQK_AFTER(0)),
+ R92C_POWER_IQK_RESULT);
+ if (tx[0] == 0x142 || tx[1] == 0x042)
+ return (0); /* Tx failed. */
+
+ rtwn_bb_write(sc, R92C_TX_IQK, 0x80007c00 | (tx[0] << 16) | tx[1]);
+
+ /* Set Rx IQ calibration mode table. */
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0);
+ rtwn_rf_write(sc, 0, R88E_RF_WE_LUT, 0x800a0);
+ rtwn_rf_write(sc, 0, R92C_RF_RCK_OS, 0x30000);
+ rtwn_rf_write(sc, 0, R92C_RF_TXPA_G(0), 0xf);
+ rtwn_rf_write(sc, 0, R92C_RF_TXPA_G(1), 0xf7ffa);
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0x80800000);
+
+ /* IQ calibration settings. */
+ rtwn_bb_write(sc, R92C_RX_IQK, 0x01004800);
+
+ /* IQ calibration settings for chain 0. */
+ rtwn_bb_write(sc, R92C_TX_IQK_TONE(0), 0x30008c1c);
+ rtwn_bb_write(sc, R92C_RX_IQK_TONE(0), 0x10008c1c);
+ rtwn_bb_write(sc, R92C_TX_IQK_PI(0), 0x82160c05);
+ rtwn_bb_write(sc, R92C_RX_IQK_PI(0), 0x28160c05);
+
+ /* LO calibration settings. */
+ rtwn_bb_write(sc, R92C_IQK_AGC_RSP, 0x0046a911);
+
+ /* We're doing LO and IQ calibration in one shot. */
+ rtwn_bb_write(sc, R92C_IQK_AGC_PTS, 0xf9000000);
+ rtwn_bb_write(sc, R92C_IQK_AGC_PTS, 0xf8000000);
+
+ /* Give LO and IQ calibrations the time to complete. */
+ rtwn_delay(sc, 10000);
+
+ /* Read IQ calibration status. */
+ status = rtwn_bb_read(sc, R92C_RX_POWER_IQK_AFTER(0));
+ if (status & (1 << 27))
+ return (1); /* Rx failed. */
+
+ /* Read Rx IQ calibration results. */
+ rx[0] = MS(rtwn_bb_read(sc, R92C_RX_POWER_IQK_BEFORE(0)),
+ R92C_POWER_IQK_RESULT);
+ rx[1] = MS(status, R92C_POWER_IQK_RESULT);
+ if (rx[0] == 0x132 || rx[1] == 0x036)
+ return (1); /* Rx failed. */
+
+ return (3); /* Both Tx and Rx succeeded. */
+}
+
+static void
+r88e_iq_calib_run(struct rtwn_softc *sc, int n, uint16_t tx[2],
+ uint16_t rx[2], struct r88e_iq_cal_reg_vals *vals)
+{
+ /* Registers to save and restore during IQ calibration. */
+ static const uint16_t reg_adda[16] = {
+ 0x85c, 0xe6c, 0xe70, 0xe74,
+ 0xe78, 0xe7c, 0xe80, 0xe84,
+ 0xe88, 0xe8c, 0xed0, 0xed4,
+ 0xed8, 0xedc, 0xee0, 0xeec
+ };
+ int i;
+ uint32_t hssi_param1;
+
+ if (n == 0) {
+ for (i = 0; i < nitems(reg_adda); i++)
+ vals->adda[i] = rtwn_bb_read(sc, reg_adda[i]);
+
+ vals->txpause = rtwn_read_1(sc, R92C_TXPAUSE);
+ vals->bcn_ctrl[0] = rtwn_read_1(sc, R92C_BCN_CTRL(0));
+ vals->bcn_ctrl[1] = rtwn_read_1(sc, R92C_BCN_CTRL(1));
+ vals->gpio_muxcfg = rtwn_read_4(sc, R92C_GPIO_MUXCFG);
+ }
+
+ rtwn_bb_write(sc, reg_adda[0], 0x0b1b25a0);
+ for (i = 1; i < nitems(reg_adda); i++)
+ rtwn_bb_write(sc, reg_adda[i], 0x0bdb25a0);
+
+ hssi_param1 = rtwn_bb_read(sc, R92C_HSSI_PARAM1(0));
+ if (!(hssi_param1 & R92C_HSSI_PARAM1_PI)) {
+ rtwn_bb_write(sc, R92C_HSSI_PARAM1(0),
+ hssi_param1 | R92C_HSSI_PARAM1_PI);
+ rtwn_bb_write(sc, R92C_HSSI_PARAM1(1),
+ hssi_param1 | R92C_HSSI_PARAM1_PI);
+ }
+
+ if (n == 0) {
+ vals->cck0_afesetting = rtwn_bb_read(sc, R92C_CCK0_AFESETTING);
+ vals->ofdm0_trxpathena =
+ rtwn_bb_read(sc, R92C_OFDM0_TRXPATHENA);
+ vals->ofdm0_trmuxpar = rtwn_bb_read(sc, R92C_OFDM0_TRMUXPAR);
+ vals->fpga0_rfifacesw0 =
+ rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(0));
+ vals->fpga0_rfifacesw1 =
+ rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(1));
+ vals->fpga0_rfifaceoe0 =
+ rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(0));
+ vals->fpga0_rfifaceoe1 =
+ rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(1));
+ vals->config_ant0 = rtwn_bb_read(sc, R92C_CONFIG_ANT(0));
+ vals->config_ant1 = rtwn_bb_read(sc, R92C_CONFIG_ANT(1));
+ }
+
+ rtwn_bb_setbits(sc, R92C_CCK0_AFESETTING, 0, 0x0f000000);
+ rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, 0x03a05600);
+ rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, 0x000800e4);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1), 0x22204000);
+ rtwn_bb_setbits(sc, R92C_FPGA0_RFIFACESW(0), 0, 0x04000400);
+ rtwn_bb_setbits(sc, R92C_FPGA0_RFIFACEOE(0), 0x400, 0);
+ rtwn_bb_setbits(sc, R92C_FPGA0_RFIFACEOE(1), 0x400, 0);
+
+ rtwn_write_1(sc, R92C_TXPAUSE,
+ R92C_TX_QUEUE_AC | R92C_TX_QUEUE_MGT | R92C_TX_QUEUE_HIGH);
+ rtwn_write_1(sc, R92C_BCN_CTRL(0),
+ vals->bcn_ctrl[0] & ~R92C_BCN_CTRL_EN_BCN);
+ rtwn_write_1(sc, R92C_BCN_CTRL(1),
+ vals->bcn_ctrl[1] & ~R92C_BCN_CTRL_EN_BCN);
+ rtwn_write_1(sc, R92C_GPIO_MUXCFG,
+ vals->gpio_muxcfg & ~R92C_GPIO_MUXCFG_ENBT);
+
+ rtwn_bb_write(sc, R92C_CONFIG_ANT(0), 0x0f600000);
+
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0x80800000);
+ rtwn_bb_write(sc, R92C_TX_IQK, 0x01007c00);
+ rtwn_bb_write(sc, R92C_RX_IQK, 0x01004800);
+
+ /* Run IQ calibration twice. */
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = r88e_iq_calib_chain(sc, tx, rx);
+ if (ret == 0) {
+ RTWN_DPRINTF(sc, RTWN_DEBUG_CALIB, "%s: Tx failed.\n",
+ __func__);
+ tx[0] = 0xff;
+ tx[1] = 0xff;
+ rx[0] = 0xff;
+ rx[1] = 0xff;
+ } else if (ret == 1) {
+ RTWN_DPRINTF(sc, RTWN_DEBUG_CALIB, "%s: Rx failed.\n",
+ __func__);
+ rx[0] = 0xff;
+ rx[1] = 0xff;
+ } else if (ret == 3) {
+ RTWN_DPRINTF(sc, RTWN_DEBUG_CALIB, "%s: Both Tx and Rx"
+ " succeeded.\n", __func__);
+ }
+ }
+
+ RTWN_DPRINTF(sc, RTWN_DEBUG_CALIB,
+ "%s: results for run %d: tx[0] 0x%x, tx[1] 0x%x, rx[0] 0x%x, "
+ "rx[1] 0x%x\n", __func__, n, tx[0], tx[1], rx[0], rx[1]);
+
+ rtwn_bb_write(sc, R92C_CCK0_AFESETTING, vals->cck0_afesetting);
+ rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, vals->ofdm0_trxpathena);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(0), vals->fpga0_rfifacesw0);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1), vals->fpga0_rfifacesw1);
+ rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, vals->ofdm0_trmuxpar);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(0), vals->fpga0_rfifaceoe0);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(1), vals->fpga0_rfifaceoe1);
+ rtwn_bb_write(sc, R92C_CONFIG_ANT(0), vals->config_ant0);
+ rtwn_bb_write(sc, R92C_CONFIG_ANT(1), vals->config_ant1);
+
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0);
+ rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00032ed3);
+
+ if (n != 0) {
+ if (!(hssi_param1 & R92C_HSSI_PARAM1_PI)) {
+ rtwn_bb_write(sc, R92C_HSSI_PARAM1(0), hssi_param1);
+ rtwn_bb_write(sc, R92C_HSSI_PARAM1(1), hssi_param1);
+ }
+
+ for (i = 0; i < nitems(reg_adda); i++)
+ rtwn_bb_write(sc, reg_adda[i], vals->adda[i]);
+
+ rtwn_write_1(sc, R92C_TXPAUSE, vals->txpause);
+ rtwn_write_1(sc, R92C_BCN_CTRL(0), vals->bcn_ctrl[0]);
+ rtwn_write_1(sc, R92C_BCN_CTRL(1), vals->bcn_ctrl[1]);
+ rtwn_write_4(sc, R92C_GPIO_MUXCFG, vals->gpio_muxcfg);
+ }
+}
+
+#define RTWN_IQ_CAL_MAX_TOLERANCE 5
+static int
+r88e_iq_calib_compare_results(struct rtwn_softc *sc, uint16_t tx1[2],
+ uint16_t rx1[2], uint16_t tx2[2], uint16_t rx2[2])
+{
+ int i, tx_ok, rx_ok;
+
+ tx_ok = rx_ok = 0;
+ for (i = 0; i < 2; i++) {
+ if (tx1[i] == 0xff || tx2[i] == 0xff ||
+ rx1[i] == 0xff || rx2[i] == 0xff)
+ continue;
+
+ tx_ok = (abs(tx1[i] - tx2[i]) <= RTWN_IQ_CAL_MAX_TOLERANCE);
+ rx_ok = (abs(rx1[i] - rx2[i]) <= RTWN_IQ_CAL_MAX_TOLERANCE);
+ }
+
+ return (tx_ok && rx_ok);
+}
+#undef RTWN_IQ_CAL_MAX_TOLERANCE
+
+static void
+r88e_iq_calib_write_results(struct rtwn_softc *sc, uint16_t tx[2],
+ uint16_t rx[2])
+{
+ uint32_t reg, val, x;
+ long y, tx_c;
+
+ if (tx[0] == 0xff || tx[1] == 0xff)
+ return;
+
+ reg = rtwn_bb_read(sc, R92C_OFDM0_TXIQIMBALANCE(0));
+ val = ((reg >> 22) & 0x3ff);
+ x = tx[0];
+ if (x & 0x00000200)
+ x |= 0xfffffc00;
+ reg = (((x * val) >> 8) & 0x3ff);
+ rtwn_bb_setbits(sc, R92C_OFDM0_TXIQIMBALANCE(0), 0x3ff, reg);
+ rtwn_bb_setbits(sc, R92C_OFDM0_ECCATHRESHOLD, 0x80000000,
+ ((x * val) & 0x80) << 24);
+
+ y = tx[1];
+ if (y & 0x00000200)
+ y |= 0xfffffc00;
+ tx_c = (y * val) >> 8;
+ rtwn_bb_setbits(sc, R92C_OFDM0_TXAFE(0), 0xf0000000,
+ (tx_c & 0x3c0) << 22);
+ rtwn_bb_setbits(sc, R92C_OFDM0_TXIQIMBALANCE(0), 0x003f0000,
+ (tx_c & 0x3f) << 16);
+ rtwn_bb_setbits(sc, R92C_OFDM0_ECCATHRESHOLD, 0x20000000,
+ ((y * val) & 0x80) << 22);
+
+ if (rx[0] == 0xff || rx[1] == 0xff)
+ return;
+
+ rtwn_bb_setbits(sc, R92C_OFDM0_RXIQIMBALANCE(0), 0x3ff,
+ rx[0] & 0x3ff);
+ rtwn_bb_setbits(sc, R92C_OFDM0_RXIQIMBALANCE(0), 0xfc00,
+ (rx[1] & 0x3f) << 10);
+ rtwn_bb_setbits(sc, R92C_OFDM0_RXIQEXTANTA, 0xf0000000,
+ (rx[1] & 0x3c0) << 22);
+}
+
+#define RTWN_IQ_CAL_NRUN 3
void
r88e_iq_calib(struct rtwn_softc *sc)
{
- /* XXX TODO */
+ struct r88e_iq_cal_reg_vals vals;
+ uint16_t tx[RTWN_IQ_CAL_NRUN][2], rx[RTWN_IQ_CAL_NRUN][2];
+ int n, valid;
+
+ KASSERT(sc->ntxchains == 1,
+ ("%s: only 1T1R configuration is supported!\n", __func__));
+
+ valid = 0;
+ for (n = 0; n < RTWN_IQ_CAL_NRUN; n++) {
+ r88e_iq_calib_run(sc, n, tx[n], rx[n], &vals);
+
+ if (n == 0)
+ continue;
+
+ /* Valid results remain stable after consecutive runs. */
+ valid = r88e_iq_calib_compare_results(sc, tx[n - 1],
+ rx[n - 1], tx[n], rx[n]);
+ if (valid)
+ break;
+ }
+
+ if (valid)
+ r88e_iq_calib_write_results(sc, tx[n], rx[n]);
}
+#undef RTWN_IQ_CAL_NRUN
void
r88e_temp_measure(struct rtwn_softc *sc)
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c b/freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c
index ddc9d0a4..ff48d244 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c
@@ -111,7 +111,11 @@ r88e_fw_reset(struct rtwn_softc *sc, int reason)
reg = rtwn_read_2(sc, R92C_SYS_FUNC_EN);
rtwn_write_2(sc, R92C_SYS_FUNC_EN, reg & ~R92C_SYS_FUNC_EN_CPUEN);
- rtwn_write_2(sc, R92C_SYS_FUNC_EN, reg | R92C_SYS_FUNC_EN_CPUEN);
+
+ if (reason != RTWN_FW_RESET_SHUTDOWN) {
+ rtwn_write_2(sc, R92C_SYS_FUNC_EN,
+ reg | R92C_SYS_FUNC_EN_CPUEN);
+ }
}
void
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_init.c b/freebsd/sys/dev/rtwn/rtl8188e/r88e_init.c
index 5225c43f..8c5bbac2 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_init.c
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_init.c
@@ -72,20 +72,8 @@ r88e_crystalcap_write(struct rtwn_softc *sc)
}
void
-r88e_init_bb(struct rtwn_softc *sc)
+r88e_init_bb_common(struct rtwn_softc *sc)
{
-
- /* Enable BB and RF. */
- rtwn_setbits_2(sc, R92C_SYS_FUNC_EN, 0,
- R92C_SYS_FUNC_EN_BBRSTB | R92C_SYS_FUNC_EN_BB_GLB_RST |
- R92C_SYS_FUNC_EN_DIO_RF);
-
- rtwn_write_1(sc, R92C_RF_CTRL,
- R92C_RF_CTRL_EN | R92C_RF_CTRL_RSTB | R92C_RF_CTRL_SDMRSTB);
- rtwn_write_1(sc, R92C_SYS_FUNC_EN,
- R92C_SYS_FUNC_EN_USBA | R92C_SYS_FUNC_EN_USBD |
- R92C_SYS_FUNC_EN_BB_GLB_RST | R92C_SYS_FUNC_EN_BBRSTB);
-
r92c_init_bb_common(sc);
rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), 0x69553422);
@@ -95,66 +83,3 @@ r88e_init_bb(struct rtwn_softc *sc)
r88e_crystalcap_write(sc);
}
-
-int
-r88e_power_on(struct rtwn_softc *sc)
-{
-#define RTWN_CHK(res) do { \
- if (res != 0) \
- return (EIO); \
-} while(0)
- int ntries;
-
- /* Wait for power ready bit. */
- for (ntries = 0; ntries < 5000; ntries++) {
- if (rtwn_read_4(sc, R92C_APS_FSMCO) & R92C_APS_FSMCO_SUS_HOST)
- break;
- rtwn_delay(sc, 10);
- }
- if (ntries == 5000) {
- device_printf(sc->sc_dev,
- "timeout waiting for chip power up\n");
- return (ETIMEDOUT);
- }
-
- /* Reset BB. */
- RTWN_CHK(rtwn_setbits_1(sc, R92C_SYS_FUNC_EN,
- R92C_SYS_FUNC_EN_BBRSTB | R92C_SYS_FUNC_EN_BB_GLB_RST, 0));
-
- RTWN_CHK(rtwn_setbits_1(sc, R92C_AFE_XTAL_CTRL + 2, 0, 0x80));
-
- /* Disable HWPDN. */
- RTWN_CHK(rtwn_setbits_1_shift(sc, R92C_APS_FSMCO,
- R92C_APS_FSMCO_APDM_HPDN, 0, 1));
-
- /* Disable WL suspend. */
- RTWN_CHK(rtwn_setbits_1_shift(sc, R92C_APS_FSMCO,
- R92C_APS_FSMCO_AFSM_HSUS | R92C_APS_FSMCO_AFSM_PCIE, 0, 1));
-
- RTWN_CHK(rtwn_setbits_1_shift(sc, R92C_APS_FSMCO,
- 0, R92C_APS_FSMCO_APFM_ONMAC, 1));
- for (ntries = 0; ntries < 5000; ntries++) {
- if (!(rtwn_read_2(sc, R92C_APS_FSMCO) &
- R92C_APS_FSMCO_APFM_ONMAC))
- break;
- rtwn_delay(sc, 10);
- }
- if (ntries == 5000)
- return (ETIMEDOUT);
-
- /* Enable LDO normal mode. */
- RTWN_CHK(rtwn_setbits_1(sc, R92C_LPLDO_CTRL,
- R92C_LPLDO_CTRL_SLEEP, 0));
-
- /* Enable MAC DMA/WMAC/SCHEDULE/SEC blocks. */
- RTWN_CHK(rtwn_write_2(sc, R92C_CR, 0));
- RTWN_CHK(rtwn_setbits_2(sc, R92C_CR, 0,
- R92C_CR_HCI_TXDMA_EN | R92C_CR_TXDMA_EN |
- R92C_CR_HCI_RXDMA_EN | R92C_CR_RXDMA_EN |
- R92C_CR_PROTOCOL_EN | R92C_CR_SCHEDULE_EN |
- ((sc->sc_hwcrypto != RTWN_CRYPTO_SW) ? R92C_CR_ENSEC : 0) |
- R92C_CR_CALTMR_EN));
-
- return (0);
-#undef RTWN_CHK
-}
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_priv.h b/freebsd/sys/dev/rtwn/rtl8188e/r88e_priv.h
index 28f4b1fb..8ec5502d 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_priv.h
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_priv.h
@@ -37,7 +37,7 @@ struct rtwn_r88e_txpwr {
/*
* MAC initialization values.
*/
-static const struct rtwn_mac_prog rtl8188eu_mac[] = {
+static const struct rtwn_mac_prog rtl8188e_mac[] = {
{ 0x026, 0x41 }, { 0x027, 0x35 }, { 0x040, 0x00 }, { 0x428, 0x0a },
{ 0x429, 0x10 }, { 0x430, 0x00 }, { 0x431, 0x01 }, { 0x432, 0x02 },
{ 0x433, 0x04 }, { 0x434, 0x05 }, { 0x435, 0x06 }, { 0x436, 0x07 },
@@ -66,7 +66,7 @@ static const struct rtwn_mac_prog rtl8188eu_mac[] = {
/*
* Baseband initialization values.
*/
-static const uint16_t rtl8188eu_bb_regs[] = {
+static const uint16_t rtl8188e_bb_regs[] = {
0x800, 0x804, 0x808, 0x80c, 0x810, 0x814, 0x818, 0x81c,
0x820, 0x824, 0x828, 0x82c, 0x830, 0x834, 0x838, 0x83c,
0x840, 0x844, 0x848, 0x84c, 0x850, 0x854, 0x858, 0x85c,
@@ -93,7 +93,7 @@ static const uint16_t rtl8188eu_bb_regs[] = {
0xed8, 0xedc, 0xee0, 0xee8, 0xeec, 0xf14, 0xf4c, 0xf00
};
-static const uint32_t rtl8188eu_bb_vals[] = {
+static const uint32_t rtl8188e_bb_vals[] = {
0x80040000, 0x00000003, 0x0000fc00, 0x0000000a, 0x10001331,
0x020c3d10, 0x02200385, 0x00000000, 0x01000100, 0x00390204,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -135,17 +135,17 @@ static const uint32_t rtl8188eu_bb_vals[] = {
0x00000000, 0x00000300
};
-static const struct rtwn_bb_prog rtl8188eu_bb[] = {
+static const struct rtwn_bb_prog rtl8188e_bb[] = {
{
- nitems(rtl8188eu_bb_regs),
- rtl8188eu_bb_regs,
- rtl8188eu_bb_vals,
+ nitems(rtl8188e_bb_regs),
+ rtl8188e_bb_regs,
+ rtl8188e_bb_vals,
{ 0 },
NULL
}
};
-static const uint32_t rtl8188eu_agc_vals[] = {
+static const uint32_t rtl8188e_agc_vals[] = {
0xfb000001, 0xfb010001, 0xfb020001, 0xfb030001, 0xfb040001,
0xfb050001, 0xfa060001, 0xf9070001, 0xf8080001, 0xf7090001,
0xf60a0001, 0xf50b0001, 0xf40c0001, 0xf30d0001, 0xf20e0001,
@@ -174,10 +174,10 @@ static const uint32_t rtl8188eu_agc_vals[] = {
0x407d0001, 0x407e0001, 0x407f0001
};
-static const struct rtwn_agc_prog rtl8188eu_agc[] = {
+static const struct rtwn_agc_prog rtl8188e_agc[] = {
{
- nitems(rtl8188eu_agc_vals),
- rtl8188eu_agc_vals,
+ nitems(rtl8188e_agc_vals),
+ rtl8188e_agc_vals,
{ 0 },
NULL
}
@@ -186,7 +186,7 @@ static const struct rtwn_agc_prog rtl8188eu_agc[] = {
/*
* RF initialization values.
*/
-static const uint8_t rtl8188eu_rf_regs[] = {
+static const uint8_t rtl8188e_rf_regs[] = {
0x00, 0x08, 0x18, 0x19, 0x1e, 0x1f, 0x2f, 0x3f, 0x42, 0x57,
0x58, 0x67, 0x83, 0xb0, 0xb1, 0xb2, 0xb4, 0xb6, 0xb7, 0xb8,
0xb9, 0xba, 0xbb, 0xbf, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
@@ -199,7 +199,7 @@ static const uint8_t rtl8188eu_rf_regs[] = {
0x1f, 0xfe, 0xfe, 0x1e, 0x1f, 0x00
};
-static const uint32_t rtl8188eu_rf_vals[] = {
+static const uint32_t rtl8188e_rf_vals[] = {
0x30000, 0x84000, 0x00407, 0x00012, 0x80009, 0x00880, 0x1a060,
0x00000, 0x060c0, 0xd0000, 0xbe180, 0x01552, 0x00000, 0xff8fc,
0x54400, 0xccc19, 0x43003, 0x4953e, 0x1c718, 0x060ff, 0x80001,
@@ -216,11 +216,11 @@ static const uint32_t rtl8188eu_rf_vals[] = {
0x0c350, 0x0c350, 0x00001, 0x80000, 0x33e60
};
-static const struct rtwn_rf_prog rtl8188eu_rf[] = {
+static const struct rtwn_rf_prog rtl8188e_rf[] = {
{
- nitems(rtl8188eu_rf_regs),
- rtl8188eu_rf_regs,
- rtl8188eu_rf_vals,
+ nitems(rtl8188e_rf_regs),
+ rtl8188e_rf_regs,
+ rtl8188e_rf_vals,
{ 0 },
NULL
},
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_reg.h b/freebsd/sys/dev/rtwn/rtl8188e/r88e_reg.h
index f6f26fa4..94b9a660 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_reg.h
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_reg.h
@@ -32,6 +32,7 @@
#define R88E_HISR 0x0b4
#define R88E_HIMRE 0x0b8
#define R88E_HISRE 0x0bc
+#define R88E_XCK_OUT_CTRL 0x07c
/* MAC General Configuration. */
#define R88E_32K_CTRL 0x194
#define R88E_HMEBOX_EXT(idx) (0x1f0 + (idx) * 4)
@@ -45,16 +46,49 @@
/* Bits for R88E_HIMR. */
-#define R88E_HIMR_CPWM 0x00000100
-#define R88E_HIMR_CPWM2 0x00000200
-#define R88E_HIMR_TBDER 0x04000000
-#define R88E_HIMR_PSTIMEOUT 0x20000000
+#define R88E_HIMR_ROK 0x00000001 /* receive DMA OK */
+#define R88E_HIMR_RDU 0x00000002 /* Rx descriptor unavailable */
+#define R88E_HIMR_VODOK 0x00000004 /* AC_VO DMA OK */
+#define R88E_HIMR_VIDOK 0x00000008 /* AC_VI DMA OK */
+#define R88E_HIMR_BEDOK 0x00000010 /* AC_BE DMA OK */
+#define R88E_HIMR_BKDOK 0x00000020 /* AC_BK DMA OK */
+#define R88E_HIMR_MGNTDOK 0x00000040 /* management queue DMA OK */
+#define R88E_HIMR_HIGHDOK 0x00000080 /* high queue DMA OK */
+#define R88E_HIMR_CPWM 0x00000100 /* CPU power mode intr 1 */
+#define R88E_HIMR_CPWM2 0x00000200 /* CPU power mode intr 2 */
+#define R88E_HIMR_C2HCMD 0x00000400 /* C2H command interrupt */
+#define R88E_HIMR_HISR 0x00000800 /* (HISR & HIMR) != 0 */
+#define R88E_HIMR_ATIMEND 0x00001000 /* ATIM window end interrupt */
+#define R88E_HIMR_HSISR 0x00008000 /* (HSIMR & HSISR) != 0 */
+#define R88E_HIMR_BCNDERR 0x00010000 /* beacon queue DMA error */
+#define R88E_HIMR_BCNINT 0x00100000 /* beacon DMA interrupt 0 */
+#define R88E_HIMR_TSF32 0x01000000 /* TSF 32 bit interrupt */
+#define R88E_HIMR_TBDOK 0x02000000 /* beacon transmit OK */
+#define R88E_HIMR_TBDER 0x04000000 /* beacon transmit error */
+#define R88E_HIMR_GTIMER3 0x08000000 /* GTIMER3 interrupt */
+#define R88E_HIMR_GTIMER4 0x10000000 /* GTIMER4 interrupt */
+#define R88E_HIMR_PSTIMEOUT 0x20000000 /* powersave timeout */
+#define R88E_HIMR_TXRPT 0x40000000 /* Tx report interrupt */
/* Bits for R88E_HIMRE.*/
-#define R88E_HIMRE_RXFOVW 0x00000100
-#define R88E_HIMRE_TXFOVW 0x00000200
-#define R88E_HIMRE_RXERR 0x00000400
-#define R88E_HIMRE_TXERR 0x00000800
+#define R88E_HIMRE_RXFOVW 0x00000100 /* receive FIFO overflow */
+#define R88E_HIMRE_TXFOVW 0x00000200 /* transmit FIFO overflow */
+#define R88E_HIMRE_RXERR 0x00000400 /* receive error */
+#define R88E_HIMRE_TXERR 0x00000800 /* transmit error */
+#define R88E_HIMRE_BCNDOK1 0x00004000 /* beacon queue DMA OK (1) */
+#define R88E_HIMRE_BCNDOK2 0x00008000 /* beacon queue DMA OK (2) */
+#define R88E_HIMRE_BCNDOK3 0x00010000 /* beacon queue DMA OK (3) */
+#define R88E_HIMRE_BCNDOK4 0x00020000 /* beacon queue DMA OK (4) */
+#define R88E_HIMRE_BCNDOK5 0x00040000 /* beacon queue DMA OK (5) */
+#define R88E_HIMRE_BCNDOK6 0x00080000 /* beacon queue DMA OK (6) */
+#define R88E_HIMRE_BCNDOK7 0x00100000 /* beacon queue DMA OK (7) */
+#define R88E_HIMRE_BCNDMAINT1 0x00200000 /* beacon DMA interrupt 1 */
+#define R88E_HIMRE_BCNDMAINT2 0x00400000 /* beacon DMA interrupt 2 */
+#define R88E_HIMRE_BCNDMAINT3 0x00800000 /* beacon DMA interrupt 3 */
+#define R88E_HIMRE_BCNDMAINT4 0x01000000 /* beacon DMA interrupt 4 */
+#define R88E_HIMRE_BCNDMAINT5 0x02000000 /* beacon DMA interrupt 5 */
+#define R88E_HIMRE_BCNDMAINT6 0x04000000 /* beacon DMA interrupt 6 */
+#define R88E_HIMRE_BCNDMAINT7 0x08000000 /* beacon DMA interrupt 7 */
/* Bits for R88E_TX_RPT_CTRL. */
#define R88E_TX_RPT1_ENA 0x01
@@ -79,6 +113,7 @@
* RF (6052) registers.
*/
#define R88E_RF_T_METER 0x42
+#define R88E_RF_WE_LUT 0xef
/* Bits for R92C_RF_CHNLBW. */
#define R88E_RF_CHNLBW_BW20 0x00c00
@@ -88,4 +123,7 @@
#define R88E_RF_T_METER_VAL_S 10
#define R88E_RF_T_METER_START 0x30000
+/* Bits for R88E_XCK_OUT_CTRL. */
+#define R88E_XCK_OUT_CTRL_EN 1
+
#endif /* R88E_REG_H */
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_rom.c b/freebsd/sys/dev/rtwn/rtl8188e/r88e_rom.c
index e1337dba..3afa2910 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_rom.c
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_rom.c
@@ -83,5 +83,6 @@ r88e_parse_rom(struct rtwn_softc *sc, uint8_t *buf)
__func__,rs->regulatory);
sc->thermal_meter = rom->thermal_meter;
- IEEE80211_ADDR_COPY(sc->sc_ic.ic_macaddr, rom->macaddr);
+
+ rtwn_r92c_set_rom_opts(sc, buf);
}
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_rom_image.h b/freebsd/sys/dev/rtwn/rtl8188e/r88e_rom_image.h
index c80028e0..d5d97ffb 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_rom_image.h
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_rom_image.h
@@ -44,11 +44,24 @@ struct r88e_rom {
uint8_t reserved4[3];
uint8_t rf_ant_opt;
uint8_t reserved5[6];
- uint16_t vid;
- uint16_t pid;
- uint8_t usb_opt;
- uint8_t reserved6[2];
- uint8_t macaddr[IEEE80211_ADDR_LEN];
+
+ union {
+ struct {
+ uint16_t vid;
+ uint16_t pid;
+ uint8_t usb_opt;
+ uint8_t reserved6[2];
+ uint8_t macaddr[IEEE80211_ADDR_LEN];
+ } __packed usb;
+
+ struct {
+ uint8_t macaddr[IEEE80211_ADDR_LEN];
+ uint16_t vid;
+ uint16_t pid;
+ uint8_t reserved6[3];
+ } __packed pci;
+ } __packed diff_d0;
+
uint8_t reserved7[2];
uint8_t string[33]; /* "realtek 802.11n NIC" */
uint8_t reserved8[256];
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c b/freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c
index 53cc722f..eee2f63f 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c
+++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c
@@ -58,6 +58,25 @@ __FBSDID("$FreeBSD$");
#include <dev/rtwn/rtl8188e/r88e_rx_desc.h>
+int
+r88e_classify_intr(struct rtwn_softc *sc, void *buf, int len)
+{
+ struct r92c_rx_stat *stat = buf;
+ int report_sel = MS(le32toh(stat->rxdw3), R88E_RXDW3_RPT);
+
+ switch (report_sel) {
+ case R88E_RXDW3_RPT_RX:
+ return (RTWN_RX_DATA);
+ case R88E_RXDW3_RPT_TX1: /* per-packet Tx report */
+ case R88E_RXDW3_RPT_TX2: /* periodical Tx report */
+ return (RTWN_RX_TX_REPORT);
+ case R88E_RXDW3_RPT_HIS:
+ return (RTWN_RX_OTHER);
+ default: /* shut up the compiler */
+ return (RTWN_RX_DATA);
+ }
+}
+
void
r88e_ratectl_tx_complete(struct rtwn_softc *sc, uint8_t *buf, int len)
{
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu.h b/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu.h
index 85b637cb..e79b1387 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu.h
+++ b/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu.h
@@ -25,15 +25,21 @@
/*
+ * Global definitions.
+ */
+#define R88EU_PUBQ_NPAGES 142
+#define R88EU_TX_PAGE_COUNT 169
+
+
+/*
* Function declarations.
*/
/* r88eu_init.c */
+void r88eu_init_bb(struct rtwn_softc *);
+int r88eu_power_on(struct rtwn_softc *);
void r88eu_power_off(struct rtwn_softc *);
void r88eu_init_intr(struct rtwn_softc *);
void r88eu_init_rx_agg(struct rtwn_softc *);
void r88eu_post_init(struct rtwn_softc *);
-/* r88eu_rx.c */
-int r88eu_classify_intr(struct rtwn_softc *, void *, int);
-
#endif /* RTL8188EU_H */
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c b/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c
index 73cc7856..6b153bcd 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c
+++ b/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <dev/rtwn/rtl8192c/usb/r92cu_tx_desc.h>
#include <dev/rtwn/rtl8188e/r88e_priv.h>
+#include <dev/rtwn/rtl8188e/r88e_rom_image.h> /* for 'macaddr' field */
#include <dev/rtwn/rtl8188e/usb/r88eu.h>
@@ -68,6 +69,14 @@ static struct rtwn_r88e_txpwr r88e_txpwr;
void r88eu_attach(struct rtwn_usb_softc *);
static void
+r88eu_set_macaddr(struct rtwn_softc *sc, uint8_t *buf)
+{
+ struct r88e_rom *rom = (struct r88e_rom *)buf;
+
+ IEEE80211_ADDR_COPY(sc->sc_ic.ic_macaddr, rom->diff_d0.usb.macaddr);
+}
+
+static void
r88e_postattach(struct rtwn_softc *sc)
{
struct r92c_softc *rs = sc->sc_priv;
@@ -95,7 +104,7 @@ r88eu_attach_private(struct rtwn_softc *sc)
rs->rs_tx_enable_ampdu = r88e_tx_enable_ampdu;
rs->rs_tx_setup_hwseq = r88e_tx_setup_hwseq;
rs->rs_tx_setup_macid = r88e_tx_setup_macid;
- rs->rs_set_name = rtwn_nop_softc; /* not used */
+ rs->rs_set_rom_opts = r88eu_set_macaddr;
rs->rf_read_delay[0] = 10;
rs->rf_read_delay[1] = 100;
@@ -132,7 +141,7 @@ r88eu_attach(struct rtwn_usb_softc *uc)
sc->sc_get_rx_stats = r88e_get_rx_stats;
sc->sc_get_rssi_cck = r88e_get_rssi_cck;
sc->sc_get_rssi_ofdm = r88e_get_rssi_ofdm;
- sc->sc_classify_intr = r88eu_classify_intr;
+ sc->sc_classify_intr = r88e_classify_intr;
sc->sc_handle_tx_report = r88e_ratectl_tx_complete;
sc->sc_handle_c2h_report = r88e_handle_c2h_report;
sc->sc_check_frame = rtwn_nop_int_softc_mbuf;
@@ -142,7 +151,7 @@ r88eu_attach(struct rtwn_usb_softc *uc)
sc->sc_efuse_postread = rtwn_nop_softc;
sc->sc_parse_rom = r88e_parse_rom;
sc->sc_set_led = r88e_set_led;
- sc->sc_power_on = r88e_power_on;
+ sc->sc_power_on = r88eu_power_on;
sc->sc_power_off = r88eu_power_off;
#ifndef RTWN_WITHOUT_UCODE
sc->sc_fw_reset = r88e_fw_reset;
@@ -151,7 +160,7 @@ r88eu_attach(struct rtwn_usb_softc *uc)
sc->sc_llt_init = r92c_llt_init;
sc->sc_set_page_size = r92c_set_page_size;
sc->sc_lc_calib = r92c_lc_calib;
- sc->sc_iq_calib = r88e_iq_calib; /* XXX TODO */
+ sc->sc_iq_calib = r88e_iq_calib;
sc->sc_read_chipid_vendor = rtwn_nop_softc_uint32;
sc->sc_adj_devcaps = r88eu_adj_devcaps;
sc->sc_vap_preattach = rtwn_nop_softc_vap;
@@ -174,29 +183,29 @@ r88eu_attach(struct rtwn_usb_softc *uc)
sc->sc_init_ampdu = rtwn_nop_softc;
sc->sc_init_intr = r88eu_init_intr;
sc->sc_init_edca = r92c_init_edca;
- sc->sc_init_bb = r88e_init_bb;
+ sc->sc_init_bb = r88eu_init_bb;
sc->sc_init_rf = r92c_init_rf;
sc->sc_init_antsel = rtwn_nop_softc;
sc->sc_post_init = r88eu_post_init;
sc->sc_init_bcnq1_boundary = rtwn_nop_int_softc;
- sc->mac_prog = &rtl8188eu_mac[0];
- sc->mac_size = nitems(rtl8188eu_mac);
- sc->bb_prog = &rtl8188eu_bb[0];
- sc->bb_size = nitems(rtl8188eu_bb);
- sc->agc_prog = &rtl8188eu_agc[0];
- sc->agc_size = nitems(rtl8188eu_agc);
- sc->rf_prog = &rtl8188eu_rf[0];
+ sc->mac_prog = &rtl8188e_mac[0];
+ sc->mac_size = nitems(rtl8188e_mac);
+ sc->bb_prog = &rtl8188e_bb[0];
+ sc->bb_size = nitems(rtl8188e_bb);
+ sc->agc_prog = &rtl8188e_agc[0];
+ sc->agc_size = nitems(rtl8188e_agc);
+ sc->rf_prog = &rtl8188e_rf[0];
sc->name = "RTL8188EU";
sc->fwname = "rtwn-rtl8188eufw";
sc->fwsig = 0x88e;
- sc->page_count = R88E_TX_PAGE_COUNT;
+ sc->page_count = R88EU_TX_PAGE_COUNT;
sc->pktbuf_count = R88E_TXPKTBUF_COUNT;
sc->ackto = 0x40;
- sc->npubqpages = R88E_PUBQ_NPAGES;
+ sc->npubqpages = R88EU_PUBQ_NPAGES;
sc->page_size = R92C_TX_PAGE_SIZE;
sc->txdesc_len = sizeof(struct r92cu_tx_desc);
diff --git a/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c b/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c
index 2c776671..bfe3681e 100644
--- a/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c
+++ b/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_init.c
@@ -57,6 +57,87 @@ __FBSDID("$FreeBSD$");
void
+r88eu_init_bb(struct rtwn_softc *sc)
+{
+
+ /* Enable BB and RF. */
+ rtwn_setbits_2(sc, R92C_SYS_FUNC_EN, 0,
+ R92C_SYS_FUNC_EN_BBRSTB | R92C_SYS_FUNC_EN_BB_GLB_RST |
+ R92C_SYS_FUNC_EN_DIO_RF);
+
+ rtwn_write_1(sc, R92C_RF_CTRL,
+ R92C_RF_CTRL_EN | R92C_RF_CTRL_RSTB | R92C_RF_CTRL_SDMRSTB);
+ rtwn_write_1(sc, R92C_SYS_FUNC_EN,
+ R92C_SYS_FUNC_EN_USBA | R92C_SYS_FUNC_EN_USBD |
+ R92C_SYS_FUNC_EN_BB_GLB_RST | R92C_SYS_FUNC_EN_BBRSTB);
+
+ r88e_init_bb_common(sc);
+}
+
+int
+r88eu_power_on(struct rtwn_softc *sc)
+{
+#define RTWN_CHK(res) do { \
+ if (res != 0) \
+ return (EIO); \
+} while(0)
+ int ntries;
+
+ /* Wait for power ready bit. */
+ for (ntries = 0; ntries < 5000; ntries++) {
+ if (rtwn_read_4(sc, R92C_APS_FSMCO) & R92C_APS_FSMCO_SUS_HOST)
+ break;
+ rtwn_delay(sc, 10);
+ }
+ if (ntries == 5000) {
+ device_printf(sc->sc_dev,
+ "timeout waiting for chip power up\n");
+ return (ETIMEDOUT);
+ }
+
+ /* Reset BB. */
+ RTWN_CHK(rtwn_setbits_1(sc, R92C_SYS_FUNC_EN,
+ R92C_SYS_FUNC_EN_BBRSTB | R92C_SYS_FUNC_EN_BB_GLB_RST, 0));
+
+ RTWN_CHK(rtwn_setbits_1(sc, R92C_AFE_XTAL_CTRL + 2, 0, 0x80));
+
+ /* Disable HWPDN. */
+ RTWN_CHK(rtwn_setbits_1_shift(sc, R92C_APS_FSMCO,
+ R92C_APS_FSMCO_APDM_HPDN, 0, 1));
+
+ /* Disable WL suspend. */
+ RTWN_CHK(rtwn_setbits_1_shift(sc, R92C_APS_FSMCO,
+ R92C_APS_FSMCO_AFSM_HSUS | R92C_APS_FSMCO_AFSM_PCIE, 0, 1));
+
+ RTWN_CHK(rtwn_setbits_1_shift(sc, R92C_APS_FSMCO,
+ 0, R92C_APS_FSMCO_APFM_ONMAC, 1));
+ for (ntries = 0; ntries < 5000; ntries++) {
+ if (!(rtwn_read_2(sc, R92C_APS_FSMCO) &
+ R92C_APS_FSMCO_APFM_ONMAC))
+ break;
+ rtwn_delay(sc, 10);
+ }
+ if (ntries == 5000)
+ return (ETIMEDOUT);
+
+ /* Enable LDO normal mode. */
+ RTWN_CHK(rtwn_setbits_1(sc, R92C_LPLDO_CTRL,
+ R92C_LPLDO_CTRL_SLEEP, 0));
+
+ /* Enable MAC DMA/WMAC/SCHEDULE/SEC blocks. */
+ RTWN_CHK(rtwn_write_2(sc, R92C_CR, 0));
+ RTWN_CHK(rtwn_setbits_2(sc, R92C_CR, 0,
+ R92C_CR_HCI_TXDMA_EN | R92C_CR_TXDMA_EN |
+ R92C_CR_HCI_RXDMA_EN | R92C_CR_RXDMA_EN |
+ R92C_CR_PROTOCOL_EN | R92C_CR_SCHEDULE_EN |
+ ((sc->sc_hwcrypto != RTWN_CRYPTO_SW) ? R92C_CR_ENSEC : 0) |
+ R92C_CR_CALTMR_EN));
+
+ return (0);
+#undef RTWN_CHK
+}
+
+void
r88eu_power_off(struct rtwn_softc *sc)
{
uint8_t reg;
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce.h b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce.h
index 93379f8b..5d13f160 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce.h
+++ b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce.h
@@ -60,7 +60,7 @@ void r92ce_post_init(struct rtwn_softc *);
void r92ce_set_led(struct rtwn_softc *, int, int);
/* r92ce_rx.c */
-int r92ce_classify_intr(struct rtwn_softc *, void *, int);
+int r92ce_get_intr_status(struct rtwn_pci_softc *, int *);
void r92ce_enable_intr(struct rtwn_pci_softc *);
void r92ce_start_xfers(struct rtwn_softc *);
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c
index e43d3ac0..61791c41 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c
@@ -94,7 +94,7 @@ r92ce_postattach(struct rtwn_softc *sc)
}
static void
-r92ce_set_name(struct rtwn_softc *sc)
+r92ce_set_name(struct rtwn_softc *sc, uint8_t *buf)
{
struct r92c_softc *rs = sc->sc_priv;
@@ -119,7 +119,7 @@ r92ce_attach_private(struct rtwn_softc *sc)
rs->rs_tx_enable_ampdu = r92c_tx_enable_ampdu;
rs->rs_tx_setup_hwseq = r92c_tx_setup_hwseq;
rs->rs_tx_setup_macid = r92c_tx_setup_macid;
- rs->rs_set_name = r92ce_set_name;
+ rs->rs_set_rom_opts = r92ce_set_name;
/* XXX TODO: test with net80211 ratectl! */
#ifndef RTWN_WITHOUT_UCODE
@@ -157,6 +157,7 @@ r92ce_attach(struct rtwn_pci_softc *pc)
pc->pc_tx_postsetup = r92ce_tx_postsetup;
pc->pc_copy_tx_desc = r92ce_copy_tx_desc;
pc->pc_enable_intr = r92ce_enable_intr;
+ pc->pc_get_intr_status = r92ce_get_intr_status;
pc->pc_qmap = 0xf771;
pc->tcr =
@@ -177,7 +178,7 @@ r92ce_attach(struct rtwn_pci_softc *pc)
sc->sc_get_rx_stats = r92c_get_rx_stats;
sc->sc_get_rssi_cck = r92c_get_rssi_cck;
sc->sc_get_rssi_ofdm = r92c_get_rssi_ofdm;
- sc->sc_classify_intr = r92ce_classify_intr;
+ sc->sc_classify_intr = r92c_classify_intr;
sc->sc_handle_tx_report = rtwn_nop_softc_uint8_int;
sc->sc_handle_c2h_report = rtwn_nop_softc_uint8_int;
sc->sc_check_frame = rtwn_nop_int_softc_mbuf;
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_calib.c b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_calib.c
index 070f6e1d..f1cd42e8 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_calib.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_calib.c
@@ -77,55 +77,58 @@ r92ce_iq_calib_chain(struct rtwn_softc *sc, int chain, uint16_t tx[2],
uint16_t rx[2])
{
uint32_t status;
- int offset = chain * 0x20;
if (chain == 0) { /* IQ calibration for chain 0. */
/* IQ calibration settings for chain 0. */
- rtwn_bb_write(sc, 0xe30, 0x10008c1f);
- rtwn_bb_write(sc, 0xe34, 0x10008c1f);
- rtwn_bb_write(sc, 0xe38, 0x82140102);
+ rtwn_bb_write(sc, R92C_TX_IQK_TONE(0), 0x10008c1f);
+ rtwn_bb_write(sc, R92C_RX_IQK_TONE(0), 0x10008c1f);
+ rtwn_bb_write(sc, R92C_TX_IQK_PI(0), 0x82140102);
if (sc->ntxchains > 1) {
- rtwn_bb_write(sc, 0xe3c, 0x28160202); /* 2T */
+ rtwn_bb_write(sc, R92C_RX_IQK_PI(0), 0x28160202);
/* IQ calibration settings for chain 1. */
- rtwn_bb_write(sc, 0xe50, 0x10008c22);
- rtwn_bb_write(sc, 0xe54, 0x10008c22);
- rtwn_bb_write(sc, 0xe58, 0x82140102);
- rtwn_bb_write(sc, 0xe5c, 0x28160202);
+ rtwn_bb_write(sc, R92C_TX_IQK_TONE(1), 0x10008c22);
+ rtwn_bb_write(sc, R92C_RX_IQK_TONE(1), 0x10008c22);
+ rtwn_bb_write(sc, R92C_TX_IQK_PI(1), 0x82140102);
+ rtwn_bb_write(sc, R92C_RX_IQK_PI(1), 0x28160202);
} else
- rtwn_bb_write(sc, 0xe3c, 0x28160502); /* 1T */
+ rtwn_bb_write(sc, R92C_RX_IQK_PI(0), 0x28160502);
/* LO calibration settings. */
- rtwn_bb_write(sc, 0xe4c, 0x001028d1);
+ rtwn_bb_write(sc, R92C_IQK_AGC_RSP, 0x001028d1);
/* We're doing LO and IQ calibration in one shot. */
- rtwn_bb_write(sc, 0xe48, 0xf9000000);
- rtwn_bb_write(sc, 0xe48, 0xf8000000);
+ rtwn_bb_write(sc, R92C_IQK_AGC_PTS, 0xf9000000);
+ rtwn_bb_write(sc, R92C_IQK_AGC_PTS, 0xf8000000);
} else { /* IQ calibration for chain 1. */
/* We're doing LO and IQ calibration in one shot. */
- rtwn_bb_write(sc, 0xe60, 0x00000002);
- rtwn_bb_write(sc, 0xe60, 0x00000000);
+ rtwn_bb_write(sc, R92C_IQK_AGC_CONT, 2);
+ rtwn_bb_write(sc, R92C_IQK_AGC_CONT, 0);
}
/* Give LO and IQ calibrations the time to complete. */
rtwn_delay(sc, 1000);
/* Read IQ calibration status. */
- status = rtwn_bb_read(sc, 0xeac);
+ status = rtwn_bb_read(sc, R92C_RX_POWER_IQK_AFTER(0));
if (status & (1 << (28 + chain * 3)))
return (0); /* Tx failed. */
/* Read Tx IQ calibration results. */
- tx[0] = (rtwn_bb_read(sc, 0xe94 + offset) >> 16) & 0x3ff;
- tx[1] = (rtwn_bb_read(sc, 0xe9c + offset) >> 16) & 0x3ff;
+ tx[0] = MS(rtwn_bb_read(sc, R92C_TX_POWER_IQK_BEFORE(chain)),
+ R92C_POWER_IQK_RESULT);
+ tx[1] = MS(rtwn_bb_read(sc, R92C_TX_POWER_IQK_AFTER(chain)),
+ R92C_POWER_IQK_RESULT);
if (tx[0] == 0x142 || tx[1] == 0x042)
return (0); /* Tx failed. */
if (status & (1 << (27 + chain * 3)))
return (1); /* Rx failed. */
/* Read Rx IQ calibration results. */
- rx[0] = (rtwn_bb_read(sc, 0xea4 + offset) >> 16) & 0x3ff;
- rx[1] = (rtwn_bb_read(sc, 0xeac + offset) >> 16) & 0x3ff;
+ rx[0] = MS(rtwn_bb_read(sc, R92C_RX_POWER_IQK_BEFORE(chain)),
+ R92C_POWER_IQK_RESULT);
+ rx[1] = MS(rtwn_bb_read(sc, R92C_RX_POWER_IQK_AFTER(chain)),
+ R92C_POWER_IQK_RESULT);
if (rx[0] == 0x132 || rx[1] == 0x036)
return (1); /* Rx failed. */
@@ -202,18 +205,18 @@ r92ce_iq_calib_run(struct rtwn_softc *sc, int n, uint16_t tx[2][2],
if (sc->ntxchains > 1)
rtwn_bb_write(sc, 0x0b6c, 0x00080000);
- rtwn_bb_write(sc, 0x0e28, 0x80800000);
- rtwn_bb_write(sc, 0x0e40, 0x01007c00);
- rtwn_bb_write(sc, 0x0e44, 0x01004800);
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0x80800000);
+ rtwn_bb_write(sc, R92C_TX_IQK, 0x01007c00);
+ rtwn_bb_write(sc, R92C_RX_IQK, 0x01004800);
rtwn_bb_write(sc, 0x0b68, 0x00080000);
for (chain = 0; chain < sc->ntxchains; chain++) {
if (chain > 0) {
/* Put chain 0 on standby. */
- rtwn_bb_write(sc, 0x0e28, 0x00);
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0);
rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00010000);
- rtwn_bb_write(sc, 0x0e28, 0x80800000);
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0x80800000);
/* Enable chain 1. */
for (i = 0; i < nitems(reg_adda); i++)
@@ -259,7 +262,7 @@ r92ce_iq_calib_run(struct rtwn_softc *sc, int n, uint16_t tx[2][2],
vals->fpga0_rfifacesw1);
rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, vals->ofdm0_trmuxpar);
- rtwn_bb_write(sc, 0x0e28, 0x00);
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0);
rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00032ed3);
if (sc->ntxchains > 1)
rtwn_bb_write(sc, R92C_LSSI_PARAM(1), 0x00032ed3);
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_rx.c b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_rx.c
index 203b0bf8..0f65d1bf 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_rx.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_rx.c
@@ -60,10 +60,10 @@ __FBSDID("$FreeBSD$");
int
-r92ce_classify_intr(struct rtwn_softc *sc, void *arg, int len __unused)
+r92ce_get_intr_status(struct rtwn_pci_softc *pc, int *rings)
{
+ struct rtwn_softc *sc = &pc->pc_sc;
uint32_t status;
- int *rings = arg;
int ret;
*rings = 0;
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c.h b/freebsd/sys/dev/rtwn/rtl8192c/r92c.h
index d8f7afc8..f215e34f 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/r92c.h
+++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c.h
@@ -102,6 +102,7 @@ void r92c_efuse_postread(struct rtwn_softc *);
void r92c_parse_rom(struct rtwn_softc *, uint8_t *);
/* r92c_rx.c */
+int r92c_classify_intr(struct rtwn_softc *, void *, int);
int8_t r92c_get_rssi_cck(struct rtwn_softc *, void *);
int8_t r92c_get_rssi_ofdm(struct rtwn_softc *, void *);
uint8_t r92c_rx_radiotap_flags(const void *);
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_calib.c b/freebsd/sys/dev/rtwn/rtl8192c/r92c_calib.c
index 6c18a606..ebb4f5cf 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_calib.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_calib.c
@@ -48,16 +48,366 @@ __FBSDID("$FreeBSD$");
#include <dev/rtwn/if_rtwnreg.h>
#include <dev/rtwn/if_rtwnvar.h>
+#include <dev/rtwn/if_rtwn_debug.h>
#include <dev/rtwn/rtl8192c/r92c.h>
#include <dev/rtwn/rtl8192c/r92c_reg.h>
+/* Registers to save and restore during IQ calibration. */
+struct r92c_iq_cal_reg_vals {
+ uint32_t adda[16];
+ uint8_t txpause;
+ uint8_t bcn_ctrl[2];
+ uint32_t gpio_muxcfg;
+ uint32_t cck0_afesetting;
+ uint32_t ofdm0_trxpathena;
+ uint32_t ofdm0_trmuxpar;
+ uint32_t fpga0_rfifacesw0;
+ uint32_t fpga0_rfifacesw1;
+ uint32_t fpga0_rfifaceoe0;
+ uint32_t fpga0_rfifaceoe1;
+ uint32_t config_ant0;
+ uint32_t config_ant1;
+};
+
+/* XXX TODO: merge */
+static int
+r92c_iq_calib_chain(struct rtwn_softc *sc, int chain, uint16_t tx[2],
+ uint16_t rx[2])
+{
+ uint32_t status;
+
+ if (chain == 0) { /* IQ calibration for chain 0. */
+ /* IQ calibration settings for chain 0. */
+ rtwn_bb_write(sc, R92C_TX_IQK_TONE(0), 0x10008c1f);
+ rtwn_bb_write(sc, R92C_RX_IQK_TONE(0), 0x10008c1f);
+ rtwn_bb_write(sc, R92C_TX_IQK_PI(0), 0x82140102);
+
+ if (sc->ntxchains > 1) {
+ rtwn_bb_write(sc, R92C_RX_IQK_PI(0), 0x28160202);
+ /* IQ calibration settings for chain 1. */
+ rtwn_bb_write(sc, R92C_TX_IQK_TONE(1), 0x10008c22);
+ rtwn_bb_write(sc, R92C_RX_IQK_TONE(1), 0x10008c22);
+ rtwn_bb_write(sc, R92C_TX_IQK_PI(1), 0x82140102);
+ rtwn_bb_write(sc, R92C_RX_IQK_PI(1), 0x28160202);
+ } else
+ rtwn_bb_write(sc, R92C_RX_IQK_PI(0), 0x28160502);
+
+ /* LO calibration settings. */
+ rtwn_bb_write(sc, R92C_IQK_AGC_RSP, 0x001028d1);
+ /* We're doing LO and IQ calibration in one shot. */
+ rtwn_bb_write(sc, R92C_IQK_AGC_PTS, 0xf9000000);
+ rtwn_bb_write(sc, R92C_IQK_AGC_PTS, 0xf8000000);
+
+ } else { /* IQ calibration for chain 1. */
+ /* We're doing LO and IQ calibration in one shot. */
+ rtwn_bb_write(sc, R92C_IQK_AGC_CONT, 2);
+ rtwn_bb_write(sc, R92C_IQK_AGC_CONT, 0);
+ }
+
+ /* Give LO and IQ calibrations the time to complete. */
+ rtwn_delay(sc, 10000);
+
+ /* Read IQ calibration status. */
+ status = rtwn_bb_read(sc, R92C_RX_POWER_IQK_AFTER(0));
+
+ if (status & (1 << (28 + chain * 3)))
+ return (0); /* Tx failed. */
+ /* Read Tx IQ calibration results. */
+ tx[0] = MS(rtwn_bb_read(sc, R92C_TX_POWER_IQK_BEFORE(chain)),
+ R92C_POWER_IQK_RESULT);
+ tx[1] = MS(rtwn_bb_read(sc, R92C_TX_POWER_IQK_AFTER(chain)),
+ R92C_POWER_IQK_RESULT);
+ if (tx[0] == 0x142 || tx[1] == 0x042)
+ return (0); /* Tx failed. */
+
+ if (status & (1 << (27 + chain * 3)))
+ return (1); /* Rx failed. */
+ /* Read Rx IQ calibration results. */
+ rx[0] = MS(rtwn_bb_read(sc, R92C_RX_POWER_IQK_BEFORE(chain)),
+ R92C_POWER_IQK_RESULT);
+ rx[1] = MS(rtwn_bb_read(sc, R92C_RX_POWER_IQK_AFTER(chain)),
+ R92C_POWER_IQK_RESULT);
+ if (rx[0] == 0x132 || rx[1] == 0x036)
+ return (1); /* Rx failed. */
+
+ return (3); /* Both Tx and Rx succeeded. */
+}
+
+static void
+r92c_iq_calib_run(struct rtwn_softc *sc, int n, uint16_t tx[2][2],
+ uint16_t rx[2][2], struct r92c_iq_cal_reg_vals *vals)
+{
+ /* Registers to save and restore during IQ calibration. */
+ static const uint16_t reg_adda[16] = {
+ 0x85c, 0xe6c, 0xe70, 0xe74,
+ 0xe78, 0xe7c, 0xe80, 0xe84,
+ 0xe88, 0xe8c, 0xed0, 0xed4,
+ 0xed8, 0xedc, 0xee0, 0xeec
+ };
+ int i, chain;
+ uint32_t hssi_param1;
+
+ if (n == 0) {
+ for (i = 0; i < nitems(reg_adda); i++)
+ vals->adda[i] = rtwn_bb_read(sc, reg_adda[i]);
+
+ vals->txpause = rtwn_read_1(sc, R92C_TXPAUSE);
+ vals->bcn_ctrl[0] = rtwn_read_1(sc, R92C_BCN_CTRL(0));
+ vals->bcn_ctrl[1] = rtwn_read_1(sc, R92C_BCN_CTRL(1));
+ vals->gpio_muxcfg = rtwn_read_4(sc, R92C_GPIO_MUXCFG);
+ }
+
+ if (sc->ntxchains == 1) {
+ rtwn_bb_write(sc, reg_adda[0], 0x0b1b25a0);
+ for (i = 1; i < nitems(reg_adda); i++)
+ rtwn_bb_write(sc, reg_adda[i], 0x0bdb25a0);
+ } else {
+ for (i = 0; i < nitems(reg_adda); i++)
+ rtwn_bb_write(sc, reg_adda[i], 0x04db25a4);
+ }
+
+ hssi_param1 = rtwn_bb_read(sc, R92C_HSSI_PARAM1(0));
+ if (!(hssi_param1 & R92C_HSSI_PARAM1_PI)) {
+ rtwn_bb_write(sc, R92C_HSSI_PARAM1(0),
+ hssi_param1 | R92C_HSSI_PARAM1_PI);
+ rtwn_bb_write(sc, R92C_HSSI_PARAM1(1),
+ hssi_param1 | R92C_HSSI_PARAM1_PI);
+ }
+
+ if (n == 0) {
+ vals->cck0_afesetting = rtwn_bb_read(sc, R92C_CCK0_AFESETTING);
+ vals->ofdm0_trxpathena =
+ rtwn_bb_read(sc, R92C_OFDM0_TRXPATHENA);
+ vals->ofdm0_trmuxpar = rtwn_bb_read(sc, R92C_OFDM0_TRMUXPAR);
+ vals->fpga0_rfifacesw0 =
+ rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(0));
+ vals->fpga0_rfifacesw1 =
+ rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(1));
+ vals->fpga0_rfifaceoe0 =
+ rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(0));
+ vals->fpga0_rfifaceoe1 =
+ rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(1));
+ vals->config_ant0 = rtwn_bb_read(sc, R92C_CONFIG_ANT(0));
+ vals->config_ant1 = rtwn_bb_read(sc, R92C_CONFIG_ANT(1));
+ }
+
+ rtwn_bb_setbits(sc, R92C_CCK0_AFESETTING, 0, 0x0f000000);
+ rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, 0x03a05600);
+ rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, 0x000800e4);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1), 0x22204000);
+ rtwn_bb_setbits(sc, R92C_FPGA0_RFIFACESW(0), 0, 0x04000400);
+ rtwn_bb_setbits(sc, R92C_FPGA0_RFIFACEOE(0), 0x400, 0);
+ rtwn_bb_setbits(sc, R92C_FPGA0_RFIFACEOE(1), 0x400, 0);
+
+ if (sc->ntxchains > 1) {
+ rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00010000);
+ rtwn_bb_write(sc, R92C_LSSI_PARAM(1), 0x00010000);
+ }
+
+ rtwn_write_1(sc, R92C_TXPAUSE,
+ R92C_TX_QUEUE_AC | R92C_TX_QUEUE_MGT | R92C_TX_QUEUE_HIGH);
+ rtwn_write_1(sc, R92C_BCN_CTRL(0),
+ vals->bcn_ctrl[0] & ~R92C_BCN_CTRL_EN_BCN);
+ rtwn_write_1(sc, R92C_BCN_CTRL(1),
+ vals->bcn_ctrl[1] & ~R92C_BCN_CTRL_EN_BCN);
+ rtwn_write_1(sc, R92C_GPIO_MUXCFG,
+ vals->gpio_muxcfg & ~R92C_GPIO_MUXCFG_ENBT);
+
+ rtwn_bb_write(sc, R92C_CONFIG_ANT(0), 0x00080000);
+ if (sc->ntxchains > 1)
+ rtwn_bb_write(sc, R92C_CONFIG_ANT(1), 0x00080000);
+
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0x80800000);
+ rtwn_bb_write(sc, R92C_TX_IQK, 0x01007c00);
+ rtwn_bb_write(sc, R92C_RX_IQK, 0x01004800);
+
+ for (chain = 0; chain < sc->ntxchains; chain++) {
+ if (chain > 0) {
+ /* Put chain 0 on standby. */
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0);
+ rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00010000);
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0x80800000);
+
+ /* Enable chain 1. */
+ for (i = 0; i < nitems(reg_adda); i++)
+ rtwn_bb_write(sc, reg_adda[i], 0x0b1b25a4);
+ }
+
+ /* Run IQ calibration twice. */
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = r92c_iq_calib_chain(sc, chain,
+ tx[chain], rx[chain]);
+ if (ret == 0) {
+ RTWN_DPRINTF(sc, RTWN_DEBUG_CALIB,
+ "%s: chain %d: Tx failed.\n",
+ __func__, chain);
+ tx[chain][0] = 0xff;
+ tx[chain][1] = 0xff;
+ rx[chain][0] = 0xff;
+ rx[chain][1] = 0xff;
+ } else if (ret == 1) {
+ RTWN_DPRINTF(sc, RTWN_DEBUG_CALIB,
+ "%s: chain %d: Rx failed.\n",
+ __func__, chain);
+ rx[chain][0] = 0xff;
+ rx[chain][1] = 0xff;
+ } else if (ret == 3) {
+ RTWN_DPRINTF(sc, RTWN_DEBUG_CALIB,
+ "%s: chain %d: Both Tx and Rx "
+ "succeeded.\n", __func__, chain);
+ }
+ }
+
+ RTWN_DPRINTF(sc, RTWN_DEBUG_CALIB,
+ "%s: results for run %d chain %d: tx[0] 0x%x, "
+ "tx[1] 0x%x, rx[0] 0x%x, rx[1] 0x%x\n", __func__, n, chain,
+ tx[chain][0], tx[chain][1], rx[chain][0], rx[chain][1]);
+ }
+
+ rtwn_bb_write(sc, R92C_CCK0_AFESETTING, vals->cck0_afesetting);
+ rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, vals->ofdm0_trxpathena);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(0), vals->fpga0_rfifacesw0);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1), vals->fpga0_rfifacesw1);
+ rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, vals->ofdm0_trmuxpar);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(0), vals->fpga0_rfifaceoe0);
+ rtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(1), vals->fpga0_rfifaceoe1);
+ rtwn_bb_write(sc, R92C_CONFIG_ANT(0), vals->config_ant0);
+ rtwn_bb_write(sc, R92C_CONFIG_ANT(1), vals->config_ant1);
+
+ rtwn_bb_write(sc, R92C_FPGA0_IQK, 0);
+ rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00032ed3);
+ if (sc->ntxchains > 1)
+ rtwn_bb_write(sc, R92C_LSSI_PARAM(1), 0x00032ed3);
+
+ if (n != 0) {
+ if (!(hssi_param1 & R92C_HSSI_PARAM1_PI)) {
+ rtwn_bb_write(sc, R92C_HSSI_PARAM1(0), hssi_param1);
+ rtwn_bb_write(sc, R92C_HSSI_PARAM1(1), hssi_param1);
+ }
+
+ for (i = 0; i < nitems(reg_adda); i++)
+ rtwn_bb_write(sc, reg_adda[i], vals->adda[i]);
+
+ rtwn_write_1(sc, R92C_TXPAUSE, vals->txpause);
+ rtwn_write_1(sc, R92C_BCN_CTRL(0), vals->bcn_ctrl[0]);
+ rtwn_write_1(sc, R92C_BCN_CTRL(1), vals->bcn_ctrl[1]);
+ rtwn_write_4(sc, R92C_GPIO_MUXCFG, vals->gpio_muxcfg);
+
+ rtwn_bb_write(sc, R92C_TX_IQK_TONE(0), 0x01008c00);
+ rtwn_bb_write(sc, R92C_RX_IQK_TONE(0), 0x01008c00);
+ }
+}
+
+#define RTWN_IQ_CAL_MAX_TOLERANCE 5
+static int
+r92c_iq_calib_compare_results(struct rtwn_softc *sc, uint16_t tx1[2][2],
+ uint16_t rx1[2][2], uint16_t tx2[2][2], uint16_t rx2[2][2])
+{
+ int chain, i, tx_ok[2], rx_ok[2];
+
+ tx_ok[0] = tx_ok[1] = rx_ok[0] = rx_ok[1] = 0;
+ for (chain = 0; chain < sc->ntxchains; chain++) {
+ for (i = 0; i < 2; i++) {
+ if (tx1[chain][i] == 0xff || tx2[chain][i] == 0xff ||
+ rx1[chain][i] == 0xff || rx2[chain][i] == 0xff)
+ continue;
+
+ tx_ok[chain] = (abs(tx1[chain][i] - tx2[chain][i]) <=
+ RTWN_IQ_CAL_MAX_TOLERANCE);
+
+ rx_ok[chain] = (abs(rx1[chain][i] - rx2[chain][i]) <=
+ RTWN_IQ_CAL_MAX_TOLERANCE);
+ }
+ }
+
+ if (sc->ntxchains > 1)
+ return (tx_ok[0] && tx_ok[1] && rx_ok[0] && rx_ok[1]);
+ else
+ return (tx_ok[0] && rx_ok[0]);
+}
+#undef RTWN_IQ_CAL_MAX_TOLERANCE
+
+static void
+r92c_iq_calib_write_results(struct rtwn_softc *sc, uint16_t tx[2],
+ uint16_t rx[2], int chain)
+{
+ uint32_t reg, val, x;
+ long y, tx_c;
+
+ if (tx[0] == 0xff || tx[1] == 0xff)
+ return;
+
+ reg = rtwn_bb_read(sc, R92C_OFDM0_TXIQIMBALANCE(chain));
+ val = ((reg >> 22) & 0x3ff);
+ x = tx[0];
+ if (x & 0x00000200)
+ x |= 0xfffffc00;
+ reg = (((x * val) >> 8) & 0x3ff);
+ rtwn_bb_setbits(sc, R92C_OFDM0_TXIQIMBALANCE(chain), 0x3ff, reg);
+ rtwn_bb_setbits(sc, R92C_OFDM0_ECCATHRESHOLD, 0x80000000,
+ ((x * val) & 0x80) << 24);
+
+ y = tx[1];
+ if (y & 0x00000200)
+ y |= 0xfffffc00;
+ tx_c = (y * val) >> 8;
+ rtwn_bb_setbits(sc, R92C_OFDM0_TXAFE(chain), 0xf0000000,
+ (tx_c & 0x3c0) << 22);
+ rtwn_bb_setbits(sc, R92C_OFDM0_TXIQIMBALANCE(chain), 0x003f0000,
+ (tx_c & 0x3f) << 16);
+ rtwn_bb_setbits(sc, R92C_OFDM0_ECCATHRESHOLD, 0x20000000,
+ ((y * val) & 0x80) << 22);
+
+ if (rx[0] == 0xff || rx[1] == 0xff)
+ return;
+
+ rtwn_bb_setbits(sc, R92C_OFDM0_RXIQIMBALANCE(chain), 0x3ff,
+ rx[0] & 0x3ff);
+ rtwn_bb_setbits(sc, R92C_OFDM0_RXIQIMBALANCE(chain), 0xfc00,
+ (rx[1] & 0x3f) << 10);
+
+ if (chain == 0) {
+ rtwn_bb_setbits(sc, R92C_OFDM0_RXIQEXTANTA, 0xf0000000,
+ (rx[1] & 0x3c0) << 22);
+ } else {
+ rtwn_bb_setbits(sc, R92C_OFDM0_AGCRSSITABLE, 0xf000,
+ (rx[1] & 0x3c0) << 6);
+ }
+}
+
+#define RTWN_IQ_CAL_NRUN 3
void
r92c_iq_calib(struct rtwn_softc *sc)
{
- /* XXX TODO */
+ struct r92c_iq_cal_reg_vals vals;
+ uint16_t tx[RTWN_IQ_CAL_NRUN][2][2], rx[RTWN_IQ_CAL_NRUN][2][2];
+ int n, valid;
+
+ valid = 0;
+ for (n = 0; n < RTWN_IQ_CAL_NRUN; n++) {
+ r92c_iq_calib_run(sc, n, tx[n], rx[n], &vals);
+
+ if (n == 0)
+ continue;
+
+ /* Valid results remain stable after consecutive runs. */
+ valid = r92c_iq_calib_compare_results(sc, tx[n - 1],
+ rx[n - 1], tx[n], rx[n]);
+ if (valid)
+ break;
+ }
+
+ if (valid) {
+ r92c_iq_calib_write_results(sc, tx[n][0], rx[n][0], 0);
+ if (sc->ntxchains > 1)
+ r92c_iq_calib_write_results(sc, tx[n][1], rx[n][1], 1);
+ }
}
+#undef RTWN_IQ_CAL_NRUN
void
r92c_lc_calib(struct rtwn_softc *sc)
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c b/freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c
index 199a419f..8cd55fbf 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c
@@ -324,6 +324,7 @@ r92c_init_antsel(struct rtwn_softc *sc)
rtwn_bb_setbits(sc, R92C_FPGA0_RFPARAM(0), 0, 0x2000);
reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(0));
sc->sc_ant = MS(reg, R92C_FPGA0_RFIFACEOE0_ANT); /* XXX */
+ rtwn_setbits_1(sc, R92C_LEDCFG2, 0x80, 0);
}
void
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h b/freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h
index 34a4b80c..c3def33e 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h
+++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h
@@ -66,6 +66,7 @@
#define R92C_HSIMR 0x058
#define R92C_HSISR 0x05c
#define R92C_MULTI_FUNC_CTRL 0x068
+#define R92C_AFE_XTAL_CTRL_EXT 0x078
#define R92C_LDO_SWR_CTRL 0x07c
#define R92C_MCUFWDL 0x080
#define R92C_HMEBOX_EXT(idx) (0x088 + (idx) * 2)
@@ -147,6 +148,7 @@
#define R92C_RD_RESP_PKT_TH 0x463
#define R92C_INIRTS_RATE_SEL 0x480
#define R92C_INIDATA_RATE_SEL(macid) (0x484 + (macid))
+#define R92C_POWER_STATUS 0x4a4
#define R92C_QUEUE_CTRL 0x4c6
#define R92C_MAX_AGGR_NUM 0x4ca
#define R92C_BAR_MODE_CTRL 0x4cc
@@ -347,6 +349,7 @@
/* Bits for R92C_GPIO_MUXCFG. */
#define R92C_GPIO_MUXCFG_ENBT 0x0020
+#define R92C_GPIO_MUXCFG_ENSIC 0x1000
/* Bits for R92C_LEDCFG0. */
#define R92C_LEDCFG0_DIS 0x08
@@ -691,6 +694,7 @@
#define R92C_FPGA1_TXINFO 0x90c
#define R92C_CCK0_SYSTEM 0xa00
#define R92C_CCK0_AFESETTING 0xa04
+#define R92C_CONFIG_ANT(chain) (0xb68 + (chain) * 4)
#define R92C_OFDM0_TRXPATHENA 0xc04
#define R92C_OFDM0_TRMUXPAR 0xc08
#define R92C_OFDM0_RXIQIMBALANCE(chain) (0xc14 + (chain) * 8)
@@ -703,6 +707,20 @@
#define R92C_OFDM0_RXIQEXTANTA 0xca0
#define R92C_OFDM0_TXPSEUDONOISEWGT 0xce4
#define R92C_OFDM1_LSTF 0xd00
+#define R92C_FPGA0_IQK 0xe28
+#define R92C_TX_IQK_TONE(chain) (0xe30 + (chain) * 32)
+#define R92C_RX_IQK_TONE(chain) (0xe34 + (chain) * 32)
+#define R92C_TX_IQK_PI(chain) (0xe38 + (chain) * 32)
+#define R92C_RX_IQK_PI(chain) (0xe3c + (chain) * 32)
+#define R92C_TX_IQK 0xe40
+#define R92C_RX_IQK 0xe44
+#define R92C_IQK_AGC_PTS 0xe48
+#define R92C_IQK_AGC_RSP 0xe4c
+#define R92C_IQK_AGC_CONT 0xe60
+#define R92C_TX_POWER_IQK_BEFORE(chain) (0xe94 + (chain) * 32)
+#define R92C_TX_POWER_IQK_AFTER(chain) (0xe9c + (chain) * 32)
+#define R92C_RX_POWER_IQK_BEFORE(chain) (0xea4 + (chain) * 32)
+#define R92C_RX_POWER_IQK_AFTER(chain) (0xeac + (chain) * 32)
/* Bits for R92C_FPGA[01]_RFMOD. */
#define R92C_RFMOD_40MHZ 0x00000001
@@ -828,6 +846,10 @@
#define R92C_OFDM0_AGCCORE1_GAIN_M 0x0000007f
#define R92C_OFDM0_AGCCORE1_GAIN_S 0
+/* Bits for R92C_[RT]X_POWER_IQK*. */
+#define R92C_POWER_IQK_RESULT_S 16
+#define R92C_POWER_IQK_RESULT_M 0x03ff0000
+
/*
* RF (6052) registers.
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_rom.c b/freebsd/sys/dev/rtwn/rtl8192c/r92c_rom.c
index 1eb5ca12..bc355e30 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_rom.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_rom.c
@@ -98,7 +98,7 @@ r92c_parse_rom(struct rtwn_softc *sc, uint8_t *buf)
__func__, rs->regulatory);
/* Need to be set before postinit() (but after preinit()). */
- rtwn_r92c_set_name(sc);
+ rtwn_r92c_set_rom_opts(sc, buf);
r92c_set_chains(sc);
for (j = 0; j < R92C_GROUP_2G; j++) {
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c b/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c
index c98a0203..cbc8eb56 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c
@@ -54,6 +54,13 @@ __FBSDID("$FreeBSD$");
#include <dev/rtwn/rtl8192c/r92c_rx_desc.h>
+int
+r92c_classify_intr(struct rtwn_softc *sc, void *buf, int len)
+{
+ /* NB: reports are fetched from C2H_MSG register. */
+ return (RTWN_RX_DATA);
+}
+
int8_t
r92c_get_rssi_cck(struct rtwn_softc *sc, void *physt)
{
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_tx.c b/freebsd/sys/dev/rtwn/rtl8192c/r92c_tx.c
index 5c80af90..36450e80 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_tx.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_tx.c
@@ -105,7 +105,7 @@ r92c_tx_protection(struct rtwn_softc *sc, struct r92c_tx_desc *txd,
rate = rtwn_ctl_mcsrate(ic->ic_rt, ridx);
else
rate = ieee80211_ctl_rate(ic->ic_rt, ridx2rate[ridx]);
- ridx = rate2ridx(rate);
+ ridx = rate2ridx(IEEE80211_RV(rate));
txd->txdw4 |= htole32(SM(R92C_TXDW4_RTSRATE, ridx));
/* RTS rate fallback limit (max). */
@@ -213,6 +213,12 @@ r92c_tx_setup_macid(void *buf, int id)
struct r92c_tx_desc *txd = (struct r92c_tx_desc *)buf;
txd->txdw1 |= htole32(SM(R92C_TXDW1_MACID, id));
+
+ /* XXX does not belong here */
+ /* XXX temporary (I hope) */
+ /* Force CCK1 for RTS / CTS frames (driver bug) */
+ txd->txdw4 &= ~htole32(SM(R92C_TXDW4_RTSRATE, R92C_TXDW4_RTSRATE_M));
+ txd->txdw4 &= ~htole32(R92C_TXDW4_RTS_SHORT);
}
void
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_var.h b/freebsd/sys/dev/rtwn/rtl8192c/r92c_var.h
index 79592449..2b35ccea 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_var.h
+++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_var.h
@@ -58,7 +58,7 @@ struct r92c_softc {
void (*rs_tx_enable_ampdu)(void *, int);
void (*rs_tx_setup_hwseq)(void *);
void (*rs_tx_setup_macid)(void *, int);
- void (*rs_set_name)(struct rtwn_softc *);
+ void (*rs_set_rom_opts)(struct rtwn_softc *, uint8_t *);
int rf_read_delay[3];
uint32_t rf_chnlbw[R92C_MAX_CHAINS];
@@ -77,7 +77,7 @@ struct r92c_softc {
((R92C_SOFTC(_sc)->rs_tx_setup_hwseq)((_buf)))
#define rtwn_r92c_tx_setup_macid(_sc, _buf, _id) \
((R92C_SOFTC(_sc)->rs_tx_setup_macid)((_buf), (_id)))
-#define rtwn_r92c_set_name(_sc) \
- ((R92C_SOFTC(_sc)->rs_set_name)((_sc)))
+#define rtwn_r92c_set_rom_opts(_sc, _buf) \
+ ((R92C_SOFTC(_sc)->rs_set_rom_opts)((_sc), (_buf)))
#endif /* R92C_VAR_H */
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu.h b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu.h
index 2486d7fa..4cc01049 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu.h
+++ b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu.h
@@ -47,7 +47,6 @@ void r92cu_post_init(struct rtwn_softc *);
void r92cu_set_led(struct rtwn_softc *, int, int);
/* r92cu_rx.c */
-int r92cu_classify_intr(struct rtwn_softc *, void *, int);
int r92cu_align_rx(int, int);
/* r92cu_tx.c */
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c
index 4696e8c4..c8270184 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c
@@ -96,7 +96,7 @@ r92cu_postattach(struct rtwn_softc *sc)
}
static void
-r92cu_set_name(struct rtwn_softc *sc)
+r92cu_set_name(struct rtwn_softc *sc, uint8_t *buf)
{
struct r92c_softc *rs = sc->sc_priv;
@@ -126,7 +126,7 @@ r92cu_attach_private(struct rtwn_softc *sc)
rs->rs_tx_enable_ampdu = r92c_tx_enable_ampdu;
rs->rs_tx_setup_hwseq = r92c_tx_setup_hwseq;
rs->rs_tx_setup_macid = r92c_tx_setup_macid;
- rs->rs_set_name = r92cu_set_name;
+ rs->rs_set_rom_opts = r92cu_set_name;
#ifndef RTWN_WITHOUT_UCODE
rs->rs_c2h_timeout = hz;
@@ -170,7 +170,7 @@ r92cu_attach(struct rtwn_usb_softc *uc)
sc->sc_get_rx_stats = r92c_get_rx_stats;
sc->sc_get_rssi_cck = r92c_get_rssi_cck;
sc->sc_get_rssi_ofdm = r92c_get_rssi_ofdm;
- sc->sc_classify_intr = r92cu_classify_intr;
+ sc->sc_classify_intr = r92c_classify_intr;
sc->sc_handle_tx_report = rtwn_nop_softc_uint8_int;
sc->sc_handle_c2h_report = rtwn_nop_softc_uint8_int;
sc->sc_check_frame = rtwn_nop_int_softc_mbuf;
@@ -189,7 +189,7 @@ r92cu_attach(struct rtwn_usb_softc *uc)
sc->sc_llt_init = r92c_llt_init;
sc->sc_set_page_size = r92c_set_page_size;
sc->sc_lc_calib = r92c_lc_calib;
- sc->sc_iq_calib = r92c_iq_calib; /* XXX TODO */
+ sc->sc_iq_calib = r92c_iq_calib;
sc->sc_read_chipid_vendor = r92c_read_chipid_vendor;
sc->sc_adj_devcaps = r92cu_adj_devcaps;
sc->sc_vap_preattach = rtwn_nop_softc_vap;
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_init.c b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_init.c
index 61dd1aa0..08259875 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_init.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_init.c
@@ -359,6 +359,8 @@ void
r92cu_post_init(struct rtwn_softc *sc)
{
+ rtwn_write_4(sc, R92C_POWER_STATUS, 0x5);
+
/* Perform LO and IQ calibrations. */
r92c_iq_calib(sc);
/* Perform LC calibration. */
diff --git a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_rx.c b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_rx.c
index 4d040a4e..8d8489c2 100644
--- a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_rx.c
+++ b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_rx.c
@@ -52,13 +52,6 @@ __FBSDID("$FreeBSD$");
int
-r92cu_classify_intr(struct rtwn_softc *sc, void *buf, int len)
-{
- /* NB: reports are fetched from C2H_MSG register. */
- return (RTWN_RX_DATA);
-}
-
-int
r92cu_align_rx(int totlen, int len)
{
return (roundup2(totlen, 128));
diff --git a/freebsd/sys/dev/rtwn/rtl8192e/usb/r92eu_attach.c b/freebsd/sys/dev/rtwn/rtl8192e/usb/r92eu_attach.c
index ce1e49f4..f906d9fa 100644
--- a/freebsd/sys/dev/rtwn/rtl8192e/usb/r92eu_attach.c
+++ b/freebsd/sys/dev/rtwn/rtl8192e/usb/r92eu_attach.c
@@ -137,7 +137,7 @@ r92eu_attach(struct rtwn_usb_softc *uc)
sc->sc_llt_init = r92e_llt_init;
sc->sc_set_page_size = rtwn_nop_int_softc;
sc->sc_lc_calib = r92c_lc_calib;
- sc->sc_iq_calib = r88e_iq_calib; /* XXX TODO */
+ sc->sc_iq_calib = rtwn_nop_softc; /* XXX TODO */
sc->sc_read_chipid_vendor = rtwn_nop_softc_uint32;
sc->sc_adj_devcaps = r92eu_adj_devcaps;
sc->sc_vap_preattach = rtwn_nop_softc_vap;
diff --git a/freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c b/freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c
index 77e9e423..910a06eb 100644
--- a/freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c
+++ b/freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c
@@ -113,7 +113,7 @@ r12a_tx_protection(struct rtwn_softc *sc, struct r12a_tx_desc *txd,
rate = rtwn_ctl_mcsrate(ic->ic_rt, ridx);
else
rate = ieee80211_ctl_rate(ic->ic_rt, ridx2rate[ridx]);
- ridx = rate2ridx(rate);
+ ridx = rate2ridx(IEEE80211_RV(rate));
txd->txdw4 |= htole32(SM(R12A_TXDW4_RTSRATE, ridx));
/* RTS rate fallback limit (max). */
diff --git a/freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h b/freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h
index 432d7e97..c757cb79 100644
--- a/freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h
+++ b/freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h
@@ -107,6 +107,7 @@ static const STRUCT_USB_HOST_ID rtwn_devs[] = {
{ USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, RTWN_CHIP_RTL8192EU) }
RTWN_RTL8192EU_DEV(DLINK, DWA131E1),
RTWN_RTL8192EU_DEV(REALTEK, RTL8192EU),
+ RTWN_RTL8192EU_DEV(TPLINK, WN821NV5),
RTWN_RTL8192EU_DEV(TPLINK, WN822NV4),
RTWN_RTL8192EU_DEV(TPLINK, WN823NV2),
#undef RTWN_RTL8192EU_DEV
@@ -155,7 +156,10 @@ static const STRUCT_USB_HOST_ID rtwn_devs[] = {
RTWN_RTL8821AU_DEV(EDIMAX, EW7811UTC_2),
RTWN_RTL8821AU_DEV(HAWKING, HD65U),
RTWN_RTL8821AU_DEV(MELCO, WIU2433DM),
- RTWN_RTL8821AU_DEV(NETGEAR, A6100)
+ RTWN_RTL8821AU_DEV(NETGEAR, A6100),
+ RTWN_RTL8821AU_DEV(REALTEK, RTL8821AU_1),
+ RTWN_RTL8821AU_DEV(REALTEK, RTL8821AU_2),
+ RTWN_RTL8821AU_DEV(TPLINK, T2UNANO)
#undef RTWN_RTL8821AU_DEV
};
diff --git a/freebsd/sys/dev/rtwn/usb/rtwn_usb_reg.c b/freebsd/sys/dev/rtwn/usb/rtwn_usb_reg.c
index 8225282e..c7cb255e 100644
--- a/freebsd/sys/dev/rtwn/usb/rtwn_usb_reg.c
+++ b/freebsd/sys/dev/rtwn/usb/rtwn_usb_reg.c
@@ -174,8 +174,6 @@ rtwn_usb_delay(struct rtwn_softc *sc, int usec)
/* 1ms delay as default is too big. */
if (usec < 1000)
DELAY(usec);
- else {
- usb_pause_mtx(&sc->sc_mtx,
- MAX(msecs_to_ticks(usec / 1000), 1));
- }
+ else
+ usb_pause_mtx(&sc->sc_mtx, msecs_to_ticks(usec / 1000));
}
diff --git a/freebsd/sys/dev/sdhci/fsl_sdhci.c b/freebsd/sys/dev/sdhci/fsl_sdhci.c
index 84665b41..be3d1de3 100644
--- a/freebsd/sys/dev/sdhci/fsl_sdhci.c
+++ b/freebsd/sys/dev/sdhci/fsl_sdhci.c
@@ -1016,7 +1016,7 @@ static driver_t fsl_sdhci_driver = {
DRIVER_MODULE(sdhci_fsl, simplebus, fsl_sdhci_driver, fsl_sdhci_devclass,
NULL, NULL);
-MODULE_DEPEND(sdhci_fsl, sdhci, 1, 1, 1);
+SDHCI_DEPEND(sdhci_fsl);
#ifndef MMCCAM
MMC_DECLARE_BRIDGE(sdhci_fsl);
diff --git a/freebsd/sys/dev/sdhci/sdhci.c b/freebsd/sys/dev/sdhci/sdhci.c
index 0bb9edc7..cf853360 100644
--- a/freebsd/sys/dev/sdhci/sdhci.c
+++ b/freebsd/sys/dev/sdhci/sdhci.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/kobj.h>
+#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
@@ -110,19 +111,20 @@ static void sdhci_retune(void *arg);
static void sdhci_set_clock(struct sdhci_slot *slot, uint32_t clock);
static void sdhci_set_power(struct sdhci_slot *slot, u_char power);
static void sdhci_set_transfer_mode(struct sdhci_slot *slot,
- struct mmc_data *data);
+ const struct mmc_data *data);
static void sdhci_start(struct sdhci_slot *slot);
static void sdhci_timeout(void *arg);
static void sdhci_start_command(struct sdhci_slot *slot,
struct mmc_command *cmd);
-static void sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data);
+static void sdhci_start_data(struct sdhci_slot *slot,
+ const struct mmc_data *data);
static void sdhci_write_block_pio(struct sdhci_slot *slot);
static void sdhci_transfer_pio(struct sdhci_slot *slot);
#ifdef MMCCAM
/* CAM-related */
static void sdhci_cam_action(struct cam_sim *sim, union ccb *ccb);
-static int sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot,
+static int sdhci_cam_get_possible_host_clock(const struct sdhci_slot *slot,
int proposed_clock);
static void sdhci_cam_handle_mmcio(struct cam_sim *sim, union ccb *ccb);
static void sdhci_cam_poll(struct cam_sim *sim);
@@ -132,12 +134,14 @@ static int sdhci_cam_update_ios(struct sdhci_slot *slot);
#endif
/* helper routines */
+static int sdhci_dma_alloc(struct sdhci_slot *slot);
+static void sdhci_dma_free(struct sdhci_slot *slot);
static void sdhci_dumpregs(struct sdhci_slot *slot);
static void sdhci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs,
int error);
-static int slot_printf(struct sdhci_slot *slot, const char * fmt, ...)
+static int slot_printf(const struct sdhci_slot *slot, const char * fmt, ...)
__printflike(2, 3);
-static uint32_t sdhci_tuning_intmask(struct sdhci_slot *slot);
+static uint32_t sdhci_tuning_intmask(const struct sdhci_slot *slot);
#define SDHCI_LOCK(_slot) mtx_lock(&(_slot)->mtx)
#define SDHCI_UNLOCK(_slot) mtx_unlock(&(_slot)->mtx)
@@ -181,17 +185,22 @@ sdhci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
}
static int
-slot_printf(struct sdhci_slot *slot, const char * fmt, ...)
+slot_printf(const struct sdhci_slot *slot, const char * fmt, ...)
{
+ char buf[128];
va_list ap;
int retval;
- retval = printf("%s-slot%d: ",
- device_get_nameunit(slot->bus), slot->num);
-
+ /*
+ * Make sure we print a single line all together rather than in two
+ * halves to avoid console gibberish bingo.
+ */
va_start(ap, fmt);
- retval += vprintf(fmt, ap);
+ retval = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
+
+ retval += printf("%s-slot%d: %s",
+ device_get_nameunit(slot->bus), slot->num, buf);
return (retval);
}
@@ -292,7 +301,7 @@ sdhci_reset(struct sdhci_slot *slot, uint8_t mask)
}
static uint32_t
-sdhci_tuning_intmask(struct sdhci_slot *slot)
+sdhci_tuning_intmask(const struct sdhci_slot *slot)
{
uint32_t intmask;
@@ -474,7 +483,7 @@ sdhci_set_power(struct sdhci_slot *slot, u_char power)
DELAY(100);
}
if (!(RD1(slot, SDHCI_POWER_CONTROL) & SDHCI_POWER_ON))
- slot_printf(slot, "Bus power failed to enable");
+ slot_printf(slot, "Bus power failed to enable\n");
if (slot->quirks & SDHCI_QUIRK_INTEL_POWER_UP_RESET) {
WR1(slot, SDHCI_POWER_CONTROL, pwr | 0x10);
@@ -494,7 +503,13 @@ sdhci_read_block_pio(struct sdhci_slot *slot)
buffer = slot->curcmd->data->data;
buffer += slot->offset;
/* Transfer one block at a time. */
- left = min(512, slot->curcmd->data->len - slot->offset);
+#ifdef MMCCAM
+ if (slot->curcmd->data->flags & MMC_DATA_BLOCK_SIZE)
+ left = min(slot->curcmd->data->block_size,
+ slot->curcmd->data->len - slot->offset);
+ else
+#endif
+ left = min(512, slot->curcmd->data->len - slot->offset);
slot->offset += left;
/* If we are too fast, broken controllers return zeroes. */
@@ -537,7 +552,13 @@ sdhci_write_block_pio(struct sdhci_slot *slot)
buffer = slot->curcmd->data->data;
buffer += slot->offset;
/* Transfer one block at a time. */
- left = min(512, slot->curcmd->data->len - slot->offset);
+#ifdef MMCCAM
+ if (slot->curcmd->data->flags & MMC_DATA_BLOCK_SIZE) {
+ left = min(slot->curcmd->data->block_size,
+ slot->curcmd->data->len - slot->offset);
+ } else
+#endif
+ left = min(512, slot->curcmd->data->len - slot->offset);
slot->offset += left;
/* Handle unaligned and aligned buffer cases. */
@@ -739,55 +760,94 @@ sdhci_card_poll(void *arg)
sdhci_card_poll, slot);
}
-int
-sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num)
+static int
+sdhci_dma_alloc(struct sdhci_slot *slot)
{
- kobjop_desc_t kobj_desc;
- kobj_method_t *kobj_method;
- uint32_t caps, caps2, freq, host_caps;
int err;
- SDHCI_LOCK_INIT(slot);
-
- slot->num = num;
- slot->bus = dev;
+ if (!(slot->quirks & SDHCI_QUIRK_BROKEN_SDMA_BOUNDARY)) {
+ if (MAXPHYS <= 1024 * 4)
+ slot->sdma_boundary = SDHCI_BLKSZ_SDMA_BNDRY_4K;
+ else if (MAXPHYS <= 1024 * 8)
+ slot->sdma_boundary = SDHCI_BLKSZ_SDMA_BNDRY_8K;
+ else if (MAXPHYS <= 1024 * 16)
+ slot->sdma_boundary = SDHCI_BLKSZ_SDMA_BNDRY_16K;
+ else if (MAXPHYS <= 1024 * 32)
+ slot->sdma_boundary = SDHCI_BLKSZ_SDMA_BNDRY_32K;
+ else if (MAXPHYS <= 1024 * 64)
+ slot->sdma_boundary = SDHCI_BLKSZ_SDMA_BNDRY_64K;
+ else if (MAXPHYS <= 1024 * 128)
+ slot->sdma_boundary = SDHCI_BLKSZ_SDMA_BNDRY_128K;
+ else if (MAXPHYS <= 1024 * 256)
+ slot->sdma_boundary = SDHCI_BLKSZ_SDMA_BNDRY_256K;
+ else
+ slot->sdma_boundary = SDHCI_BLKSZ_SDMA_BNDRY_512K;
+ }
+ slot->sdma_bbufsz = SDHCI_SDMA_BNDRY_TO_BBUFSZ(slot->sdma_boundary);
- /* Allocate DMA tag. */
- err = bus_dma_tag_create(bus_get_dma_tag(dev),
- DMA_BLOCK_SIZE, 0, BUS_SPACE_MAXADDR_32BIT,
- BUS_SPACE_MAXADDR, NULL, NULL,
- DMA_BLOCK_SIZE, 1, DMA_BLOCK_SIZE,
- BUS_DMA_ALLOCNOW, NULL, NULL,
- &slot->dmatag);
+ /*
+ * Allocate the DMA tag for an SDMA bounce buffer.
+ * Note that the SDHCI specification doesn't state any alignment
+ * constraint for the SDMA system address. However, controllers
+ * typically ignore the SDMA boundary bits in SDHCI_DMA_ADDRESS when
+ * forming the actual address of data, requiring the SDMA buffer to
+ * be aligned to the SDMA boundary.
+ */
+ err = bus_dma_tag_create(bus_get_dma_tag(slot->bus), slot->sdma_bbufsz,
+ 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+ slot->sdma_bbufsz, 1, slot->sdma_bbufsz, BUS_DMA_ALLOCNOW,
+ NULL, NULL, &slot->dmatag);
if (err != 0) {
- device_printf(dev, "Can't create DMA tag\n");
- SDHCI_LOCK_DESTROY(slot);
+ slot_printf(slot, "Can't create DMA tag for SDMA\n");
return (err);
}
- /* Allocate DMA memory. */
+ /* Allocate DMA memory for the SDMA bounce buffer. */
err = bus_dmamem_alloc(slot->dmatag, (void **)&slot->dmamem,
BUS_DMA_NOWAIT, &slot->dmamap);
if (err != 0) {
- device_printf(dev, "Can't alloc DMA memory\n");
+ slot_printf(slot, "Can't alloc DMA memory for SDMA\n");
bus_dma_tag_destroy(slot->dmatag);
- SDHCI_LOCK_DESTROY(slot);
return (err);
}
- /* Map the memory. */
+ /* Map the memory of the SDMA bounce buffer. */
err = bus_dmamap_load(slot->dmatag, slot->dmamap,
- (void *)slot->dmamem, DMA_BLOCK_SIZE,
- sdhci_getaddr, &slot->paddr, 0);
+ (void *)slot->dmamem, slot->sdma_bbufsz, sdhci_getaddr,
+ &slot->paddr, 0);
if (err != 0 || slot->paddr == 0) {
- device_printf(dev, "Can't load DMA memory\n");
+ slot_printf(slot, "Can't load DMA memory for SDMA\n");
bus_dmamem_free(slot->dmatag, slot->dmamem, slot->dmamap);
bus_dma_tag_destroy(slot->dmatag);
- SDHCI_LOCK_DESTROY(slot);
if (err)
return (err);
else
return (EFAULT);
}
+ return (0);
+}
+
+static void
+sdhci_dma_free(struct sdhci_slot *slot)
+{
+
+ bus_dmamap_unload(slot->dmatag, slot->dmamap);
+ bus_dmamem_free(slot->dmatag, slot->dmamem, slot->dmamap);
+ bus_dma_tag_destroy(slot->dmatag);
+}
+
+int
+sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num)
+{
+ kobjop_desc_t kobj_desc;
+ kobj_method_t *kobj_method;
+ uint32_t caps, caps2, freq, host_caps;
+ int err;
+
+ SDHCI_LOCK_INIT(slot);
+
+ slot->num = num;
+ slot->bus = dev;
+
slot->version = (RD2(slot, SDHCI_HOST_VERSION)
>> SDHCI_SPEC_VER_SHIFT) & SDHCI_SPEC_VER_MASK;
if (slot->quirks & SDHCI_QUIRK_MISSING_CAPS) {
@@ -803,12 +863,8 @@ sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num)
if (slot->version >= SDHCI_SPEC_300) {
if ((caps & SDHCI_SLOTTYPE_MASK) != SDHCI_SLOTTYPE_REMOVABLE &&
(caps & SDHCI_SLOTTYPE_MASK) != SDHCI_SLOTTYPE_EMBEDDED) {
- device_printf(dev,
+ slot_printf(slot,
"Driver doesn't support shared bus slots\n");
- bus_dmamap_unload(slot->dmatag, slot->dmamap);
- bus_dmamem_free(slot->dmatag, slot->dmamem,
- slot->dmamap);
- bus_dma_tag_destroy(slot->dmatag);
SDHCI_LOCK_DESTROY(slot);
return (ENXIO);
} else if ((caps & SDHCI_SLOTTYPE_MASK) ==
@@ -832,7 +888,7 @@ sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num)
*/
if (slot->max_clk == 0) {
slot->max_clk = SDHCI_DEFAULT_MAX_FREQ * 1000000;
- device_printf(dev, "Hardware doesn't specify base clock "
+ slot_printf(slot, "Hardware doesn't specify base clock "
"frequency, using %dMHz as default.\n",
SDHCI_DEFAULT_MAX_FREQ);
}
@@ -853,7 +909,7 @@ sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num)
* max timeout, but still mention it.
*/
if (slot->timeout_clk == 0) {
- device_printf(dev, "Hardware doesn't specify timeout clock "
+ slot_printf(slot, "Hardware doesn't specify timeout clock "
"frequency, setting BROKEN_TIMEOUT quirk.\n");
slot->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
}
@@ -869,7 +925,7 @@ sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num)
if ((caps & SDHCI_CAN_VDD_180) && (slot->opt & SDHCI_SLOT_EMBEDDED))
slot->host.host_ocr |= MMC_OCR_LOW_VOLTAGE;
if (slot->host.host_ocr == 0) {
- device_printf(dev, "Hardware doesn't report any "
+ slot_printf(slot, "Hardware doesn't report any "
"support voltages.\n");
}
@@ -955,7 +1011,7 @@ no_tuning:
slot->retune_count = (caps2 & SDHCI_RETUNE_CNT_MASK) >>
SDHCI_RETUNE_CNT_SHIFT;
if (slot->retune_count > 0xb) {
- device_printf(dev, "Unknown re-tuning count "
+ slot_printf(slot, "Unknown re-tuning count "
"%x, using 1 sec\n", slot->retune_count);
slot->retune_count = 1;
} else if (slot->retune_count != 0)
@@ -1014,6 +1070,19 @@ no_tuning:
if (slot->opt & SDHCI_PLATFORM_TRANSFER)
slot->opt &= ~SDHCI_HAVE_DMA;
+ if (slot->opt & SDHCI_HAVE_DMA) {
+ err = sdhci_dma_alloc(slot);
+ if (err != 0) {
+ if (slot->opt & SDHCI_TUNING_SUPPORTED) {
+ free(slot->tune_req, M_DEVBUF);
+ free(slot->tune_cmd, M_DEVBUF);
+ free(slot->tune_data, M_DEVBUF);
+ }
+ SDHCI_LOCK_DESTROY(slot);
+ return (err);
+ }
+ }
+
if (bootverbose || sdhci_debug) {
slot_printf(slot,
"%uMHz%s %s VDD:%s%s%s VCCQ: 3.3V%s%s DRV: B%s%s%s %s %s\n",
@@ -1061,7 +1130,7 @@ no_tuning:
slot->timeout = 10;
SYSCTL_ADD_INT(device_get_sysctl_ctx(slot->bus),
SYSCTL_CHILDREN(device_get_sysctl_tree(slot->bus)), OID_AUTO,
- "timeout", CTLFLAG_RW, &slot->timeout, 0,
+ "timeout", CTLFLAG_RWTUN, &slot->timeout, 0,
"Maximum timeout for SDHCI transfers (in secs)");
TASK_INIT(&slot->card_task, 0, sdhci_card_task, slot);
TIMEOUT_TASK_INIT(taskqueue_swi_giant, &slot->card_delayed_task, 0,
@@ -1111,9 +1180,8 @@ sdhci_cleanup_slot(struct sdhci_slot *slot)
SDHCI_LOCK(slot);
sdhci_reset(slot, SDHCI_RESET_ALL);
SDHCI_UNLOCK(slot);
- bus_dmamap_unload(slot->dmatag, slot->dmamap);
- bus_dmamem_free(slot->dmatag, slot->dmamem, slot->dmamap);
- bus_dma_tag_destroy(slot->dmatag);
+ if (slot->opt & SDHCI_HAVE_DMA)
+ sdhci_dma_free(slot);
if (slot->opt & SDHCI_TUNING_SUPPORTED) {
free(slot->tune_req, M_DEVBUF);
free(slot->tune_cmd, M_DEVBUF);
@@ -1177,7 +1245,7 @@ sdhci_generic_get_card_present(device_t brdev __unused, struct sdhci_slot *slot)
void
sdhci_generic_set_uhs_timing(device_t brdev __unused, struct sdhci_slot *slot)
{
- struct mmc_ios *ios;
+ const struct mmc_ios *ios;
uint16_t hostctrl2;
if (slot->version < SDHCI_SPEC_300)
@@ -1310,7 +1378,7 @@ int
sdhci_generic_tune(device_t brdev __unused, device_t reqdev, bool hs400)
{
struct sdhci_slot *slot = device_get_ivars(reqdev);
- struct mmc_ios *ios = &slot->host.ios;
+ const struct mmc_ios *ios = &slot->host.ios;
struct mmc_command *tune_cmd;
struct mmc_data *tune_data;
uint32_t opcode;
@@ -1513,23 +1581,23 @@ sdhci_retune(void *arg)
static void
sdhci_req_done(struct sdhci_slot *slot)
{
- union ccb *ccb;
+ union ccb *ccb;
if (__predict_false(sdhci_debug > 1))
slot_printf(slot, "%s\n", __func__);
if (slot->ccb != NULL && slot->curcmd != NULL) {
callout_stop(&slot->timeout_callout);
- ccb = slot->ccb;
- slot->ccb = NULL;
+ ccb = slot->ccb;
+ slot->ccb = NULL;
slot->curcmd = NULL;
- /* Tell CAM the request is finished */
- struct ccb_mmcio *mmcio;
- mmcio = &ccb->mmcio;
+ /* Tell CAM the request is finished */
+ struct ccb_mmcio *mmcio;
+ mmcio = &ccb->mmcio;
- ccb->ccb_h.status =
- (mmcio->cmd.error == 0 ? CAM_REQ_CMP : CAM_REQ_CMP_ERR);
- xpt_done(ccb);
+ ccb->ccb_h.status =
+ (mmcio->cmd.error == 0 ? CAM_REQ_CMP : CAM_REQ_CMP_ERR);
+ xpt_done(ccb);
}
}
#else
@@ -1579,7 +1647,7 @@ sdhci_timeout(void *arg)
}
static void
-sdhci_set_transfer_mode(struct sdhci_slot *slot, struct mmc_data *data)
+sdhci_set_transfer_mode(struct sdhci_slot *slot, const struct mmc_data *data)
{
uint16_t mode;
@@ -1587,9 +1655,9 @@ sdhci_set_transfer_mode(struct sdhci_slot *slot, struct mmc_data *data)
return;
mode = SDHCI_TRNS_BLK_CNT_EN;
- if (data->len > 512) {
+ if (data->len > 512 || data->block_count > 1) {
mode |= SDHCI_TRNS_MULTI;
- if (__predict_true(
+ if (data->block_count == 0 && __predict_true(
#ifdef MMCCAM
slot->ccb->mmcio.stop.opcode == MMC_STOP_TRANSMISSION &&
#else
@@ -1714,7 +1782,9 @@ sdhci_start_command(struct sdhci_slot *slot, struct mmc_command *cmd)
/* Set data transfer mode. */
sdhci_set_transfer_mode(slot, cmd->data);
if (__predict_false(sdhci_debug > 1))
- slot_printf(slot, "Starting command!\n");
+ slot_printf(slot, "Starting command opcode %#04x flags %#04x\n",
+ cmd->opcode, flags);
+
/* Start command. */
WR2(slot, SDHCI_COMMAND_FLAGS, (cmd->opcode << 8) | (flags & 0xff));
/* Start timeout callout. */
@@ -1730,7 +1800,7 @@ sdhci_finish_command(struct sdhci_slot *slot)
uint8_t extra;
if (__predict_false(sdhci_debug > 1))
- slot_printf(slot, "%s: called, err %d flags %d\n",
+ slot_printf(slot, "%s: called, err %d flags %#04x\n",
__func__, slot->curcmd->error, slot->curcmd->flags);
slot->cmd_done = 1;
/*
@@ -1771,7 +1841,7 @@ sdhci_finish_command(struct sdhci_slot *slot)
slot->curcmd->resp[0] = RD4(slot, SDHCI_RESPONSE);
}
if (__predict_false(sdhci_debug > 1))
- printf("Resp: %02x %02x %02x %02x\n",
+ slot_printf(slot, "Resp: %#04x %#04x %#04x %#04x\n",
slot->curcmd->resp[0], slot->curcmd->resp[1],
slot->curcmd->resp[2], slot->curcmd->resp[3]);
@@ -1781,9 +1851,9 @@ sdhci_finish_command(struct sdhci_slot *slot)
}
static void
-sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data)
+sdhci_start_data(struct sdhci_slot *slot, const struct mmc_data *data)
{
- uint32_t target_timeout, current_timeout;
+ uint32_t blkcnt, blksz, current_timeout, sdma_bbufsz, target_timeout;
uint8_t div;
if (data == NULL && (slot->curcmd->flags & MMC_RSP_BUSY) == 0) {
@@ -1819,7 +1889,7 @@ sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data)
/* Use DMA if possible. */
if ((slot->opt & SDHCI_HAVE_DMA))
slot->flags |= SDHCI_USE_DMA;
- /* If data is small, broken DMA may return zeroes instead of data, */
+ /* If data is small, broken DMA may return zeroes instead of data. */
if ((slot->quirks & SDHCI_QUIRK_BROKEN_TIMINGS) &&
(data->len <= 512))
slot->flags &= ~SDHCI_USE_DMA;
@@ -1829,20 +1899,22 @@ sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data)
slot->flags &= ~SDHCI_USE_DMA;
/* Load DMA buffer. */
if (slot->flags & SDHCI_USE_DMA) {
+ sdma_bbufsz = slot->sdma_bbufsz;
if (data->flags & MMC_DATA_READ)
bus_dmamap_sync(slot->dmatag, slot->dmamap,
BUS_DMASYNC_PREREAD);
else {
- memcpy(slot->dmamem, data->data,
- (data->len < DMA_BLOCK_SIZE) ?
- data->len : DMA_BLOCK_SIZE);
+ memcpy(slot->dmamem, data->data, ulmin(data->len,
+ sdma_bbufsz));
bus_dmamap_sync(slot->dmatag, slot->dmamap,
BUS_DMASYNC_PREWRITE);
}
WR4(slot, SDHCI_DMA_ADDRESS, slot->paddr);
- /* Interrupt aggregation: Mask border interrupt
- * for the last page and unmask else. */
- if (data->len == DMA_BLOCK_SIZE)
+ /*
+ * Interrupt aggregation: Mask border interrupt for the last
+ * bounce buffer and unmask otherwise.
+ */
+ if (data->len == sdma_bbufsz)
slot->intmask &= ~SDHCI_INT_DMA_END;
else
slot->intmask |= SDHCI_INT_DMA_END;
@@ -1850,16 +1922,27 @@ sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data)
}
/* Current data offset for both PIO and DMA. */
slot->offset = 0;
- /* Set block size and request IRQ on 4K border. */
- WR2(slot, SDHCI_BLOCK_SIZE, SDHCI_MAKE_BLKSZ(DMA_BOUNDARY,
- (data->len < 512) ? data->len : 512));
- /* Set block count. */
- WR2(slot, SDHCI_BLOCK_COUNT, (data->len + 511) / 512);
+#ifdef MMCCAM
+ if (data->flags & MMC_DATA_BLOCK_SIZE) {
+ /* Set block size and request border interrupts on the SDMA boundary. */
+ blksz = SDHCI_MAKE_BLKSZ(slot->sdma_boundary, data->block_size);
+ blkcnt = data->block_count;
+ if (__predict_false(sdhci_debug > 0))
+ slot_printf(slot, "SDIO Custom block params: blksz: "
+ "%#10x, blk cnt: %#10x\n", blksz, blkcnt);
+ } else
+#endif
+ {
+ /* Set block size and request border interrupts on the SDMA boundary. */
+ blksz = SDHCI_MAKE_BLKSZ(slot->sdma_boundary, ulmin(data->len, 512));
+ blkcnt = howmany(data->len, 512);
+ }
+ WR2(slot, SDHCI_BLOCK_SIZE, blksz);
+ WR2(slot, SDHCI_BLOCK_COUNT, blkcnt);
if (__predict_false(sdhci_debug > 1))
- slot_printf(slot, "Block size: %02x, count %lu\n",
- (unsigned int)SDHCI_MAKE_BLKSZ(DMA_BOUNDARY, (data->len < 512) ? data->len : 512),
- (unsigned long)(data->len + 511) / 512);
+ slot_printf(slot, "Blk size: 0x%08x | Blk cnt: 0x%08x\n",
+ blksz, blkcnt);
}
void
@@ -1883,7 +1966,7 @@ sdhci_finish_data(struct sdhci_slot *slot)
bus_dmamap_sync(slot->dmatag, slot->dmamap,
BUS_DMASYNC_POSTREAD);
memcpy((u_char*)data->data + slot->offset, slot->dmamem,
- (left < DMA_BLOCK_SIZE) ? left : DMA_BLOCK_SIZE);
+ ulmin(left, slot->sdma_bbufsz));
} else
bus_dmamap_sync(slot->dmatag, slot->dmamap,
BUS_DMASYNC_POSTWRITE);
@@ -1907,15 +1990,14 @@ sdhci_finish_data(struct sdhci_slot *slot)
static void
sdhci_start(struct sdhci_slot *slot)
{
- union ccb *ccb;
+ union ccb *ccb;
+ struct ccb_mmcio *mmcio;
ccb = slot->ccb;
if (ccb == NULL)
return;
- struct ccb_mmcio *mmcio;
mmcio = &ccb->mmcio;
-
if (!(slot->flags & CMD_STARTED)) {
slot->flags |= CMD_STARTED;
sdhci_start_command(slot, &mmcio->cmd);
@@ -1947,7 +2029,7 @@ sdhci_start(struct sdhci_slot *slot)
static void
sdhci_start(struct sdhci_slot *slot)
{
- struct mmc_request *req;
+ const struct mmc_request *req;
req = slot->req;
if (req == NULL)
@@ -2076,6 +2158,7 @@ sdhci_data_irq(struct sdhci_slot *slot, uint32_t intmask)
{
struct mmc_data *data;
size_t left;
+ uint32_t sdma_bbufsz;
if (!slot->curcmd) {
slot_printf(slot, "Got data interrupt 0x%08x, but "
@@ -2130,6 +2213,7 @@ sdhci_data_irq(struct sdhci_slot *slot, uint32_t intmask)
/* Handle DMA border. */
if (intmask & SDHCI_INT_DMA_END) {
data = slot->curcmd->data;
+ sdma_bbufsz = slot->sdma_bbufsz;
/* Unload DMA buffer ... */
left = data->len - slot->offset;
@@ -2137,26 +2221,28 @@ sdhci_data_irq(struct sdhci_slot *slot, uint32_t intmask)
bus_dmamap_sync(slot->dmatag, slot->dmamap,
BUS_DMASYNC_POSTREAD);
memcpy((u_char*)data->data + slot->offset, slot->dmamem,
- (left < DMA_BLOCK_SIZE) ? left : DMA_BLOCK_SIZE);
+ ulmin(left, sdma_bbufsz));
} else {
bus_dmamap_sync(slot->dmatag, slot->dmamap,
BUS_DMASYNC_POSTWRITE);
}
/* ... and reload it again. */
- slot->offset += DMA_BLOCK_SIZE;
+ slot->offset += sdma_bbufsz;
left = data->len - slot->offset;
if (data->flags & MMC_DATA_READ) {
bus_dmamap_sync(slot->dmatag, slot->dmamap,
BUS_DMASYNC_PREREAD);
} else {
memcpy(slot->dmamem, (u_char*)data->data + slot->offset,
- (left < DMA_BLOCK_SIZE)? left : DMA_BLOCK_SIZE);
+ ulmin(left, sdma_bbufsz));
bus_dmamap_sync(slot->dmatag, slot->dmamap,
BUS_DMASYNC_PREWRITE);
}
- /* Interrupt aggregation: Mask border interrupt
- * for the last page. */
- if (left == DMA_BLOCK_SIZE) {
+ /*
+ * Interrupt aggregation: Mask border interrupt for the last
+ * bounce buffer.
+ */
+ if (left == sdma_bbufsz) {
slot->intmask &= ~SDHCI_INT_DMA_END;
WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask);
}
@@ -2279,7 +2365,7 @@ int
sdhci_generic_read_ivar(device_t bus, device_t child, int which,
uintptr_t *result)
{
- struct sdhci_slot *slot = device_get_ivars(child);
+ const struct sdhci_slot *slot = device_get_ivars(child);
switch (which) {
default:
@@ -2442,47 +2528,46 @@ sdhci_generic_write_ivar(device_t bus, device_t child, int which,
void
sdhci_start_slot(struct sdhci_slot *slot)
{
- if ((slot->devq = cam_simq_alloc(1)) == NULL) {
- goto fail;
- }
-
- mtx_init(&slot->sim_mtx, "sdhcisim", NULL, MTX_DEF);
- slot->sim = cam_sim_alloc(sdhci_cam_action, sdhci_cam_poll,
- "sdhci_slot", slot, device_get_unit(slot->bus),
- &slot->sim_mtx, 1, 1, slot->devq);
-
- if (slot->sim == NULL) {
- cam_simq_free(slot->devq);
- slot_printf(slot, "cannot allocate CAM SIM\n");
- goto fail;
- }
-
- mtx_lock(&slot->sim_mtx);
- if (xpt_bus_register(slot->sim, slot->bus, 0) != 0) {
- slot_printf(slot,
- "cannot register SCSI pass-through bus\n");
- cam_sim_free(slot->sim, FALSE);
- cam_simq_free(slot->devq);
- mtx_unlock(&slot->sim_mtx);
- goto fail;
- }
-
- mtx_unlock(&slot->sim_mtx);
- /* End CAM-specific init */
+
+ if ((slot->devq = cam_simq_alloc(1)) == NULL)
+ goto fail;
+
+ mtx_init(&slot->sim_mtx, "sdhcisim", NULL, MTX_DEF);
+ slot->sim = cam_sim_alloc_dev(sdhci_cam_action, sdhci_cam_poll,
+ "sdhci_slot", slot, slot->bus,
+ &slot->sim_mtx, 1, 1, slot->devq);
+
+ if (slot->sim == NULL) {
+ cam_simq_free(slot->devq);
+ slot_printf(slot, "cannot allocate CAM SIM\n");
+ goto fail;
+ }
+
+ mtx_lock(&slot->sim_mtx);
+ if (xpt_bus_register(slot->sim, slot->bus, 0) != 0) {
+ slot_printf(slot, "cannot register SCSI pass-through bus\n");
+ cam_sim_free(slot->sim, FALSE);
+ cam_simq_free(slot->devq);
+ mtx_unlock(&slot->sim_mtx);
+ goto fail;
+ }
+ mtx_unlock(&slot->sim_mtx);
+
+ /* End CAM-specific init */
slot->card_present = 0;
sdhci_card_task(slot, 0);
- return;
+ return;
fail:
- if (slot->sim != NULL) {
- mtx_lock(&slot->sim_mtx);
- xpt_bus_deregister(cam_sim_path(slot->sim));
- cam_sim_free(slot->sim, FALSE);
- mtx_unlock(&slot->sim_mtx);
- }
-
- if (slot->devq != NULL)
- cam_simq_free(slot->devq);
+ if (slot->sim != NULL) {
+ mtx_lock(&slot->sim_mtx);
+ xpt_bus_deregister(cam_sim_path(slot->sim));
+ cam_sim_free(slot->sim, FALSE);
+ mtx_unlock(&slot->sim_mtx);
+ }
+
+ if (slot->devq != NULL)
+ cam_simq_free(slot->devq);
}
static void
@@ -2541,6 +2626,7 @@ sdhci_cam_action(struct cam_sim *sim, union ccb *ccb)
case XPT_GET_TRAN_SETTINGS:
{
struct ccb_trans_settings *cts = &ccb->cts;
+ uint32_t max_data;
if (sdhci_debug > 1)
slot_printf(slot, "Got XPT_GET_TRAN_SETTINGS\n");
@@ -2554,6 +2640,19 @@ sdhci_cam_action(struct cam_sim *sim, union ccb *ccb)
cts->proto_specific.mmc.host_f_min = slot->host.f_min;
cts->proto_specific.mmc.host_f_max = slot->host.f_max;
cts->proto_specific.mmc.host_caps = slot->host.caps;
+ /*
+ * Re-tuning modes 1 and 2 restrict the maximum data length
+ * per read/write command to 4 MiB.
+ */
+ if (slot->opt & SDHCI_TUNING_ENABLED &&
+ (slot->retune_mode == SDHCI_RETUNE_MODE_1 ||
+ slot->retune_mode == SDHCI_RETUNE_MODE_2)) {
+ max_data = 4 * 1024 * 1024 / MMC_SECTOR_SIZE;
+ } else {
+ max_data = 65535;
+ }
+ cts->proto_specific.mmc.host_max_data = max_data;
+
memcpy(&cts->proto_specific.mmc.ios, &slot->host.ios, sizeof(struct mmc_ios));
ccb->ccb_h.status = CAM_REQ_CMP;
break;
@@ -2601,7 +2700,8 @@ sdhci_cam_poll(struct cam_sim *sim)
}
static int
-sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot, int proposed_clock)
+sdhci_cam_get_possible_host_clock(const struct sdhci_slot *slot,
+ int proposed_clock)
{
int max_clock, clock, i;
@@ -2611,15 +2711,13 @@ sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot, int proposed_clock)
clock = max_clock;
if (slot->version < SDHCI_SPEC_300) {
- for (i = 0; i < SDHCI_200_MAX_DIVIDER;
- i <<= 1) {
+ for (i = 0; i < SDHCI_200_MAX_DIVIDER; i <<= 1) {
if (clock <= proposed_clock)
break;
clock >>= 1;
}
} else {
- for (i = 0; i < SDHCI_300_MAX_DIVIDER;
- i += 2) {
+ for (i = 0; i < SDHCI_300_MAX_DIVIDER; i += 2) {
if (clock <= proposed_clock)
break;
clock = max_clock / (i + 2);
@@ -2628,15 +2726,14 @@ sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot, int proposed_clock)
return clock;
}
-int
+static int
sdhci_cam_settran_settings(struct sdhci_slot *slot, union ccb *ccb)
{
struct mmc_ios *ios;
- struct mmc_ios *new_ios;
- struct ccb_trans_settings_mmc *cts;
+ const struct mmc_ios *new_ios;
+ const struct ccb_trans_settings_mmc *cts;
ios = &slot->host.ios;
-
cts = &ccb->cts.proto_specific.mmc;
new_ios = &cts->ios;
@@ -2670,11 +2767,11 @@ sdhci_cam_settran_settings(struct sdhci_slot *slot, union ccb *ccb)
slot_printf(slot, "Bus mode => %d\n", ios->bus_mode);
}
- /* XXX Provide a way to call a chip-specific IOS update, required for TI */
+ /* XXX Provide a way to call a chip-specific IOS update, required for TI */
return (sdhci_cam_update_ios(slot));
}
-int
+static int
sdhci_cam_update_ios(struct sdhci_slot *slot)
{
struct mmc_ios *ios = &slot->host.ios;
@@ -2716,10 +2813,10 @@ sdhci_cam_update_ios(struct sdhci_slot *slot)
return (0);
}
-int
+static int
sdhci_cam_request(struct sdhci_slot *slot, union ccb *ccb)
{
- struct ccb_mmcio *mmcio;
+ const struct ccb_mmcio *mmcio;
mmcio = &ccb->mmcio;
@@ -2730,15 +2827,18 @@ sdhci_cam_request(struct sdhci_slot *slot, union ccb *ccb)
}
*/
if (__predict_false(sdhci_debug > 1)) {
- slot_printf(slot, "CMD%u arg %#x flags %#x dlen %u dflags %#x\n",
- mmcio->cmd.opcode, mmcio->cmd.arg, mmcio->cmd.flags,
- mmcio->cmd.data != NULL ? (unsigned int) mmcio->cmd.data->len : 0,
- mmcio->cmd.data != NULL ? mmcio->cmd.data->flags: 0);
+ slot_printf(slot, "CMD%u arg %#x flags %#x dlen %u dflags %#x "
+ "blksz=%zu blkcnt=%zu\n",
+ mmcio->cmd.opcode, mmcio->cmd.arg, mmcio->cmd.flags,
+ mmcio->cmd.data != NULL ? (unsigned int) mmcio->cmd.data->len : 0,
+ mmcio->cmd.data != NULL ? mmcio->cmd.data->flags : 0,
+ mmcio->cmd.data != NULL ? mmcio->cmd.data->block_size : 0,
+ mmcio->cmd.data != NULL ? mmcio->cmd.data->block_count : 0);
}
if (mmcio->cmd.data != NULL) {
if (mmcio->cmd.data->len == 0 || mmcio->cmd.data->flags == 0)
panic("data->len = %d, data->flags = %d -- something is b0rked",
- (int)mmcio->cmd.data->len, mmcio->cmd.data->flags);
+ (int)mmcio->cmd.data->len, mmcio->cmd.data->flags);
}
slot->ccb = ccb;
slot->flags = 0;
@@ -2754,4 +2854,4 @@ sdhci_cam_request(struct sdhci_slot *slot, union ccb *ccb)
}
#endif /* MMCCAM */
-MODULE_VERSION(sdhci, 1);
+MODULE_VERSION(sdhci, SDHCI_VERSION);
diff --git a/freebsd/sys/dev/sdhci/sdhci.h b/freebsd/sys/dev/sdhci/sdhci.h
index a22e0235..38c5e1b9 100644
--- a/freebsd/sys/dev/sdhci/sdhci.h
+++ b/freebsd/sys/dev/sdhci/sdhci.h
@@ -32,8 +32,8 @@
#include <rtems/bsd/local/opt_mmccam.h>
-#define DMA_BLOCK_SIZE 4096
-#define DMA_BOUNDARY 0 /* DMA reload every 4K */
+/* Macro for sizing the SDMA bounce buffer on the SDMA buffer boundary. */
+#define SDHCI_SDMA_BNDRY_TO_BBUFSZ(bndry) (4096 * (1 << bndry))
/* Controller doesn't honor resets unless we touch the clock register */
#define SDHCI_QUIRK_CLOCK_BEFORE_RESET (1 << 0)
@@ -95,6 +95,8 @@
#define SDHCI_QUIRK_BROKEN_AUTO_STOP (1 << 28)
/* Controller supports eMMC HS400 mode if SDHCI_CAN_SDR104 is set. */
#define SDHCI_QUIRK_MMC_HS400_IF_CAN_SDR104 (1 << 29)
+/* SDMA boundary in SDHCI_BLOCK_SIZE broken - use front-end supplied value. */
+#define SDHCI_QUIRK_BROKEN_SDMA_BOUNDARY (1 << 30)
/*
* Controller registers
@@ -102,6 +104,14 @@
#define SDHCI_DMA_ADDRESS 0x00
#define SDHCI_BLOCK_SIZE 0x04
+#define SDHCI_BLKSZ_SDMA_BNDRY_4K 0x00
+#define SDHCI_BLKSZ_SDMA_BNDRY_8K 0x01
+#define SDHCI_BLKSZ_SDMA_BNDRY_16K 0x02
+#define SDHCI_BLKSZ_SDMA_BNDRY_32K 0x03
+#define SDHCI_BLKSZ_SDMA_BNDRY_64K 0x04
+#define SDHCI_BLKSZ_SDMA_BNDRY_128K 0x05
+#define SDHCI_BLKSZ_SDMA_BNDRY_256K 0x06
+#define SDHCI_BLKSZ_SDMA_BNDRY_512K 0x07
#define SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF))
#define SDHCI_BLOCK_COUNT 0x06
@@ -362,6 +372,8 @@ struct sdhci_slot {
bus_dmamap_t dmamap;
u_char *dmamem;
bus_addr_t paddr; /* DMA buffer address */
+ uint32_t sdma_bbufsz; /* SDMA bounce buffer size */
+ uint8_t sdma_boundary; /* SDMA boundary */
struct task card_task; /* Card presence check task */
struct timeout_task
card_delayed_task;/* Card insert delayed task */
@@ -401,10 +413,10 @@ struct sdhci_slot {
#ifdef MMCCAM
/* CAM stuff */
union ccb *ccb;
- struct cam_devq *devq;
- struct cam_sim *sim;
- struct mtx sim_mtx;
- u_char card_present; /* XXX Maybe derive this from elsewhere? */
+ struct cam_devq *devq;
+ struct cam_sim *sim;
+ struct mtx sim_mtx;
+ u_char card_present; /* XXX Maybe derive this from elsewhere? */
#endif
};
@@ -434,4 +446,9 @@ bool sdhci_generic_get_card_present(device_t brdev, struct sdhci_slot *slot);
void sdhci_generic_set_uhs_timing(device_t brdev, struct sdhci_slot *slot);
void sdhci_handle_card_present(struct sdhci_slot *slot, bool is_present);
+#define SDHCI_VERSION 2
+
+#define SDHCI_DEPEND(name) \
+ MODULE_DEPEND(name, sdhci, SDHCI_VERSION, SDHCI_VERSION, SDHCI_VERSION);
+
#endif /* __SDHCI_H__ */
diff --git a/freebsd/sys/dev/usb/net/if_aue.c b/freebsd/sys/dev/usb/net/if_aue.c
index 5454e2aa..d33176d9 100644
--- a/freebsd/sys/dev/usb/net/if_aue.c
+++ b/freebsd/sys/dev/usb/net/if_aue.c
@@ -94,6 +94,10 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+
+#include <dev/mii/mii.h>
+#include <dev/mii/miivar.h>
#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
@@ -107,6 +111,8 @@ __FBSDID("$FreeBSD$");
#include <dev/usb/net/usb_ethernet.h>
#include <dev/usb/net/if_auereg.h>
+#include <rtems/bsd/local/miibus_if.h>
+
#ifdef USB_DEBUG
static int aue_debug = 0;
diff --git a/freebsd/sys/dev/usb/net/if_axe.c b/freebsd/sys/dev/usb/net/if_axe.c
index cf54e96e..643b46ce 100644
--- a/freebsd/sys/dev/usb/net/if_axe.c
+++ b/freebsd/sys/dev/usb/net/if_axe.c
@@ -118,6 +118,8 @@ __FBSDID("$FreeBSD$");
#include <dev/usb/net/usb_ethernet.h>
#include <dev/usb/net/if_axereg.h>
+#include <rtems/bsd/local/miibus_if.h>
+
/*
* AXE_178_MAX_FRAME_BURST
* max frame burst size for Ax88178 and Ax88772
@@ -1149,7 +1151,7 @@ axe_rxeof(struct usb_ether *ue, struct usb_page_cache *pc, unsigned int offset,
}
}
- _IF_ENQUEUE(&ue->ue_rxq, m);
+ (void)mbufq_enqueue(&ue->ue_rxq, m);
return (0);
}
diff --git a/freebsd/sys/dev/usb/net/if_axge.c b/freebsd/sys/dev/usb/net/if_axge.c
index f8ed34ae..564d5221 100644
--- a/freebsd/sys/dev/usb/net/if_axge.c
+++ b/freebsd/sys/dev/usb/net/if_axge.c
@@ -50,6 +50,10 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+
+#include <dev/mii/mii.h>
+#include <dev/mii/miivar.h>
#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
@@ -63,6 +67,8 @@ __FBSDID("$FreeBSD$");
#include <dev/usb/net/usb_ethernet.h>
#include <dev/usb/net/if_axgereg.h>
+#include <rtems/bsd/local/miibus_if.h>
+
/*
* Various supported device vendors/products.
*/
@@ -1037,7 +1043,7 @@ axge_rxeof(struct usb_ether *ue, struct usb_page_cache *pc, unsigned int offset,
}
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
- _IF_ENQUEUE(&ue->ue_rxq, m);
+ (void)mbufq_enqueue(&ue->ue_rxq, m);
}
static void
diff --git a/freebsd/sys/dev/usb/net/if_cdce.c b/freebsd/sys/dev/usb/net/if_cdce.c
index 9a15da69..6abe6653 100644
--- a/freebsd/sys/dev/usb/net/if_cdce.c
+++ b/freebsd/sys/dev/usb/net/if_cdce.c
@@ -52,11 +52,11 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/gsb_crc32.h>
+#include <sys/eventhandler.h>
#include <sys/stdint.h>
#include <sys/stddef.h>
-#include <sys/param.h>
#include <sys/queue.h>
-#include <sys/types.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/kernel.h>
@@ -278,6 +278,7 @@ static const STRUCT_USB_HOST_ID cdce_host_devs[] = {
{USB_VPI(USB_VENDOR_SHARP, USB_PRODUCT_SHARP_SLA300, CDCE_FLAG_ZAURUS | CDCE_FLAG_NO_UNION)},
{USB_VPI(USB_VENDOR_SHARP, USB_PRODUCT_SHARP_SLC700, CDCE_FLAG_ZAURUS | CDCE_FLAG_NO_UNION)},
{USB_VPI(USB_VENDOR_SHARP, USB_PRODUCT_SHARP_SLC750, CDCE_FLAG_ZAURUS | CDCE_FLAG_NO_UNION)},
+ {USB_VPI(USB_VENDOR_REALTEK, USB_PRODUCT_REALTEK_RTL8156, 0)},
{USB_VENDOR(USB_VENDOR_HUAWEI), USB_IFACE_CLASS(UICLASS_VENDOR),
USB_IFACE_SUBCLASS(0x02), USB_IFACE_PROTOCOL(0x16),
diff --git a/freebsd/sys/dev/usb/net/if_mos.c b/freebsd/sys/dev/usb/net/if_mos.c
index 3dd0e5ef..3ab0e62e 100644
--- a/freebsd/sys/dev/usb/net/if_mos.c
+++ b/freebsd/sys/dev/usb/net/if_mos.c
@@ -122,6 +122,10 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+
+#include <dev/mii/mii.h>
+#include <dev/mii/miivar.h>
#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
@@ -134,6 +138,8 @@ __FBSDID("$FreeBSD$");
#include <dev/usb/net/usb_ethernet.h>
+#include <rtems/bsd/local/miibus_if.h>
+
//#include <dev/usb/net/if_mosreg.h>
#include "if_mosreg.h"
diff --git a/freebsd/sys/dev/usb/net/if_rue.c b/freebsd/sys/dev/usb/net/if_rue.c
index 810a98c8..8d095c86 100644
--- a/freebsd/sys/dev/usb/net/if_rue.c
+++ b/freebsd/sys/dev/usb/net/if_rue.c
@@ -91,6 +91,10 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+
+#include <dev/mii/mii.h>
+#include <dev/mii/miivar.h>
#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
@@ -104,6 +108,8 @@ __FBSDID("$FreeBSD$");
#include <dev/usb/net/usb_ethernet.h>
#include <dev/usb/net/if_ruereg.h>
+#include <rtems/bsd/local/miibus_if.h>
+
#ifdef USB_DEBUG
static int rue_debug = 0;
diff --git a/freebsd/sys/dev/usb/net/if_smsc.c b/freebsd/sys/dev/usb/net/if_smsc.c
index 87e181d8..bbf28ad8 100644
--- a/freebsd/sys/dev/usb/net/if_smsc.c
+++ b/freebsd/sys/dev/usb/net/if_smsc.c
@@ -89,6 +89,10 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+
+#include <dev/mii/mii.h>
+#include <dev/mii/miivar.h>
#include <netinet/in.h>
#include <netinet/ip.h>
@@ -99,6 +103,7 @@ __FBSDID("$FreeBSD$");
#include <dev/fdt/fdt_common.h>
#include <dev/ofw/ofw_bus.h>
#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/usb/usb_fdt_support.h>
#endif
#include <dev/usb/usb.h>
@@ -114,6 +119,8 @@ __FBSDID("$FreeBSD$");
#include <dev/usb/net/if_smscreg.h>
+#include <rtems/bsd/local/miibus_if.h>
+
#ifdef USB_DEBUG
static int smsc_debug = 0;
@@ -1561,147 +1568,6 @@ smsc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (rc);
}
-#ifdef FDT
-/*
- * This is FreeBSD-specific compatibility strings for RPi/RPi2
- */
-static phandle_t
-smsc_fdt_find_eth_node(phandle_t start)
-{
- phandle_t child, node;
-
- /* Traverse through entire tree to find usb ethernet nodes. */
- for (node = OF_child(start); node != 0; node = OF_peer(node)) {
- if ((ofw_bus_node_is_compatible(node, "net,ethernet") &&
- ofw_bus_node_is_compatible(node, "usb,device")) ||
- ofw_bus_node_is_compatible(node, "usb424,ec00"))
- return (node);
- child = smsc_fdt_find_eth_node(node);
- if (child != -1)
- return (child);
- }
-
- return (-1);
-}
-
-/*
- * Check if node's path is <*>/usb/hub/ethernet
- */
-static int
-smsc_fdt_is_usb_eth(phandle_t node)
-{
- char name[16];
- int len;
-
- memset(name, 0, sizeof(name));
- len = OF_getprop(node, "name", name, sizeof(name));
- if (len <= 0)
- return (0);
-
- if (strcmp(name, "ethernet"))
- return (0);
-
- node = OF_parent(node);
- if (node == -1)
- return (0);
- len = OF_getprop(node, "name", name, sizeof(name));
- if (len <= 0)
- return (0);
-
- if (strcmp(name, "hub"))
- return (0);
-
- node = OF_parent(node);
- if (node == -1)
- return (0);
- len = OF_getprop(node, "name", name, sizeof(name));
- if (len <= 0)
- return (0);
-
- if (strcmp(name, "usb"))
- return (0);
-
- return (1);
-}
-
-static phandle_t
-smsc_fdt_find_eth_node_by_path(phandle_t start)
-{
- phandle_t child, node;
-
- /* Traverse through entire tree to find usb ethernet nodes. */
- for (node = OF_child(start); node != 0; node = OF_peer(node)) {
- if (smsc_fdt_is_usb_eth(node))
- return (node);
- child = smsc_fdt_find_eth_node_by_path(node);
- if (child != -1)
- return (child);
- }
-
- return (-1);
-}
-
-/*
- * Look through known names that can contain mac address
- * return 0 if valid MAC address has been found
- */
-static int
-smsc_fdt_read_mac_property(phandle_t node, unsigned char *mac)
-{
- int len;
-
- /* Check if there is property */
- if ((len = OF_getproplen(node, "local-mac-address")) > 0) {
- if (len != ETHER_ADDR_LEN)
- return (EINVAL);
-
- OF_getprop(node, "local-mac-address", mac,
- ETHER_ADDR_LEN);
- return (0);
- }
-
- if ((len = OF_getproplen(node, "mac-address")) > 0) {
- if (len != ETHER_ADDR_LEN)
- return (EINVAL);
-
- OF_getprop(node, "mac-address", mac,
- ETHER_ADDR_LEN);
- return (0);
- }
-
- return (ENXIO);
-}
-
-/**
- * Get MAC address from FDT blob. Firmware or loader should fill
- * mac-address or local-mac-address property. Returns 0 if MAC address
- * obtained, error code otherwise.
- */
-static int
-smsc_fdt_find_mac(unsigned char *mac)
-{
- phandle_t node, root;
-
- root = OF_finddevice("/");
- node = smsc_fdt_find_eth_node(root);
- if (node != -1) {
- if (smsc_fdt_read_mac_property(node, mac) == 0)
- return (0);
- }
-
- /*
- * If it's not FreeBSD FDT blob for RPi, try more
- * generic .../usb/hub/ethernet
- */
- node = smsc_fdt_find_eth_node_by_path(root);
-
- if (node != -1)
- return smsc_fdt_read_mac_property(node, mac);
-
- return (ENXIO);
-}
-#endif
-
/**
* smsc_attach_post - Called after the driver attached to the USB interface
* @ue: the USB ethernet device
@@ -1750,7 +1616,7 @@ smsc_attach_post(struct usb_ether *ue)
err = smsc_eeprom_read(sc, 0x01, sc->sc_ue.ue_eaddr, ETHER_ADDR_LEN);
#ifdef FDT
if ((err != 0) || (!ETHER_IS_VALID(sc->sc_ue.ue_eaddr)))
- err = smsc_fdt_find_mac(sc->sc_ue.ue_eaddr);
+ err = usb_fdt_get_mac_addr(sc->sc_ue.ue_dev, &sc->sc_ue);
#endif
if ((err != 0) || (!ETHER_IS_VALID(sc->sc_ue.ue_eaddr))) {
read_random(sc->sc_ue.ue_eaddr, ETHER_ADDR_LEN);
diff --git a/freebsd/sys/dev/usb/net/if_udav.c b/freebsd/sys/dev/usb/net/if_udav.c
index a4e683ac..8a2ad81f 100644
--- a/freebsd/sys/dev/usb/net/if_udav.c
+++ b/freebsd/sys/dev/usb/net/if_udav.c
@@ -72,12 +72,18 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+
+#include <dev/mii/mii.h>
+#include <dev/mii/miivar.h>
#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <rtems/bsd/local/usbdevs.h>
+#include <rtems/bsd/local/miibus_if.h>
+
#define USB_DEBUG_VAR udav_debug
#include <dev/usb/usb_debug.h>
#include <dev/usb/usb_process.h>
diff --git a/freebsd/sys/dev/usb/net/if_ure.c b/freebsd/sys/dev/usb/net/if_ure.c
index 136b61f9..0130a7bb 100644
--- a/freebsd/sys/dev/usb/net/if_ure.c
+++ b/freebsd/sys/dev/usb/net/if_ure.c
@@ -43,6 +43,10 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+
+#include <dev/mii/mii.h>
+#include <dev/mii/miivar.h>
#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
@@ -56,6 +60,8 @@ __FBSDID("$FreeBSD$");
#include <dev/usb/net/usb_ethernet.h>
#include <dev/usb/net/if_urereg.h>
+#include <rtems/bsd/local/miibus_if.h>
+
#ifdef USB_DEBUG
static int ure_debug = 0;
@@ -64,6 +70,9 @@ SYSCTL_INT(_hw_usb_ure, OID_AUTO, debug, CTLFLAG_RWTUN, &ure_debug, 0,
"Debug level");
#endif
+#define ETHER_IS_ZERO(addr) \
+ (!(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]))
+
/*
* Various supported device vendors/products.
*/
@@ -71,6 +80,7 @@ static const STRUCT_USB_HOST_ID ure_devs[] = {
#define URE_DEV(v,p,i) { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, i) }
URE_DEV(LENOVO, RTL8153, 0),
URE_DEV(LENOVO, TBT3LAN, 0),
+ URE_DEV(LENOVO, ONELINK, 0),
URE_DEV(LENOVO, USBCLAN, 0),
URE_DEV(NVIDIA, RTL8153, 0),
URE_DEV(REALTEK, RTL8152, URE_FLAG_8152),
@@ -674,12 +684,20 @@ ure_attach_post(struct usb_ether *ue)
else
ure_rtl8153_init(sc);
- if (sc->sc_chip & URE_CHIP_VER_4C00)
+ if ((sc->sc_chip & URE_CHIP_VER_4C00) ||
+ (sc->sc_chip & URE_CHIP_VER_4C10))
ure_read_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA,
ue->ue_eaddr, 8);
else
ure_read_mem(sc, URE_PLA_BACKUP, URE_MCU_TYPE_PLA,
ue->ue_eaddr, 8);
+
+ if (ETHER_IS_ZERO(sc->sc_ue.ue_eaddr)) {
+ device_printf(sc->sc_ue.ue_dev, "MAC assigned randomly\n");
+ arc4rand(sc->sc_ue.ue_eaddr, ETHER_ADDR_LEN, 0);
+ sc->sc_ue.ue_eaddr[0] &= ~0x01; /* unicast */
+ sc->sc_ue.ue_eaddr[0] |= 0x02; /* locally administered */
+ }
}
static int
@@ -725,8 +743,10 @@ ure_init(struct usb_ether *ue)
ure_reset(sc);
/* Set MAC address. */
+ ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
ure_write_mem(sc, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
IF_LLADDR(ifp), 8);
+ ure_write_1(sc, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_NORAML);
/* Reset the packet filter. */
ure_write_2(sc, URE_PLA_FMC, URE_MCU_TYPE_PLA,
diff --git a/freebsd/sys/dev/usb/net/if_urereg.h b/freebsd/sys/dev/usb/net/if_urereg.h
index 8eff1c25..cc70093f 100644
--- a/freebsd/sys/dev/usb/net/if_urereg.h
+++ b/freebsd/sys/dev/usb/net/if_urereg.h
@@ -176,7 +176,7 @@
#define URE_EEEP_CR_EEEP_TX 0x0002
/* PLA_WDT6_CTRL */
-#define URE_WDT6_SET_MODE 0x001
+#define URE_WDT6_SET_MODE 0x0010
/* PLA_TCR0 */
#define URE_TCR0_TX_EMPTY 0x0800
diff --git a/freebsd/sys/dev/usb/net/usb_ethernet.c b/freebsd/sys/dev/usb/net/usb_ethernet.c
index 9ce60eff..2bbcdedc 100644
--- a/freebsd/sys/dev/usb/net/usb_ethernet.c
+++ b/freebsd/sys/dev/usb/net/usb_ethernet.c
@@ -221,6 +221,7 @@ ue_attach_post_task(struct usb_proc_msg *_task)
ue->ue_unit = alloc_unr(ueunit);
usb_callout_init_mtx(&ue->ue_watchdog, ue->ue_mtx, 0);
sysctl_ctx_init(&ue->ue_sysctl_ctx);
+ mbufq_init(&ue->ue_rxq, 0 /* unlimited length */);
error = 0;
CURVNET_SET_QUIET(vnet0);
@@ -286,6 +287,11 @@ ue_attach_post_task(struct usb_proc_msg *_task)
fail:
CURVNET_RESTORE();
+
+ /* drain mbuf queue */
+ mbufq_drain(&ue->ue_rxq);
+
+ /* free unit */
free_unr(ueunit, ue->ue_unit);
if (ue->ue_ifp != NULL) {
if_free(ue->ue_ifp);
@@ -332,6 +338,9 @@ uether_ifdetach(struct usb_ether *ue)
/* free sysctl */
sysctl_ctx_free(&ue->ue_sysctl_ctx);
+ /* drain mbuf queue */
+ mbufq_drain(&ue->ue_rxq);
+
/* free unit */
free_unr(ueunit, ue->ue_unit);
}
@@ -600,7 +609,7 @@ uether_rxmbuf(struct usb_ether *ue, struct mbuf *m,
m->m_pkthdr.len = m->m_len = len;
/* enqueue for later when the lock can be released */
- _IF_ENQUEUE(&ue->ue_rxq, m);
+ (void)mbufq_enqueue(&ue->ue_rxq, m);
return (0);
}
@@ -630,7 +639,7 @@ uether_rxbuf(struct usb_ether *ue, struct usb_page_cache *pc,
m->m_pkthdr.len = m->m_len = len;
/* enqueue for later when the lock can be released */
- _IF_ENQUEUE(&ue->ue_rxq, m);
+ (void)mbufq_enqueue(&ue->ue_rxq, m);
return (0);
}
@@ -643,7 +652,7 @@ uether_rxflush(struct usb_ether *ue)
UE_LOCK_ASSERT(ue, MA_OWNED);
for (;;) {
- _IF_DEQUEUE(&ue->ue_rxq, m);
+ m = mbufq_dequeue(&ue->ue_rxq);
if (m == NULL)
break;
diff --git a/freebsd/sys/dev/usb/net/usb_ethernet.h b/freebsd/sys/dev/usb/net/usb_ethernet.h
index 9839db16..87886559 100644
--- a/freebsd/sys/dev/usb/net/usb_ethernet.h
+++ b/freebsd/sys/dev/usb/net/usb_ethernet.h
@@ -48,11 +48,7 @@
#include <net/bpf.h>
#include <net/ethernet.h>
-#include <rtems/bsd/local/miibus_if.h>
-
-#include <dev/mii/mii.h>
-#include <dev/mii/miivar.h>
-
+struct mii_data;
struct usb_ether;
struct usb_device_request;
@@ -91,7 +87,7 @@ struct usb_ether {
struct usb_process ue_tq;
struct sysctl_ctx_list ue_sysctl_ctx;
- struct ifqueue ue_rxq;
+ struct mbufq ue_rxq;
struct usb_callout ue_watchdog;
struct usb_ether_cfg_task ue_sync_task[2];
struct usb_ether_cfg_task ue_media_task[2];
diff --git a/freebsd/sys/dev/usb/quirk/usb_quirk.c b/freebsd/sys/dev/usb/quirk/usb_quirk.c
index 2aea57c8..6c1589e9 100644
--- a/freebsd/sys/dev/usb/quirk/usb_quirk.c
+++ b/freebsd/sys/dev/usb/quirk/usb_quirk.c
@@ -98,10 +98,12 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(TELEX, MIC1, 0x009, 0x009, UQ_AU_NO_FRAC),
USB_QUIRK(SILICONPORTALS, YAPPHONE, 0x100, 0x100, UQ_AU_INP_ASYNC),
USB_QUIRK(LOGITECH, UN53B, 0x0000, 0xffff, UQ_NO_STRINGS),
+ USB_QUIRK(LOGITECH, G510S, 0x0000, 0xFFFF, UQ_KBD_BOOTPROTO),
USB_QUIRK(REALTEK, RTL8196EU, 0x0000, 0xffff, UQ_CFG_INDEX_1),
USB_QUIRK(ELSA, MODEM1, 0x0000, 0xffff, UQ_CFG_INDEX_1),
USB_QUIRK(PLANEX2, MZKUE150N, 0x0000, 0xffff, UQ_CFG_INDEX_1),
USB_QUIRK(CISCOLINKSYS, USB3GIGV1, 0x0000, 0xffff, UQ_CFG_INDEX_1),
+ USB_QUIRK(REALTEK, RTL8156, 0x0000, 0xffff, UQ_CFG_INDEX_2),
/* Quirks for printer devices */
USB_QUIRK(HP, 895C, 0x0000, 0xffff, UQ_BROKEN_BIDIR),
USB_QUIRK(HP, 880C, 0x0000, 0xffff, UQ_BROKEN_BIDIR),
@@ -112,8 +114,19 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(XEROX, WCM15, 0x0000, 0xffff, UQ_BROKEN_BIDIR),
/* Devices which should be ignored by uhid */
USB_QUIRK(APC, UPS, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(BELKIN, F6H375USB, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(BELKIN, F6C550AVR, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(BELKIN, F6C1250TWRK, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(BELKIN, F6C1500TWRK, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(BELKIN, F6C900UNV, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(BELKIN, F6C100UNV, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(BELKIN, F6C120UNV, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(BELKIN, F6C800UNV, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(BELKIN, F6C1100UNV, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(CYBERPOWER, BC900D, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(CYBERPOWER, 1500CAVRLCD, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(CYBERPOWER, OR2200LCDRM2U, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(DELL2, VARIOUS_UPS, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(CYPRESS, SILVERSHIELD, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(DELORME, EARTHMATE, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(DREAMLINK, DL100B, 0x0000, 0xffff, UQ_HID_IGNORE),
@@ -121,8 +134,26 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(ITUNERNET, USBLCD4X20, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(LIEBERT, POWERSURE_PXT, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(LIEBERT2, PSI1000, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(LIEBERT2, POWERSURE_PSA, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(MGE, UPS1, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(MGE, UPS2, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(POWERCOM, IMPERIAL_SERIES, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(POWERCOM, SMART_KING_PRO, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(POWERCOM, WOW, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(POWERCOM, VANGUARD, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(POWERCOM, BLACK_KNIGHT_PRO, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, AVR550U, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, AVR750U, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, ECO550UPS, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, T750_INTL, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, RT_2200_INTL, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, OMNI1000LCD, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, OMNI900LCD, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, SMART_2200RMXL2U, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, UPS_3014, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, SU1500RTXL2UA, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, SU6000RT4U, 0x0000, 0xffff, UQ_HID_IGNORE),
+ USB_QUIRK(TRIPPLITE2, SU1500RTXL2UA_2, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(APPLE, IPHONE, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(APPLE, IPHONE_3G, 0x0000, 0xffff, UQ_HID_IGNORE),
USB_QUIRK(MEGATEC, UPS, 0x0000, 0xffff, UQ_HID_IGNORE),
@@ -137,12 +168,15 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(MICROSOFT, WLINTELLIMOUSE, 0x0000, 0xffff, UQ_MS_LEADING_BYTE),
/* Quirk for Corsair Vengeance K60 keyboard */
USB_QUIRK(CORSAIR, K60, 0x0000, 0xffff, UQ_KBD_BOOTPROTO),
+ /* Quirk for Corsair Gaming K68 keyboard */
+ USB_QUIRK(CORSAIR, K68, 0x0000, 0xffff, UQ_KBD_BOOTPROTO),
/* Quirk for Corsair Vengeance K70 keyboard */
USB_QUIRK(CORSAIR, K70, 0x0000, 0xffff, UQ_KBD_BOOTPROTO),
/* Quirk for Corsair K70 RGB keyboard */
USB_QUIRK(CORSAIR, K70_RGB, 0x0000, 0xffff, UQ_KBD_BOOTPROTO),
/* Quirk for Corsair STRAFE Gaming keyboard */
USB_QUIRK(CORSAIR, STRAFE, 0x0000, 0xffff, UQ_KBD_BOOTPROTO),
+ USB_QUIRK(CORSAIR, STRAFE2, 0x0000, 0xffff, UQ_KBD_BOOTPROTO),
/* umodem(4) device quirks */
USB_QUIRK(METRICOM, RICOCHET_GS, 0x100, 0x100, UQ_ASSUME_CM_OVER_DATA),
USB_QUIRK(SANYO, SCP4900, 0x000, 0x000, UQ_ASSUME_CM_OVER_DATA),
@@ -188,6 +222,8 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(CENTURY, EX35QUAT, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ,
UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE),
+ USB_QUIRK(CREATIVE, NOMAD, 0x0001, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
+ UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_READ_CAP_OFFBY1),
USB_QUIRK(CYPRESS, XX6830XX, 0x0000, 0xffff, UQ_MSC_NO_GETMAXLUN,
UQ_MSC_NO_SYNC_CACHE),
USB_QUIRK(DESKNOTE, UCR_61S2B, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
@@ -240,7 +276,7 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
UQ_MSC_FORCE_PROTO_RBC),
USB_QUIRK(INSYSTEM, STORAGE_V2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI,
UQ_MSC_FORCE_PROTO_RBC),
- USB_QUIRK(INTENSO, MEMORY_BOX, 0x0000, 0xffff, UQ_MSC_NO_INQUIRY),
+ USB_QUIRK(VIALABS, VL701, 0x0000, 0xffff, UQ_MSC_NO_INQUIRY),
USB_QUIRK(IODATA, IU_CD2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI),
USB_QUIRK(IODATA, DVR_UEH8, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
@@ -248,6 +284,7 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(IOMEGA, ZIP100, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI,
UQ_MSC_NO_TEST_UNIT_READY), /* XXX ZIP drives can also use ATAPI */
+ USB_QUIRK(JMICRON, JMS566, 0x0000, 0xffff, UQ_MSC_NO_GETMAXLUN),
USB_QUIRK(JMICRON, JMS567, 0x0000, 0xffff, UQ_MSC_NO_GETMAXLUN),
USB_QUIRK(JMICRON, JM20337, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI,
@@ -362,6 +399,8 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE),
USB_QUIRK(SANDISK, SDCZ4_256, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE),
+ USB_QUIRK(SANDISK, SDCZ48_32, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE,
+ UQ_MSC_NO_TEST_UNIT_READY),
USB_QUIRK(SANDISK, SDDR31, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_READ_CAP_OFFBY1),
USB_QUIRK(SANDISK, IMAGEMATE_SDDR289, 0x0000, 0xffff,
@@ -498,6 +537,7 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(VIALABS, USB30SATABRIDGE, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE),
USB_QUIRK(QUALCOMMINC, ZTE_MF730M, 0x0000, 0xffff, UQ_MSC_NO_GETMAXLUN,
UQ_MSC_NO_INQUIRY, UQ_CFG_INDEX_0),
+ USB_QUIRK(SMART2, G2MEMKEY, 0x0000, 0xffff, UQ_MSC_NO_INQUIRY),
/* Non-standard USB MIDI devices */
USB_QUIRK(ROLAND, UM1, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
USB_QUIRK(ROLAND, SC8850, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
@@ -528,6 +568,8 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(MAUDIO, FASTTRACKULTRA, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
USB_QUIRK(MAUDIO, FASTTRACKULTRA8R, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
USB_QUIRK(CMEDIA, CM6206, 0x0000, 0xffff, UQ_AU_SET_SPDIF_CM6206),
+ USB_QUIRK(PLOYTEC, SPL_CRIMSON_1, 0x0000, 0xffff, UQ_CFG_INDEX_1),
+ USB_QUIRK(ROLAND, UA25EX_AD, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
/*
* Quirks for manufacturers which USB devices does not respond
diff --git a/freebsd/sys/dev/usb/serial/u3g.c b/freebsd/sys/dev/usb/serial/u3g.c
index 8d72ef49..dd82a2b2 100644
--- a/freebsd/sys/dev/usb/serial/u3g.c
+++ b/freebsd/sys/dev/usb/serial/u3g.c
@@ -33,11 +33,11 @@
*/
+#include <sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/stdint.h>
#include <sys/stddef.h>
-#include <sys/param.h>
#include <sys/queue.h>
-#include <sys/types.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
diff --git a/freebsd/sys/dev/usb/serial/ugensa.c b/freebsd/sys/dev/usb/serial/ugensa.c
index 2737227c..10a3ea33 100644
--- a/freebsd/sys/dev/usb/serial/ugensa.c
+++ b/freebsd/sys/dev/usb/serial/ugensa.c
@@ -72,6 +72,7 @@
#define UGENSA_CONFIG_INDEX 0
#define UGENSA_IFACE_INDEX 0
#define UGENSA_IFACE_MAX 8 /* exclusivly */
+#define UGENSA_PORT_MAX 8 /* exclusivly */
enum {
UGENSA_BULK_DT_WR,
@@ -86,11 +87,11 @@ struct ugensa_sub_softc {
struct ugensa_softc {
struct ucom_super_softc sc_super_ucom;
- struct ucom_softc sc_ucom[UGENSA_IFACE_MAX];
- struct ugensa_sub_softc sc_sub[UGENSA_IFACE_MAX];
+ struct ucom_softc sc_ucom[UGENSA_PORT_MAX];
+ struct ugensa_sub_softc sc_sub[UGENSA_PORT_MAX];
struct mtx sc_mtx;
- uint8_t sc_niface;
+ uint8_t sc_nports;
};
/* prototypes */
@@ -156,12 +157,13 @@ static driver_t ugensa_driver = {
.size = sizeof(struct ugensa_softc),
};
+/* Driver-info is max number of serial ports per interface */
static const STRUCT_USB_HOST_ID ugensa_devs[] = {
- {USB_VPI(USB_VENDOR_AIRPRIME, USB_PRODUCT_AIRPRIME_PC5220, 0)},
- {USB_VPI(USB_VENDOR_CMOTECH, USB_PRODUCT_CMOTECH_CDMA_MODEM1, 0)},
- {USB_VPI(USB_VENDOR_KYOCERA2, USB_PRODUCT_KYOCERA2_CDMA_MSM_K, 0)},
- {USB_VPI(USB_VENDOR_HP, USB_PRODUCT_HP_49GPLUS, 0)},
- {USB_VPI(USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_FLEXPACKGPS, 0)},
+ {USB_VPI(USB_VENDOR_AIRPRIME, USB_PRODUCT_AIRPRIME_PC5220, 1)},
+ {USB_VPI(USB_VENDOR_CMOTECH, USB_PRODUCT_CMOTECH_CDMA_MODEM1, 1)},
+ {USB_VPI(USB_VENDOR_KYOCERA2, USB_PRODUCT_KYOCERA2_CDMA_MSM_K, 1)},
+ {USB_VPI(USB_VENDOR_HP, USB_PRODUCT_HP_49GPLUS, 1)},
+ {USB_VPI(USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_FLEXPACKGPS, 3)},
};
DRIVER_MODULE(ugensa, uhub, ugensa_driver, ugensa_devclass, NULL, 0);
@@ -194,65 +196,68 @@ ugensa_attach(device_t dev)
struct ugensa_softc *sc = device_get_softc(dev);
struct ugensa_sub_softc *ssc;
struct usb_interface *iface;
+ struct usb_config xfer_config[UGENSA_N_TRANSFER];
int32_t error;
uint8_t iface_index;
- int x, cnt;
+ int x, maxports;
+ maxports = USB_GET_DRIVER_INFO(uaa);
device_set_usb_desc(dev);
mtx_init(&sc->sc_mtx, "ugensa", NULL, MTX_DEF);
ucom_ref(&sc->sc_super_ucom);
- /* Figure out how many interfaces this device has got */
- for (cnt = 0; cnt < UGENSA_IFACE_MAX; cnt++) {
- if ((usbd_get_endpoint(uaa->device, cnt, ugensa_xfer_config + 0) == NULL) ||
- (usbd_get_endpoint(uaa->device, cnt, ugensa_xfer_config + 1) == NULL)) {
- /* we have reached the end */
- break;
- }
- }
+ for (iface_index = UGENSA_IFACE_INDEX; iface_index < UGENSA_IFACE_MAX; iface_index++) {
- if (cnt == 0) {
- device_printf(dev, "No interfaces\n");
- goto detach;
- }
- for (x = 0; x < cnt; x++) {
- iface = usbd_get_iface(uaa->device, x);
- if (iface->idesc->bInterfaceClass != UICLASS_VENDOR)
+ iface = usbd_get_iface(uaa->device, iface_index);
+ if (iface == NULL || iface->idesc->bInterfaceClass != UICLASS_VENDOR)
/* Not a serial port, most likely a SD reader */
continue;
- ssc = sc->sc_sub + sc->sc_niface;
- ssc->sc_ucom_ptr = sc->sc_ucom + sc->sc_niface;
-
- iface_index = (UGENSA_IFACE_INDEX + x);
- error = usbd_transfer_setup(uaa->device,
- &iface_index, ssc->sc_xfer, ugensa_xfer_config,
- UGENSA_N_TRANSFER, ssc, &sc->sc_mtx);
-
- if (error) {
- device_printf(dev, "allocating USB "
- "transfers failed\n");
- goto detach;
+ /* Loop over all endpoints pairwise */
+ for (x = 0; x < maxports && sc->sc_nports < UGENSA_PORT_MAX; x++) {
+
+ ssc = sc->sc_sub + sc->sc_nports;
+ ssc->sc_ucom_ptr = sc->sc_ucom + sc->sc_nports;
+
+ memcpy(xfer_config, ugensa_xfer_config, sizeof ugensa_xfer_config);
+ xfer_config[UGENSA_BULK_DT_RD].ep_index = x;
+ xfer_config[UGENSA_BULK_DT_WR].ep_index = x;
+
+ error = usbd_transfer_setup(uaa->device,
+ &iface_index, ssc->sc_xfer, xfer_config,
+ UGENSA_N_TRANSFER, ssc, &sc->sc_mtx);
+
+ if (error) {
+ if (x == 0) {
+ device_printf(dev, "allocating USB "
+ "transfers failed (%d)\n", error);
+ goto detach;
+ }
+ break;
+ }
+
+ /* clear stall at first run */
+ mtx_lock(&sc->sc_mtx);
+ usbd_xfer_set_stall(ssc->sc_xfer[UGENSA_BULK_DT_WR]);
+ usbd_xfer_set_stall(ssc->sc_xfer[UGENSA_BULK_DT_RD]);
+ mtx_unlock(&sc->sc_mtx);
+
+ /* initialize port number */
+ ssc->sc_ucom_ptr->sc_portno = sc->sc_nports;
+ if (iface_index != uaa->info.bIfaceIndex) {
+ usbd_set_parent_iface(uaa->device, iface_index,
+ uaa->info.bIfaceIndex);
+ }
+ sc->sc_nports++;
}
- /* clear stall at first run */
- mtx_lock(&sc->sc_mtx);
- usbd_xfer_set_stall(ssc->sc_xfer[UGENSA_BULK_DT_WR]);
- usbd_xfer_set_stall(ssc->sc_xfer[UGENSA_BULK_DT_RD]);
- mtx_unlock(&sc->sc_mtx);
-
- /* initialize port number */
- ssc->sc_ucom_ptr->sc_portno = sc->sc_niface;
- sc->sc_niface++;
- if (x != uaa->info.bIfaceIndex)
- usbd_set_parent_iface(uaa->device, x,
- uaa->info.bIfaceIndex);
}
- device_printf(dev, "Found %d interfaces.\n", sc->sc_niface);
+ device_printf(dev, "Found %d serial ports.\n", sc->sc_nports);
- error = ucom_attach(&sc->sc_super_ucom, sc->sc_ucom, sc->sc_niface, sc,
+ error = ucom_attach(&sc->sc_super_ucom, sc->sc_ucom, sc->sc_nports, sc,
&ugensa_callback, &sc->sc_mtx);
+
if (error) {
- DPRINTF("attach failed\n");
+ DPRINTF("ucom attach failed\n");
goto detach;
}
ucom_set_pnpinfo_usb(&sc->sc_super_ucom, dev);
@@ -272,7 +277,7 @@ ugensa_detach(device_t dev)
ucom_detach(&sc->sc_super_ucom, sc->sc_ucom);
- for (x = 0; x < sc->sc_niface; x++) {
+ for (x = 0; x < sc->sc_nports; x++) {
usbd_transfer_unsetup(sc->sc_sub[x].sc_xfer, UGENSA_N_TRANSFER);
}
diff --git a/freebsd/sys/dev/usb/serial/umcs.c b/freebsd/sys/dev/usb/serial/umcs.c
index 8f083ce3..3a5fc70f 100644
--- a/freebsd/sys/dev/usb/serial/umcs.c
+++ b/freebsd/sys/dev/usb/serial/umcs.c
@@ -501,7 +501,9 @@ umcs7840_cfg_open(struct ucom_softc *ucom)
* Enable DTR/RTS on modem control, enable modem interrupts --
* documented
*/
- sc->sc_ports[pn].sc_mcr = MCS7840_UART_MCR_DTR | MCS7840_UART_MCR_RTS | MCS7840_UART_MCR_IE;
+ sc->sc_ports[pn].sc_mcr = MCS7840_UART_MCR_IE;
+ if (ucom->sc_tty == NULL || (ucom->sc_tty->t_termios.c_cflag & CNO_RTSDTR) == 0)
+ sc->sc_ports[pn].sc_mcr |= MCS7840_UART_MCR_DTR | MCS7840_UART_MCR_RTS;
if (umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_MCR, sc->sc_ports[pn].sc_mcr))
return;
diff --git a/freebsd/sys/dev/usb/serial/usb_serial.c b/freebsd/sys/dev/usb/serial/usb_serial.c
index a3f9b5de..c649056a 100644
--- a/freebsd/sys/dev/usb/serial/usb_serial.c
+++ b/freebsd/sys/dev/usb/serial/usb_serial.c
@@ -810,7 +810,8 @@ ucom_open(struct tty *tp)
&sc->sc_start_task[0].hdr,
&sc->sc_start_task[1].hdr);
- ucom_modem(tp, SER_DTR | SER_RTS, 0);
+ if (sc->sc_tty == NULL || (sc->sc_tty->t_termios.c_cflag & CNO_RTSDTR) == 0)
+ ucom_modem(tp, SER_DTR | SER_RTS, 0);
ucom_ring(sc, 0);
diff --git a/freebsd/sys/dev/usb/usb.h b/freebsd/sys/dev/usb/usb.h
index ff33cf00..0075d429 100644
--- a/freebsd/sys/dev/usb/usb.h
+++ b/freebsd/sys/dev/usb/usb.h
@@ -444,6 +444,7 @@ typedef struct usb_interface_assoc_descriptor usb_interface_assoc_descriptor_t;
#define UIPROTO_CDC_NONE 0
#define UIPROTO_CDC_AT 1
+#define UIPROTO_CDC_EEM 7
#define UICLASS_HID 0x03
#define UISUBCLASS_BOOT 1
diff --git a/freebsd/sys/dev/usb/usb_bus.h b/freebsd/sys/dev/usb/usb_bus.h
index 710436c1..9f8586e6 100644
--- a/freebsd/sys/dev/usb/usb_bus.h
+++ b/freebsd/sys/dev/usb/usb_bus.h
@@ -131,6 +131,7 @@ struct usb_bus {
uint8_t do_probe; /* set if USB should be re-probed */
uint8_t no_explore; /* don't explore USB ports */
uint8_t dma_bits; /* number of DMA address lines */
+ uint8_t control_ep_quirk; /* need 64kByte buffer for data stage */
};
#endif /* _USB_BUS_H_ */
diff --git a/freebsd/sys/dev/usb/usb_device.c b/freebsd/sys/dev/usb/usb_device.c
index 5d6b9d0f..ee240949 100644
--- a/freebsd/sys/dev/usb/usb_device.c
+++ b/freebsd/sys/dev/usb/usb_device.c
@@ -34,8 +34,8 @@
#include <sys/stdint.h>
#include <sys/stddef.h>
#include <sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/queue.h>
-#include <sys/types.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
diff --git a/freebsd/sys/sys/capability.h b/freebsd/sys/dev/usb/usb_fdt_support.h
index 39195e03..e4249e85 100644
--- a/freebsd/sys/sys/capability.h
+++ b/freebsd/sys/dev/usb/usb_fdt_support.h
@@ -1,11 +1,7 @@
/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ * SPDX-License-Identifier: BSD-2-Clause
*
- * Copyright (c) 2014 Robert N. M. Watson
- * All rights reserved.
- *
- * This software was developed at the University of Cambridge Computer
- * Laboratory with support from a grant from Google, Inc.
+ * Copyright (c) 2019 Ian Lepore <ian@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,17 +27,22 @@
* $FreeBSD$
*/
+#ifndef _USB_FDT_SUPPORT_H_
+#define _USB_FDT_SUPPORT_H_
+
+struct usb_device;
+struct usb_ether;
+
/*
- * Historically, the key userspace and kernel Capsicum definitions were found
- * in this file. However, it conflicted with POSIX.1e's capability.h, so has
- * been renamed capsicum.h. The file remains for backwards compatibility
- * reasons as a nested include. It is expected to be removed before
- * FreeBSD 13.
+ * Get the device's MAC address from the FDT data. Fills in ue->ue_eaddr and
+ * returns 0 on success, otherwise leaves ue_eaddr untouched and returns
+ * non-zero. This first attempts to get the address from the "mac-address"
+ * property, and if that's not valid it tries the "local-mac-address" property;
+ * this matches the linux interpretation of the precedence of those properties.
*/
-#ifndef _SYS_CAPABILITY_H_
-#define _SYS_CAPABILITY_H_
+int usb_fdt_get_mac_addr(device_t dev, struct usb_ether* ue);
-#warning this file includes <sys/capability.h> which is deprecated
-#include <sys/capsicum.h>
+/* Get the FDT node for dev. Returns -1 if dev is not in the FDT data. */
+phandle_t usb_fdt_get_node(device_t dev, struct usb_device* udev);
-#endif /* !_SYS_CAPABILITY_H_ */
+#endif
diff --git a/freebsd/sys/dev/usb/usb_generic.c b/freebsd/sys/dev/usb/usb_generic.c
index 29a27d53..6d359226 100644
--- a/freebsd/sys/dev/usb/usb_generic.c
+++ b/freebsd/sys/dev/usb/usb_generic.c
@@ -185,7 +185,8 @@ ugen_open(struct usb_fifo *f, int fflags)
struct usb_endpoint_descriptor *ed = ep->edesc;
uint8_t type;
- DPRINTFN(6, "flag=0x%x\n", fflags);
+ DPRINTFN(1, "flag=0x%x pid=%d name=%s\n", fflags,
+ curthread->td_proc->p_pid, curthread->td_proc->p_comm);
mtx_lock(f->priv_mtx);
switch (usbd_get_speed(f->udev)) {
@@ -215,7 +216,9 @@ ugen_open(struct usb_fifo *f, int fflags)
static void
ugen_close(struct usb_fifo *f, int fflags)
{
- DPRINTFN(6, "flag=0x%x\n", fflags);
+
+ DPRINTFN(1, "flag=0x%x pid=%d name=%s\n", fflags,
+ curthread->td_proc->p_pid, curthread->td_proc->p_comm);
/* cleanup */
@@ -1218,6 +1221,40 @@ complete:
}
static int
+ugen_fs_copy_out_cancelled(struct usb_fs_endpoint *fs_ep_uptr)
+{
+ struct usb_fs_endpoint fs_ep;
+ int error;
+
+ error = copyin(fs_ep_uptr, &fs_ep, sizeof(fs_ep));
+ if (error)
+ return (error);
+
+ fs_ep.status = USB_ERR_CANCELLED;
+ fs_ep.aFrames = 0;
+ fs_ep.isoc_time_complete = 0;
+
+ /* update "aFrames" */
+ error = copyout(&fs_ep.aFrames, &fs_ep_uptr->aFrames,
+ sizeof(fs_ep.aFrames));
+ if (error)
+ goto done;
+
+ /* update "isoc_time_complete" */
+ error = copyout(&fs_ep.isoc_time_complete,
+ &fs_ep_uptr->isoc_time_complete,
+ sizeof(fs_ep.isoc_time_complete));
+ if (error)
+ goto done;
+
+ /* update "status" */
+ error = copyout(&fs_ep.status, &fs_ep_uptr->status,
+ sizeof(fs_ep.status));
+done:
+ return (error);
+}
+
+static int
ugen_fs_copy_out(struct usb_fifo *f, uint8_t ep_index)
{
struct usb_device_request *req;
@@ -1242,7 +1279,12 @@ ugen_fs_copy_out(struct usb_fifo *f, uint8_t ep_index)
return (EINVAL);
mtx_lock(f->priv_mtx);
- if (usbd_transfer_pending(xfer)) {
+ if (!xfer->flags_int.transferring &&
+ !xfer->flags_int.started) {
+ mtx_unlock(f->priv_mtx);
+ DPRINTF("Returning fake cancel event\n");
+ return (ugen_fs_copy_out_cancelled(f->fs_ep_ptr + ep_index));
+ } else if (usbd_transfer_pending(xfer)) {
mtx_unlock(f->priv_mtx);
return (EBUSY); /* should not happen */
}
@@ -1363,6 +1405,7 @@ complete:
sizeof(fs_ep.isoc_time_complete));
if (error)
goto done;
+
/* update "status" */
error = copyout(&fs_ep.status, &fs_ep_uptr->status,
sizeof(fs_ep.status));
@@ -1451,12 +1494,15 @@ ugen_ioctl(struct usb_fifo *f, u_long cmd, void *addr, int fflags)
xfer = f->fs_xfer[u.pstart->ep_index];
if (usbd_transfer_pending(xfer)) {
usbd_transfer_stop(xfer);
+
/*
* Check if the USB transfer was stopped
- * before it was even started. Else a cancel
- * callback will be pending.
+ * before it was even started and fake a
+ * cancel event.
*/
- if (!xfer->flags_int.transferring) {
+ if (!xfer->flags_int.transferring &&
+ !xfer->flags_int.started) {
+ DPRINTF("Issuing fake completion event\n");
ugen_fs_set_complete(xfer->priv_sc,
USB_P2U(xfer->priv_fifo));
}
diff --git a/freebsd/sys/dev/usb/usb_hid.c b/freebsd/sys/dev/usb/usb_hid.c
index 7ae052b7..21289f0d 100644
--- a/freebsd/sys/dev/usb/usb_hid.c
+++ b/freebsd/sys/dev/usb/usb_hid.c
@@ -76,7 +76,7 @@ static uint8_t hid_get_byte(struct hid_data *s, const uint16_t wSize);
#define MAXUSAGE 64
#define MAXPUSH 4
#define MAXID 16
-#define MAXLOCCNT 1024
+#define MAXLOCCNT 2048
struct hid_pos_data {
int32_t rid;
diff --git a/freebsd/sys/dev/usb/usb_hub.c b/freebsd/sys/dev/usb/usb_hub.c
index a0ad462f..b3543a8f 100644
--- a/freebsd/sys/dev/usb/usb_hub.c
+++ b/freebsd/sys/dev/usb/usb_hub.c
@@ -77,14 +77,9 @@
#include <dev/usb/usb_bus.h>
#endif /* USB_GLOBAL_INCLUDE_FILE */
-#define UHUB_INTR_INTERVAL 250 /* ms */
-enum {
- UHUB_INTR_TRANSFER,
-#if USB_HAVE_TT_SUPPORT
- UHUB_RESET_TT_TRANSFER,
-#endif
- UHUB_N_TRANSFER,
-};
+
+#include <dev/usb/usb_hub_private.h>
+
#ifdef USB_DEBUG
static int uhub_debug = 0;
@@ -113,27 +108,6 @@ SYSCTL_INT(_hw_usb, OID_AUTO, disable_port_power, CTLFLAG_RWTUN,
&usb_disable_port_power, 0, "Set to disable all USB port power.");
#endif
-struct uhub_current_state {
- uint16_t port_change;
- uint16_t port_status;
-};
-
-struct uhub_softc {
- struct uhub_current_state sc_st;/* current state */
-#if (USB_HAVE_FIXED_PORT != 0)
- struct usb_hub sc_hub;
-#endif
- device_t sc_dev; /* base device */
- struct mtx sc_mtx; /* our mutex */
- struct usb_device *sc_udev; /* USB device */
- struct usb_xfer *sc_xfer[UHUB_N_TRANSFER]; /* interrupt xfer */
-#if USB_HAVE_DISABLE_ENUM
- int sc_disable_enumeration;
- int sc_disable_port_power;
-#endif
- uint8_t sc_flags;
-#define UHUB_FLAG_DID_EXPLORE 0x01
-};
#define UHUB_PROTO(sc) ((sc)->sc_udev->ddesc.bDeviceProtocol)
#define UHUB_IS_HIGH_SPEED(sc) (UHUB_PROTO(sc) != UDPROTO_FSHUB)
@@ -143,14 +117,10 @@ struct uhub_softc {
/* prototypes for type checking: */
-static device_probe_t uhub_probe;
-static device_attach_t uhub_attach;
-static device_detach_t uhub_detach;
static device_suspend_t uhub_suspend;
static device_resume_t uhub_resume;
static bus_driver_added_t uhub_driver_added;
-static bus_child_location_str_t uhub_child_location_string;
static bus_child_pnpinfo_str_t uhub_child_pnpinfo_string;
static usb_callback_t uhub_intr_callback;
@@ -207,7 +177,7 @@ static device_method_t uhub_methods[] = {
DEVMETHOD_END
};
-static driver_t uhub_driver = {
+driver_t uhub_driver = {
.name = "uhub",
.methods = uhub_methods,
.size = sizeof(struct uhub_softc)
@@ -589,13 +559,25 @@ uhub_read_port_status(struct uhub_softc *sc, uint8_t portno)
struct usb_port_status ps;
usb_error_t err;
+ if (sc->sc_usb_port_errors >= UHUB_USB_PORT_ERRORS_MAX) {
+ DPRINTFN(4, "port %d, HUB looks dead, too many errors\n", portno);
+ sc->sc_st.port_status = 0;
+ sc->sc_st.port_change = 0;
+ return (USB_ERR_TIMEOUT);
+ }
+
err = usbd_req_get_port_status(
sc->sc_udev, NULL, &ps, portno);
- /* update status regardless of error */
-
- sc->sc_st.port_status = UGETW(ps.wPortStatus);
- sc->sc_st.port_change = UGETW(ps.wPortChange);
+ if (err == 0) {
+ sc->sc_st.port_status = UGETW(ps.wPortStatus);
+ sc->sc_st.port_change = UGETW(ps.wPortChange);
+ sc->sc_usb_port_errors = 0;
+ } else {
+ sc->sc_st.port_status = 0;
+ sc->sc_st.port_change = 0;
+ sc->sc_usb_port_errors++;
+ }
/* debugging print */
@@ -1126,7 +1108,7 @@ uhub_explore(struct usb_device *udev)
return (USB_ERR_NORMAL_COMPLETION);
}
-static int
+int
uhub_probe(device_t dev)
{
struct usb_attach_arg *uaa = device_get_ivars(dev);
@@ -1140,7 +1122,7 @@ uhub_probe(device_t dev)
*/
if (uaa->info.bConfigIndex == 0 &&
uaa->info.bDeviceClass == UDCLASS_HUB)
- return (0);
+ return (BUS_PROBE_DEFAULT);
return (ENXIO);
}
@@ -1206,7 +1188,7 @@ uhub_query_info(struct usb_device *udev, uint8_t *pnports, uint8_t *ptt)
return (err);
}
-static int
+int
uhub_attach(device_t dev)
{
struct uhub_softc *sc = device_get_softc(dev);
@@ -1552,7 +1534,7 @@ error:
* Called from process context when the hub is gone.
* Detach all devices on active ports.
*/
-static int
+int
uhub_detach(device_t dev)
{
struct uhub_softc *sc = device_get_softc(dev);
@@ -1622,13 +1604,7 @@ uhub_driver_added(device_t dev, driver_t *driver)
usb_needs_explore_all();
}
-struct hub_result {
- struct usb_device *udev;
- uint8_t portno;
- uint8_t iface_index;
-};
-
-static void
+void
uhub_find_iface_index(struct usb_hub *hub, device_t child,
struct hub_result *res)
{
@@ -1661,7 +1637,7 @@ uhub_find_iface_index(struct usb_hub *hub, device_t child,
res->portno = 0;
}
-static int
+int
uhub_child_location_string(device_t parent, device_t child,
char *buf, size_t buflen)
{
diff --git a/freebsd/sys/dev/usb/usb_hub_private.h b/freebsd/sys/dev/usb/usb_hub_private.h
new file mode 100644
index 00000000..1151ed7d
--- /dev/null
+++ b/freebsd/sys/dev/usb/usb_hub_private.h
@@ -0,0 +1,86 @@
+/* $FreeBSD$ */
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-NetBSD
+ *
+ * Copyright (c) 1998 The NetBSD Foundation, Inc. All rights reserved.
+ * Copyright (c) 1998 Lennart Augustsson. All rights reserved.
+ * Copyright (c) 2008-2010 Hans Petter Selasky. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * USB spec: http://www.usb.org/developers/docs/usbspec.zip
+ */
+
+#ifndef USB_HUB_PRIVATE_H_
+#define USB_HUB_PRIVATE_H_
+#define UHUB_INTR_INTERVAL 250 /* ms */
+
+enum {
+ UHUB_INTR_TRANSFER,
+#if USB_HAVE_TT_SUPPORT
+ UHUB_RESET_TT_TRANSFER,
+#endif
+ UHUB_N_TRANSFER,
+};
+
+
+struct uhub_current_state {
+ uint16_t port_change;
+ uint16_t port_status;
+};
+
+struct uhub_softc {
+ struct uhub_current_state sc_st; /* current state */
+#if (USB_HAVE_FIXED_PORT != 0)
+ struct usb_hub sc_hub;
+#endif
+ device_t sc_dev; /* base device */
+ struct mtx sc_mtx; /* our mutex */
+ struct usb_device *sc_udev; /* USB device */
+ struct usb_xfer *sc_xfer[UHUB_N_TRANSFER]; /* interrupt xfer */
+#if USB_HAVE_DISABLE_ENUM
+ int sc_disable_enumeration;
+ int sc_disable_port_power;
+#endif
+ uint8_t sc_usb_port_errors; /* error counter */
+#define UHUB_USB_PORT_ERRORS_MAX 4
+ uint8_t sc_flags;
+#define UHUB_FLAG_DID_EXPLORE 0x01
+};
+struct hub_result {
+ struct usb_device *udev;
+ uint8_t portno;
+ uint8_t iface_index;
+};
+
+void
+uhub_find_iface_index(struct usb_hub *hub, device_t child,
+ struct hub_result *res);
+
+device_probe_t uhub_probe;
+device_attach_t uhub_attach;
+device_detach_t uhub_detach;
+bus_child_location_str_t uhub_child_location_string;
+
+#endif
diff --git a/freebsd/sys/dev/usb/usb_ioctl.h b/freebsd/sys/dev/usb/usb_ioctl.h
index fcd31e31..c4023cab 100644
--- a/freebsd/sys/dev/usb/usb_ioctl.h
+++ b/freebsd/sys/dev/usb/usb_ioctl.h
@@ -70,6 +70,7 @@ enum {
USB_TEMP_SERIALNET, /* USB CDC Ethernet and Modem */
USB_TEMP_MIDI, /* USB MIDI */
USB_TEMP_MULTI, /* USB Ethernet, serial, and storage */
+ USB_TEMP_CDCEEM, /* USB Ethernet Emulation Model */
USB_TEMP_MAX,
};
@@ -223,7 +224,7 @@ struct usb_fs_uninit {
} USB_IOCTL_STRUCT_ALIGN(1);
struct usb_fs_open {
-#define USB_FS_MAX_BUFSIZE (1 << 18)
+#define USB_FS_MAX_BUFSIZE (1 << 25) /* 32 MBytes */
uint32_t max_bufsize;
#define USB_FS_MAX_FRAMES (1U << 12)
#define USB_FS_MAX_FRAMES_PRE_SCALE (1U << 31) /* for ISOCHRONOUS transfers */
diff --git a/freebsd/sys/dev/usb/usb_request.c b/freebsd/sys/dev/usb/usb_request.c
index d2a15f3c..f288378e 100644
--- a/freebsd/sys/dev/usb/usb_request.c
+++ b/freebsd/sys/dev/usb/usb_request.c
@@ -1603,8 +1603,9 @@ usbd_req_get_port_status(struct usb_device *udev, struct mtx *mtx,
USETW(req.wValue, 0);
req.wIndex[0] = port;
req.wIndex[1] = 0;
- USETW(req.wLength, sizeof *ps);
- return (usbd_do_request(udev, mtx, &req, ps));
+ USETW(req.wLength, sizeof(*ps));
+
+ return (usbd_do_request_flags(udev, mtx, &req, ps, 0, NULL, 1000));
}
/*------------------------------------------------------------------------*
diff --git a/freebsd/sys/dev/usb/usb_transfer.c b/freebsd/sys/dev/usb/usb_transfer.c
index 7ea25337..3b67c20c 100644
--- a/freebsd/sys/dev/usb/usb_transfer.c
+++ b/freebsd/sys/dev/usb/usb_transfer.c
@@ -111,6 +111,33 @@ static const struct usb_config usb_control_ep_cfg[USB_CTRL_XFER_MAX] = {
},
};
+static const struct usb_config usb_control_ep_quirk_cfg[USB_CTRL_XFER_MAX] = {
+
+ /* This transfer is used for generic control endpoint transfers */
+
+ [0] = {
+ .type = UE_CONTROL,
+ .endpoint = 0x00, /* Control endpoint */
+ .direction = UE_DIR_ANY,
+ .bufsize = 65535, /* bytes */
+ .callback = &usb_request_callback,
+ .usb_mode = USB_MODE_DUAL, /* both modes */
+ },
+
+ /* This transfer is used for generic clear stall only */
+
+ [1] = {
+ .type = UE_CONTROL,
+ .endpoint = 0x00, /* Control pipe */
+ .direction = UE_DIR_ANY,
+ .bufsize = sizeof(struct usb_device_request),
+ .callback = &usb_do_clear_stall_callback,
+ .timeout = 1000, /* 1 second */
+ .interval = 50, /* 50ms */
+ .usb_mode = USB_MODE_HOST,
+ },
+};
+
/* function prototypes */
static void usbd_update_max_frame_size(struct usb_xfer *);
@@ -1051,7 +1078,8 @@ usbd_transfer_setup(struct usb_device *udev,
* context, else there is a chance of
* deadlock!
*/
- if (setup_start == usb_control_ep_cfg)
+ if (setup_start == usb_control_ep_cfg ||
+ setup_start == usb_control_ep_quirk_cfg)
info->done_p =
USB_BUS_CONTROL_XFER_PROC(udev->bus);
else if (xfer_mtx == &Giant)
@@ -3179,7 +3207,8 @@ repeat:
*/
iface_index = 0;
if (usbd_transfer_setup(udev, &iface_index,
- udev->ctrl_xfer, usb_control_ep_cfg, USB_CTRL_XFER_MAX, NULL,
+ udev->ctrl_xfer, udev->bus->control_ep_quirk ?
+ usb_control_ep_quirk_cfg : usb_control_ep_cfg, USB_CTRL_XFER_MAX, NULL,
&udev->device_mtx)) {
DPRINTFN(0, "could not setup default "
"USB transfer\n");
diff --git a/freebsd/sys/dev/usb/usbdi.h b/freebsd/sys/dev/usb/usbdi.h
index d5648c03..0a393844 100644
--- a/freebsd/sys/dev/usb/usbdi.h
+++ b/freebsd/sys/dev/usb/usbdi.h
@@ -105,7 +105,7 @@ typedef void (usb_fifo_filter_t)(struct usb_fifo *fifo, struct usb_mbuf *m);
/* USB events */
#ifndef USB_GLOBAL_INCLUDE_FILE
-#include <sys/eventhandler.h>
+#include <sys/_eventhandler.h>
#endif
typedef void (*usb_dev_configured_t)(void *, struct usb_device *,
struct usb_attach_arg *);
diff --git a/freebsd/sys/dev/usb/wlan/if_rsu.c b/freebsd/sys/dev/usb/wlan/if_rsu.c
index b730ce59..112b8675 100644
--- a/freebsd/sys/dev/usb/wlan/if_rsu.c
+++ b/freebsd/sys/dev/usb/wlan/if_rsu.c
@@ -44,13 +44,9 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/bus.h>
-#include <sys/rman.h>
#include <sys/firmware.h>
#include <sys/module.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -291,9 +287,6 @@ MODULE_DEPEND(rsu, firmware, 1, 1, 1);
MODULE_VERSION(rsu, 1);
USB_PNP_HOST_INFO(rsu_devs);
-static const uint8_t rsu_chan_2ghz[] =
- { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-
static uint8_t rsu_wme_ac_xfer_map[4] = {
[WME_AC_BE] = RSU_BULK_TX_BE_BK,
[WME_AC_BK] = RSU_BULK_TX_BE_BK,
@@ -789,9 +782,8 @@ rsu_getradiocaps(struct ieee80211com *ic,
setbit(bands, IEEE80211_MODE_11G);
if (sc->sc_ht)
setbit(bands, IEEE80211_MODE_11NG);
- ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
- rsu_chan_2ghz, nitems(rsu_chan_2ghz), bands,
- (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) != 0);
+ ieee80211_add_channels_default_2ghz(chans, maxchans, nchans,
+ bands, (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) != 0);
}
static void
@@ -2460,8 +2452,6 @@ rsu_rx_frame(struct rsu_softc *sc, struct mbuf *m)
tap->wr_rate = rxs.c_rate;
tap->wr_dbm_antsignal = rssi;
- tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
- tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
};
(void) ieee80211_add_rx_params(m, &rxs);
@@ -2762,15 +2752,16 @@ static int
rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
struct mbuf *m0, struct rsu_data *data)
{
- struct ieee80211com *ic = &sc->sc_ic;
+ const struct ieee80211_txparam *tp = ni->ni_txparms;
struct ieee80211vap *vap = ni->ni_vap;
struct ieee80211_frame *wh;
struct ieee80211_key *k = NULL;
struct r92s_tx_desc *txd;
- uint8_t type, cipher;
+ uint8_t rate, ridx, type, cipher, qos;
int prio = 0;
uint8_t which;
int hasqos;
+ int ismcast;
int xferlen;
int qid;
@@ -2778,10 +2769,26 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
wh = mtod(m0, struct ieee80211_frame *);
type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
+ ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
RSU_DPRINTF(sc, RSU_DEBUG_TX, "%s: data=%p, m=%p\n",
__func__, data, m0);
+ /* Choose a TX rate index. */
+ if (type == IEEE80211_FC0_TYPE_MGT ||
+ type == IEEE80211_FC0_TYPE_CTL ||
+ (m0->m_flags & M_EAPOL) != 0)
+ rate = tp->mgmtrate;
+ else if (ismcast)
+ rate = tp->mcastrate;
+ else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE)
+ rate = tp->ucastrate;
+ else
+ rate = 0;
+
+ if (rate != 0)
+ ridx = rate2ridx(rate);
+
if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
k = ieee80211_crypto_encap(ni, m0);
if (k == NULL) {
@@ -2799,12 +2806,14 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
prio = M_WME_GETAC(m0);
which = rsu_wme_ac_xfer_map[prio];
hasqos = 1;
+ qos = ((const struct ieee80211_qosframe *)wh)->i_qos[0];
} else {
/* Non-QoS TID */
/* XXX TODO: tid=0 for non-qos TID? */
which = rsu_wme_ac_xfer_map[WME_AC_BE];
hasqos = 0;
prio = 0;
+ qos = 0;
}
qid = rsu_ac2qid[prio];
@@ -2860,8 +2869,23 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
}
/* XXX todo: set AGGEN bit if appropriate? */
txd->txdw2 |= htole32(R92S_TXDW2_BK);
- if (IEEE80211_IS_MULTICAST(wh->i_addr1))
+ if (ismcast)
txd->txdw2 |= htole32(R92S_TXDW2_BMCAST);
+
+ if (!ismcast && (!qos || (qos & IEEE80211_QOS_ACKPOLICY) !=
+ IEEE80211_QOS_ACKPOLICY_NOACK)) {
+ txd->txdw2 |= htole32(R92S_TXDW2_RTY_LMT_ENA);
+ txd->txdw2 |= htole32(SM(R92S_TXDW2_RTY_LMT, tp->maxretry));
+ }
+
+ /* Force mgmt / mcast / ucast rate if needed. */
+ if (rate != 0) {
+ /* Data rate fallback limit (max). */
+ txd->txdw5 |= htole32(SM(R92S_TXDW5_DATARATE_FB_LMT, 0x1f));
+ txd->txdw5 |= htole32(SM(R92S_TXDW5_DATARATE, ridx));
+ txd->txdw4 |= htole32(R92S_TXDW4_DRVRATE);
+ }
+
/*
* Firmware will use and increment the sequence number for the
* specified priority.
@@ -2872,8 +2896,6 @@ rsu_tx_start(struct rsu_softc *sc, struct ieee80211_node *ni,
struct rsu_tx_radiotap_header *tap = &sc->sc_txtap;
tap->wt_flags = 0;
- tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
- tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
ieee80211_radiotap_tx(vap, m0);
}
diff --git a/freebsd/sys/dev/usb/wlan/if_rsureg.h b/freebsd/sys/dev/usb/wlan/if_rsureg.h
index 973280cf..246b06b7 100644
--- a/freebsd/sys/dev/usb/wlan/if_rsureg.h
+++ b/freebsd/sys/dev/usb/wlan/if_rsureg.h
@@ -688,6 +688,9 @@ struct r92s_tx_desc {
#define R92S_TXDW1_HWPC 0x80000000
uint32_t txdw2;
+#define R92S_TXDW2_RTY_LMT_M 0x0000003f
+#define R92S_TXDW2_RTY_LMT_S 0
+#define R92S_TXDW2_RTY_LMT_ENA 0x00000040
#define R92S_TXDW2_BMCAST 0x00000080
#define R92S_TXDW2_AGGEN 0x20000000
#define R92S_TXDW2_BK 0x40000000
@@ -700,9 +703,14 @@ struct r92s_tx_desc {
uint32_t txdw4;
#define R92S_TXDW4_TXBW 0x00040000
+#define R92S_TXDW4_DRVRATE 0x80000000
uint32_t txdw5;
-#define R92S_TXDW5_DISFB 0x00008000
+#define R92S_TXDW5_DATARATE_M 0x00007e00
+#define R92S_TXDW5_DATARATE_S 9
+#define R92S_TXDW5_DISFB 0x00008000
+#define R92S_TXDW5_DATARATE_FB_LMT_M 0x001f0000
+#define R92S_TXDW5_DATARATE_FB_LMT_S 16
uint16_t ipchksum;
uint16_t tcpchksum;
@@ -792,9 +800,10 @@ struct rsu_rx_radiotap_header {
struct rsu_tx_radiotap_header {
struct ieee80211_radiotap_header wt_ihdr;
uint8_t wt_flags;
+ uint8_t wt_pad;
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
-} __packed __aligned(8);
+} __packed;
#define RSU_TX_RADIOTAP_PRESENT \
(1 << IEEE80211_RADIOTAP_FLAGS | \
diff --git a/freebsd/sys/dev/usb/wlan/if_rum.c b/freebsd/sys/dev/usb/wlan/if_rum.c
index 5c826cac..c2f68406 100644
--- a/freebsd/sys/dev/usb/wlan/if_rum.c
+++ b/freebsd/sys/dev/usb/wlan/if_rum.c
@@ -46,10 +46,6 @@ __FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <sys/kdb.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/rman.h>
-
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -344,9 +340,6 @@ static const struct {
{ 107, 0x04 }
};
-static const uint8_t rum_chan_2ghz[] =
- { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-
static const uint8_t rum_chan_5ghz[] =
{ 34, 36, 38, 40, 42, 44, 46, 48, 52, 56, 60, 64,
100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140,
@@ -3222,8 +3215,7 @@ rum_getradiocaps(struct ieee80211com *ic,
memset(bands, 0, sizeof(bands));
setbit(bands, IEEE80211_MODE_11B);
setbit(bands, IEEE80211_MODE_11G);
- ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
- rum_chan_2ghz, nitems(rum_chan_2ghz), bands, 0);
+ ieee80211_add_channels_default_2ghz(chans, maxchans, nchans, bands, 0);
if (sc->rf_rev == RT2573_RF_5225 || sc->rf_rev == RT2573_RF_5226) {
setbit(bands, IEEE80211_MODE_11A);
diff --git a/freebsd/sys/dev/usb/wlan/if_rumvar.h b/freebsd/sys/dev/usb/wlan/if_rumvar.h
index 4ff831f4..e19a7088 100644
--- a/freebsd/sys/dev/usb/wlan/if_rumvar.h
+++ b/freebsd/sys/dev/usb/wlan/if_rumvar.h
@@ -49,7 +49,7 @@ struct rum_tx_radiotap_header {
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
uint8_t wt_antenna;
-} __packed __aligned(8);
+} __packed;
#define RT2573_TX_RADIOTAP_PRESENT \
((1 << IEEE80211_RADIOTAP_FLAGS) | \
diff --git a/freebsd/sys/dev/usb/wlan/if_run.c b/freebsd/sys/dev/usb/wlan/if_run.c
index 3bd247e3..9f11a3a3 100644
--- a/freebsd/sys/dev/usb/wlan/if_run.c
+++ b/freebsd/sys/dev/usb/wlan/if_run.c
@@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_wlan.h>
#include <sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/lock.h>
@@ -46,10 +47,6 @@ __FBSDID("$FreeBSD$");
#include <sys/firmware.h>
#include <sys/kdb.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/rman.h>
-
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -470,6 +467,7 @@ static void run_usb_timeout_cb(void *);
static void run_reset_livelock(struct run_softc *);
static void run_enable_tsf_sync(struct run_softc *);
static void run_enable_tsf(struct run_softc *);
+static void run_disable_tsf(struct run_softc *);
static void run_get_tsf(struct run_softc *, uint64_t *);
static void run_enable_mrr(struct run_softc *);
static void run_set_txpreamble(struct run_softc *);
@@ -2035,7 +2033,8 @@ run_read_eeprom(struct run_softc *sc)
static struct ieee80211_node *
run_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN])
{
- return malloc(sizeof (struct run_node), M_DEVBUF, M_NOWAIT | M_ZERO);
+ return malloc(sizeof (struct run_node), M_80211_NODE,
+ M_NOWAIT | M_ZERO);
}
static int
@@ -2095,7 +2094,6 @@ run_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
struct run_vap *rvp = RUN_VAP(vap);
enum ieee80211_state ostate;
uint32_t sta[3];
- uint32_t tmp;
uint8_t ratectl;
uint8_t restart_ratectl = 0;
uint8_t bid = 1 << rvp->rvp_id;
@@ -2128,12 +2126,8 @@ run_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
sc->runbmap &= ~bid;
/* abort TSF synchronization if there is no vap running */
- if (--sc->running == 0) {
- run_read(sc, RT2860_BCN_TIME_CFG, &tmp);
- run_write(sc, RT2860_BCN_TIME_CFG,
- tmp & ~(RT2860_BCN_TX_EN | RT2860_TSF_TIMER_EN |
- RT2860_TBTT_TIMER_EN));
- }
+ if (--sc->running == 0)
+ run_disable_tsf(sc);
break;
case IEEE80211_S_RUN:
@@ -2826,76 +2820,83 @@ run_rx_frame(struct run_softc *sc, struct mbuf *m, uint32_t dmalen)
uint8_t ant, rssi;
int8_t nf;
- rxwi = mtod(m, struct rt2860_rxwi *);
- len = le16toh(rxwi->len) & 0xfff;
rxwisize = sizeof(struct rt2860_rxwi);
if (sc->mac_ver == 0x5592)
rxwisize += sizeof(uint64_t);
else if (sc->mac_ver == 0x3593)
rxwisize += sizeof(uint32_t);
- if (__predict_false(len > dmalen)) {
- m_freem(m);
- counter_u64_add(ic->ic_ierrors, 1);
+
+ if (__predict_false(dmalen <
+ rxwisize + sizeof(struct ieee80211_frame_ack))) {
+ RUN_DPRINTF(sc, RUN_DEBUG_RECV,
+ "payload is too short: dma length %u < %zu\n",
+ dmalen, rxwisize + sizeof(struct ieee80211_frame_ack));
+ goto fail;
+ }
+
+ rxwi = mtod(m, struct rt2860_rxwi *);
+ len = le16toh(rxwi->len) & 0xfff;
+
+ if (__predict_false(len > dmalen - rxwisize)) {
RUN_DPRINTF(sc, RUN_DEBUG_RECV,
"bad RXWI length %u > %u\n", len, dmalen);
- return;
+ goto fail;
}
+
/* Rx descriptor is located at the end */
rxd = (struct rt2870_rxd *)(mtod(m, caddr_t) + dmalen);
flags = le32toh(rxd->flags);
if (__predict_false(flags & (RT2860_RX_CRCERR | RT2860_RX_ICVERR))) {
- m_freem(m);
- counter_u64_add(ic->ic_ierrors, 1);
RUN_DPRINTF(sc, RUN_DEBUG_RECV, "%s error.\n",
(flags & RT2860_RX_CRCERR)?"CRC":"ICV");
- return;
+ goto fail;
+ }
+
+ if (flags & RT2860_RX_L2PAD) {
+ RUN_DPRINTF(sc, RUN_DEBUG_RECV,
+ "received RT2860_RX_L2PAD frame\n");
+ len += 2;
}
m->m_data += rxwisize;
- m->m_pkthdr.len = m->m_len -= rxwisize;
+ m->m_pkthdr.len = m->m_len = len;
wh = mtod(m, struct ieee80211_frame *);
- if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
+ if ((wh->i_fc[1] & IEEE80211_FC1_PROTECTED) != 0 &&
+ (flags & RT2860_RX_DEC) != 0) {
wh->i_fc[1] &= ~IEEE80211_FC1_PROTECTED;
m->m_flags |= M_WEP;
}
- if (flags & RT2860_RX_L2PAD) {
- RUN_DPRINTF(sc, RUN_DEBUG_RECV,
- "received RT2860_RX_L2PAD frame\n");
- len += 2;
- }
-
- ni = ieee80211_find_rxnode(ic,
- mtod(m, struct ieee80211_frame_min *));
+ if (len >= sizeof(struct ieee80211_frame_min)) {
+ ni = ieee80211_find_rxnode(ic,
+ mtod(m, struct ieee80211_frame_min *));
+ } else
+ ni = NULL;
if (__predict_false(flags & RT2860_RX_MICERR)) {
/* report MIC failures to net80211 for TKIP */
if (ni != NULL)
ieee80211_notify_michael_failure(ni->ni_vap, wh,
rxwi->keyidx);
- m_freem(m);
- counter_u64_add(ic->ic_ierrors, 1);
RUN_DPRINTF(sc, RUN_DEBUG_RECV,
"MIC error. Someone is lying.\n");
- return;
+ goto fail;
}
ant = run_maxrssi_chain(sc, rxwi);
rssi = rxwi->rssi[ant];
nf = run_rssi2dbm(sc, rssi, ant);
- m->m_pkthdr.len = m->m_len = len;
-
if (__predict_false(ieee80211_radiotap_active(ic))) {
struct run_rx_radiotap_header *tap = &sc->sc_rxtap;
uint16_t phy;
tap->wr_flags = 0;
- tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
- tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
+ if (flags & RT2860_RX_L2PAD)
+ tap->wr_flags |= IEEE80211_RADIOTAP_F_DATAPAD;
tap->wr_antsignal = rssi;
tap->wr_antenna = ant;
tap->wr_dbm_antsignal = run_rssi2dbm(sc, rssi, ant);
@@ -2936,6 +2937,12 @@ run_rx_frame(struct run_softc *sc, struct mbuf *m, uint32_t dmalen)
} else {
(void)ieee80211_input_all(ic, m, rssi, nf);
}
+
+ return;
+
+fail:
+ m_freem(m);
+ counter_u64_add(ic->ic_ierrors, 1);
}
static void
@@ -2945,7 +2952,7 @@ run_bulk_rx_callback(struct usb_xfer *xfer, usb_error_t error)
struct ieee80211com *ic = &sc->sc_ic;
struct mbuf *m = NULL;
struct mbuf *m0;
- uint32_t dmalen;
+ uint32_t dmalen, mbuf_len;
uint16_t rxwisize;
int xferlen;
@@ -3051,6 +3058,14 @@ tr_setup:
break;
}
+ mbuf_len = dmalen + sizeof(struct rt2870_rxd);
+ if (__predict_false(mbuf_len > MCLBYTES)) {
+ RUN_DPRINTF(sc, RUN_DEBUG_RECV_DESC | RUN_DEBUG_USB,
+ "payload is too big: mbuf_len %u\n", mbuf_len);
+ counter_u64_add(ic->ic_ierrors, 1);
+ break;
+ }
+
/* copy aggregated frames to another mbuf */
m0 = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (__predict_false(m0 == NULL)) {
@@ -3060,14 +3075,13 @@ tr_setup:
break;
}
m_copydata(m, 4 /* skip 32-bit DMA-len header */,
- dmalen + sizeof(struct rt2870_rxd), mtod(m0, caddr_t));
- m0->m_pkthdr.len = m0->m_len =
- dmalen + sizeof(struct rt2870_rxd);
+ mbuf_len, mtod(m0, caddr_t));
+ m0->m_pkthdr.len = m0->m_len = mbuf_len;
run_rx_frame(sc, m0, dmalen);
/* update data ptr */
- m->m_data += dmalen + 8;
- m->m_pkthdr.len = m->m_len -= dmalen + 8;
+ m->m_data += mbuf_len + 4;
+ m->m_pkthdr.len = m->m_len -= mbuf_len + 4;
}
/* make sure we free the source buffer, if any */
@@ -3149,16 +3163,23 @@ tr_setup:
vap = data->ni->ni_vap;
if (ieee80211_radiotap_active_vap(vap)) {
+ const struct ieee80211_frame *wh;
struct run_tx_radiotap_header *tap = &sc->sc_txtap;
struct rt2860_txwi *txwi =
(struct rt2860_txwi *)(&data->desc + sizeof(struct rt2870_txd));
+ int has_l2pad;
+
+ wh = mtod(m, struct ieee80211_frame *);
+ has_l2pad = IEEE80211_HAS_ADDR4(wh) !=
+ IEEE80211_QOS_HAS_SEQ(wh);
+
tap->wt_flags = 0;
tap->wt_rate = rt2860_rates[data->ridx].rate;
- tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
- tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
tap->wt_hwqueue = index;
if (le16toh(txwi->phy) & RT2860_PHY_SHPRE)
tap->wt_flags |= IEEE80211_RADIOTAP_F_SHORTPRE;
+ if (has_l2pad)
+ tap->wt_flags |= IEEE80211_RADIOTAP_F_DATAPAD;
ieee80211_radiotap_tx(vap, m);
}
@@ -3350,11 +3371,7 @@ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
if ((hasqos = IEEE80211_QOS_HAS_SEQ(wh))) {
uint8_t *frm;
- if(IEEE80211_HAS_ADDR4(wh))
- frm = ((struct ieee80211_qosframe_addr4 *)wh)->i_qos;
- else
- frm =((struct ieee80211_qosframe *)wh)->i_qos;
-
+ frm = ieee80211_getqos(wh);
qos = le16toh(*(const uint16_t *)frm);
tid = qos & IEEE80211_QOS_TID;
qid = TID_TO_WME_AC(tid);
@@ -4837,8 +4854,7 @@ run_getradiocaps(struct ieee80211com *ic,
memset(bands, 0, sizeof(bands));
setbit(bands, IEEE80211_MODE_11B);
setbit(bands, IEEE80211_MODE_11G);
- ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
- run_chan_2ghz, nitems(run_chan_2ghz), bands, 0);
+ ieee80211_add_channels_default_2ghz(chans, maxchans, nchans, bands, 0);
if (sc->rf_rev == RT2860_RF_2750 || sc->rf_rev == RT2860_RF_2850 ||
sc->rf_rev == RT3070_RF_3052 || sc->rf_rev == RT3593_RF_3053 ||
@@ -4853,15 +4869,11 @@ static void
run_scan_start(struct ieee80211com *ic)
{
struct run_softc *sc = ic->ic_softc;
- uint32_t tmp;
RUN_LOCK(sc);
/* abort TSF synchronization */
- run_read(sc, RT2860_BCN_TIME_CFG, &tmp);
- run_write(sc, RT2860_BCN_TIME_CFG,
- tmp & ~(RT2860_BCN_TX_EN | RT2860_TSF_TIMER_EN |
- RT2860_TBTT_TIMER_EN));
+ run_disable_tsf(sc);
run_set_bssid(sc, ieee80211broadcastaddr);
RUN_UNLOCK(sc);
@@ -5148,6 +5160,18 @@ run_enable_tsf(struct run_softc *sc)
}
static void
+run_disable_tsf(struct run_softc *sc)
+{
+ uint32_t tmp;
+
+ if (run_read(sc, RT2860_BCN_TIME_CFG, &tmp) == 0) {
+ tmp &= ~(RT2860_BCN_TX_EN | RT2860_TSF_TIMER_EN |
+ RT2860_TBTT_TIMER_EN);
+ run_write(sc, RT2860_BCN_TIME_CFG, tmp);
+ }
+}
+
+static void
run_get_tsf(struct run_softc *sc, uint64_t *buf)
{
run_read_region_1(sc, RT2860_TSF_TIMER_DW0, (uint8_t *)buf,
@@ -6098,10 +6122,7 @@ run_init_locked(struct run_softc *sc)
}
/* abort TSF synchronization */
- run_read(sc, RT2860_BCN_TIME_CFG, &tmp);
- tmp &= ~(RT2860_BCN_TX_EN | RT2860_TSF_TIMER_EN |
- RT2860_TBTT_TIMER_EN);
- run_write(sc, RT2860_BCN_TIME_CFG, tmp);
+ run_disable_tsf(sc);
/* clear RX WCID search table */
run_set_region_4(sc, RT2860_WCID_ENTRY(0), 0, 512);
diff --git a/freebsd/sys/dev/usb/wlan/if_runreg.h b/freebsd/sys/dev/usb/wlan/if_runreg.h
index c09aac8f..8561d2c1 100644
--- a/freebsd/sys/dev/usb/wlan/if_runreg.h
+++ b/freebsd/sys/dev/usb/wlan/if_runreg.h
@@ -1086,9 +1086,6 @@ struct rt2860_rxwi {
/*
* Channel map for run(4) driver; taken from the table below.
*/
-static const uint8_t run_chan_2ghz[] =
- { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-
static const uint8_t run_chan_5ghz[] =
{ 36, 38, 40, 44, 46, 48, 52, 54, 56, 60, 62, 64, 100, 102, 104,
108, 110, 112, 116, 118, 120, 124, 126, 128, 132, 134, 136, 140,
diff --git a/freebsd/sys/dev/usb/wlan/if_runvar.h b/freebsd/sys/dev/usb/wlan/if_runvar.h
index 7209bfc7..a17d5b46 100644
--- a/freebsd/sys/dev/usb/wlan/if_runvar.h
+++ b/freebsd/sys/dev/usb/wlan/if_runvar.h
@@ -71,7 +71,7 @@ struct run_tx_radiotap_header {
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
uint8_t wt_hwqueue;
-} __packed __aligned(8);
+} __packed;
#define IEEE80211_RADIOTAP_HWQUEUE 15
diff --git a/freebsd/sys/dev/usb/wlan/if_uath.c b/freebsd/sys/dev/usb/wlan/if_uath.c
index 2b97060e..9f0c9d5d 100644
--- a/freebsd/sys/dev/usb/wlan/if_uath.c
+++ b/freebsd/sys/dev/usb/wlan/if_uath.c
@@ -87,10 +87,6 @@ __FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <sys/kdb.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/rman.h>
-
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -1282,8 +1278,8 @@ uath_watchdog(void *arg)
if (sc->sc_tx_timer > 0) {
if (--sc->sc_tx_timer == 0) {
device_printf(sc->sc_dev, "device timeout\n");
- /*uath_init(sc); XXX needs a process context! */
counter_u64_add(ic->ic_oerrors, 1);
+ ieee80211_restart_all(ic);
return;
}
callout_reset(&sc->watchdog_ch, hz, uath_watchdog, sc);
diff --git a/freebsd/sys/dev/usb/wlan/if_uathvar.h b/freebsd/sys/dev/usb/wlan/if_uathvar.h
index a38f54fc..a0ef4eab 100644
--- a/freebsd/sys/dev/usb/wlan/if_uathvar.h
+++ b/freebsd/sys/dev/usb/wlan/if_uathvar.h
@@ -67,9 +67,10 @@ struct uath_rx_radiotap_header {
struct uath_tx_radiotap_header {
struct ieee80211_radiotap_header wt_ihdr;
uint8_t wt_flags;
+ uint8_t wt_pad;
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
-} __packed __aligned(8);
+} __packed;
#define UATH_TX_RADIOTAP_PRESENT \
((1 << IEEE80211_RADIOTAP_FLAGS) | \
diff --git a/freebsd/sys/dev/usb/wlan/if_upgt.c b/freebsd/sys/dev/usb/wlan/if_upgt.c
index e1923bab..c556d108 100644
--- a/freebsd/sys/dev/usb/wlan/if_upgt.c
+++ b/freebsd/sys/dev/usb/wlan/if_upgt.c
@@ -43,7 +43,6 @@
#include <net/if_types.h>
#include <sys/bus.h>
-#include <machine/bus.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_phy.h>
@@ -1618,7 +1617,7 @@ upgt_fw_load(struct upgt_softc *sc)
data_cmd->buflen = bsize;
upgt_bulk_tx(sc, data_cmd);
- DPRINTF(sc, UPGT_DEBUG_FW, "FW offset=%d, read=%d, sent=%d\n",
+ DPRINTF(sc, UPGT_DEBUG_FW, "FW offset=%zu, read=%d, sent=%d\n",
offset, n, bsize);
bsize = n;
}
@@ -1775,7 +1774,7 @@ upgt_fw_verify(struct upgt_softc *sc)
}
DPRINTF(sc, UPGT_DEBUG_FW,
- "firmware Boot Record Area found at offset %d\n", offset);
+ "firmware Boot Record Area found at offset %zu\n", offset);
/*
* Parse Boot Record Area (BRA) options.
diff --git a/freebsd/sys/dev/usb/wlan/if_upgtvar.h b/freebsd/sys/dev/usb/wlan/if_upgtvar.h
index ce996f6a..9d4c85e6 100644
--- a/freebsd/sys/dev/usb/wlan/if_upgtvar.h
+++ b/freebsd/sys/dev/usb/wlan/if_upgtvar.h
@@ -394,7 +394,7 @@ struct upgt_tx_radiotap_header {
uint8_t wt_rate;
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
-} __packed __aligned(8);
+} __packed;
#define UPGT_TX_RADIOTAP_PRESENT \
((1 << IEEE80211_RADIOTAP_FLAGS) | \
diff --git a/freebsd/sys/dev/usb/wlan/if_ural.c b/freebsd/sys/dev/usb/wlan/if_ural.c
index 4de0a9c5..8897f148 100644
--- a/freebsd/sys/dev/usb/wlan/if_ural.c
+++ b/freebsd/sys/dev/usb/wlan/if_ural.c
@@ -47,10 +47,6 @@ __FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <sys/kdb.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/rman.h>
-
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -363,9 +359,6 @@ static const struct {
{ 161, 0x08808, 0x0242f, 0x00281 }
};
-static const uint8_t ural_chan_2ghz[] =
- { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-
static const uint8_t ural_chan_5ghz[] =
{ 36, 40, 44, 48, 52, 56, 60, 64,
100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140,
@@ -1593,8 +1586,7 @@ ural_getradiocaps(struct ieee80211com *ic,
memset(bands, 0, sizeof(bands));
setbit(bands, IEEE80211_MODE_11B);
setbit(bands, IEEE80211_MODE_11G);
- ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
- ural_chan_2ghz, nitems(ural_chan_2ghz), bands, 0);
+ ieee80211_add_channels_default_2ghz(chans, maxchans, nchans, bands, 0);
if (sc->rf_rev == RAL_RF_5222) {
setbit(bands, IEEE80211_MODE_11A);
diff --git a/freebsd/sys/dev/usb/wlan/if_uralvar.h b/freebsd/sys/dev/usb/wlan/if_uralvar.h
index dd863fe0..b59b7911 100644
--- a/freebsd/sys/dev/usb/wlan/if_uralvar.h
+++ b/freebsd/sys/dev/usb/wlan/if_uralvar.h
@@ -51,7 +51,7 @@ struct ural_tx_radiotap_header {
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
uint8_t wt_antenna;
-} __packed __aligned(8);
+} __packed;
#define RAL_TX_RADIOTAP_PRESENT \
((1 << IEEE80211_RADIOTAP_FLAGS) | \
diff --git a/freebsd/sys/dev/usb/wlan/if_urtw.c b/freebsd/sys/dev/usb/wlan/if_urtw.c
index 309375f0..aba334af 100644
--- a/freebsd/sys/dev/usb/wlan/if_urtw.c
+++ b/freebsd/sys/dev/usb/wlan/if_urtw.c
@@ -36,10 +36,6 @@ __FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <sys/kdb.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/rman.h>
-
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_arp.h>
@@ -217,9 +213,6 @@ static uint8_t urtw_8225z2_agc[] = {
0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31
};
-static const uint8_t urtw_chan_2ghz[] =
- { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-
static uint32_t urtw_8225_channel[] = {
0x0000, /* dummy channel 0 */
0x085c, /* 1 */
@@ -679,6 +672,7 @@ static void urtw_scan_end(struct ieee80211com *);
static void urtw_getradiocaps(struct ieee80211com *, int, int *,
struct ieee80211_channel[]);
static void urtw_set_channel(struct ieee80211com *);
+static void urtw_update_promisc(struct ieee80211com *);
static void urtw_update_mcast(struct ieee80211com *);
static int urtw_tx_start(struct urtw_softc *,
struct ieee80211_node *, struct mbuf *,
@@ -760,6 +754,7 @@ static void urtw_free_tx_data_list(struct urtw_softc *);
static void urtw_free_rx_data_list(struct urtw_softc *);
static void urtw_free_data_list(struct urtw_softc *,
struct urtw_data data[], int, int);
+static usb_error_t urtw_set_macaddr(struct urtw_softc *, const uint8_t *);
static usb_error_t urtw_adapter_start(struct urtw_softc *);
static usb_error_t urtw_adapter_start_b(struct urtw_softc *);
static usb_error_t urtw_set_mode(struct urtw_softc *, uint32_t);
@@ -905,6 +900,7 @@ urtw_attach(device_t dev)
ic->ic_updateslot = urtw_updateslot;
ic->ic_vap_create = urtw_vap_create;
ic->ic_vap_delete = urtw_vap_delete;
+ ic->ic_update_promisc = urtw_update_promisc;
ic->ic_update_mcast = urtw_update_mcast;
ic->ic_parent = urtw_parent;
ic->ic_transmit = urtw_transmit;
@@ -1194,9 +1190,23 @@ fail:
}
static usb_error_t
+urtw_set_macaddr(struct urtw_softc *sc, const uint8_t *macaddr)
+{
+ usb_error_t error;
+
+ urtw_write32_m(sc, URTW_MAC0, ((const uint32_t *)macaddr)[0]);
+ urtw_write16_m(sc, URTW_MAC4, ((const uint32_t *)macaddr)[1] & 0xffff);
+
+fail:
+ return (error);
+}
+
+static usb_error_t
urtw_adapter_start(struct urtw_softc *sc)
{
struct ieee80211com *ic = &sc->sc_ic;
+ struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
+ const uint8_t *macaddr;
usb_error_t error;
error = urtw_reset(sc);
@@ -1216,8 +1226,11 @@ urtw_adapter_start(struct urtw_softc *sc)
if (error)
goto fail;
/* applying MAC address again. */
- urtw_write32_m(sc, URTW_MAC0, ((uint32_t *)ic->ic_macaddr)[0]);
- urtw_write16_m(sc, URTW_MAC4, ((uint32_t *)ic->ic_macaddr)[1] & 0xffff);
+ macaddr = vap ? vap->iv_myaddr : ic->ic_macaddr;
+ urtw_set_macaddr(sc, macaddr);
+ if (error)
+ goto fail;
+
error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
if (error)
goto fail;
@@ -1587,8 +1600,7 @@ urtw_getradiocaps(struct ieee80211com *ic,
memset(bands, 0, sizeof(bands));
setbit(bands, IEEE80211_MODE_11B);
setbit(bands, IEEE80211_MODE_11G);
- ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
- urtw_chan_2ghz, nitems(urtw_chan_2ghz), bands, 0);
+ ieee80211_add_channels_default_2ghz(chans, maxchans, nchans, bands, 0);
}
static void
@@ -1641,6 +1653,17 @@ fail:
}
static void
+urtw_update_promisc(struct ieee80211com *ic)
+{
+ struct urtw_softc *sc = ic->ic_softc;
+
+ URTW_LOCK(sc);
+ if (sc->sc_flags & URTW_RUNNING)
+ urtw_rx_setconf(sc);
+ URTW_UNLOCK(sc);
+}
+
+static void
urtw_update_mcast(struct ieee80211com *ic)
{
@@ -1694,11 +1717,7 @@ urtw_tx_start(struct urtw_softc *sc, struct ieee80211_node *ni, struct mbuf *m0,
if (ieee80211_radiotap_active_vap(vap)) {
struct urtw_tx_radiotap_header *tap = &sc->sc_txtap;
- /* XXX Are variables correct? */
tap->wt_flags = 0;
- tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
- tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
-
ieee80211_radiotap_tx(vap, m0);
}
@@ -1892,11 +1911,13 @@ static void
urtw_watchdog(void *arg)
{
struct urtw_softc *sc = arg;
+ struct ieee80211com *ic = &sc->sc_ic;
if (sc->sc_txtimer > 0) {
if (--sc->sc_txtimer == 0) {
device_printf(sc->sc_dev, "device timeout\n");
- counter_u64_add(sc->sc_ic.ic_oerrors, 1);
+ counter_u64_add(ic->ic_oerrors, 1);
+ ieee80211_restart_all(ic);
return;
}
callout_reset(&sc->sc_watchdog_ch, hz, urtw_watchdog, sc);
@@ -3179,6 +3200,8 @@ static usb_error_t
urtw_8225v2b_rf_init(struct urtw_softc *sc)
{
struct ieee80211com *ic = &sc->sc_ic;
+ struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
+ const uint8_t *macaddr;
unsigned int i;
uint8_t data8;
usb_error_t error;
@@ -3226,8 +3249,10 @@ urtw_8225v2b_rf_init(struct urtw_softc *sc)
urtw_write8_m(sc, URTW_CONFIG1, data8);
/* applying MAC address again. */
- urtw_write32_m(sc, URTW_MAC0, ((uint32_t *)ic->ic_macaddr)[0]);
- urtw_write16_m(sc, URTW_MAC4, ((uint32_t *)ic->ic_macaddr)[1] & 0xffff);
+ macaddr = vap ? vap->iv_myaddr : ic->ic_macaddr;
+ error = urtw_set_macaddr(sc, macaddr);
+ if (error)
+ goto fail;
error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
if (error)
@@ -3887,7 +3912,6 @@ urtw_rx_setconf(struct urtw_softc *sc)
if (sc->sc_flags & URTW_RTL8187B) {
data = data | URTW_RX_FILTER_MNG | URTW_RX_FILTER_DATA |
URTW_RX_FILTER_MCAST | URTW_RX_FILTER_BCAST |
- URTW_RX_FILTER_NICMAC | URTW_RX_CHECK_BSSID |
URTW_RX_FIFO_THRESHOLD_NONE |
URTW_MAX_RX_DMA_2048 |
URTW_RX_AUTORESETPHY | URTW_RCR_ONLYERLPKT;
@@ -3902,14 +3926,6 @@ urtw_rx_setconf(struct urtw_softc *sc)
if (sc->sc_crcmon == 1 && ic->ic_opmode == IEEE80211_M_MONITOR)
data = data | URTW_RX_FILTER_CRCERR;
- if (ic->ic_opmode == IEEE80211_M_MONITOR ||
- ic->ic_promisc > 0 || ic->ic_allmulti > 0) {
- data = data | URTW_RX_FILTER_ALLMAC;
- } else {
- data = data | URTW_RX_FILTER_NICMAC;
- data = data | URTW_RX_CHECK_BSSID;
- }
-
data = data &~ URTW_RX_FIFO_THRESHOLD_MASK;
data = data | URTW_RX_FIFO_THRESHOLD_NONE |
URTW_RX_AUTORESETPHY;
@@ -3917,6 +3933,16 @@ urtw_rx_setconf(struct urtw_softc *sc)
data = data | URTW_MAX_RX_DMA_2048 | URTW_RCR_ONLYERLPKT;
}
+ /* XXX allmulti should not be checked here... */
+ if (ic->ic_opmode == IEEE80211_M_MONITOR ||
+ ic->ic_promisc > 0 || ic->ic_allmulti > 0) {
+ data = data | URTW_RX_FILTER_CTL;
+ data = data | URTW_RX_FILTER_ALLMAC;
+ } else {
+ data = data | URTW_RX_FILTER_NICMAC;
+ data = data | URTW_RX_CHECK_BSSID;
+ }
+
urtw_write32_m(sc, URTW_RX, data);
fail:
return (error);
@@ -3932,50 +3958,56 @@ urtw_rxeof(struct usb_xfer *xfer, struct urtw_data *data, int *rssi_p,
struct urtw_softc *sc = data->sc;
struct ieee80211com *ic = &sc->sc_ic;
uint8_t noise = 0, rate;
+ uint64_t mactime;
usbd_xfer_status(xfer, &actlen, NULL, NULL, NULL);
- if (actlen < (int)URTW_MIN_RXBUFSZ) {
- counter_u64_add(ic->ic_ierrors, 1);
- return (NULL);
- }
-
if (sc->sc_flags & URTW_RTL8187B) {
struct urtw_8187b_rxhdr *rx;
+ if (actlen < sizeof(*rx) + IEEE80211_ACK_LEN)
+ goto fail;
+
rx = (struct urtw_8187b_rxhdr *)(data->buf +
(actlen - (sizeof(struct urtw_8187b_rxhdr))));
flen = le32toh(rx->flag) & 0xfff;
- if (flen > actlen) {
- counter_u64_add(ic->ic_ierrors, 1);
- return (NULL);
- }
+ if (flen > actlen - sizeof(*rx))
+ goto fail;
+
rate = (le32toh(rx->flag) >> URTW_RX_FLAG_RXRATE_SHIFT) & 0xf;
/* XXX correct? */
rssi = rx->rssi & URTW_RX_RSSI_MASK;
noise = rx->noise;
+
+ if (ieee80211_radiotap_active(ic))
+ mactime = rx->mactime;
} else {
struct urtw_8187l_rxhdr *rx;
+ if (actlen < sizeof(*rx) + IEEE80211_ACK_LEN)
+ goto fail;
+
rx = (struct urtw_8187l_rxhdr *)(data->buf +
(actlen - (sizeof(struct urtw_8187l_rxhdr))));
flen = le32toh(rx->flag) & 0xfff;
- if (flen > actlen) {
- counter_u64_add(ic->ic_ierrors, 1);
- return (NULL);
- }
+ if (flen > actlen - sizeof(*rx))
+ goto fail;
rate = (le32toh(rx->flag) >> URTW_RX_FLAG_RXRATE_SHIFT) & 0xf;
/* XXX correct? */
rssi = rx->rssi & URTW_RX_8187L_RSSI_MASK;
noise = rx->noise;
+
+ if (ieee80211_radiotap_active(ic))
+ mactime = rx->mactime;
}
+ if (flen < IEEE80211_ACK_LEN)
+ goto fail;
+
mnew = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
- if (mnew == NULL) {
- counter_u64_add(ic->ic_ierrors, 1);
- return (NULL);
- }
+ if (mnew == NULL)
+ goto fail;
m = data->m;
data->m = mnew;
@@ -3987,20 +4019,23 @@ urtw_rxeof(struct usb_xfer *xfer, struct urtw_data *data, int *rssi_p,
if (ieee80211_radiotap_active(ic)) {
struct urtw_rx_radiotap_header *tap = &sc->sc_rxtap;
- /* XXX Are variables correct? */
- tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
- tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
+ tap->wr_tsf = mactime;
+ tap->wr_flags = 0;
tap->wr_dbm_antsignal = (int8_t)rssi;
}
wh = mtod(m, struct ieee80211_frame *);
- if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_DATA)
+ if (IEEE80211_IS_DATA(wh))
sc->sc_currate = (rate > 0) ? rate : sc->sc_currate;
*rssi_p = rssi;
*nf_p = noise; /* XXX correct? */
return (m);
+
+fail:
+ counter_u64_add(ic->ic_ierrors, 1);
+ return (NULL);
}
static void
@@ -4008,7 +4043,6 @@ urtw_bulk_rx_callback(struct usb_xfer *xfer, usb_error_t error)
{
struct urtw_softc *sc = usbd_xfer_softc(xfer);
struct ieee80211com *ic = &sc->sc_ic;
- struct ieee80211_frame *wh;
struct ieee80211_node *ni;
struct mbuf *m = NULL;
struct urtw_data *data;
@@ -4046,9 +4080,13 @@ setup:
*/
URTW_UNLOCK(sc);
if (m != NULL) {
- wh = mtod(m, struct ieee80211_frame *);
- ni = ieee80211_find_rxnode(ic,
- (struct ieee80211_frame_min *)wh);
+ if (m->m_pkthdr.len >=
+ sizeof(struct ieee80211_frame_min)) {
+ ni = ieee80211_find_rxnode(ic,
+ mtod(m, struct ieee80211_frame_min *));
+ } else
+ ni = NULL;
+
if (ni != NULL) {
(void) ieee80211_input(ni, m, rssi, nf);
/* node is no longer needed */
diff --git a/freebsd/sys/dev/usb/wlan/if_urtwvar.h b/freebsd/sys/dev/usb/wlan/if_urtwvar.h
index 08ffc8f3..87c24d64 100644
--- a/freebsd/sys/dev/usb/wlan/if_urtwvar.h
+++ b/freebsd/sys/dev/usb/wlan/if_urtwvar.h
@@ -47,10 +47,6 @@ struct urtw_data {
};
typedef STAILQ_HEAD(, urtw_data) urtw_datahead;
-/* XXX not correct.. */
-#define URTW_MIN_RXBUFSZ \
- (sizeof(struct ieee80211_frame_min))
-
#define URTW_RX_DATA_LIST_COUNT 4
#define URTW_TX_DATA_LIST_COUNT 16
#define URTW_RX_MAXSIZE 0x9c4
@@ -59,23 +55,27 @@ typedef STAILQ_HEAD(, urtw_data) urtw_datahead;
struct urtw_rx_radiotap_header {
struct ieee80211_radiotap_header wr_ihdr;
+ uint64_t wr_tsf;
uint8_t wr_flags;
+ uint8_t wr_pad;
uint16_t wr_chan_freq;
uint16_t wr_chan_flags;
int8_t wr_dbm_antsignal;
} __packed __aligned(8);
#define URTW_RX_RADIOTAP_PRESENT \
- ((1 << IEEE80211_RADIOTAP_FLAGS) | \
+ ((1 << IEEE80211_RADIOTAP_TSFT) | \
+ (1 << IEEE80211_RADIOTAP_FLAGS) | \
(1 << IEEE80211_RADIOTAP_CHANNEL) | \
(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL))
struct urtw_tx_radiotap_header {
struct ieee80211_radiotap_header wt_ihdr;
uint8_t wt_flags;
+ uint8_t wt_pad;
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
-} __packed __aligned(8);
+} __packed;
#define URTW_TX_RADIOTAP_PRESENT \
((1 << IEEE80211_RADIOTAP_FLAGS) | \
diff --git a/freebsd/sys/dev/usb/wlan/if_zyd.c b/freebsd/sys/dev/usb/wlan/if_zyd.c
index 1835b58b..bb4a9e40 100644
--- a/freebsd/sys/dev/usb/wlan/if_zyd.c
+++ b/freebsd/sys/dev/usb/wlan/if_zyd.c
@@ -46,10 +46,6 @@ __FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <sys/kdb.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/rman.h>
-
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -2891,8 +2887,7 @@ zyd_getradiocaps(struct ieee80211com *ic,
memset(bands, 0, sizeof(bands));
setbit(bands, IEEE80211_MODE_11B);
setbit(bands, IEEE80211_MODE_11G);
- ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
- zyd_chan_2ghz, nitems(zyd_chan_2ghz), bands, 0);
+ ieee80211_add_channels_default_2ghz(chans, maxchans, nchans, bands, 0);
}
static void
diff --git a/freebsd/sys/dev/usb/wlan/if_zydreg.h b/freebsd/sys/dev/usb/wlan/if_zydreg.h
index 724b8c57..a4523199 100644
--- a/freebsd/sys/dev/usb/wlan/if_zydreg.h
+++ b/freebsd/sys/dev/usb/wlan/if_zydreg.h
@@ -421,10 +421,6 @@
#define ZYD_CR254 0x93f8
#define ZYD_CR255 0x93fc
-/* nitems(ZYD_*_CHANTABLE) */
-static const uint8_t zyd_chan_2ghz[] =
- { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
-
/* copied nearly verbatim from the Linux driver rewrite */
#define ZYD_DEF_PHY \
{ \
@@ -1204,7 +1200,7 @@ struct zyd_tx_radiotap_header {
uint8_t wt_rate;
uint16_t wt_chan_freq;
uint16_t wt_chan_flags;
-} __packed __aligned(8);
+} __packed;
#define ZYD_TX_RADIOTAP_PRESENT \
((1 << IEEE80211_RADIOTAP_FLAGS) | \
diff --git a/freebsd/sys/fs/devfs/devfs_vnops.c b/freebsd/sys/fs/devfs/devfs_vnops.c
index 176664db..ac1f21ee 100644
--- a/freebsd/sys/fs/devfs/devfs_vnops.c
+++ b/freebsd/sys/fs/devfs/devfs_vnops.c
@@ -47,12 +47,14 @@
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/dirent.h>
+#include <sys/eventhandler.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
@@ -1441,7 +1443,6 @@ devfs_reclaim(struct vop_reclaim_args *ap)
vp->v_data = NULL;
}
mtx_unlock(&devfs_de_interlock);
- vnode_destroy_vobject(vp);
return (0);
}
diff --git a/freebsd/sys/i386/include/machine/cpufunc.h b/freebsd/sys/i386/include/machine/cpufunc.h
index c640b569..a029da3b 100644
--- a/freebsd/sys/i386/include/machine/cpufunc.h
+++ b/freebsd/sys/i386/include/machine/cpufunc.h
@@ -110,23 +110,49 @@ disable_intr(void)
__asm __volatile("cli" : : : "memory");
}
+#ifdef _KERNEL
static __inline void
do_cpuid(u_int ax, u_int *p)
{
__asm __volatile("cpuid"
- : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
- : "0" (ax));
+ : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+ : "0" (ax));
}
static __inline void
cpuid_count(u_int ax, u_int cx, u_int *p)
{
__asm __volatile("cpuid"
- : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
- : "0" (ax), "c" (cx));
+ : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+ : "0" (ax), "c" (cx));
+}
+#else
+static __inline void
+do_cpuid(u_int ax, u_int *p)
+{
+ __asm __volatile(
+ "pushl\t%%ebx\n\t"
+ "cpuid\n\t"
+ "movl\t%%ebx,%1\n\t"
+ "popl\t%%ebx"
+ : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3])
+ : "0" (ax));
}
static __inline void
+cpuid_count(u_int ax, u_int cx, u_int *p)
+{
+ __asm __volatile(
+ "pushl\t%%ebx\n\t"
+ "cpuid\n\t"
+ "movl\t%%ebx,%1\n\t"
+ "popl\t%%ebx"
+ : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3])
+ : "0" (ax), "c" (cx));
+}
+#endif
+
+static __inline void
enable_intr(void)
{
@@ -708,6 +734,22 @@ intr_restore(register_t eflags)
}
#endif /* __rtems__ */
+static __inline uint32_t
+rdpkru(void)
+{
+ uint32_t res;
+
+ __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx");
+ return (res);
+}
+
+static __inline void
+wrpkru(uint32_t mask)
+{
+
+ __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0));
+}
+
#else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */
#ifndef __rtems__
diff --git a/freebsd/sys/i386/include/machine/md_var.h b/freebsd/sys/i386/include/machine/md_var.h
index 53e1861c..b20446dc 100644
--- a/freebsd/sys/i386/include/machine/md_var.h
+++ b/freebsd/sys/i386/include/machine/md_var.h
@@ -69,6 +69,8 @@ void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
void fill_based_sd(struct segment_descriptor *sdp, uint32_t base);
void i686_pagezero(void *addr);
void sse2_pagezero(void *addr);
+int minidumpsys_nopae(struct dumperinfo *);
+int minidumpsys_pae(struct dumperinfo *);
void init_AMD_Elan_sc520(void);
vm_paddr_t kvtop(void *addr);
void panicifcpuunsupported(void);
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c
index c6a9e310..2265d89a 100644
--- a/freebsd/sys/kern/init_main.c
+++ b/freebsd/sys/kern/init_main.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/epoch.h>
+#include <sys/eventhandler.h>
#include <sys/exec.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@@ -110,6 +111,14 @@ struct thread0_storage thread0_st __aligned(32);
struct vmspace vmspace0;
struct proc *initproc;
+int
+linux_alloc_current_noop(struct thread *td __unused, int flags __unused)
+{
+ return (0);
+}
+int (*lkpi_alloc_current)(struct thread *, int) = linux_alloc_current_noop;
+
+
#ifndef BOOTHOWTO
#define BOOTHOWTO 0
#endif
@@ -155,11 +164,6 @@ SET_DECLARE(sysinit_set, struct sysinit);
struct sysinit **sysinit, **sysinit_end;
struct sysinit **newsysinit, **newsysinit_end;
-EVENTHANDLER_LIST_DECLARE(process_init);
-EVENTHANDLER_LIST_DECLARE(thread_init);
-EVENTHANDLER_LIST_DECLARE(process_ctor);
-EVENTHANDLER_LIST_DECLARE(thread_ctor);
-
/*
* Merge a new sysinit set into the current set, reallocating it if
* necessary. This can only be called after malloc is running.
@@ -440,7 +444,6 @@ struct sysentvec null_sysvec = {
.sv_coredump = NULL,
.sv_imgact_try = NULL,
.sv_minsigstksz = 0,
- .sv_pagesize = PAGE_SIZE,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS,
.sv_usrstack = USRSTACK,
@@ -482,7 +485,7 @@ proc0_init(void *dummy __unused)
GIANT_REQUIRED;
p = &proc0;
td = &thread0;
-
+
/*
* Initialize magic number and osrel.
*/
@@ -822,11 +825,9 @@ start_init(void *dummy)
}
/*
- * Like kproc_create(), but runs in its own address space.
- * We do this early to reserve pid 1.
- *
- * Note special case - do not make it runnable yet. Other work
- * in progress will change this more.
+ * Like kproc_create(), but runs in its own address space. We do this
+ * early to reserve pid 1. Note special case - do not make it
+ * runnable yet, init execution is started when userspace can be served.
*/
static void
create_init(const void *udata __unused)
diff --git a/freebsd/sys/kern/kern_conf.c b/freebsd/sys/kern/kern_conf.c
index 560a450a..26718648 100644
--- a/freebsd/sys/kern/kern_conf.c
+++ b/freebsd/sys/kern/kern_conf.c
@@ -656,7 +656,7 @@ prep_cdevsw(struct cdevsw *devsw, int flags)
return (0);
}
- if (devsw->d_version != D_VERSION_03) {
+ if (devsw->d_version != D_VERSION_04) {
printf(
"WARNING: Device driver \"%s\" has wrong version %s\n",
devsw->d_name == NULL ? "???" : devsw->d_name,
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c
index 5c75c657..f0700e55 100644
--- a/freebsd/sys/kern/kern_event.c
+++ b/freebsd/sys/kern/kern_event.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/capsicum.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c
index 122e82bd..65b633f6 100644
--- a/freebsd/sys/kern/kern_intr.c
+++ b/freebsd/sys/kern/kern_intr.c
@@ -101,7 +101,7 @@ struct proc *intrproc;
static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
-static int intr_storm_threshold = 1000;
+static int intr_storm_threshold = 0;
SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RWTUN,
&intr_storm_threshold, 0,
"Number of consecutive interrupts before storm protection is enabled");
@@ -231,10 +231,20 @@ intr_event_update(struct intr_event *ie)
}
/*
- * If the handler names were too long, add +'s to indicate missing
- * names. If we run out of room and still have +'s to add, change
- * the last character from a + to a *.
+ * If there is only one handler and its name is too long, just copy in
+ * as much of the end of the name (includes the unit number) as will
+ * fit. Otherwise, we have multiple handlers and not all of the names
+ * will fit. Add +'s to indicate missing names. If we run out of room
+ * and still have +'s to add, change the last character from a + to a *.
*/
+ if (missed == 1 && space == 1) {
+ ih = CK_SLIST_FIRST(&ie->ie_handlers);
+ missed = strlen(ie->ie_fullname) + strlen(ih->ih_name) + 2 -
+ sizeof(ie->ie_fullname);
+ strcat(ie->ie_fullname, (missed == 0) ? " " : "-");
+ strcat(ie->ie_fullname, &ih->ih_name[missed]);
+ missed = 0;
+ }
last = &ie->ie_fullname[sizeof(ie->ie_fullname) - 2];
while (missed-- > 0) {
if (strlen(ie->ie_fullname) + 1 == sizeof(ie->ie_fullname)) {
@@ -393,6 +403,25 @@ intr_event_bind_ithread(struct intr_event *ie, int cpu)
return (_intr_event_bind(ie, cpu, false, true));
}
+/*
+ * Bind an interrupt event's ithread to the specified cpuset.
+ */
+int
+intr_event_bind_ithread_cpuset(struct intr_event *ie, cpuset_t *cs)
+{
+ lwpid_t id;
+
+ mtx_lock(&ie->ie_lock);
+ if (ie->ie_thread != NULL) {
+ id = ie->ie_thread->it_thread->td_tid;
+ mtx_unlock(&ie->ie_lock);
+ return (cpuset_setthread(id, cs));
+ } else {
+ mtx_unlock(&ie->ie_lock);
+ }
+ return (ENODEV);
+}
+
static struct intr_event *
intr_lookup(int irq)
{
diff --git a/freebsd/sys/kern/kern_mbuf.c b/freebsd/sys/kern/kern_mbuf.c
index f94eda5b..85846acd 100644
--- a/freebsd/sys/kern/kern_mbuf.c
+++ b/freebsd/sys/kern/kern_mbuf.c
@@ -33,6 +33,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_param.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <sys/param.h>
#include <sys/conf.h>
@@ -43,13 +44,20 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
+#include <sys/ktls.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
+#include <sys/refcount.h>
+#include <sys/sf_buf.h>
#include <sys/smp.h>
+#include <sys/socket.h>
#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
@@ -112,11 +120,20 @@ int nmbjumbop; /* limits number of page size jumbo clusters */
int nmbjumbo9; /* limits number of 9k jumbo clusters */
int nmbjumbo16; /* limits number of 16k jumbo clusters */
+bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile & TLS */
+SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,
+ &mb_use_ext_pgs, 0,
+ "Use unmapped mbufs for sendfile(2) and TLS offload");
+
static quad_t maxmbufmem; /* overall real memory limit for all mbufs */
SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
"Maximum real memory allocatable to various mbuf types");
+static counter_u64_t snd_tag_count;
+SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW,
+ &snd_tag_count, "# of active mbuf send tags");
+
/*
* tunable_mbinit() has to be run before any mbuf allocations are done.
*/
@@ -285,6 +302,7 @@ uma_zone_t zone_pack;
uma_zone_t zone_jumbop;
uma_zone_t zone_jumbo9;
uma_zone_t zone_jumbo16;
+uma_zone_t zone_extpgs;
/*
* Local prototypes.
@@ -302,6 +320,9 @@ static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
/* Ensure that MSIZE is a power of 2. */
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
+_Static_assert(sizeof(struct mbuf_ext_pgs) == 256,
+ "mbuf_ext_pgs size mismatch");
+
/*
* Initialize FreeBSD Network buffer allocation.
*/
@@ -383,6 +404,15 @@ mbuf_init(void *dummy)
uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
uma_zone_set_maxaction(zone_jumbo16, mb_reclaim);
+ zone_extpgs = uma_zcreate(MBUF_EXTPGS_MEM_NAME,
+ sizeof(struct mbuf_ext_pgs),
+#ifdef INVARIANTS
+ trash_ctor, trash_dtor, trash_init, trash_fini,
+#else
+ NULL, NULL, NULL, NULL,
+#endif
+ UMA_ALIGN_CACHE, 0);
+
/*
* Hook event handler for low-memory situation, used to
* drain protocols and push data back to the caches (UMA
@@ -392,6 +422,8 @@ mbuf_init(void *dummy)
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
#endif /* __rtems__ */
+
+ snd_tag_count = counter_u64_alloc(M_WAITOK);
}
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
@@ -697,14 +729,14 @@ mb_dtor_pack(void *mem, int size, void *arg)
#endif
/*
* If there are processes blocked on zone_clust, waiting for pages
- * to be freed up, * cause them to be woken up by draining the
- * packet zone. We are exposed to a race here * (in the check for
+ * to be freed up, cause them to be woken up by draining the
+ * packet zone. We are exposed to a race here (in the check for
* the UMA_ZFLAG_FULL) where we might miss the flag set, but that
* is deliberate. We don't want to acquire the zone lock for every
* mbuf free.
*/
if (uma_zone_exhausted_nolock(zone_clust))
- zone_drain(zone_pack);
+ uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
}
/*
@@ -832,6 +864,384 @@ mb_reclaim(uma_zone_t zone __unused, int pending __unused)
}
/*
+ * Free "count" units of I/O from an mbuf chain. They could be held
+ * in EXT_PGS or just as a normal mbuf. This code is intended to be
+ * called in an error path (I/O error, closed connection, etc).
+ */
+void
+mb_free_notready(struct mbuf *m, int count)
+{
+ int i;
+
+ for (i = 0; i < count && m != NULL; i++) {
+#ifndef __rtems__
+ if ((m->m_flags & M_EXT) != 0 &&
+ m->m_ext.ext_type == EXT_PGS) {
+ m->m_ext.ext_pgs->nrdy--;
+ if (m->m_ext.ext_pgs->nrdy != 0)
+ continue;
+ }
+#endif /* __rtems__ */
+ m = m_free(m);
+ }
+ KASSERT(i == count, ("Removed only %d items from %p", i, m));
+}
+
+#ifndef __rtems__
+/*
+ * Compress an unmapped mbuf into a simple mbuf when it holds a small
+ * amount of data. This is used as a DOS defense to avoid having
+ * small packets tie up wired pages, an ext_pgs structure, and an
+ * mbuf. Since this converts the existing mbuf in place, it can only
+ * be used if there are no other references to 'm'.
+ */
+int
+mb_unmapped_compress(struct mbuf *m)
+{
+ volatile u_int *refcnt;
+ struct mbuf m_temp;
+
+ /*
+ * Assert that 'm' does not have a packet header. If 'm' had
+ * a packet header, it would only be able to hold MHLEN bytes
+ * and m_data would have to be initialized differently.
+ */
+ KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXT) &&
+ m->m_ext.ext_type == EXT_PGS,
+ ("%s: m %p !M_EXT or !EXT_PGS or M_PKTHDR", __func__, m));
+ KASSERT(m->m_len <= MLEN, ("m_len too large %p", m));
+
+ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+ refcnt = &m->m_ext.ext_count;
+ } else {
+ KASSERT(m->m_ext.ext_cnt != NULL,
+ ("%s: no refcounting pointer on %p", __func__, m));
+ refcnt = m->m_ext.ext_cnt;
+ }
+
+ if (*refcnt != 1)
+ return (EBUSY);
+
+ /*
+ * Copy mbuf header and m_ext portion of 'm' to 'm_temp' to
+ * create a "fake" EXT_PGS mbuf that can be used with
+ * m_copydata() as well as the ext_free callback.
+ */
+ memcpy(&m_temp, m, offsetof(struct mbuf, m_ext) + sizeof (m->m_ext));
+ m_temp.m_next = NULL;
+ m_temp.m_nextpkt = NULL;
+
+ /* Turn 'm' into a "normal" mbuf. */
+ m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP);
+ m->m_data = m->m_dat;
+
+ /* Copy data from template's ext_pgs. */
+ m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, caddr_t));
+
+ /* Free the backing pages. */
+ m_temp.m_ext.ext_free(&m_temp);
+
+ /* Finally, free the ext_pgs struct. */
+ uma_zfree(zone_extpgs, m_temp.m_ext.ext_pgs);
+ return (0);
+}
+
+/*
+ * These next few routines are used to permit downgrading an unmapped
+ * mbuf to a chain of mapped mbufs. This is used when an interface
+ * doesn't supported unmapped mbufs or if checksums need to be
+ * computed in software.
+ *
+ * Each unmapped mbuf is converted to a chain of mbufs. First, any
+ * TLS header data is stored in a regular mbuf. Second, each page of
+ * unmapped data is stored in an mbuf with an EXT_SFBUF external
+ * cluster. These mbufs use an sf_buf to provide a valid KVA for the
+ * associated physical page. They also hold a reference on the
+ * original EXT_PGS mbuf to ensure the physical page doesn't go away.
+ * Finally, any TLS trailer data is stored in a regular mbuf.
+ *
+ * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF
+ * mbufs. It frees the associated sf_buf and releases its reference
+ * on the original EXT_PGS mbuf.
+ *
+ * _mb_unmapped_to_ext() is a helper function that converts a single
+ * unmapped mbuf into a chain of mbufs.
+ *
+ * mb_unmapped_to_ext() is the public function that walks an mbuf
+ * chain converting any unmapped mbufs to mapped mbufs. It returns
+ * the new chain of unmapped mbufs on success. On failure it frees
+ * the original mbuf chain and returns NULL.
+ */
+static void
+mb_unmapped_free_mext(struct mbuf *m)
+{
+ struct sf_buf *sf;
+ struct mbuf *old_m;
+
+ sf = m->m_ext.ext_arg1;
+ sf_buf_free(sf);
+
+ /* Drop the reference on the backing EXT_PGS mbuf. */
+ old_m = m->m_ext.ext_arg2;
+ mb_free_ext(old_m);
+}
+
+static struct mbuf *
+_mb_unmapped_to_ext(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ struct mbuf *m_new, *top, *prev, *mref;
+ struct sf_buf *sf;
+ vm_page_t pg;
+ int i, len, off, pglen, pgoff, seglen, segoff;
+ volatile u_int *refcnt;
+ u_int ref_inc = 0;
+
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ len = m->m_len;
+ KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p",
+ __func__, m));
+
+ /* See if this is the mbuf that holds the embedded refcount. */
+ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+ refcnt = &m->m_ext.ext_count;
+ mref = m;
+ } else {
+ KASSERT(m->m_ext.ext_cnt != NULL,
+ ("%s: no refcounting pointer on %p", __func__, m));
+ refcnt = m->m_ext.ext_cnt;
+ mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
+ }
+
+ /* Skip over any data removed from the front. */
+ off = mtod(m, vm_offset_t);
+
+ top = NULL;
+ if (ext_pgs->hdr_len != 0) {
+ if (off >= ext_pgs->hdr_len) {
+ off -= ext_pgs->hdr_len;
+ } else {
+ seglen = ext_pgs->hdr_len - off;
+ segoff = off;
+ seglen = min(seglen, len);
+ off = 0;
+ len -= seglen;
+ m_new = m_get(M_NOWAIT, MT_DATA);
+ if (m_new == NULL)
+ goto fail;
+ m_new->m_len = seglen;
+ prev = top = m_new;
+ memcpy(mtod(m_new, void *), &ext_pgs->hdr[segoff],
+ seglen);
+ }
+ }
+ pgoff = ext_pgs->first_pg_off;
+ for (i = 0; i < ext_pgs->npgs && len > 0; i++) {
+ pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+ if (off >= pglen) {
+ off -= pglen;
+ pgoff = 0;
+ continue;
+ }
+ seglen = pglen - off;
+ segoff = pgoff + off;
+ off = 0;
+ seglen = min(seglen, len);
+ len -= seglen;
+
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ m_new = m_get(M_NOWAIT, MT_DATA);
+ if (m_new == NULL)
+ goto fail;
+ if (top == NULL) {
+ top = prev = m_new;
+ } else {
+ prev->m_next = m_new;
+ prev = m_new;
+ }
+ sf = sf_buf_alloc(pg, SFB_NOWAIT);
+ if (sf == NULL)
+ goto fail;
+
+ ref_inc++;
+ m_extadd(m_new, (char *)sf_buf_kva(sf), PAGE_SIZE,
+ mb_unmapped_free_mext, sf, mref, M_RDONLY, EXT_SFBUF);
+ m_new->m_data += segoff;
+ m_new->m_len = seglen;
+
+ pgoff = 0;
+ };
+ if (len != 0) {
+ KASSERT((off + len) <= ext_pgs->trail_len,
+ ("off + len > trail (%d + %d > %d)", off, len,
+ ext_pgs->trail_len));
+ m_new = m_get(M_NOWAIT, MT_DATA);
+ if (m_new == NULL)
+ goto fail;
+ if (top == NULL)
+ top = m_new;
+ else
+ prev->m_next = m_new;
+ m_new->m_len = len;
+ memcpy(mtod(m_new, void *), &ext_pgs->trail[off], len);
+ }
+
+ if (ref_inc != 0) {
+ /*
+ * Obtain an additional reference on the old mbuf for
+ * each created EXT_SFBUF mbuf. They will be dropped
+ * in mb_unmapped_free_mext().
+ */
+ if (*refcnt == 1)
+ *refcnt += ref_inc;
+ else
+ atomic_add_int(refcnt, ref_inc);
+ }
+ m_free(m);
+ return (top);
+
+fail:
+ if (ref_inc != 0) {
+ /*
+ * Obtain an additional reference on the old mbuf for
+ * each created EXT_SFBUF mbuf. They will be
+ * immediately dropped when these mbufs are freed
+ * below.
+ */
+ if (*refcnt == 1)
+ *refcnt += ref_inc;
+ else
+ atomic_add_int(refcnt, ref_inc);
+ }
+ m_free(m);
+ m_freem(top);
+ return (NULL);
+}
+
+struct mbuf *
+mb_unmapped_to_ext(struct mbuf *top)
+{
+ struct mbuf *m, *next, *prev = NULL;
+
+ prev = NULL;
+ for (m = top; m != NULL; m = next) {
+ /* m might be freed, so cache the next pointer. */
+ next = m->m_next;
+ if (m->m_flags & M_NOMAP) {
+ if (prev != NULL) {
+ /*
+ * Remove 'm' from the new chain so
+ * that the 'top' chain terminates
+ * before 'm' in case 'top' is freed
+ * due to an error.
+ */
+ prev->m_next = NULL;
+ }
+ m = _mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ m_freem(top);
+ m_freem(next);
+ return (NULL);
+ }
+ if (prev == NULL) {
+ top = m;
+ } else {
+ prev->m_next = m;
+ }
+
+ /*
+ * Replaced one mbuf with a chain, so we must
+ * find the end of chain.
+ */
+ prev = m_last(m);
+ } else {
+ if (prev != NULL) {
+ prev->m_next = m;
+ }
+ prev = m;
+ }
+ }
+ return (top);
+}
+
+/*
+ * Allocate an empty EXT_PGS mbuf. The ext_free routine is
+ * responsible for freeing any pages backing this mbuf when it is
+ * freed.
+ */
+struct mbuf *
+mb_alloc_ext_pgs(int how, bool pkthdr, m_ext_free_t ext_free)
+{
+ struct mbuf *m;
+ struct mbuf_ext_pgs *ext_pgs;
+
+ if (pkthdr)
+ m = m_gethdr(how, MT_DATA);
+ else
+ m = m_get(how, MT_DATA);
+ if (m == NULL)
+ return (NULL);
+
+ ext_pgs = uma_zalloc(zone_extpgs, how);
+ if (ext_pgs == NULL) {
+ m_free(m);
+ return (NULL);
+ }
+ ext_pgs->npgs = 0;
+ ext_pgs->nrdy = 0;
+ ext_pgs->first_pg_off = 0;
+ ext_pgs->last_pg_len = 0;
+ ext_pgs->hdr_len = 0;
+ ext_pgs->trail_len = 0;
+ ext_pgs->tls = NULL;
+ ext_pgs->so = NULL;
+ m->m_data = NULL;
+ m->m_flags |= (M_EXT | M_RDONLY | M_NOMAP);
+ m->m_ext.ext_type = EXT_PGS;
+ m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+ m->m_ext.ext_count = 1;
+ m->m_ext.ext_pgs = ext_pgs;
+ m->m_ext.ext_size = 0;
+ m->m_ext.ext_free = ext_free;
+ return (m);
+}
+
+#ifdef INVARIANT_SUPPORT
+void
+mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs)
+{
+
+ /*
+ * NB: This expects a non-empty buffer (npgs > 0 and
+ * last_pg_len > 0).
+ */
+ KASSERT(ext_pgs->npgs > 0,
+ ("ext_pgs with no valid pages: %p", ext_pgs));
+ KASSERT(ext_pgs->npgs <= nitems(ext_pgs->pa),
+ ("ext_pgs with too many pages: %p", ext_pgs));
+ KASSERT(ext_pgs->nrdy <= ext_pgs->npgs,
+ ("ext_pgs with too many ready pages: %p", ext_pgs));
+ KASSERT(ext_pgs->first_pg_off < PAGE_SIZE,
+ ("ext_pgs with too large page offset: %p", ext_pgs));
+ KASSERT(ext_pgs->last_pg_len > 0,
+ ("ext_pgs with zero last page length: %p", ext_pgs));
+ KASSERT(ext_pgs->last_pg_len <= PAGE_SIZE,
+ ("ext_pgs with too large last page length: %p", ext_pgs));
+ if (ext_pgs->npgs == 1) {
+ KASSERT(ext_pgs->first_pg_off + ext_pgs->last_pg_len <=
+ PAGE_SIZE, ("ext_pgs with single page too large: %p",
+ ext_pgs));
+ }
+ KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->hdr),
+ ("ext_pgs with too large header length: %p", ext_pgs));
+ KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->trail),
+ ("ext_pgs with too large header length: %p", ext_pgs));
+}
+#endif
+#endif /* __rtems__ */
+
+/*
* Clean up after mbufs with M_EXT storage attached to them if the
* reference count hits 1.
*/
@@ -865,7 +1275,8 @@ mb_free_ext(struct mbuf *m)
*/
if (m->m_flags & M_NOFREE) {
freembuf = 0;
- KASSERT(m->m_ext.ext_type == EXT_EXTREF,
+ KASSERT(m->m_ext.ext_type == EXT_EXTREF ||
+ m->m_ext.ext_type == EXT_RXRING,
("%s: no-free mbuf %p has wrong type", __func__, m));
} else
freembuf = 1;
@@ -896,6 +1307,27 @@ mb_free_ext(struct mbuf *m)
uma_zfree(zone_mbuf, mref);
break;
#ifndef __rtems__
+ case EXT_PGS: {
+#ifdef KERN_TLS
+ struct mbuf_ext_pgs *pgs;
+ struct ktls_session *tls;
+#endif
+
+ KASSERT(mref->m_ext.ext_free != NULL,
+ ("%s: ext_free not set", __func__));
+ mref->m_ext.ext_free(mref);
+#ifdef KERN_TLS
+ pgs = mref->m_ext.ext_pgs;
+ tls = pgs->tls;
+ if (tls != NULL &&
+ !refcount_release_if_not_last(&tls->refcount))
+ ktls_enqueue_to_free(pgs);
+ else
+#endif
+ uma_zfree(zone_extpgs, mref->m_ext.ext_pgs);
+ uma_zfree(zone_mbuf, mref);
+ break;
+ }
case EXT_SFBUF:
#endif /* __rtems__ */
case EXT_NET_DRV:
@@ -911,6 +1343,10 @@ mb_free_ext(struct mbuf *m)
("%s: ext_free not set", __func__));
m->m_ext.ext_free(m);
break;
+ case EXT_RXRING:
+ KASSERT(m->m_ext.ext_free == NULL,
+ ("%s: ext_free is set", __func__));
+ break;
default:
KASSERT(m->m_ext.ext_type == 0,
("%s: unknown ext_type", __func__));
@@ -950,7 +1386,7 @@ m_clget(struct mbuf *m, int how)
* we might be able to loosen a few clusters up on the drain.
*/
if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
- zone_drain(zone_pack);
+ uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
uma_zalloc_arg(zone_clust, m, how);
}
MBUF_PROBE2(m__clget, m, how);
@@ -1051,8 +1487,7 @@ m_getjcl(int how, short type, int flags, int size)
* Allocate a given length worth of mbufs and/or clusters (whatever fits
* best) and return a pointer to the top of the allocated chain. If an
* existing mbuf chain is provided, then we will append the new chain
- * to the existing one but still return the top of the newly allocated
- * chain.
+ * to the existing one and return a pointer to the provided mbuf.
*/
struct mbuf *
m_getm2(struct mbuf *m, int len, int how, short type, int flags)
@@ -1165,3 +1600,24 @@ m_freem(struct mbuf *mb)
while (mb != NULL)
mb = m_free(mb);
}
+
+void
+m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp)
+{
+
+ if_ref(ifp);
+ mst->ifp = ifp;
+ refcount_init(&mst->refcount, 1);
+ counter_u64_add(snd_tag_count, 1);
+}
+
+void
+m_snd_tag_destroy(struct m_snd_tag *mst)
+{
+ struct ifnet *ifp;
+
+ ifp = mst->ifp;
+ ifp->if_snd_tag_free(mst);
+ if_rele(ifp);
+ counter_u64_add(snd_tag_count, -1);
+}
diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c
index 52aa32fb..cd9e6285 100644
--- a/freebsd/sys/kern/kern_mib.c
+++ b/freebsd/sys/kern/kern_mib.c
@@ -46,8 +46,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_config.h>
#include <sys/param.h>
+#include <sys/boot.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
@@ -147,7 +149,7 @@ SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
SYSCTL_NULL_INT_PTR, 0, "Whether saved set-group/user ID is available");
#endif
-char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */
+char kernelname[MAXPATHLEN] = PATH_KERNEL; /* XXX bloat */
SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW | CTLFLAG_MPSAFE,
kernelname, sizeof kernelname, "Name of kernel file booted");
@@ -170,15 +172,8 @@ sysctl_kern_arnd(SYSCTL_HANDLER_ARGS)
char buf[256];
size_t len;
- /*-
- * This is one of the very few legitimate uses of read_random(9).
- * Use of arc4random(9) is not recommended as that will ignore
- * an unsafe (i.e. unseeded) random(4).
- *
- * If random(4) is not seeded, then this returns 0, so the
- * sysctl will return a zero-length buffer.
- */
- len = read_random(buf, MIN(req->oldlen, sizeof(buf)));
+ len = MIN(req->oldlen, sizeof(buf));
+ read_random(buf, len);
return (SYSCTL_OUT(req, buf, len));
}
@@ -189,37 +184,51 @@ SYSCTL_PROC(_kern, KERN_ARND, arandom,
static int
sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
{
- u_long val;
+ u_long val, p;
- val = ctob(physmem);
+ p = SIZE_T_MAX >> PAGE_SHIFT;
+ if (physmem < p)
+ p = physmem;
+ val = ctob(p);
return (sysctl_handle_long(oidp, &val, 0, req));
}
-
SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_ULONG | CTLFLAG_RD,
- 0, 0, sysctl_hw_physmem, "LU", "");
+ 0, 0, sysctl_hw_physmem, "LU",
+ "Amount of physical memory (in bytes)");
static int
sysctl_hw_realmem(SYSCTL_HANDLER_ARGS)
{
- u_long val;
- val = ctob(realmem);
+ u_long val, p;
+
+ p = SIZE_T_MAX >> PAGE_SHIFT;
+ if (realmem < p)
+ p = realmem;
+ val = ctob(p);
return (sysctl_handle_long(oidp, &val, 0, req));
}
SYSCTL_PROC(_hw, HW_REALMEM, realmem, CTLTYPE_ULONG | CTLFLAG_RD,
- 0, 0, sysctl_hw_realmem, "LU", "");
+ 0, 0, sysctl_hw_realmem, "LU",
+ "Amount of memory (in bytes) reported by the firmware");
+
static int
sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
{
- u_long val;
+ u_long val, p, p1;
- val = ctob(physmem - vm_wire_count());
+ p1 = physmem - vm_wire_count();
+ p = SIZE_T_MAX >> PAGE_SHIFT;
+ if (p1 < p)
+ p = p1;
+ val = ctob(p);
return (sysctl_handle_long(oidp, &val, 0, req));
}
-
SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_ULONG | CTLFLAG_RD,
- 0, 0, sysctl_hw_usermem, "LU", "");
+ 0, 0, sysctl_hw_usermem, "LU",
+ "Amount of memory (in bytes) which is not wired");
-SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, "");
+SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0,
+ "Amount of physical memory (in pages)");
u_long pagesizes[MAXPAGESIZES] = { PAGE_SIZE };
@@ -501,6 +510,54 @@ sysctl_osreldate(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, KERN_OSRELDATE, osreldate,
CTLTYPE_INT | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
NULL, 0, sysctl_osreldate, "I", "Kernel release date");
+
+/*
+ * The build-id is copied from the ELF section .note.gnu.build-id. The linker
+ * script defines two variables to expose the beginning and end. LLVM
+ * currently uses a SHA-1 hash, but other formats can be supported by checking
+ * the length of the section.
+ */
+
+extern char __build_id_start[];
+extern char __build_id_end[];
+
+#define BUILD_ID_HEADER_LEN 0x10
+#define BUILD_ID_HASH_MAXLEN 0x14
+
+static int
+sysctl_build_id(SYSCTL_HANDLER_ARGS)
+{
+ uintptr_t sectionlen = (uintptr_t)(__build_id_end - __build_id_start);
+ int hashlen;
+ char buf[2*BUILD_ID_HASH_MAXLEN+1];
+
+ /*
+ * The ELF note section has a four byte length for the vendor name,
+ * four byte length for the value, and a four byte vendor specific
+ * type. The name for the build id is "GNU\0". We skip the first 16
+ * bytes to read the build hash. We will return the remaining bytes up
+ * to 20 (SHA-1) hash size. If the hash happens to be a custom number
+ * of bytes we will pad the value with zeros, as the section should be
+ * four byte aligned.
+ */
+ if (sectionlen <= BUILD_ID_HEADER_LEN ||
+ sectionlen > (BUILD_ID_HEADER_LEN + BUILD_ID_HASH_MAXLEN)) {
+ return (ENOENT);
+ }
+
+
+ hashlen = sectionlen - BUILD_ID_HEADER_LEN;
+ for (int i = 0; i < hashlen; i++) {
+ uint8_t c = __build_id_start[i+BUILD_ID_HEADER_LEN];
+ snprintf(&buf[2*i], 3, "%02x", c);
+ }
+
+ return (SYSCTL_OUT(req, buf, strlen(buf) + 1));
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, build_id,
+ CTLTYPE_STRING | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_build_id, "A", "Operating system build-id");
#endif /* __rtems__ */
SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features");
diff --git a/freebsd/sys/kern/kern_mtxpool.c b/freebsd/sys/kern/kern_mtxpool.c
index 7f6c4dce..bc47d826 100644
--- a/freebsd/sys/kern/kern_mtxpool.c
+++ b/freebsd/sys/kern/kern_mtxpool.c
@@ -64,14 +64,14 @@ static MALLOC_DEFINE(M_MTXPOOL, "mtx_pool", "mutex pool");
/* Pool sizes must be a power of two */
#ifndef MTX_POOL_SLEEP_SIZE
-#define MTX_POOL_SLEEP_SIZE 128
+#define MTX_POOL_SLEEP_SIZE 1024
#endif
struct mtxpool_header {
int mtxpool_size;
int mtxpool_mask;
int mtxpool_shift;
- int mtxpool_next;
+ int mtxpool_next __aligned(CACHE_LINE_SIZE);
};
struct mtx_pool {
diff --git a/freebsd/sys/kern/kern_synch.c b/freebsd/sys/kern/kern_synch.c
index 2597f91d..7d24c248 100644
--- a/freebsd/sys/kern/kern_synch.c
+++ b/freebsd/sys/kern/kern_synch.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/refcount.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
@@ -366,6 +367,75 @@ pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
}
/*
+ * Potentially release the last reference for refcount. Check for
+ * unlikely conditions and signal the caller as to whether it was
+ * the final ref.
+ */
+bool
+refcount_release_last(volatile u_int *count, u_int n, u_int old)
+{
+ u_int waiter;
+
+ waiter = old & REFCOUNT_WAITER;
+ old = REFCOUNT_COUNT(old);
+ if (__predict_false(n > old || REFCOUNT_SATURATED(old))) {
+ /*
+ * Avoid multiple destructor invocations if underflow occurred.
+ * This is not perfect since the memory backing the containing
+ * object may already have been reallocated.
+ */
+ _refcount_update_saturated(count);
+ return (false);
+ }
+
+ /*
+ * Attempt to atomically clear the waiter bit. Wakeup waiters
+ * if we are successful.
+ */
+ if (waiter != 0 && atomic_cmpset_int(count, REFCOUNT_WAITER, 0))
+ wakeup(__DEVOLATILE(u_int *, count));
+
+ /*
+ * Last reference. Signal the user to call the destructor.
+ *
+ * Ensure that the destructor sees all updates. The fence_rel
+ * at the start of refcount_releasen synchronizes with this fence.
+ */
+ atomic_thread_fence_acq();
+ return (true);
+}
+
+/*
+ * Wait for a refcount wakeup. This does not guarantee that the ref is still
+ * zero on return and may be subject to transient wakeups. Callers wanting
+ * a precise answer should use refcount_wait().
+ */
+void
+refcount_sleep(volatile u_int *count, const char *wmesg, int pri)
+{
+ void *wchan;
+ u_int old;
+
+ if (REFCOUNT_COUNT(*count) == 0)
+ return;
+ wchan = __DEVOLATILE(void *, count);
+ sleepq_lock(wchan);
+ old = *count;
+ for (;;) {
+ if (REFCOUNT_COUNT(old) == 0) {
+ sleepq_release(wchan);
+ return;
+ }
+ if (old & REFCOUNT_WAITER)
+ break;
+ if (atomic_fcmpset_int(count, &old, old | REFCOUNT_WAITER))
+ break;
+ }
+ sleepq_add(wchan, NULL, wmesg, 0, 0);
+ sleepq_wait(wchan, pri);
+}
+
+/*
* Make all threads sleeping on the specified identifier runnable.
*/
void
@@ -402,6 +472,19 @@ wakeup_one(void *ident)
kick_proc0();
}
+void
+wakeup_any(void *ident)
+{
+ int wakeup_swapper;
+
+ sleepq_lock(ident);
+ wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR,
+ 0, 0);
+ sleepq_release(ident);
+ if (wakeup_swapper)
+ kick_proc0();
+}
+
#ifndef __rtems__
static void
kdb_switch(void)
diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c
index dc7c4c72..1135d7f3 100644
--- a/freebsd/sys/kern/kern_sysctl.c
+++ b/freebsd/sys/kern/kern_sysctl.c
@@ -43,6 +43,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_capsicum.h>
+#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_ktrace.h>
#include <sys/param.h>
@@ -50,11 +51,13 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/capsicum.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/jail.h>
+#include <sys/kdb.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rmlock.h>
@@ -66,6 +69,11 @@ __FBSDID("$FreeBSD$");
#include <sys/ktrace.h>
#endif
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+#endif
+
#include <net/vnet.h>
#include <security/mac/mac_framework.h>
@@ -326,13 +334,6 @@ sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
}
#endif /* __rtems__ */
-static int
-sbuf_printf_drain(void *arg __unused, const char *data, int len)
-{
-
- return (printf("%.*s", len, data));
-}
-
/*
* Locate the path to a given oid. Returns the length of the resulting path,
* or -1 if the oid was not found. nodes must have room for CTL_MAXNAME
@@ -940,13 +941,18 @@ SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
* (be aware though, that the proper interface isn't as obvious as it
* may seem, there are various conflicting requirements.
*
- * {0,0} printf the entire MIB-tree.
- * {0,1,...} return the name of the "..." OID.
- * {0,2,...} return the next OID.
- * {0,3} return the OID of the name in "new"
- * {0,4,...} return the kind & format info for the "..." OID.
- * {0,5,...} return the description of the "..." OID.
- * {0,6,...} return the aggregation label of the "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_DEBUG} printf the entire MIB-tree.
+ * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...} return the name of the "..."
+ * OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...} return the next OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID} return the OID of the name in
+ * "new"
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...} return the kind & format info
+ * for the "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...} return the description of the
+ * "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...} return the aggregation label of
+ * the "..." OID.
*/
#ifdef SYSCTL_DEBUG
@@ -1014,8 +1020,8 @@ sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
return (ENOENT);
}
-SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE,
- 0, 0, sysctl_sysctl_debug, "-", "");
+SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", "");
#endif
static int
@@ -1080,8 +1086,8 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
* capability mode.
*/
-static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
- sysctl_sysctl_name, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, "");
static int
sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
@@ -1167,8 +1173,8 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
* capability mode.
*/
-static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
- sysctl_sysctl_next, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
static int
name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
@@ -1254,9 +1260,9 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
* capability mode.
*/
-SYSCTL_PROC(_sysctl, 3, name2oid,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE
- | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", "");
+SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW |
+ CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0,
+ sysctl_sysctl_name2oid, "I", "");
static int
sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
@@ -1284,8 +1290,8 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
}
-static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
- sysctl_sysctl_oidfmt, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, "");
static int
sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
@@ -1309,8 +1315,8 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
- sysctl_sysctl_oiddescr, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD |
+ CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, "");
static int
sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
@@ -1334,8 +1340,8 @@ sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 6, oidlabel,
- CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
/*
* Default "handler" functions.
@@ -1622,9 +1628,10 @@ sysctl_handle_string(SYSCTL_HANDLER_ARGS)
/*
* A zero-length buffer indicates a fixed size read-only
- * string:
+ * string. In ddb, don't worry about trying to make a malloced
+ * snapshot.
*/
- if (arg2 == 0) {
+ if (arg2 == 0 || kdb_active) {
arg2 = strlen((char *)arg1) + 1;
ro_string = 1;
}
@@ -1751,6 +1758,29 @@ sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS)
return (0);
}
+/*
+ * Convert seconds to a struct timeval. Intended for use with
+ * intervals and thus does not permit negative seconds.
+ */
+int
+sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS)
+{
+ struct timeval *tv;
+ int error, secs;
+
+ tv = arg1;
+ secs = tv->tv_sec;
+
+ error = sysctl_handle_int(oidp, &secs, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ if (secs < 0)
+ return (EINVAL);
+ tv->tv_sec = secs;
+
+ return (0);
+}
/*
* Transfer functions to/from kernel space.
@@ -1853,8 +1883,8 @@ kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
size_t oidlen, plen;
int error;
- oid[0] = 0; /* sysctl internal magic */
- oid[1] = 3; /* name2oid */
+ oid[0] = CTL_SYSCTL;
+ oid[1] = CTL_SYSCTL_NAME2OID;
oidlen = sizeof(oid);
error = kernel_sysctl(td, oid, 2, oid, &oidlen,
@@ -2149,6 +2179,68 @@ sys___sysctl(struct thread *td, struct sysctl_args *uap)
return (error);
}
+int
+kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen,
+ void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval,
+ int flags, bool inkernel)
+{
+ int oid[CTL_MAXNAME];
+ char namebuf[16];
+ char *name;
+ size_t oidlen;
+ int error;
+
+ if (namelen > MAXPATHLEN || namelen == 0)
+ return (EINVAL);
+ name = namebuf;
+ if (namelen > sizeof(namebuf))
+ name = malloc(namelen, M_SYSCTL, M_WAITOK);
+ error = copyin(oname, name, namelen);
+ if (error != 0)
+ goto out;
+
+ oid[0] = CTL_SYSCTL;
+ oid[1] = CTL_SYSCTL_NAME2OID;
+ oidlen = sizeof(oid);
+ error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen,
+ retval, flags);
+ if (error != 0)
+ goto out;
+ error = userland_sysctl(td, oid, *retval / sizeof(int), old, oldlenp,
+ inkernel, new, newlen, retval, flags);
+
+out:
+ if (namelen > sizeof(namebuf))
+ free(name, M_SYSCTL);
+ return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct __sysctlbyname_args {
+ const char *name;
+ size_t namelen;
+ void *old;
+ size_t *oldlenp;
+ void *new;
+ size_t newlen;
+};
+#endif
+int
+sys___sysctlbyname(struct thread *td, struct __sysctlbyname_args *uap)
+{
+ size_t rv;
+ int error;
+
+ error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old,
+ uap->oldlenp, uap->new, uap->newlen, &rv, 0, 0);
+ if (error != 0)
+ return (error);
+ if (uap->oldlenp != NULL)
+ error = copyout(&rv, uap->oldlenp, sizeof(rv));
+
+ return (error);
+}
+
/*
* This is used from various compatibility syscalls too. That's why name
* must be in kernel space.
@@ -2254,3 +2346,528 @@ sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
sbuf_set_drain(s, sbuf_sysctl_drain, req);
return (s);
}
+
+#ifdef DDB
+
+/* The current OID the debugger is working with */
+static struct sysctl_oid *g_ddb_oid;
+
+/* The current flags specified by the user */
+static int g_ddb_sysctl_flags;
+
+/* Check to see if the last sysctl printed */
+static int g_ddb_sysctl_printed;
+
+static const int ctl_sign[CTLTYPE+1] = {
+ [CTLTYPE_INT] = 1,
+ [CTLTYPE_LONG] = 1,
+ [CTLTYPE_S8] = 1,
+ [CTLTYPE_S16] = 1,
+ [CTLTYPE_S32] = 1,
+ [CTLTYPE_S64] = 1,
+};
+
+static const int ctl_size[CTLTYPE+1] = {
+ [CTLTYPE_INT] = sizeof(int),
+ [CTLTYPE_UINT] = sizeof(u_int),
+ [CTLTYPE_LONG] = sizeof(long),
+ [CTLTYPE_ULONG] = sizeof(u_long),
+ [CTLTYPE_S8] = sizeof(int8_t),
+ [CTLTYPE_S16] = sizeof(int16_t),
+ [CTLTYPE_S32] = sizeof(int32_t),
+ [CTLTYPE_S64] = sizeof(int64_t),
+ [CTLTYPE_U8] = sizeof(uint8_t),
+ [CTLTYPE_U16] = sizeof(uint16_t),
+ [CTLTYPE_U32] = sizeof(uint32_t),
+ [CTLTYPE_U64] = sizeof(uint64_t),
+};
+
+#define DB_SYSCTL_NAME_ONLY 0x001 /* Compare with -N */
+#define DB_SYSCTL_VALUE_ONLY 0x002 /* Compare with -n */
+#define DB_SYSCTL_OPAQUE 0x004 /* Compare with -o */
+#define DB_SYSCTL_HEX 0x008 /* Compare with -x */
+
+#define DB_SYSCTL_SAFE_ONLY 0x100 /* Only simple types */
+
+static const char db_sysctl_modifs[] = {
+ 'N', 'n', 'o', 'x',
+};
+
+static const int db_sysctl_modif_values[] = {
+ DB_SYSCTL_NAME_ONLY, DB_SYSCTL_VALUE_ONLY,
+ DB_SYSCTL_OPAQUE, DB_SYSCTL_HEX,
+};
+
+/* Handlers considered safe to print while recursing */
+static int (* const db_safe_handlers[])(SYSCTL_HANDLER_ARGS) = {
+ sysctl_handle_bool,
+ sysctl_handle_8,
+ sysctl_handle_16,
+ sysctl_handle_32,
+ sysctl_handle_64,
+ sysctl_handle_int,
+ sysctl_handle_long,
+ sysctl_handle_string,
+ sysctl_handle_opaque,
+};
+
+/*
+ * Use in place of sysctl_old_kernel to print sysctl values.
+ *
+ * Compare to the output handling in show_var from sbin/sysctl/sysctl.c
+ */
+static int
+sysctl_old_ddb(struct sysctl_req *req, const void *ptr, size_t len)
+{
+ const u_char *val, *p;
+ const char *sep1;
+ size_t intlen, slen;
+ uintmax_t umv;
+ intmax_t mv;
+ int sign, ctltype, hexlen, xflag, error;
+
+ /* Suppress false-positive GCC uninitialized variable warnings */
+ mv = 0;
+ umv = 0;
+
+ slen = len;
+ val = p = ptr;
+
+ if (ptr == NULL) {
+ error = 0;
+ goto out;
+ }
+
+ /* We are going to print */
+ g_ddb_sysctl_printed = 1;
+
+ xflag = g_ddb_sysctl_flags & DB_SYSCTL_HEX;
+
+ ctltype = (g_ddb_oid->oid_kind & CTLTYPE);
+ sign = ctl_sign[ctltype];
+ intlen = ctl_size[ctltype];
+
+ switch (ctltype) {
+ case CTLTYPE_NODE:
+ case CTLTYPE_STRING:
+ db_printf("%.*s", (int) len, (const char *) p);
+ error = 0;
+ goto out;
+
+ case CTLTYPE_INT:
+ case CTLTYPE_UINT:
+ case CTLTYPE_LONG:
+ case CTLTYPE_ULONG:
+ case CTLTYPE_S8:
+ case CTLTYPE_S16:
+ case CTLTYPE_S32:
+ case CTLTYPE_S64:
+ case CTLTYPE_U8:
+ case CTLTYPE_U16:
+ case CTLTYPE_U32:
+ case CTLTYPE_U64:
+ hexlen = 2 + (intlen * CHAR_BIT + 3) / 4;
+ sep1 = "";
+ while (len >= intlen) {
+ switch (ctltype) {
+ case CTLTYPE_INT:
+ case CTLTYPE_UINT:
+ umv = *(const u_int *)p;
+ mv = *(const int *)p;
+ break;
+ case CTLTYPE_LONG:
+ case CTLTYPE_ULONG:
+ umv = *(const u_long *)p;
+ mv = *(const long *)p;
+ break;
+ case CTLTYPE_S8:
+ case CTLTYPE_U8:
+ umv = *(const uint8_t *)p;
+ mv = *(const int8_t *)p;
+ break;
+ case CTLTYPE_S16:
+ case CTLTYPE_U16:
+ umv = *(const uint16_t *)p;
+ mv = *(const int16_t *)p;
+ break;
+ case CTLTYPE_S32:
+ case CTLTYPE_U32:
+ umv = *(const uint32_t *)p;
+ mv = *(const int32_t *)p;
+ break;
+ case CTLTYPE_S64:
+ case CTLTYPE_U64:
+ umv = *(const uint64_t *)p;
+ mv = *(const int64_t *)p;
+ break;
+ }
+
+ db_printf("%s", sep1);
+ if (xflag)
+ db_printf("%#0*jx", hexlen, umv);
+ else if (!sign)
+ db_printf("%ju", umv);
+ else if (g_ddb_oid->oid_fmt[1] == 'K') {
+ /* Kelvins are currently unsupported. */
+ error = EOPNOTSUPP;
+ goto out;
+ } else
+ db_printf("%jd", mv);
+
+ sep1 = " ";
+ len -= intlen;
+ p += intlen;
+ }
+ error = 0;
+ goto out;
+
+ case CTLTYPE_OPAQUE:
+ /* TODO: Support struct functions. */
+
+ /* FALLTHROUGH */
+ default:
+ db_printf("Format:%s Length:%zu Dump:0x",
+ g_ddb_oid->oid_fmt, len);
+ while (len-- && (xflag || p < val + 16))
+ db_printf("%02x", *p++);
+ if (!xflag && len > 16)
+ db_printf("...");
+ error = 0;
+ goto out;
+ }
+
+out:
+ req->oldidx += slen;
+ return (error);
+}
+
+/*
+ * Avoid setting new sysctl values from the debugger
+ */
+static int
+sysctl_new_ddb(struct sysctl_req *req, void *p, size_t l)
+{
+
+ if (!req->newptr)
+ return (0);
+
+ /* Changing sysctls from the debugger is currently unsupported */
+ return (EPERM);
+}
+
+/*
+ * Run a sysctl handler with the DDB oldfunc and newfunc attached.
+ * Instead of copying any output to a buffer we'll dump it right to
+ * the console.
+ */
+static int
+db_sysctl(struct sysctl_oid *oidp, int *name, u_int namelen,
+ void *old, size_t *oldlenp, size_t *retval, int flags)
+{
+ struct sysctl_req req;
+ int error;
+
+ /* Setup the request */
+ bzero(&req, sizeof req);
+ req.td = kdb_thread;
+ req.oldfunc = sysctl_old_ddb;
+ req.newfunc = sysctl_new_ddb;
+ req.lock = REQ_UNWIRED;
+ if (oldlenp) {
+ req.oldlen = *oldlenp;
+ }
+ req.validlen = req.oldlen;
+ if (old) {
+ req.oldptr = old;
+ }
+
+ /* Setup our globals for sysctl_old_ddb */
+ g_ddb_oid = oidp;
+ g_ddb_sysctl_flags = flags;
+ g_ddb_sysctl_printed = 0;
+
+ error = sysctl_root(0, name, namelen, &req);
+
+ /* Reset globals */
+ g_ddb_oid = NULL;
+ g_ddb_sysctl_flags = 0;
+
+ if (retval) {
+ if (req.oldptr && req.oldidx > req.validlen)
+ *retval = req.validlen;
+ else
+ *retval = req.oldidx;
+ }
+ return (error);
+}
+
+/*
+ * Show a sysctl's name
+ */
+static void
+db_show_oid_name(int *oid, size_t nlen)
+{
+ struct sysctl_oid *oidp;
+ int qoid[CTL_MAXNAME+2];
+ int error;
+
+ qoid[0] = 0;
+ memcpy(qoid + 2, oid, nlen * sizeof(int));
+ qoid[1] = 1;
+
+ error = sysctl_find_oid(qoid, nlen + 2, &oidp, NULL, NULL);
+ if (error)
+ db_error("sysctl name oid");
+
+ error = db_sysctl(oidp, qoid, nlen + 2, NULL, NULL, NULL, 0);
+ if (error)
+ db_error("sysctl name");
+}
+
+/*
+ * Check to see if an OID is safe to print from ddb.
+ */
+static bool
+db_oid_safe(const struct sysctl_oid *oidp)
+{
+ for (unsigned int i = 0; i < nitems(db_safe_handlers); ++i) {
+ if (oidp->oid_handler == db_safe_handlers[i])
+ return (true);
+ }
+
+ return (false);
+}
+
+/*
+ * Show a sysctl at a specific OID
+ * Compare to the input handling in show_var from sbin/sysctl/sysctl.c
+ */
+static int
+db_show_oid(struct sysctl_oid *oidp, int *oid, size_t nlen, int flags)
+{
+ int error, xflag, oflag, Nflag, nflag;
+ size_t len;
+
+ xflag = flags & DB_SYSCTL_HEX;
+ oflag = flags & DB_SYSCTL_OPAQUE;
+ nflag = flags & DB_SYSCTL_VALUE_ONLY;
+ Nflag = flags & DB_SYSCTL_NAME_ONLY;
+
+ if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_OPAQUE &&
+ (!xflag && !oflag))
+ return (0);
+
+ if (Nflag) {
+ db_show_oid_name(oid, nlen);
+ error = 0;
+ goto out;
+ }
+
+ if (!nflag) {
+ db_show_oid_name(oid, nlen);
+ db_printf(": ");
+ }
+
+ if ((flags & DB_SYSCTL_SAFE_ONLY) && !db_oid_safe(oidp)) {
+ db_printf("Skipping, unsafe to print while recursing.");
+ error = 0;
+ goto out;
+ }
+
+ /* Try once, and ask about the size */
+ len = 0;
+ error = db_sysctl(oidp, oid, nlen,
+ NULL, NULL, &len, flags);
+ if (error)
+ goto out;
+
+ if (!g_ddb_sysctl_printed)
+ /* Lie about the size */
+ error = db_sysctl(oidp, oid, nlen,
+ (void *) 1, &len, NULL, flags);
+
+out:
+ db_printf("\n");
+ return (error);
+}
+
+/*
+ * Show all sysctls under a specific OID
+ * Compare to sysctl_all from sbin/sysctl/sysctl.c
+ */
+static int
+db_show_sysctl_all(int *oid, size_t len, int flags)
+{
+ struct sysctl_oid *oidp;
+ int name1[CTL_MAXNAME + 2], name2[CTL_MAXNAME + 2];
+ size_t l1, l2;
+
+ name1[0] = CTL_SYSCTL;
+ name1[1] = CTL_SYSCTL_NEXT;
+ l1 = 2;
+ if (len) {
+ memcpy(name1+2, oid, len * sizeof(int));
+ l1 +=len;
+ } else {
+ name1[2] = 1;
+ l1++;
+ }
+ for (;;) {
+ int i, error;
+
+ l2 = sizeof(name2);
+ error = kernel_sysctl(kdb_thread, name1, l1,
+ name2, &l2, NULL, 0, &l2, 0);
+ if (error != 0) {
+ if (error == ENOENT)
+ return (0);
+ else
+ db_error("sysctl(getnext)");
+ }
+
+ l2 /= sizeof(int);
+
+ if (l2 < (unsigned int)len)
+ return (0);
+
+ for (i = 0; i < len; i++)
+ if (name2[i] != oid[i])
+ return (0);
+
+ /* Find the OID in question */
+ error = sysctl_find_oid(name2, l2, &oidp, NULL, NULL);
+ if (error)
+ return (error);
+
+ i = db_show_oid(oidp, name2, l2, flags | DB_SYSCTL_SAFE_ONLY);
+
+ if (db_pager_quit)
+ return (0);
+
+ memcpy(name1+2, name2, l2 * sizeof(int));
+ l1 = 2 + l2;
+ }
+}
+
+/*
+ * Show a sysctl by its user facing string
+ */
+static int
+db_sysctlbyname(char *name, int flags)
+{
+ struct sysctl_oid *oidp;
+ int oid[CTL_MAXNAME];
+ int error, nlen;
+
+ error = name2oid(name, oid, &nlen, &oidp);
+ if (error) {
+ return (error);
+ }
+
+ if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
+ db_show_sysctl_all(oid, nlen, flags);
+ } else {
+ error = db_show_oid(oidp, oid, nlen, flags);
+ }
+
+ return (error);
+}
+
+static void
+db_sysctl_cmd_usage(void)
+{
+ db_printf(
+ " sysctl [/Nnox] <sysctl> \n"
+ " \n"
+ " <sysctl> The name of the sysctl to show. \n"
+ " \n"
+ " Show a sysctl by hooking into SYSCTL_IN and SYSCTL_OUT. \n"
+ " This will work for most sysctls, but should not be used \n"
+ " with sysctls that are known to malloc. \n"
+ " \n"
+ " While recursing any \"unsafe\" sysctls will be skipped. \n"
+ " Call sysctl directly on the sysctl to try printing the \n"
+ " skipped sysctl. This is unsafe and may make the ddb \n"
+ " session unusable. \n"
+ " \n"
+ " Arguments: \n"
+ " /N Display only the name of the sysctl. \n"
+ " /n Display only the value of the sysctl. \n"
+ " /o Display opaque values. \n"
+ " /x Display the sysctl in hex. \n"
+ " \n"
+ "For example: \n"
+ "sysctl vm.v_free_min \n"
+ "vn.v_free_min: 12669 \n"
+ );
+}
+
+/*
+ * Show a specific sysctl similar to sysctl (8).
+ */
+DB_FUNC(sysctl, db_sysctl_cmd, db_cmd_table, CS_OWN, NULL)
+{
+ char name[TOK_STRING_SIZE];
+ int error, i, t, flags;
+
+ /* Parse the modifiers */
+ t = db_read_token();
+ if (t == tSLASH || t == tMINUS) {
+ t = db_read_token();
+ if (t != tIDENT) {
+ db_printf("Bad modifier\n");
+ error = EINVAL;
+ goto out;
+ }
+ db_strcpy(modif, db_tok_string);
+ }
+ else {
+ db_unread_token(t);
+ modif[0] = '\0';
+ }
+
+ flags = 0;
+ for (i = 0; i < nitems(db_sysctl_modifs); i++) {
+ if (strchr(modif, db_sysctl_modifs[i])) {
+ flags |= db_sysctl_modif_values[i];
+ }
+ }
+
+ /* Parse the sysctl names */
+ t = db_read_token();
+ if (t != tIDENT) {
+ db_printf("Need sysctl name\n");
+ error = EINVAL;
+ goto out;
+ }
+
+ /* Copy the name into a temporary buffer */
+ db_strcpy(name, db_tok_string);
+
+ /* Ensure there is no trailing cruft */
+ t = db_read_token();
+ if (t != tEOL) {
+ db_printf("Unexpected sysctl argument\n");
+ error = EINVAL;
+ goto out;
+ }
+
+ error = db_sysctlbyname(name, flags);
+ if (error == ENOENT) {
+ db_printf("unknown oid: '%s'\n", db_tok_string);
+ goto out;
+ } else if (error) {
+ db_printf("%s: error: %d\n", db_tok_string, error);
+ goto out;
+ }
+
+out:
+ /* Ensure we eat all of our text */
+ db_flush_lex();
+
+ if (error == EINVAL) {
+ db_sysctl_cmd_usage();
+ }
+}
+
+#endif /* DDB */
diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c
index 74b144cb..47eb9032 100644
--- a/freebsd/sys/kern/kern_time.c
+++ b/freebsd/sys/kern/kern_time.c
@@ -422,7 +422,9 @@ kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats)
if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000 ||
ats->tv_sec < 0)
return (EINVAL);
- if (!allow_insane_settime && ats->tv_sec > 8000ULL * 365 * 24 * 60 * 60)
+ if (!allow_insane_settime &&
+ (ats->tv_sec > 8000ULL * 365 * 24 * 60 * 60 ||
+ ats->tv_sec < utc_offset()))
return (EINVAL);
/* XXX Don't convert nsec->usec and back */
TIMESPEC_TO_TIMEVAL(&atv, ats);
@@ -673,8 +675,8 @@ sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap)
error = copyout(&atv, uap->tp, sizeof (atv));
}
if (error == 0 && uap->tzp != NULL) {
- rtz.tz_minuteswest = tz_minuteswest;
- rtz.tz_dsttime = tz_dsttime;
+ rtz.tz_minuteswest = 0;
+ rtz.tz_dsttime = 0;
error = copyout(&rtz, uap->tzp, sizeof (rtz));
}
return (error);
@@ -726,10 +728,6 @@ kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp)
return (EINVAL);
error = settime(td, tv);
}
- if (tzp && error == 0) {
- tz_minuteswest = tzp->tz_minuteswest;
- tz_dsttime = tzp->tz_dsttime;
- }
return (error);
}
diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c
index 2f478afc..983abba2 100644
--- a/freebsd/sys/kern/kern_timeout.c
+++ b/freebsd/sys/kern/kern_timeout.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/callout.h>
+#include <sys/domainset.h>
#include <sys/file.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
@@ -67,6 +68,7 @@ __FBSDID("$FreeBSD$");
#ifdef DDB
#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
#include <machine/_inttypes.h>
#endif
@@ -154,12 +156,16 @@ u_int callwheelsize, callwheelmask;
struct cc_exec {
struct callout *cc_curr;
void (*cc_drain)(void *);
+#ifndef __rtems__
+ void *cc_last_func;
+ void *cc_last_arg;
+#endif /* __rtems__ */
#ifdef SMP
void (*ce_migration_func)(void *);
void *ce_migration_arg;
- int ce_migration_cpu;
sbintime_t ce_migration_time;
sbintime_t ce_migration_prec;
+ int ce_migration_cpu;
#endif
bool cc_cancel;
bool cc_waiting;
@@ -200,6 +206,8 @@ struct callout_cpu {
#ifndef __rtems__
#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
+#define cc_exec_last_func(cc, dir) cc->cc_exec_entity[dir].cc_last_func
+#define cc_exec_last_arg(cc, dir) cc->cc_exec_entity[dir].cc_last_arg
#define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain
#else /* __rtems__ */
#define cc_exec_curr(cc, dir) cc->cc_exec_entity.cc_curr
@@ -426,8 +434,9 @@ callout_cpu_init(struct callout_cpu *cc, int cpu)
SLIST_INIT(&cc->cc_callfree);
cc->cc_inited = 1;
#ifndef __rtems__
- cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
- M_CALLOUT, M_WAITOK);
+ cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) *
+ callwheelsize, M_CALLOUT,
+ DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK);
#endif /* __rtems__ */
for (i = 0; i < callwheelsize; i++)
LIST_INIT(&cc->cc_callwheel[i]);
@@ -821,6 +830,10 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
c->c_iflags &= ~CALLOUT_PENDING;
cc_exec_curr(cc, direct) = c;
+#ifndef __rtems__
+ cc_exec_last_func(cc, direct) = c_func;
+ cc_exec_last_arg(cc, direct) = c_arg;
+#endif /* __rtems__ */
cc_exec_cancel(cc, direct) = false;
cc_exec_drain(cc, direct) = NULL;
CC_UNLOCK(cc);
@@ -1876,4 +1889,42 @@ DB_SHOW_COMMAND(callout, db_show_callout)
_show_callout((struct callout *)addr);
}
+
+static void
+_show_last_callout(int cpu, int direct, const char *dirstr)
+{
+ struct callout_cpu *cc;
+ void *func, *arg;
+
+ cc = CC_CPU(cpu);
+ func = cc_exec_last_func(cc, direct);
+ arg = cc_exec_last_arg(cc, direct);
+ db_printf("cpu %d last%s callout function: %p ", cpu, dirstr, func);
+ db_printsym((db_expr_t)func, DB_STGY_ANY);
+ db_printf("\ncpu %d last%s callout argument: %p\n", cpu, dirstr, arg);
+}
+
+DB_SHOW_COMMAND(callout_last, db_show_callout_last)
+{
+ int cpu, last;
+
+ if (have_addr) {
+ if (addr < 0 || addr > mp_maxid || CPU_ABSENT(addr)) {
+ db_printf("no such cpu: %d\n", (int)addr);
+ return;
+ }
+ cpu = last = addr;
+ } else {
+ cpu = 0;
+ last = mp_maxid;
+ }
+
+ while (cpu <= last) {
+ if (!CPU_ABSENT(cpu)) {
+ _show_last_callout(cpu, 0, "");
+ _show_last_callout(cpu, 1, " direct");
+ }
+ cpu++;
+ }
+}
#endif /* DDB */
diff --git a/freebsd/sys/kern/kern_uuid.c b/freebsd/sys/kern/kern_uuid.c
index a2316b16..c2a5986a 100644
--- a/freebsd/sys/kern/kern_uuid.c
+++ b/freebsd/sys/kern/kern_uuid.c
@@ -301,7 +301,7 @@ sbuf_printf_uuid(struct sbuf *sb, struct uuid *uuid)
char buf[38];
snprintf_uuid(buf, sizeof(buf), uuid);
- return (sbuf_printf(sb, "%s", buf));
+ return (sbuf_cat(sb, buf));
}
/*
diff --git a/freebsd/sys/kern/subr_blist.c b/freebsd/sys/kern/subr_blist.c
index 807a7f3c..8b073bf8 100644
--- a/freebsd/sys/kern/subr_blist.c
+++ b/freebsd/sys/kern/subr_blist.c
@@ -111,6 +111,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/sbuf.h>
+#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stddef.h>
@@ -122,19 +123,20 @@ __FBSDID("$FreeBSD$");
#define malloc(a,b,c) calloc(a, 1)
#define free(a,b) free(a)
#define ummin(a,b) ((a) < (b) ? (a) : (b))
+#define imin(a,b) ((a) < (b) ? (a) : (b))
+#define KASSERT(a,b) assert(a)
#include <sys/blist.h>
-void panic(const char *ctl, ...);
-
#endif
/*
* static support functions
*/
-static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
-static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count,
- u_daddr_t radix);
+static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk,
+ int *count, int maxcount);
+static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
+ int maxcount, u_daddr_t radix);
static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
u_daddr_t radix);
@@ -194,30 +196,40 @@ bitrange(int n, int count)
/*
- * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
- * Assumes that the argument has only one bit set.
+ * Find the first bit set in a u_daddr_t.
*/
static inline int
-bitpos(u_daddr_t mask)
+generic_bitpos(u_daddr_t mask)
{
int hi, lo, mid;
+ lo = 0;
+ hi = BLIST_BMAP_RADIX;
+ while (lo + 1 < hi) {
+ mid = (lo + hi) >> 1;
+ if (mask & bitrange(0, mid))
+ hi = mid;
+ else
+ lo = mid;
+ }
+ return (lo);
+}
+
+static inline int
+bitpos(u_daddr_t mask)
+{
+
switch (sizeof(mask)) {
#ifdef HAVE_INLINE_FFSLL
case sizeof(long long):
return (ffsll(mask) - 1);
#endif
+#ifdef HAVE_INLINE_FFS
+ case sizeof(int):
+ return (ffs(mask) - 1);
+#endif
default:
- lo = 0;
- hi = BLIST_BMAP_RADIX;
- while (lo + 1 < hi) {
- mid = (lo + hi) >> 1;
- if ((mask >> mid) != 0)
- lo = mid;
- else
- hi = mid;
- }
- return (lo);
+ return (generic_bitpos(mask));
}
}
@@ -237,8 +249,7 @@ blist_create(daddr_t blocks, int flags)
blist_t bl;
u_daddr_t nodes, radix;
- if (blocks == 0)
- panic("invalid block count");
+ KASSERT(blocks > 0, ("invalid block count"));
/*
* Calculate the radix and node count used for scanning.
@@ -286,12 +297,14 @@ blist_destroy(blist_t bl)
* not be allocated.
*/
daddr_t
-blist_alloc(blist_t bl, daddr_t count)
+blist_alloc(blist_t bl, int *count, int maxcount)
{
- daddr_t blk;
+ daddr_t blk, cursor;
- if (count > BLIST_MAX_ALLOC)
- panic("allocation too large");
+ KASSERT(*count <= maxcount,
+ ("invalid parameters %d > %d", *count, maxcount));
+ KASSERT(*count <= BLIST_MAX_ALLOC,
+ ("minimum allocation too large: %d", *count));
/*
* This loop iterates at most twice. An allocation failure in the
@@ -299,18 +312,18 @@ blist_alloc(blist_t bl, daddr_t count)
* non-zero. When the cursor is zero, an allocation failure will
* stop further iterations.
*/
- for (;;) {
- blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count,
+ for (cursor = bl->bl_cursor;; cursor = 0) {
+ blk = blst_meta_alloc(bl->bl_root, cursor, count, maxcount,
bl->bl_radix);
if (blk != SWAPBLK_NONE) {
- bl->bl_avail -= count;
- bl->bl_cursor = blk + count;
+ bl->bl_avail -= *count;
+ bl->bl_cursor = blk + *count;
if (bl->bl_cursor == bl->bl_blocks)
bl->bl_cursor = 0;
return (blk);
- } else if (bl->bl_cursor == 0)
+ }
+ if (cursor == 0)
return (SWAPBLK_NONE);
- bl->bl_cursor = 0;
}
}
@@ -326,15 +339,15 @@ blist_avail(blist_t bl)
/*
* blist_free() - free up space in the block bitmap. Return the base
- * of a contiguous region. Panic if an inconsistancy is
- * found.
+ * of a contiguous region.
*/
void
blist_free(blist_t bl, daddr_t blkno, daddr_t count)
{
- if (blkno < 0 || blkno + count > bl->bl_blocks)
- panic("freeing invalid range");
+ KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
+ ("freeing invalid range: blkno %jx, count %d, blocks %jd",
+ (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
bl->bl_avail += count;
}
@@ -350,8 +363,9 @@ blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
{
daddr_t filled;
- if (blkno < 0 || blkno + count > bl->bl_blocks)
- panic("filling invalid range");
+ KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
+ ("filling invalid range: blkno %jx, count %d, blocks %jd",
+ (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix);
bl->bl_avail -= filled;
return (filled);
@@ -533,7 +547,8 @@ blist_stats(blist_t bl, struct sbuf *s)
struct gap_stats gstats;
struct gap_stats *stats = &gstats;
daddr_t i, nodes, radix;
- u_daddr_t bit, diff, mask;
+ u_daddr_t diff, mask;
+ int digit;
init_gap_stats(stats);
nodes = 0;
@@ -571,9 +586,9 @@ blist_stats(blist_t bl, struct sbuf *s)
if (gap_stats_counting(stats))
diff ^= 1;
while (diff != 0) {
- bit = diff & -diff;
- update_gap_stats(stats, i + bitpos(bit));
- diff ^= bit;
+ digit = bitpos(diff);
+ update_gap_stats(stats, i + digit);
+ diff ^= bitrange(digit, 1);
}
}
nodes += radix_to_skip(radix);
@@ -594,53 +609,104 @@ blist_stats(blist_t bl, struct sbuf *s)
*/
/*
- * BLST_NEXT_LEAF_ALLOC() - allocate the first few blocks in the next leaf.
+ * BLST_NEXT_LEAF_ALLOC() - allocate the blocks starting with the next leaf.
*
- * 'scan' is a leaf node, associated with a block containing 'blk'.
- * The next leaf node could be adjacent, or several nodes away if the
- * least common ancestor of 'scan' and its neighbor is several levels
- * up. Use 'blk' to determine how many meta-nodes lie between the
- * leaves. If the next leaf has enough initial bits set, clear them
- * and clear the bits in the meta nodes on the path up to the least
- * common ancestor to mark any subtrees made completely empty.
+ * 'scan' is a leaf node, and its first block is at address 'start'. The
+ * next leaf node could be adjacent, or several nodes away if the least
+ * common ancestor of 'scan' and its neighbor is several levels up. Use
+ * addresses to determine how many meta-nodes lie between the leaves. If
+ * sequence of leaves starting with the next one has enough initial bits
+ * set, clear them and clear the bits in the meta nodes on the path up to
+ * the least common ancestor to mark any subtrees made completely empty.
*/
static int
-blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
+blst_next_leaf_alloc(blmeta_t *scan, daddr_t start, int count, int maxcount)
{
- blmeta_t *next;
- daddr_t skip;
u_daddr_t radix;
- int digit;
+ daddr_t blk;
+ int avail, digit;
- next = scan + 1;
- blk += BLIST_BMAP_RADIX;
- radix = BLIST_BMAP_RADIX;
- while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 &&
- (next->bm_bitmap & 1) == 1) {
- next++;
- radix *= BLIST_META_RADIX;
- }
- if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) {
- /*
- * The next leaf doesn't have enough free blocks at the
- * beginning to complete the spanning allocation.
- */
- return (ENOMEM);
+ start += BLIST_BMAP_RADIX;
+ for (blk = start; blk - start < maxcount; blk += BLIST_BMAP_RADIX) {
+ /* Skip meta-nodes, as long as they promise more free blocks. */
+ radix = BLIST_BMAP_RADIX;
+ while (((++scan)->bm_bitmap & 1) == 1 &&
+ ((blk / radix) & BLIST_META_MASK) == 0)
+ radix *= BLIST_META_RADIX;
+ if (~scan->bm_bitmap != 0) {
+ /*
+ * Either there is no next leaf with any free blocks,
+ * or we've reached the next leaf and found that some
+ * of its blocks are not free. In the first case,
+ * bitpos() returns zero here.
+ */
+ avail = blk - start + bitpos(~scan->bm_bitmap);
+ if (avail < count || avail == 0) {
+ /*
+ * There isn't a next leaf with enough free
+ * blocks at its beginning to bother
+ * allocating.
+ */
+ return (avail);
+ }
+ maxcount = imin(avail, maxcount);
+ if (maxcount % BLIST_BMAP_RADIX == 0) {
+ /*
+ * There was no next leaf. Back scan up to
+ * last leaf.
+ */
+ --scan;
+ while (radix != BLIST_BMAP_RADIX) {
+ radix /= BLIST_META_RADIX;
+ --scan;
+ }
+ blk -= BLIST_BMAP_RADIX;
+ }
+ }
}
- /* Clear the first 'count' bits in the next leaf to allocate. */
- next->bm_bitmap &= (u_daddr_t)-1 << count;
-
+
/*
- * Update bitmaps of next-ancestors, up to least common ancestor.
+ * 'scan' is the last leaf that provides blocks. Clear from 1 to
+ * BLIST_BMAP_RADIX bits to represent the allocation of those last
+ * blocks.
*/
- skip = radix_to_skip(radix);
- while (radix != BLIST_BMAP_RADIX && next->bm_bitmap == 0) {
- (--next)->bm_bitmap ^= 1;
- radix /= BLIST_META_RADIX;
+ if (maxcount % BLIST_BMAP_RADIX != 0)
+ scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_BMAP_RADIX);
+ else
+ scan->bm_bitmap = 0;
+
+ for (;;) {
+ /* Back up over meta-nodes, clearing bits if necessary. */
+ blk -= BLIST_BMAP_RADIX;
+ radix = BLIST_BMAP_RADIX;
+ while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0) {
+ if ((scan--)->bm_bitmap == 0)
+ scan->bm_bitmap ^= 1;
+ radix *= BLIST_META_RADIX;
+ }
+ if ((scan--)->bm_bitmap == 0)
+ scan[-digit * radix_to_skip(radix)].bm_bitmap ^=
+ (u_daddr_t)1 << digit;
+
+ if (blk == start)
+ break;
+ /* Clear all the bits of this leaf. */
+ scan->bm_bitmap = 0;
}
- if (next->bm_bitmap == 0)
- scan[-digit * skip].bm_bitmap ^= (u_daddr_t)1 << digit;
- return (0);
+ return (maxcount);
+}
+
+/*
+ * Given a bitmask, flip all the bits from the least-significant 1-bit to the
+ * most significant bit. If the result is non-zero, then the least-significant
+ * 1-bit of the result is in the same position as the least-signification 0-bit
+ * in mask that is followed by a 1-bit.
+ */
+static inline u_daddr_t
+flip_hibits(u_daddr_t mask)
+{
+
+ return (-mask & ~mask);
}
/*
@@ -651,16 +717,16 @@ blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
* crosses a leaf boundary.
*/
static daddr_t
-blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
+blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int *count, int maxcount)
{
u_daddr_t cursor_mask, mask;
int count1, hi, lo, num_shifts, range1, range_ext;
range1 = 0;
- count1 = count - 1;
+ count1 = *count - 1;
num_shifts = fls(count1);
mask = scan->bm_bitmap;
- while ((-mask & ~mask) != 0 && num_shifts > 0) {
+ while (flip_hibits(mask) != 0 && num_shifts > 0) {
/*
* If bit i is set in mask, then bits in [i, i+range1] are set
* in scan->bm_bitmap. The value of range1 is equal to count1
@@ -712,40 +778,50 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
/*
* The least significant set bit in mask marks the start of the first
- * available range of sufficient size. Clear all the bits but that one,
- * and then find its position.
+ * available range of sufficient size. Find its position.
*/
- mask &= -mask;
lo = bitpos(mask);
- hi = lo + count;
- if (hi > BLIST_BMAP_RADIX) {
- /*
- * An allocation within this leaf is impossible, so a successful
- * allocation depends on the next leaf providing some of the blocks.
- */
- if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0)
+ /*
+ * Find how much space is available starting at that position.
+ */
+ if (flip_hibits(mask) != 0) {
+ /* Count the 1 bits starting at position lo. */
+ hi = bitpos(flip_hibits(mask)) + count1;
+ if (maxcount < hi - lo)
+ hi = lo + maxcount;
+ *count = hi - lo;
+ mask = bitrange(lo, *count);
+ } else if (maxcount <= BLIST_BMAP_RADIX - lo) {
+ /* All the blocks we can use are available here. */
+ hi = lo + maxcount;
+ *count = maxcount;
+ mask = bitrange(lo, *count);
+ } else {
+ /* Check next leaf for some of the blocks we want or need. */
+ count1 = *count - (BLIST_BMAP_RADIX - lo);
+ maxcount -= BLIST_BMAP_RADIX - lo;
+ hi = blst_next_leaf_alloc(scan, blk, count1, maxcount);
+ if (hi < count1)
/*
- * The hint cannot be updated, because the same
- * allocation request could be satisfied later, by this
- * leaf, if the state of the next leaf changes, and
- * without any changes to this leaf.
+ * The next leaf cannot supply enough blocks to reach
+ * the minimum required allocation. The hint cannot be
+ * updated, because the same allocation request could
+ * be satisfied later, by this leaf, if the state of
+ * the next leaf changes, and without any changes to
+ * this leaf.
*/
return (SWAPBLK_NONE);
+ *count = BLIST_BMAP_RADIX - lo + hi;
hi = BLIST_BMAP_RADIX;
}
- /* Set the bits of mask at position 'lo' and higher. */
- mask = -mask;
if (hi == BLIST_BMAP_RADIX) {
/*
* Update bighint. There is no allocation bigger than range1
* available in this leaf after this allocation completes.
*/
scan->bm_bighint = range1;
- } else {
- /* Clear the bits of mask at position 'hi' and higher. */
- mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi);
}
/* Clear the allocated bits from this leaf. */
scan->bm_bitmap &= ~mask;
@@ -761,15 +837,16 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
* and we have a few optimizations strewn in as well.
*/
static daddr_t
-blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
+blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
+ int maxcount, u_daddr_t radix)
{
daddr_t blk, i, r, skip;
- u_daddr_t bit, mask;
+ u_daddr_t mask;
bool scan_from_start;
int digit;
if (radix == BLIST_BMAP_RADIX)
- return (blst_leaf_alloc(scan, cursor, count));
+ return (blst_leaf_alloc(scan, cursor, count, maxcount));
blk = cursor & -radix;
scan_from_start = (cursor == blk);
radix /= BLIST_META_RADIX;
@@ -796,23 +873,22 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
* Examine the nonempty subtree associated with each bit set in mask.
*/
do {
- bit = mask & -mask;
- digit = bitpos(bit);
+ digit = bitpos(mask);
i = 1 + digit * skip;
- if (count <= scan[i].bm_bighint) {
+ if (*count <= scan[i].bm_bighint) {
/*
* The allocation might fit beginning in the i'th subtree.
*/
r = blst_meta_alloc(&scan[i], cursor + digit * radix,
- count, radix);
+ count, maxcount, radix);
if (r != SWAPBLK_NONE) {
if (scan[i].bm_bitmap == 0)
- scan->bm_bitmap ^= bit;
+ scan->bm_bitmap ^= bitrange(digit, 1);
return (r);
}
}
cursor = blk;
- } while ((mask ^= bit) != 0);
+ } while ((mask ^= bitrange(digit, 1)) != 0);
/*
* We couldn't allocate count in this subtree. If the whole tree was
@@ -820,7 +896,7 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
*/
if (scan_from_start && !(digit == BLIST_META_RADIX - 1 &&
scan[i].bm_bighint == BLIST_MAX_ALLOC))
- scan->bm_bighint = count - 1;
+ scan->bm_bighint = *count - 1;
return (SWAPBLK_NONE);
}
@@ -841,8 +917,9 @@ blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
* count n
*/
mask = bitrange(blk & BLIST_BMAP_MASK, count);
- if (scan->bm_bitmap & mask)
- panic("freeing free block");
+ KASSERT((scan->bm_bitmap & mask) == 0,
+ ("freeing free block: %jx, size %d, mask %jx",
+ (uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask));
scan->bm_bitmap |= mask;
}
@@ -1006,7 +1083,7 @@ static void
blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
{
daddr_t skip;
- u_daddr_t bit, mask;
+ u_daddr_t mask;
int digit;
if (radix == BLIST_BMAP_RADIX) {
@@ -1038,11 +1115,10 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
mask = scan->bm_bitmap;
/* Examine the nonempty subtree associated with each bit set in mask */
do {
- bit = mask & -mask;
- digit = bitpos(bit);
+ digit = bitpos(mask);
blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
radix, tab);
- } while ((mask ^= bit) != 0);
+ } while ((mask ^= bitrange(digit, 1)) != 0);
tab -= 4;
printf(
@@ -1079,7 +1155,7 @@ main(int ac, char **av)
for (;;) {
char buf[1024];
long long da = 0;
- long long count = 0;
+ int count = 0, maxcount = 0;
printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
(long long)size, (long long)bl->bl_radix);
@@ -1088,7 +1164,7 @@ main(int ac, char **av)
break;
switch(buf[0]) {
case 'r':
- if (sscanf(buf + 1, "%lld", &count) == 1) {
+ if (sscanf(buf + 1, "%d", &count) == 1) {
blist_resize(&bl, count, 1, M_WAITOK);
} else {
printf("?\n");
@@ -1104,22 +1180,23 @@ main(int ac, char **av)
sbuf_delete(s);
break;
case 'a':
- if (sscanf(buf + 1, "%lld", &count) == 1) {
- daddr_t blk = blist_alloc(bl, count);
- printf(" R=%08llx\n", (long long)blk);
+ if (sscanf(buf + 1, "%d%d", &count, &maxcount) == 2) {
+ daddr_t blk = blist_alloc(bl, &count, maxcount);
+ printf(" R=%08llx, c=%08d\n",
+ (long long)blk, count);
} else {
printf("?\n");
}
break;
case 'f':
- if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
+ if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
blist_free(bl, da, count);
} else {
printf("?\n");
}
break;
case 'l':
- if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
+ if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
printf(" n=%jd\n",
(intmax_t)blist_fill(bl, da, count));
} else {
@@ -1131,31 +1208,24 @@ main(int ac, char **av)
puts(
"p -print\n"
"s -stats\n"
- "a %d -allocate\n"
+ "a %d %d -allocate\n"
"f %x %d -free\n"
"l %x %d -fill\n"
"r %d -resize\n"
- "h/? -help"
+ "h/? -help\n"
+ "q -quit"
);
break;
+ case 'q':
+ break;
default:
printf("?\n");
break;
}
+ if (buf[0] == 'q')
+ break;
}
- return(0);
-}
-
-void
-panic(const char *ctl, ...)
-{
- va_list va;
-
- va_start(va, ctl);
- vfprintf(stderr, ctl, va);
- fprintf(stderr, "\n");
- va_end(va);
- exit(1);
+ return (0);
}
#endif
diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c
index a87c02a5..244f1af3 100644
--- a/freebsd/sys/kern/subr_bus.c
+++ b/freebsd/sys/kern/subr_bus.c
@@ -2472,13 +2472,31 @@ device_print_prettyname(device_t dev)
int
device_printf(device_t dev, const char * fmt, ...)
{
+ char buf[128];
+ struct sbuf sb;
+ const char *name;
va_list ap;
- int retval;
+ size_t retval;
+
+ retval = 0;
+
+ sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
+ sbuf_set_drain(&sb, sbuf_printf_drain, &retval);
+
+ name = device_get_name(dev);
+
+ if (name == NULL)
+ sbuf_cat(&sb, "unknown: ");
+ else
+ sbuf_printf(&sb, "%s%d: ", name, device_get_unit(dev));
- retval = device_print_prettyname(dev);
va_start(ap, fmt);
- retval += vprintf(fmt, ap);
+ sbuf_vprintf(&sb, fmt, ap);
va_end(ap);
+
+ sbuf_finish(&sb);
+ sbuf_delete(&sb);
+
return (retval);
}
@@ -3050,6 +3068,10 @@ device_detach(device_t dev)
PDEBUG(("%s", DEVICENAME(dev)));
if (dev->state == DS_BUSY)
return (EBUSY);
+ if (dev->state == DS_ATTACHING) {
+ device_printf(dev, "device in attaching state! Deferring detach.\n");
+ return (EBUSY);
+ }
if (dev->state != DS_ATTACHED)
return (0);
@@ -3914,6 +3936,95 @@ bus_generic_resume(device_t dev)
return (0);
}
+
+/**
+ * @brief Helper function for implementing BUS_RESET_POST
+ *
+ * Bus can use this function to implement common operations of
+ * re-attaching or resuming the children after the bus itself was
+ * reset, and after restoring bus-unique state of children.
+ *
+ * @param dev The bus
+ * #param flags DEVF_RESET_*
+ */
+int
+bus_helper_reset_post(device_t dev, int flags)
+{
+ device_t child;
+ int error, error1;
+
+ error = 0;
+ TAILQ_FOREACH(child, &dev->children,link) {
+ BUS_RESET_POST(dev, child);
+ error1 = (flags & DEVF_RESET_DETACH) != 0 ?
+ device_probe_and_attach(child) :
+ BUS_RESUME_CHILD(dev, child);
+ if (error == 0 && error1 != 0)
+ error = error1;
+ }
+ return (error);
+}
+
+static void
+bus_helper_reset_prepare_rollback(device_t dev, device_t child, int flags)
+{
+
+ child = TAILQ_NEXT(child, link);
+ if (child == NULL)
+ return;
+ TAILQ_FOREACH_FROM(child, &dev->children,link) {
+ BUS_RESET_POST(dev, child);
+ if ((flags & DEVF_RESET_DETACH) != 0)
+ device_probe_and_attach(child);
+ else
+ BUS_RESUME_CHILD(dev, child);
+ }
+}
+
+/**
+ * @brief Helper function for implementing BUS_RESET_PREPARE
+ *
+ * Bus can use this function to implement common operations of
+ * detaching or suspending the children before the bus itself is
+ * reset, and then save bus-unique state of children that must
+ * persists around reset.
+ *
+ * @param dev The bus
+ * #param flags DEVF_RESET_*
+ */
+int
+bus_helper_reset_prepare(device_t dev, int flags)
+{
+ device_t child;
+ int error;
+
+ if (dev->state != DS_ATTACHED)
+ return (EBUSY);
+
+ TAILQ_FOREACH_REVERSE(child, &dev->children, device_list, link) {
+ if ((flags & DEVF_RESET_DETACH) != 0) {
+ error = device_get_state(child) == DS_ATTACHED ?
+ device_detach(child) : 0;
+ } else {
+ error = BUS_SUSPEND_CHILD(dev, child);
+ }
+ if (error == 0) {
+ error = BUS_RESET_PREPARE(dev, child);
+ if (error != 0) {
+ if ((flags & DEVF_RESET_DETACH) != 0)
+ device_probe_and_attach(child);
+ else
+ BUS_RESUME_CHILD(dev, child);
+ }
+ }
+ if (error != 0) {
+ bus_helper_reset_prepare_rollback(dev, child, flags);
+ return (error);
+ }
+ }
+ return (0);
+}
+
/**
* @brief Helper function for implementing BUS_PRINT_CHILD().
*
@@ -5613,6 +5724,7 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
case DEV_CLEAR_DRIVER:
case DEV_RESCAN:
case DEV_DELETE:
+ case DEV_RESET:
error = priv_check(td, PRIV_DRIVER);
if (error == 0)
error = find_device(req, &dev);
@@ -5839,6 +5951,14 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
device_frozen = false;
}
break;
+ case DEV_RESET:
+ if ((req->dr_flags & ~(DEVF_RESET_DETACH)) != 0) {
+ error = EINVAL;
+ break;
+ }
+ error = BUS_RESET_CHILD(device_get_parent(dev), dev,
+ req->dr_flags);
+ break;
}
#endif /* __rtems__ */
mtx_unlock(&Giant);
@@ -5864,8 +5984,9 @@ devctl2_init(void)
*/
static int obsolete_panic = 0;
SYSCTL_INT(_debug, OID_AUTO, obsolete_panic, CTLFLAG_RWTUN, &obsolete_panic, 0,
- "Bus debug level");
-/* 0 - don't panic, 1 - panic if already obsolete, 2 - panic if deprecated */
+ "Panic when obsolete features are used (0 = never, 1 = if osbolete, "
+ "2 = if deprecated)");
+
static void
gone_panic(int major, int running, const char *msg)
{
@@ -5890,7 +6011,7 @@ _gone_in(int major, const char *msg)
gone_panic(major, P_OSREL_MAJOR(__FreeBSD_version), msg);
if (P_OSREL_MAJOR(__FreeBSD_version) >= major)
printf("Obsolete code will removed soon: %s\n", msg);
- else if (P_OSREL_MAJOR(__FreeBSD_version) + 1 == major)
+ else
printf("Deprecated code (to be removed in FreeBSD %d): %s\n",
major, msg);
}
@@ -5903,7 +6024,7 @@ _gone_in_dev(device_t dev, int major, const char *msg)
if (P_OSREL_MAJOR(__FreeBSD_version) >= major)
device_printf(dev,
"Obsolete code will removed soon: %s\n", msg);
- else if (P_OSREL_MAJOR(__FreeBSD_version) + 1 == major)
+ else
device_printf(dev,
"Deprecated code (to be removed in FreeBSD %d): %s\n",
major, msg);
diff --git a/freebsd/sys/kern/subr_eventhandler.c b/freebsd/sys/kern/subr_eventhandler.c
index e07248bf..6d36653d 100644
--- a/freebsd/sys/kern/subr_eventhandler.c
+++ b/freebsd/sys/kern/subr_eventhandler.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
diff --git a/freebsd/sys/kern/subr_gtaskqueue.c b/freebsd/sys/kern/subr_gtaskqueue.c
index 3f80cd2c..af9b65d4 100644
--- a/freebsd/sys/kern/subr_gtaskqueue.c
+++ b/freebsd/sys/kern/subr_gtaskqueue.c
@@ -35,7 +35,6 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/cpuset.h>
-#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/libkern.h>
@@ -69,6 +68,8 @@ struct gtaskqueue_busy {
static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
+typedef void (*gtaskqueue_enqueue_fn)(void *context);
+
struct gtaskqueue {
STAILQ_HEAD(, gtask) tq_queue;
gtaskqueue_enqueue_fn tq_enqueue;
@@ -697,7 +698,7 @@ taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
}
}
if (idx == -1)
- panic("taskqgroup_find: Failed to pick a qid.");
+ panic("%s: failed to pick a qid.", __func__);
return (idx);
}
@@ -733,36 +734,36 @@ SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
void
taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
- void *uniq, int irq, const char *name)
+ void *uniq, device_t dev, struct resource *irq, const char *name)
{
#ifndef __rtems__
- cpuset_t mask;
- int qid, error;
+ int cpu, qid, error;
#else /* __rtems__ */
int qid;
#endif /* __rtems__ */
gtask->gt_uniq = uniq;
snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
+#ifndef __rtems__
+ gtask->gt_dev = dev;
gtask->gt_irq = irq;
gtask->gt_cpu = -1;
+#endif /* __rtems__ */
mtx_lock(&qgroup->tqg_lock);
qid = taskqgroup_find(qgroup, uniq);
qgroup->tqg_queue[qid].tgc_cnt++;
LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
#ifndef __rtems__
- if (irq != -1 && tqg_smp_started) {
- gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
- CPU_ZERO(&mask);
- CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
+ if (dev != NULL && irq != NULL && tqg_smp_started) {
+ cpu = qgroup->tqg_queue[qid].tgc_cpu;
+ gtask->gt_cpu = cpu;
mtx_unlock(&qgroup->tqg_lock);
- error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ error = bus_bind_intr(dev, irq, cpu);
if (error)
- printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error);
+ printf("%s: binding interrupt failed for %s: %d\n",
+ __func__, gtask->gt_name, error);
} else
-#else /* __rtems__ */
- BSD_ASSERT(irq == -1);
#endif /* __rtems__ */
mtx_unlock(&qgroup->tqg_lock);
}
@@ -771,7 +772,6 @@ static void
taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
{
#ifndef __rtems__
- cpuset_t mask;
int qid, cpu, error;
#else /* __rtems__ */
int qid;
@@ -781,24 +781,18 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
qid = taskqgroup_find(qgroup, gtask->gt_uniq);
#ifndef __rtems__
cpu = qgroup->tqg_queue[qid].tgc_cpu;
- if (gtask->gt_irq != -1) {
+ if (gtask->gt_dev != NULL && gtask->gt_irq != NULL) {
mtx_unlock(&qgroup->tqg_lock);
-
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
- error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
+ error = bus_bind_intr(gtask->gt_dev, gtask->gt_irq, cpu);
mtx_lock(&qgroup->tqg_lock);
if (error)
- printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error);
+ printf("%s: binding interrupt failed for %s: %d\n",
+ __func__, gtask->gt_name, error);
}
-#else /* __rtems__ */
- BSD_ASSERT(gtask->gt_irq == -1);
#endif /* __rtems__ */
qgroup->tqg_queue[qid].tgc_cnt++;
-
- LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
- gt_list);
+ LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
mtx_unlock(&qgroup->tqg_lock);
@@ -806,10 +800,9 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
int
taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
- void *uniq, int cpu, int irq, const char *name)
+ void *uniq, int cpu, device_t dev, struct resource *irq, const char *name)
{
#ifndef __rtems__
- cpuset_t mask;
int i, qid, error;
#else /* __rtems__ */
int i, qid;
@@ -818,8 +811,11 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
qid = -1;
gtask->gt_uniq = uniq;
snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
+#ifndef __rtems__
+ gtask->gt_dev = dev;
gtask->gt_irq = irq;
gtask->gt_cpu = cpu;
+#endif /* __rtems__ */
mtx_lock(&qgroup->tqg_lock);
if (tqg_smp_started) {
for (i = 0; i < qgroup->tqg_cnt; i++)
@@ -843,30 +839,28 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
mtx_unlock(&qgroup->tqg_lock);
#ifndef __rtems__
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
- if (irq != -1 && tqg_smp_started) {
- error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (dev != NULL && irq != NULL && tqg_smp_started) {
+ error = bus_bind_intr(dev, irq, cpu);
if (error)
- printf("%s: setaffinity failed: %d\n", __func__, error);
+ printf("%s: binding interrupt failed for %s: %d\n",
+ __func__, gtask->gt_name, error);
}
#else /* __rtems__ */
- BSD_ASSERT(irq == -1);
+ BSD_ASSERT(irq == NULL);
#endif /* __rtems__ */
return (0);
}
+#ifndef __rtems__
static int
taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
{
-#ifndef __rtems__
- cpuset_t mask;
- int i, qid, irq, cpu, error;
-#else /* __rtems__ */
- int i, qid, irq, cpu;
-#endif /* __rtems__ */
+ device_t dev;
+ struct resource *irq;
+ int cpu, error, i, qid;
qid = -1;
+ dev = gtask->gt_dev;
irq = gtask->gt_irq;
cpu = gtask->gt_cpu;
MPASS(tqg_smp_started);
@@ -887,20 +881,15 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtas
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
mtx_unlock(&qgroup->tqg_lock);
-#ifndef __rtems__
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
-
- if (irq != -1) {
- error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (dev != NULL && irq != NULL) {
+ error = bus_bind_intr(dev, irq, cpu);
if (error)
- printf("%s: setaffinity failed: %d\n", __func__, error);
+ printf("%s: binding interrupt failed for %s: %d\n",
+ __func__, gtask->gt_name, error);
}
-#else /* __rtems__ */
- BSD_ASSERT(irq == -1);
-#endif /* __rtems__ */
return (0);
}
+#endif /* __rtems__ */
void
taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
@@ -913,7 +902,7 @@ taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
break;
if (i == qgroup->tqg_cnt)
- panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name);
+ panic("%s: task %s not in group", __func__, gtask->gt_name);
qgroup->tqg_queue[i].tgc_cnt--;
LIST_REMOVE(gtask, gt_list);
mtx_unlock(&qgroup->tqg_lock);
@@ -941,8 +930,7 @@ taskqgroup_binder(void *ctx)
thread_unlock(curthread);
if (error)
- printf("%s: setaffinity failed: %d\n", __func__,
- error);
+ printf("%s: binding curthread failed: %d\n", __func__, error);
#else /* __rtems__ */
sc = rtems_task_set_affinity(RTEMS_SELF, sizeof(mask), &mask);
if (sc != RTEMS_SUCCESSFUL)
@@ -1053,10 +1041,14 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
while ((gtask = LIST_FIRST(&gtask_head))) {
LIST_REMOVE(gtask, gt_list);
+#ifndef __rtems__
if (gtask->gt_cpu == -1)
+#endif /* __rtems__ */
taskqgroup_attach_deferred(qgroup, gtask);
+#ifndef __rtems__
else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
taskqgroup_attach_deferred(qgroup, gtask);
+#endif /* __rtems__ */
}
#ifdef INVARIANTS
@@ -1115,15 +1107,16 @@ taskqgroup_destroy(struct taskqgroup *qgroup)
void
taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
- const char *name)
+ const char *name)
{
GROUPTASK_INIT(gtask, 0, fn, ctx);
- taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
+ taskqgroup_attach(qgroup_config, gtask, gtask, NULL, NULL, name);
}
void
taskqgroup_config_gtask_deinit(struct grouptask *gtask)
{
+
taskqgroup_detach(qgroup_config, gtask);
}
diff --git a/freebsd/sys/kern/subr_kobj.c b/freebsd/sys/kern/subr_kobj.c
index a6a888d5..3736f64c 100644
--- a/freebsd/sys/kern/subr_kobj.c
+++ b/freebsd/sys/kern/subr_kobj.c
@@ -127,35 +127,40 @@ kobj_class_compile_common(kobj_class_t cls, kobj_ops_t ops)
cls->ops = ops;
}
-void
-kobj_class_compile(kobj_class_t cls)
+static int
+kobj_class_compile1(kobj_class_t cls, int mflags)
{
kobj_ops_t ops;
KOBJ_ASSERT(MA_NOTOWNED);
- /*
- * Allocate space for the compiled ops table.
- */
- ops = malloc(sizeof(struct kobj_ops), M_KOBJ, M_NOWAIT);
- if (!ops)
- panic("%s: out of memory", __func__);
+ ops = malloc(sizeof(struct kobj_ops), M_KOBJ, mflags);
+ if (ops == NULL)
+ return (ENOMEM);
- KOBJ_LOCK();
-
/*
* We may have lost a race for kobj_class_compile here - check
* to make sure someone else hasn't already compiled this
* class.
*/
+ KOBJ_LOCK();
if (cls->ops) {
KOBJ_UNLOCK();
free(ops, M_KOBJ);
- return;
+ return (0);
}
-
kobj_class_compile_common(cls, ops);
KOBJ_UNLOCK();
+ return (0);
+}
+
+void
+kobj_class_compile(kobj_class_t cls)
+{
+ int error;
+
+ error = kobj_class_compile1(cls, M_WAITOK);
+ KASSERT(error == 0, ("kobj_class_compile1 returned %d", error));
}
void
@@ -256,24 +261,6 @@ kobj_class_free(kobj_class_t cls)
free(ops, M_KOBJ);
}
-kobj_t
-kobj_create(kobj_class_t cls,
- struct malloc_type *mtype,
- int mflags)
-{
- kobj_t obj;
-
- /*
- * Allocate and initialise the new object.
- */
- obj = malloc(cls->size, mtype, mflags | M_ZERO);
- if (!obj)
- return NULL;
- kobj_init(obj, cls);
-
- return obj;
-}
-
static void
kobj_init_common(kobj_t obj, kobj_class_t cls)
{
@@ -282,30 +269,52 @@ kobj_init_common(kobj_t obj, kobj_class_t cls)
cls->refs++;
}
-void
-kobj_init(kobj_t obj, kobj_class_t cls)
+static int
+kobj_init1(kobj_t obj, kobj_class_t cls, int mflags)
{
- KOBJ_ASSERT(MA_NOTOWNED);
- retry:
- KOBJ_LOCK();
+ int error;
- /*
- * Consider compiling the class' method table.
- */
- if (!cls->ops) {
+ KOBJ_LOCK();
+ while (cls->ops == NULL) {
/*
* kobj_class_compile doesn't want the lock held
* because of the call to malloc - we drop the lock
* and re-try.
*/
KOBJ_UNLOCK();
- kobj_class_compile(cls);
- goto retry;
+ error = kobj_class_compile1(cls, mflags);
+ if (error != 0)
+ return (error);
+ KOBJ_LOCK();
}
-
kobj_init_common(obj, cls);
-
KOBJ_UNLOCK();
+ return (0);
+}
+
+kobj_t
+kobj_create(kobj_class_t cls, struct malloc_type *mtype, int mflags)
+{
+ kobj_t obj;
+
+ obj = malloc(cls->size, mtype, mflags | M_ZERO);
+ if (obj == NULL)
+ return (NULL);
+ if (kobj_init1(obj, cls, mflags) != 0) {
+ free(obj, mtype);
+ return (NULL);
+ }
+ return (obj);
+}
+
+void
+kobj_init(kobj_t obj, kobj_class_t cls)
+{
+ int error;
+
+ error = kobj_init1(obj, cls, M_NOWAIT);
+ if (error != 0)
+ panic("kobj_init1 failed: error %d", error);
}
void
diff --git a/freebsd/sys/kern/subr_lock.c b/freebsd/sys/kern/subr_lock.c
index c2587cd0..53d99743 100644
--- a/freebsd/sys/kern/subr_lock.c
+++ b/freebsd/sys/kern/subr_lock.c
@@ -4,7 +4,6 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -258,7 +257,9 @@ struct lock_prof_cpu {
struct lock_prof_type lpc_types[2]; /* One for spin one for other. */
};
-struct lock_prof_cpu *lp_cpu[MAXCPU];
+DPCPU_DEFINE_STATIC(struct lock_prof_cpu, lp);
+#define LP_CPU_SELF (DPCPU_PTR(lp))
+#define LP_CPU(cpu) (DPCPU_ID_PTR((cpu), lp))
volatile int __read_mostly lock_prof_enable;
static volatile int lock_prof_resetting;
@@ -304,11 +305,9 @@ lock_prof_init(void *arg)
{
int cpu;
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- lp_cpu[cpu] = malloc(sizeof(*lp_cpu[cpu]), M_DEVBUF,
- M_WAITOK | M_ZERO);
- lock_prof_init_type(&lp_cpu[cpu]->lpc_types[0]);
- lock_prof_init_type(&lp_cpu[cpu]->lpc_types[1]);
+ CPU_FOREACH(cpu) {
+ lock_prof_init_type(&LP_CPU(cpu)->lpc_types[0]);
+ lock_prof_init_type(&LP_CPU(cpu)->lpc_types[1]);
}
}
SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
@@ -347,15 +346,15 @@ lock_prof_reset(void)
* before we zero the structures. Some items may still be linked
* into per-thread lists as well.
*/
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- lpc = lp_cpu[cpu];
+ CPU_FOREACH(cpu) {
+ lpc = LP_CPU(cpu);
for (i = 0; i < LPROF_CACHE_SIZE; i++) {
LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link);
LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link);
}
}
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- lpc = lp_cpu[cpu];
+ CPU_FOREACH(cpu) {
+ lpc = LP_CPU(cpu);
bzero(lpc, sizeof(*lpc));
lock_prof_init_type(&lpc->lpc_types[0]);
lock_prof_init_type(&lpc->lpc_types[1]);
@@ -395,10 +394,8 @@ lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash,
dst->class = match->class;
dst->name = match->name;
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- if (lp_cpu[cpu] == NULL)
- continue;
- type = &lp_cpu[cpu]->lpc_types[spin];
+ CPU_FOREACH(cpu) {
+ type = &LP_CPU(cpu)->lpc_types[spin];
SLIST_FOREACH(l, &type->lpt_hash[hash], link) {
if (l->ticks == t)
continue;
@@ -416,7 +413,6 @@ lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash,
dst->cnt_contest_locking += l->cnt_contest_locking;
}
}
-
}
static void
@@ -455,11 +451,9 @@ dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
lock_prof_enable = 0;
quiesce_all_cpus("profstat", 0);
t = ticks;
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- if (lp_cpu[cpu] == NULL)
- continue;
- lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[0], sb, 0, t);
- lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[1], sb, 1, t);
+ CPU_FOREACH(cpu) {
+ lock_prof_type_stats(&LP_CPU(cpu)->lpc_types[0], sb, 0, t);
+ lock_prof_type_stats(&LP_CPU(cpu)->lpc_types[1], sb, 1, t);
}
lock_prof_enable = enabled;
@@ -525,7 +519,7 @@ lock_profile_lookup(struct lock_object *lo, int spin, const char *file,
p = unknown;
hash = (uintptr_t)lo->lo_name * 31 + (uintptr_t)p * 31 + line;
hash &= LPROF_HASH_MASK;
- type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ type = &LP_CPU_SELF->lpc_types[spin];
head = &type->lpt_hash[hash];
SLIST_FOREACH(lp, head, link) {
if (lp->line == line && lp->file == p &&
@@ -560,7 +554,7 @@ lock_profile_object_lookup(struct lock_object *lo, int spin, const char *file,
if (l->lpo_obj == lo && l->lpo_file == file &&
l->lpo_line == line)
return (l);
- type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ type = &LP_CPU_SELF->lpc_types[spin];
l = LIST_FIRST(&type->lpt_lpoalloc);
if (l == NULL) {
lock_prof_rejected++;
@@ -696,7 +690,7 @@ lock_profile_release_lock(struct lock_object *lo)
lp->cnt_cur += l->lpo_cnt;
release:
LIST_REMOVE(l, lpo_link);
- type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ type = &LP_CPU_SELF->lpc_types[spin];
LIST_INSERT_HEAD(&type->lpt_lpoalloc, l, lpo_link);
out:
critical_exit();
diff --git a/freebsd/sys/kern/subr_pcpu.c b/freebsd/sys/kern/subr_pcpu.c
index 0ab77996..a3a06c78 100644
--- a/freebsd/sys/kern/subr_pcpu.c
+++ b/freebsd/sys/kern/subr_pcpu.c
@@ -136,24 +136,20 @@ SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, NULL);
/*
* UMA_PCPU_ZONE zones, that are available for all kernel
* consumers. Right now 64 bit zone is used for counter(9)
- * and pointer zone is used by flowtable.
+ * and int zone is used for mount point counters.
*/
+uma_zone_t pcpu_zone_int;
uma_zone_t pcpu_zone_64;
-uma_zone_t pcpu_zone_ptr;
static void
pcpu_zones_startup(void)
{
+ pcpu_zone_int = uma_zcreate("int pcpu", sizeof(int),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
pcpu_zone_64 = uma_zcreate("64 pcpu", sizeof(uint64_t),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
-
- if (sizeof(uint64_t) == sizeof(void *))
- pcpu_zone_ptr = pcpu_zone_64;
- else
- pcpu_zone_ptr = uma_zcreate("ptr pcpu", sizeof(void *),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
}
SYSINIT(pcpu_zones, SI_SUB_VM, SI_ORDER_ANY, pcpu_zones_startup, NULL);
diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c
index 2b45c13e..ed3c8498 100644
--- a/freebsd/sys/kern/subr_prf.c
+++ b/freebsd/sys/kern/subr_prf.c
@@ -70,6 +70,8 @@ __FBSDID("$FreeBSD$");
#include <sys/cons.h>
#endif /* __rtems__ */
#include <sys/uio.h>
+#else /* !_KERNEL */
+#include <errno.h>
#endif
#include <sys/ctype.h>
#include <sys/sbuf.h>
@@ -1300,3 +1302,46 @@ sbuf_putbuf(struct sbuf *sb)
printf("%s", sbuf_data(sb));
}
#endif /* __rtems__ */
+
+int
+sbuf_printf_drain(void *arg, const char *data, int len)
+{
+#ifndef __rtems__
+ size_t *retvalptr;
+ int r;
+#ifdef _KERNEL
+ char *dataptr;
+ char oldchr;
+
+ /*
+ * This is allowed as an extra byte is always resvered for
+ * terminating NUL byte. Save and restore the byte because
+ * we might be flushing a record, and there may be valid
+ * data after the buffer.
+ */
+ oldchr = data[len];
+ dataptr = __DECONST(char *, data);
+ dataptr[len] = '\0';
+
+ prf_putbuf(dataptr, TOLOG | TOCONS, -1);
+ r = len;
+
+ dataptr[len] = oldchr;
+
+#else /* !_KERNEL */
+
+ r = printf("%.*s", len, data);
+ if (r < 0)
+ return (-errno);
+
+#endif
+
+ retvalptr = arg;
+ if (retvalptr != NULL)
+ *retvalptr += r;
+
+ return (r);
+#else /* __rtems__ */
+ return (printf("%.*s", len, data));
+#endif /* __rtems__ */
+}
diff --git a/freebsd/sys/kern/subr_sbuf.c b/freebsd/sys/kern/subr_sbuf.c
index b51ed52c..42e6f8f0 100644
--- a/freebsd/sys/kern/subr_sbuf.c
+++ b/freebsd/sys/kern/subr_sbuf.c
@@ -58,11 +58,11 @@ __FBSDID("$FreeBSD$");
#ifdef _KERNEL
static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
-#define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK|M_ZERO)
+#define SBMALLOC(size, flags) malloc(size, M_SBUF, (flags) | M_ZERO)
#define SBFREE(buf) free(buf, M_SBUF)
#else /* _KERNEL */
#define KASSERT(e, m)
-#define SBMALLOC(size) calloc(1, size)
+#define SBMALLOC(size, flags) calloc(1, size)
#define SBFREE(buf) free(buf)
#endif /* _KERNEL */
@@ -72,6 +72,7 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBUF_ISDYNAMIC(s) ((s)->s_flags & SBUF_DYNAMIC)
#define SBUF_ISDYNSTRUCT(s) ((s)->s_flags & SBUF_DYNSTRUCT)
#define SBUF_ISFINISHED(s) ((s)->s_flags & SBUF_FINISHED)
+#define SBUF_ISDRAINATEOL(s) ((s)->s_flags & SBUF_DRAINATEOL)
#define SBUF_HASROOM(s) ((s)->s_len < (s)->s_size - 1)
#define SBUF_FREESPACE(s) ((s)->s_size - ((s)->s_len + 1))
#define SBUF_CANEXTEND(s) ((s)->s_flags & SBUF_AUTOEXTEND)
@@ -79,6 +80,8 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBUF_NULINCLUDED(s) ((s)->s_flags & SBUF_INCLUDENUL)
#define SBUF_ISDRAINTOEOR(s) ((s)->s_flags & SBUF_DRAINTOEOR)
#define SBUF_DODRAINTOEOR(s) (SBUF_ISSECTION(s) && SBUF_ISDRAINTOEOR(s))
+#define SBUF_MALLOCFLAG(s) \
+ (((s)->s_flags & SBUF_NOWAIT) ? M_NOWAIT : M_WAITOK)
/*
* Set / clear flags
@@ -173,7 +176,7 @@ sbuf_extend(struct sbuf *s, int addlen)
if (!SBUF_CANEXTEND(s))
return (-1);
newsize = sbuf_extendsize(s->s_size + addlen);
- newbuf = SBMALLOC(newsize);
+ newbuf = SBMALLOC(newsize, SBUF_MALLOCFLAG(s));
if (newbuf == NULL)
return (-1);
memcpy(newbuf, s->s_buf, s->s_size);
@@ -187,39 +190,6 @@ sbuf_extend(struct sbuf *s, int addlen)
}
/*
- * Initialize the internals of an sbuf.
- * If buf is non-NULL, it points to a static or already-allocated string
- * big enough to hold at least length characters.
- */
-static struct sbuf *
-sbuf_newbuf(struct sbuf *s, char *buf, int length, int flags)
-{
-
- memset(s, 0, sizeof(*s));
- s->s_flags = flags;
- s->s_size = length;
- s->s_buf = buf;
-
- if ((s->s_flags & SBUF_AUTOEXTEND) == 0) {
- KASSERT(s->s_size >= SBUF_MINSIZE,
- ("attempt to create an sbuf smaller than %d bytes",
- SBUF_MINSIZE));
- }
-
- if (s->s_buf != NULL)
- return (s);
-
- if ((flags & SBUF_AUTOEXTEND) != 0)
- s->s_size = sbuf_extendsize(s->s_size);
-
- s->s_buf = SBMALLOC(s->s_size);
- if (s->s_buf == NULL)
- return (NULL);
- SBUF_SETFLAG(s, SBUF_DYNAMIC);
- return (s);
-}
-
-/*
* Initialize an sbuf.
* If buf is non-NULL, it points to a static or already-allocated string
* big enough to hold at least length characters.
@@ -232,19 +202,56 @@ sbuf_new(struct sbuf *s, char *buf, int length, int flags)
("attempt to create an sbuf of negative length (%d)", length));
KASSERT((flags & ~SBUF_USRFLAGMSK) == 0,
("%s called with invalid flags", __func__));
+ KASSERT((flags & SBUF_AUTOEXTEND) || length >= SBUF_MINSIZE,
+ ("sbuf buffer %d smaller than minimum %d bytes", length,
+ SBUF_MINSIZE));
flags &= SBUF_USRFLAGMSK;
- if (s != NULL)
- return (sbuf_newbuf(s, buf, length, flags));
- s = SBMALLOC(sizeof(*s));
- if (s == NULL)
- return (NULL);
- if (sbuf_newbuf(s, buf, length, flags) == NULL) {
- SBFREE(s);
- return (NULL);
+ /*
+ * Allocate 'DYNSTRUCT' sbuf from the heap, if NULL 's' was provided.
+ */
+ if (s == NULL) {
+ s = SBMALLOC(sizeof(*s),
+ (flags & SBUF_NOWAIT) ? M_NOWAIT : M_WAITOK);
+ if (s == NULL)
+ goto out;
+ SBUF_SETFLAG(s, SBUF_DYNSTRUCT);
+ } else {
+ /*
+ * DYNSTRUCT SBMALLOC sbufs are allocated with M_ZERO, but
+ * user-provided sbuf objects must be initialized.
+ */
+ memset(s, 0, sizeof(*s));
+ }
+
+ s->s_flags |= flags;
+ s->s_size = length;
+ s->s_buf = buf;
+ /*
+ * Never-written sbufs do not need \n termination.
+ */
+ SBUF_SETFLAG(s, SBUF_DRAINATEOL);
+
+ /*
+ * Allocate DYNAMIC, i.e., heap data buffer backing the sbuf, if no
+ * buffer was provided.
+ */
+ if (s->s_buf == NULL) {
+ if (SBUF_CANEXTEND(s))
+ s->s_size = sbuf_extendsize(s->s_size);
+ s->s_buf = SBMALLOC(s->s_size, SBUF_MALLOCFLAG(s));
+ if (s->s_buf == NULL)
+ goto out;
+ SBUF_SETFLAG(s, SBUF_DYNAMIC);
+ }
+
+out:
+ if (s != NULL && s->s_buf == NULL) {
+ if (SBUF_ISDYNSTRUCT(s))
+ SBFREE(s);
+ s = NULL;
}
- SBUF_SETFLAG(s, SBUF_DYNSTRUCT);
return (s);
}
@@ -310,6 +317,8 @@ sbuf_clear(struct sbuf *s)
assert_sbuf_integrity(s);
/* don't care if it's finished or not */
+ KASSERT(s->s_drain_func == NULL,
+ ("%s makes no sense on sbuf %p with drain", __func__, s));
SBUF_CLEARFLAG(s, SBUF_FINISHED);
s->s_error = 0;
@@ -344,6 +353,21 @@ sbuf_setpos(struct sbuf *s, ssize_t pos)
}
/*
+ * Drain into a counter. Counts amount of data without producing output.
+ * Useful for cases like sysctl, where user may first request only size.
+ * This allows to avoid pointless allocation/freeing of large buffers.
+ */
+int
+sbuf_count_drain(void *arg, const char *data __unused, int len)
+{
+ size_t *sizep;
+
+ sizep = (size_t *)arg;
+ *sizep += len;
+ return (len);
+}
+
+/*
* Set up a drain function and argument on an sbuf to flush data to
* when the sbuf buffer overflows.
*/
@@ -369,6 +393,7 @@ sbuf_drain(struct sbuf *s)
KASSERT(s->s_len > 0, ("Shouldn't drain empty sbuf %p", s));
KASSERT(s->s_error == 0, ("Called %s with error on %p", __func__, s));
+
if (SBUF_DODRAINTOEOR(s) && s->s_rec_off == 0)
return (s->s_error = EDEADLK);
len = s->s_drain_func(s->s_drain_arg, s->s_buf,
@@ -385,8 +410,18 @@ sbuf_drain(struct sbuf *s)
* Fast path for the expected case where all the data was
* drained.
*/
- if (s->s_len == 0)
+ if (s->s_len == 0) {
+ /*
+ * When the s_buf is entirely drained, we need to remember if
+ * the last character was a '\n' or not for
+ * sbuf_nl_terminate().
+ */
+ if (s->s_buf[len - 1] == '\n')
+ SBUF_SETFLAG(s, SBUF_DRAINATEOL);
+ else
+ SBUF_CLEARFLAG(s, SBUF_DRAINATEOL);
return (0);
+ }
/*
* Move the remaining characters to the beginning of the
* string.
@@ -702,6 +737,38 @@ sbuf_putc(struct sbuf *s, int c)
}
/*
+ * Append a trailing newline to a non-empty sbuf, if one is not already
+ * present. Handles sbufs with drain functions correctly.
+ */
+int
+sbuf_nl_terminate(struct sbuf *s)
+{
+
+ assert_sbuf_integrity(s);
+ assert_sbuf_state(s, 0);
+
+ /*
+ * If the s_buf isn't empty, the last byte is simply s_buf[s_len - 1].
+ *
+ * If the s_buf is empty because a drain function drained it, we
+ * remember if the last byte was a \n with the SBUF_DRAINATEOL flag in
+ * sbuf_drain().
+ *
+ * In either case, we only append a \n if the previous character was
+ * something else.
+ */
+ if (s->s_len == 0) {
+ if (!SBUF_ISDRAINATEOL(s))
+ sbuf_put_byte(s, '\n');
+ } else if (s->s_buf[s->s_len - 1] != '\n')
+ sbuf_put_byte(s, '\n');
+
+ if (s->s_error != 0)
+ return (-1);
+ return (0);
+}
+
+/*
* Trim whitespace characters from end of an sbuf.
*/
int
diff --git a/freebsd/sys/kern/subr_sleepqueue.c b/freebsd/sys/kern/subr_sleepqueue.c
index 57681cce..9665c02f 100644
--- a/freebsd/sys/kern/subr_sleepqueue.c
+++ b/freebsd/sys/kern/subr_sleepqueue.c
@@ -5,7 +5,6 @@
*
* Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
* Copyright (c) 2015 embedded brains GmbH <rtems@embedded-brains.de>
- * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -132,7 +131,7 @@ CTASSERT(powerof2(SC_TABLESIZE));
* c - sleep queue chain lock
*/
struct sleepqueue {
- TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
+ struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
u_int sq_blockedcnt[NR_SLEEPQS]; /* (c) N. of blocked threads. */
LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */
LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */
@@ -593,6 +592,19 @@ sleepq_catch_signals(void *wchan, int pri)
} else {
mtx_unlock(&ps->ps_mtx);
}
+
+ /*
+ * Do not go into sleep if this thread was the
+ * ptrace(2) attach leader. cursig() consumed
+ * SIGSTOP from PT_ATTACH, but we usually act
+ * on the signal by interrupting sleep, and
+ * should do that here as well.
+ */
+ if ((td->td_dbgflags & TDB_FSTP) != 0) {
+ if (ret == 0)
+ ret = EINTR;
+ td->td_dbgflags &= ~TDB_FSTP;
+ }
}
/*
* Lock the per-process spinlock prior to dropping the PROC_LOCK
@@ -1127,13 +1139,15 @@ sleepq_init(void *mem, int size, int flags)
}
/*
- * Find the highest priority thread sleeping on a wait channel and resume it.
+ * Find thread sleeping on a wait channel and resume it.
*/
int
sleepq_signal(void *wchan, int flags, int pri, int queue)
{
+ struct sleepqueue_chain *sc;
struct sleepqueue *sq;
#ifndef __rtems__
+ struct threadqueue *head;
struct thread *td, *besttd;
#else /* __rtems__ */
struct thread *besttd;
@@ -1150,16 +1164,33 @@ sleepq_signal(void *wchan, int flags, int pri, int queue)
("%s: mismatch between sleep/wakeup and cv_*", __func__));
#ifndef __rtems__
- /*
- * Find the highest priority thread on the queue. If there is a
- * tie, use the thread that first appears in the queue as it has
- * been sleeping the longest since threads are always added to
- * the tail of sleep queues.
- */
- besttd = TAILQ_FIRST(&sq->sq_blocked[queue]);
- TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
- if (td->td_priority < besttd->td_priority)
+ head = &sq->sq_blocked[queue];
+ if (flags & SLEEPQ_UNFAIR) {
+ /*
+ * Find the most recently sleeping thread, but try to
+ * skip threads still in process of context switch to
+ * avoid spinning on the thread lock.
+ */
+ sc = SC_LOOKUP(wchan);
+ besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
+ while (besttd->td_lock != &sc->sc_lock) {
+ td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
+ if (td == NULL)
+ break;
besttd = td;
+ }
+ } else {
+ /*
+ * Find the highest priority thread on the queue. If there
+ * is a tie, use the thread that first appears in the queue
+ * as it has been sleeping the longest since threads are
+ * always added to the tail of sleep queues.
+ */
+ besttd = td = TAILQ_FIRST(head);
+ while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
+ if (td->td_priority < besttd->td_priority)
+ besttd = td;
+ }
}
#else /* __rtems__ */
besttd = TAILQ_FIRST(&sq->sq_blocked[queue]);
diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c
index 39d9f939..67e62fc8 100644
--- a/freebsd/sys/kern/subr_taskqueue.c
+++ b/freebsd/sys/kern/subr_taskqueue.c
@@ -841,7 +841,7 @@ taskqueue_thread_enqueue(void *context)
tqp = context;
tq = *tqp;
- wakeup_one(tq);
+ wakeup_any(tq);
}
TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,
diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c
index cc208d6e..1bc4fa6b 100644
--- a/freebsd/sys/kern/sys_generic.c
+++ b/freebsd/sys/kern/sys_generic.c
@@ -772,7 +772,11 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
fp = NULL; /* fhold() was not called yet */
goto out;
}
- fhold(fp);
+ if (!fhold(fp)) {
+ error = EBADF;
+ fp = NULL;
+ goto out;
+ }
if (locked == LA_SLOCKED) {
FILEDESC_SUNLOCK(fdp);
locked = LA_UNLOCKED;
diff --git a/freebsd/sys/kern/sys_pipe.c b/freebsd/sys/kern/sys_pipe.c
index 050d63a4..d9b502f0 100755
--- a/freebsd/sys/kern/sys_pipe.c
+++ b/freebsd/sys/kern/sys_pipe.c
@@ -177,7 +177,6 @@ struct fileops pipeops = {
};
#else /* __rtems__ */
#define PIPE_NODIRECT
-#define PRIBIO (0)
static int rtems_bsd_pipe_open(rtems_libio_t *iop, const char *path,
int oflag, mode_t mode);
@@ -433,9 +432,7 @@ void
pipe_dtor(struct pipe *dpipe)
{
struct pipe *peer;
- ino_t ino;
- ino = dpipe->pipe_ino;
peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL;
funsetown(&dpipe->pipe_sigio);
pipeclose(dpipe);
@@ -802,11 +799,9 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
/*
* Direct copy, bypassing a kernel buffer.
*/
- } else if ((size = rpipe->pipe_map.cnt) &&
- (rpipe->pipe_state & PIPE_DIRECTW)) {
+ } else if ((size = rpipe->pipe_map.cnt) != 0) {
if (size > uio->uio_resid)
size = (u_int) uio->uio_resid;
-
PIPE_UNLOCK(rpipe);
error = uiomove_fromphys(rpipe->pipe_map.ms,
rpipe->pipe_map.pos, size, uio);
@@ -817,7 +812,7 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
rpipe->pipe_map.pos += size;
rpipe->pipe_map.cnt -= size;
if (rpipe->pipe_map.cnt == 0) {
- rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW);
+ rpipe->pipe_state &= ~PIPE_WANTW;
wakeup(rpipe);
}
#endif
@@ -984,32 +979,33 @@ pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio)
u_int size;
int i;
- PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
- KASSERT(wpipe->pipe_state & PIPE_DIRECTW,
- ("Clone attempt on non-direct write pipe!"));
+ PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
+ KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0,
+ ("%s: PIPE_DIRECTW set on %p", __func__, wpipe));
+ KASSERT(wpipe->pipe_map.cnt == 0,
+ ("%s: pipe map for %p contains residual data", __func__, wpipe));
if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size)
size = wpipe->pipe_buffer.size;
else
size = uio->uio_iov->iov_len;
- if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+ wpipe->pipe_state |= PIPE_DIRECTW;
+ PIPE_UNLOCK(wpipe);
+ i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
(vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ,
- wpipe->pipe_map.ms, PIPENPAGES)) < 0)
+ wpipe->pipe_map.ms, PIPENPAGES);
+ PIPE_LOCK(wpipe);
+ if (i < 0) {
+ wpipe->pipe_state &= ~PIPE_DIRECTW;
return (EFAULT);
+ }
-/*
- * set up the control block
- */
wpipe->pipe_map.npages = i;
wpipe->pipe_map.pos =
((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
wpipe->pipe_map.cnt = size;
-/*
- * and update the uio data
- */
-
uio->uio_iov->iov_len -= size;
uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
if (uio->uio_iov->iov_len == 0)
@@ -1020,13 +1016,19 @@ pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio)
}
/*
- * unmap and unwire the process buffer
+ * Unwire the process buffer.
*/
static void
pipe_destroy_write_buffer(struct pipe *wpipe)
{
PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
+ KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0,
+ ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe));
+ KASSERT(wpipe->pipe_map.cnt == 0,
+ ("%s: pipe map for %p contains residual data", __func__, wpipe));
+
+ wpipe->pipe_state &= ~PIPE_DIRECTW;
vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages);
wpipe->pipe_map.npages = 0;
}
@@ -1045,13 +1047,16 @@ pipe_clone_write_buffer(struct pipe *wpipe)
int pos;
PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
+ KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0,
+ ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe));
+
size = wpipe->pipe_map.cnt;
pos = wpipe->pipe_map.pos;
+ wpipe->pipe_map.cnt = 0;
wpipe->pipe_buffer.in = size;
wpipe->pipe_buffer.out = 0;
wpipe->pipe_buffer.cnt = size;
- wpipe->pipe_state &= ~PIPE_DIRECTW;
PIPE_UNLOCK(wpipe);
iov.iov_base = wpipe->pipe_buffer.buffer;
@@ -1090,7 +1095,7 @@ retry:
pipeunlock(wpipe);
goto error1;
}
- while (wpipe->pipe_state & PIPE_DIRECTW) {
+ if (wpipe->pipe_state & PIPE_DIRECTW) {
if (wpipe->pipe_state & PIPE_WANTR) {
wpipe->pipe_state &= ~PIPE_WANTR;
wakeup(wpipe);
@@ -1105,7 +1110,6 @@ retry:
else
goto retry;
}
- wpipe->pipe_map.cnt = 0; /* transfer not ready yet */
if (wpipe->pipe_buffer.cnt > 0) {
if (wpipe->pipe_state & PIPE_WANTR) {
wpipe->pipe_state &= ~PIPE_WANTR;
@@ -1122,20 +1126,15 @@ retry:
goto retry;
}
- wpipe->pipe_state |= PIPE_DIRECTW;
-
- PIPE_UNLOCK(wpipe);
error = pipe_build_write_buffer(wpipe, uio);
- PIPE_LOCK(wpipe);
if (error) {
- wpipe->pipe_state &= ~PIPE_DIRECTW;
pipeunlock(wpipe);
goto error1;
}
- error = 0;
- while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
+ while (wpipe->pipe_map.cnt != 0) {
if (wpipe->pipe_state & PIPE_EOF) {
+ wpipe->pipe_map.cnt = 0;
pipe_destroy_write_buffer(wpipe);
pipeselwakeup(wpipe);
pipeunlock(wpipe);
@@ -1152,20 +1151,19 @@ retry:
error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
"pipdwt", 0);
pipelock(wpipe, 0);
+ if (error != 0)
+ break;
}
if (wpipe->pipe_state & PIPE_EOF)
error = EPIPE;
- if (wpipe->pipe_state & PIPE_DIRECTW) {
- /*
- * this bit of trickery substitutes a kernel buffer for
- * the process that might be going away.
- */
+ if (error == EINTR || error == ERESTART)
pipe_clone_write_buffer(wpipe);
- } else {
+ else
pipe_destroy_write_buffer(wpipe);
- }
pipeunlock(wpipe);
+ KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0,
+ ("pipe %p leaked PIPE_DIRECTW", wpipe));
return (error);
error1:
@@ -1290,7 +1288,7 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
* pipe buffer. We break out if a signal occurs or the
* reader goes away.
*/
- if (wpipe->pipe_state & PIPE_DIRECTW) {
+ if (wpipe->pipe_map.cnt != 0) {
if (wpipe->pipe_state & PIPE_WANTR) {
wpipe->pipe_state &= ~PIPE_WANTR;
wakeup(wpipe);
@@ -1586,7 +1584,7 @@ pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
PIPE_UNLOCK(mpipe);
return (0);
}
- if (mpipe->pipe_state & PIPE_DIRECTW)
+ if (mpipe->pipe_map.cnt != 0)
*(int *)data = mpipe->pipe_map.cnt;
else
*(int *)data = mpipe->pipe_buffer.cnt;
@@ -1663,8 +1661,7 @@ pipe_poll(struct file *fp, int events, struct ucred *active_cred,
#else /* __rtems__ */
if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && events & (POLLIN | POLLRDNORM))
#endif /* __rtems__ */
- if ((rpipe->pipe_state & PIPE_DIRECTW) ||
- (rpipe->pipe_buffer.cnt > 0))
+ if (rpipe->pipe_map.cnt > 0 || rpipe->pipe_buffer.cnt > 0)
revents |= events & (POLLIN | POLLRDNORM);
#ifndef __rtems__
@@ -1674,7 +1671,7 @@ pipe_poll(struct file *fp, int events, struct ucred *active_cred,
#endif /* __rtems__ */
if (wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF) ||
- (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
+ ((wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF ||
wpipe->pipe_buffer.size == 0)))
revents |= events & (POLLOUT | POLLWRNORM);
@@ -1683,7 +1680,7 @@ pipe_poll(struct file *fp, int events, struct ucred *active_cred,
(POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
#ifndef __rtems__
if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents &&
- fp->f_seqcount == rpipe->pipe_wgen)
+ fp->f_pipegen == rpipe->pipe_wgen)
#else /* __rtems__ */
if (rpipe->pipe_state & PIPE_NAMED && rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && levents)
#endif /* __rtems__ */
@@ -1792,7 +1789,7 @@ pipe_stat(struct pipe *pipe, struct stat *ub)
#endif /* __rtems__ */
ub->st_mode = S_IFIFO;
ub->st_blksize = PAGE_SIZE;
- if (pipe->pipe_state & PIPE_DIRECTW)
+ if (pipe->pipe_map.cnt != 0)
ub->st_size = pipe->pipe_map.cnt;
else
ub->st_size = pipe->pipe_buffer.cnt;
@@ -2081,7 +2078,7 @@ filt_piperead(struct knote *kn, long hint)
PIPE_LOCK_ASSERT(rpipe, MA_OWNED);
kn->kn_data = rpipe->pipe_buffer.cnt;
- if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
+ if (kn->kn_data == 0)
kn->kn_data = rpipe->pipe_map.cnt;
if ((rpipe->pipe_state & PIPE_EOF) ||
@@ -2099,15 +2096,19 @@ static int
filt_pipewrite(struct knote *kn, long hint)
{
struct pipe *wpipe;
-
+
+ /*
+ * If this end of the pipe is closed, the knote was removed from the
+ * knlist and the list lock (i.e., the pipe lock) is therefore not held.
+ */
wpipe = kn->kn_hook;
- PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
if (wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF)) {
kn->kn_data = 0;
kn->kn_flags |= EV_EOF;
return (1);
}
+ PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
kn->kn_data = (wpipe->pipe_buffer.size > 0) ?
(wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) : PIPE_BUF;
if (wpipe->pipe_state & PIPE_DIRECTW)
diff --git a/freebsd/sys/kern/tty.c b/freebsd/sys/kern/tty.c
index 5d9c8a57..ee46a44f 100644
--- a/freebsd/sys/kern/tty.c
+++ b/freebsd/sys/kern/tty.c
@@ -95,7 +95,7 @@ static const char *dev_console_filename;
FLUSHO|NOKERNINFO|NOFLSH)
#define TTYSUP_CFLAG (CIGNORE|CSIZE|CSTOPB|CREAD|PARENB|PARODD|\
HUPCL|CLOCAL|CCTS_OFLOW|CRTS_IFLOW|CDTR_IFLOW|\
- CDSR_OFLOW|CCAR_OFLOW)
+ CDSR_OFLOW|CCAR_OFLOW|CNO_RTSDTR)
#define TTY_CALLOUT(tp,d) (dev2unit(d) & TTYUNIT_CALLOUT)
@@ -336,7 +336,8 @@ ttydev_open(struct cdev *dev, int oflags, int devtype __unused,
if (TTY_CALLOUT(tp, dev) || dev == dev_console)
tp->t_termios.c_cflag |= CLOCAL;
- ttydevsw_modem(tp, SER_DTR|SER_RTS, 0);
+ if ((tp->t_termios.c_cflag & CNO_RTSDTR) == 0)
+ ttydevsw_modem(tp, SER_DTR|SER_RTS, 0);
error = ttydevsw_open(tp);
if (error != 0)
@@ -1147,6 +1148,9 @@ tty_rel_free(struct tty *tp)
return;
}
+ /* Stop asynchronous I/O. */
+ funsetown(&tp->t_sigio);
+
/* TTY can be deallocated. */
dev = tp->t_dev;
tp->t_dev = NULL;
diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c
index 185d14a0..2f1768da 100644
--- a/freebsd/sys/kern/uipc_mbuf.c
+++ b/freebsd/sys/kern/uipc_mbuf.c
@@ -51,7 +51,11 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/uio.h>
+#include <sys/vmmeter.h>
#include <sys/sdt.h>
+#include <vm/vm.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_page.h>
SDT_PROBE_DEFINE5_XLATE(sdt, , , m__init,
"struct mbuf *", "mbufinfo_t *",
@@ -204,7 +208,7 @@ mb_dupcl(struct mbuf *n, struct mbuf *m)
else
bcopy(&m->m_ext, &n->m_ext, m_ext_copylen);
n->m_flags |= M_EXT;
- n->m_flags |= m->m_flags & M_RDONLY;
+ n->m_flags |= m->m_flags & (M_RDONLY | M_NOMAP);
/* See if this is the mbuf that holds the embedded refcount. */
if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
@@ -248,7 +252,8 @@ m_demote(struct mbuf *m0, int all, int flags)
__func__, m, m0));
if (m->m_flags & M_PKTHDR)
m_demote_pkthdr(m);
- m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
+ m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE |
+ M_NOMAP | flags);
}
}
@@ -343,6 +348,9 @@ m_pkthdr_init(struct mbuf *m, int how)
#endif
m->m_data = m->m_pktdat;
bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
+#ifdef NUMA
+ m->m_pkthdr.numa_domain = M_NODOM;
+#endif
#ifdef MAC
/* If the label init fails, fail the alloc */
error = mac_mbuf_init(m, how);
@@ -375,12 +383,17 @@ m_move_pkthdr(struct mbuf *to, struct mbuf *from)
if (to->m_flags & M_PKTHDR)
m_tag_delete_chain(to, NULL);
#endif
- to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+ to->m_flags = (from->m_flags & M_COPYFLAGS) |
+ (to->m_flags & (M_EXT | M_NOMAP));
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr; /* especially tags */
SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
from->m_flags &= ~M_PKTHDR;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ from->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ from->m_pkthdr.snd_tag = NULL;
+ }
}
/*
@@ -409,10 +422,13 @@ m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
if (to->m_flags & M_PKTHDR)
m_tag_delete_chain(to, NULL);
#endif
- to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+ to->m_flags = (from->m_flags & M_COPYFLAGS) |
+ (to->m_flags & (M_EXT | M_NOMAP));
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_ref(from->m_pkthdr.snd_tag);
SLIST_INIT(&to->m_pkthdr.tags);
return (m_tag_copy_chain(to, from, how));
}
@@ -572,6 +588,32 @@ nospace:
return (NULL);
}
+#ifndef __rtems__
+static void
+m_copyfromunmapped(const struct mbuf *m, int off, int len, caddr_t cp)
+{
+ struct iovec iov;
+ struct uio uio;
+ int error;
+
+ KASSERT(off >= 0, ("m_copyfromunmapped: negative off %d", off));
+ KASSERT(len >= 0, ("m_copyfromunmapped: negative len %d", len));
+ KASSERT(off < m->m_len,
+ ("m_copyfromunmapped: len exceeds mbuf length"));
+ iov.iov_base = cp;
+ iov.iov_len = len;
+ uio.uio_resid = len;
+ uio.uio_iov = &iov;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_rw = UIO_READ;
+ error = m_unmappedtouio(m, off, &uio, len);
+ KASSERT(error == 0, ("m_unmappedtouio failed: off %d, len %d", off,
+ len));
+}
+#endif /* __rtems__ */
+
/*
* Copy data from an mbuf chain starting "off" bytes from the beginning,
* continuing for "len" bytes, into the indicated buffer.
@@ -593,7 +635,12 @@ m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
while (len > 0) {
KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
count = min(m->m_len - off, len);
- bcopy(mtod(m, caddr_t) + off, cp, count);
+#ifndef __rtems__
+ if ((m->m_flags & M_NOMAP) != 0)
+ m_copyfromunmapped(m, off, count, cp);
+ else
+#endif /* __rtems__ */
+ bcopy(mtod(m, caddr_t) + off, cp, count);
len -= count;
cp += count;
off = 0;
@@ -688,6 +735,7 @@ m_cat(struct mbuf *m, struct mbuf *n)
m = m->m_next;
while (n) {
if (!M_WRITABLE(m) ||
+ (n->m_flags & M_NOMAP) != 0 ||
M_TRAILINGSPACE(m) < n->m_len) {
/* just join the two chains */
m->m_next = n;
@@ -805,6 +853,9 @@ m_pullup(struct mbuf *n, int len)
int count;
int space;
+ KASSERT((n->m_flags & M_NOMAP) == 0,
+ ("%s: unmapped mbuf %p", __func__, n));
+
/*
* If first mbuf has no cluster, and has room for len bytes
* without shifting current data, pullup into it,
@@ -923,7 +974,12 @@ m_split(struct mbuf *m0, int len0, int wait)
return (NULL);
n->m_next = m->m_next;
m->m_next = NULL;
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
return (n);
@@ -931,7 +987,12 @@ m_split(struct mbuf *m0, int len0, int wait)
n = m_gethdr(wait, m0->m_type);
if (n == NULL)
return (NULL);
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
if (m->m_flags & M_EXT)
@@ -1348,6 +1409,41 @@ nospace:
}
/*
+ * Return the number of fragments an mbuf will use. This is usually
+ * used as a proxy for the number of scatter/gather elements needed by
+ * a DMA engine to access an mbuf. In general mapped mbufs are
+ * assumed to be backed by physically contiguous buffers that only
+ * need a single fragment. Unmapped mbufs, on the other hand, can
+ * span disjoint physical pages.
+ */
+static int
+frags_per_mbuf(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ int frags;
+
+ if ((m->m_flags & M_NOMAP) == 0)
+ return (1);
+
+ /*
+ * The header and trailer are counted as a single fragment
+ * each when present.
+ *
+ * XXX: This overestimates the number of fragments by assuming
+ * all the backing physical pages are disjoint.
+ */
+ ext_pgs = m->m_ext.ext_pgs;
+ frags = 0;
+ if (ext_pgs->hdr_len != 0)
+ frags++;
+ frags += ext_pgs->npgs;
+ if (ext_pgs->trail_len != 0)
+ frags++;
+
+ return (frags);
+}
+
+/*
* Defragment an mbuf chain, returning at most maxfrags separate
* mbufs+clusters. If this is not possible NULL is returned and
* the original mbuf chain is left in its present (potentially
@@ -1367,7 +1463,7 @@ m_collapse(struct mbuf *m0, int how, int maxfrags)
*/
curfrags = 0;
for (m = m0; m != NULL; m = m->m_next)
- curfrags++;
+ curfrags += frags_per_mbuf(m);
/*
* First, try to collapse mbufs. Note that we always collapse
* towards the front so we don't need to deal with moving the
@@ -1382,12 +1478,13 @@ again:
break;
if (M_WRITABLE(m) &&
n->m_len < M_TRAILINGSPACE(m)) {
- bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
- n->m_len);
+ m_copydata(n, 0, n->m_len,
+ mtod(m, char *) + m->m_len);
m->m_len += n->m_len;
m->m_next = n->m_next;
+ curfrags -= frags_per_mbuf(n);
m_free(n);
- if (--curfrags <= maxfrags)
+ if (curfrags <= maxfrags)
return m0;
} else
m = n;
@@ -1404,15 +1501,18 @@ again:
m = m_getcl(how, MT_DATA, 0);
if (m == NULL)
goto bad;
- bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
- bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
- n2->m_len);
+ m_copydata(n, 0, n->m_len, mtod(m, char *));
+ m_copydata(n2, 0, n2->m_len,
+ mtod(m, char *) + n->m_len);
m->m_len = n->m_len + n2->m_len;
m->m_next = n2->m_next;
*prev = m;
+ curfrags += 1; /* For the new cluster */
+ curfrags -= frags_per_mbuf(n);
+ curfrags -= frags_per_mbuf(n2);
m_free(n);
m_free(n2);
- if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */
+ if (curfrags <= maxfrags)
return m0;
/*
* Still not there, try the normal collapse
@@ -1512,6 +1612,100 @@ nospace:
#endif
+#ifndef __rtems__
+/*
+ * Free pages from mbuf_ext_pgs, assuming they were allocated via
+ * vm_page_alloc() and aren't associated with any object. Complement
+ * to allocator from m_uiotombuf_nomap().
+ */
+void
+mb_free_mext_pgs(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ vm_page_t pg;
+
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ for (int i = 0; i < ext_pgs->npgs; i++) {
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ vm_page_unwire_noq(pg);
+ vm_page_free(pg);
+ }
+}
+
+static struct mbuf *
+m_uiotombuf_nomap(struct uio *uio, int how, int len, int maxseg, int flags)
+{
+ struct mbuf *m, *mb, *prev;
+ struct mbuf_ext_pgs *pgs;
+ vm_page_t pg_array[MBUF_PEXT_MAX_PGS];
+ int error, length, i, needed;
+ ssize_t total;
+ int pflags = malloc2vm_flags(how) | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP |
+ VM_ALLOC_WIRED;
+
+ /*
+ * len can be zero or an arbitrary large value bound by
+ * the total data supplied by the uio.
+ */
+ if (len > 0)
+ total = MIN(uio->uio_resid, len);
+ else
+ total = uio->uio_resid;
+
+ if (maxseg == 0)
+ maxseg = MBUF_PEXT_MAX_PGS * PAGE_SIZE;
+
+ /*
+ * Allocate the pages
+ */
+ m = NULL;
+ while (total > 0) {
+ mb = mb_alloc_ext_pgs(how, (flags & M_PKTHDR),
+ mb_free_mext_pgs);
+ if (mb == NULL)
+ goto failed;
+ if (m == NULL)
+ m = mb;
+ else
+ prev->m_next = mb;
+ prev = mb;
+ pgs = mb->m_ext.ext_pgs;
+ needed = length = MIN(maxseg, total);
+ for (i = 0; needed > 0; i++, needed -= PAGE_SIZE) {
+retry_page:
+ pg_array[i] = vm_page_alloc(NULL, 0, pflags);
+ if (pg_array[i] == NULL) {
+ if (how & M_NOWAIT) {
+ goto failed;
+ } else {
+ vm_wait(NULL);
+ goto retry_page;
+ }
+ }
+ pg_array[i]->flags &= ~PG_ZERO;
+ pgs->pa[i] = VM_PAGE_TO_PHYS(pg_array[i]);
+ pgs->npgs++;
+ }
+ pgs->last_pg_len = length - PAGE_SIZE * (pgs->npgs - 1);
+ MBUF_EXT_PGS_ASSERT_SANITY(pgs);
+ total -= length;
+ error = uiomove_fromphys(pg_array, 0, length, uio);
+ if (error != 0)
+ goto failed;
+ mb->m_len = length;
+ mb->m_ext.ext_size += PAGE_SIZE * pgs->npgs;
+ if (flags & M_PKTHDR)
+ m->m_pkthdr.len += length;
+ }
+ return (m);
+
+failed:
+ m_freem(m);
+ return (NULL);
+}
+#endif /* __rtems__ */
+
/*
* Copy the contents of uio into a properly sized mbuf chain.
*/
@@ -1523,6 +1717,11 @@ m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
ssize_t total;
int progress = 0;
+#ifndef __rtems__
+ if (flags & M_NOMAP)
+ return (m_uiotombuf_nomap(uio, how, len, align, flags));
+#endif /* __rtems__ */
+
/*
* len can be zero or an arbitrary large value bound by
* the total data supplied by the uio.
@@ -1569,6 +1768,62 @@ m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
}
/*
+ * Copy data from an unmapped mbuf into a uio limited by len if set.
+ */
+int
+m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ vm_page_t pg;
+ int error, i, off, pglen, pgoff, seglen, segoff;
+
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ error = 0;
+
+ /* Skip over any data removed from the front. */
+ off = mtod(m, vm_offset_t);
+
+ off += m_off;
+ if (ext_pgs->hdr_len != 0) {
+ if (off >= ext_pgs->hdr_len) {
+ off -= ext_pgs->hdr_len;
+ } else {
+ seglen = ext_pgs->hdr_len - off;
+ segoff = off;
+ seglen = min(seglen, len);
+ off = 0;
+ len -= seglen;
+ error = uiomove(&ext_pgs->hdr[segoff], seglen, uio);
+ }
+ }
+ pgoff = ext_pgs->first_pg_off;
+ for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) {
+ pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+ if (off >= pglen) {
+ off -= pglen;
+ pgoff = 0;
+ continue;
+ }
+ seglen = pglen - off;
+ segoff = pgoff + off;
+ off = 0;
+ seglen = min(seglen, len);
+ len -= seglen;
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ error = uiomove_fromphys(&pg, segoff, seglen, uio);
+ pgoff = 0;
+ };
+ if (len != 0 && error == 0) {
+ KASSERT((off + len) <= ext_pgs->trail_len,
+ ("off + len > trail (%d + %d > %d, m_off = %d)", off, len,
+ ext_pgs->trail_len, m_off));
+ error = uiomove(&ext_pgs->trail[off], len, uio);
+ }
+ return (error);
+}
+
+/*
* Copy an mbuf chain into a uio limited by len if set.
*/
int
@@ -1586,7 +1841,12 @@ m_mbuftouio(struct uio *uio, const struct mbuf *m, int len)
for (; m != NULL; m = m->m_next) {
length = min(m->m_len, total - progress);
- error = uiomove(mtod(m, void *), length, uio);
+#ifndef __rtems__
+ if ((m->m_flags & M_NOMAP) != 0)
+ error = m_unmappedtouio(m, 0, uio, length);
+ else
+#endif /* __rtems__ */
+ error = uiomove(mtod(m, void *), length, uio);
if (error)
return (error);
diff --git a/freebsd/sys/kern/uipc_mbuf2.c b/freebsd/sys/kern/uipc_mbuf2.c
index 7dd2840c..6f98b0a2 100644
--- a/freebsd/sys/kern/uipc_mbuf2.c
+++ b/freebsd/sys/kern/uipc_mbuf2.c
@@ -218,7 +218,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
goto ok;
}
if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen
- && writable) {
+ && writable && n->m_next->m_len >= tlen) {
n->m_next->m_data -= hlen;
n->m_next->m_len += hlen;
bcopy(mtod(n, caddr_t) + off, mtod(n->m_next, caddr_t), hlen);
diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c
index 0830206a..2305b333 100644
--- a/freebsd/sys/kern/uipc_sockbuf.c
+++ b/freebsd/sys/kern/uipc_sockbuf.c
@@ -36,11 +36,13 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_param.h>
#include <sys/param.h>
#include <sys/aio.h> /* for aio_swake proto */
#include <sys/kernel.h>
+#include <sys/ktls.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -91,28 +93,135 @@ sbm_clrprotoflags(struct mbuf *m, int flags)
}
/*
- * Mark ready "count" mbufs starting with "m".
+ * Compress M_NOTREADY mbufs after they have been readied by sbready().
+ *
+ * sbcompress() skips M_NOTREADY mbufs since the data is not available to
+ * be copied at the time of sbcompress(). This function combines small
+ * mbufs similar to sbcompress() once mbufs are ready. 'm0' is the first
+ * mbuf sbready() marked ready, and 'end' is the first mbuf still not
+ * ready.
+ */
+static void
+sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
+{
+ struct mbuf *m, *n;
+ int ext_size;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ if ((sb->sb_flags & SB_NOCOALESCE) != 0)
+ return;
+
+ for (m = m0; m != end; m = m->m_next) {
+ MPASS((m->m_flags & M_NOTREADY) == 0);
+
+ /* Compress small unmapped mbufs into plain mbufs. */
+ if ((m->m_flags & M_NOMAP) && m->m_len <= MLEN &&
+ !mbuf_has_tls_session(m)) {
+ MPASS(m->m_flags & M_EXT);
+ ext_size = m->m_ext.ext_size;
+ if (mb_unmapped_compress(m) == 0) {
+ sb->sb_mbcnt -= ext_size;
+ sb->sb_ccnt -= 1;
+ }
+ }
+
+ /*
+ * NB: In sbcompress(), 'n' is the last mbuf in the
+ * socket buffer and 'm' is the new mbuf being copied
+ * into the trailing space of 'n'. Here, the roles
+ * are reversed and 'n' is the next mbuf after 'm'
+ * that is being copied into the trailing space of
+ * 'm'.
+ */
+ n = m->m_next;
+ while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
+ M_WRITABLE(m) &&
+ (m->m_flags & M_NOMAP) == 0 &&
+ !mbuf_has_tls_session(n) &&
+ !mbuf_has_tls_session(m) &&
+ n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
+ n->m_len <= M_TRAILINGSPACE(m) &&
+ m->m_type == n->m_type) {
+ KASSERT(sb->sb_lastrecord != n,
+ ("%s: merging start of record (%p) into previous mbuf (%p)",
+ __func__, n, m));
+ m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
+ m->m_len += n->m_len;
+ m->m_next = n->m_next;
+ m->m_flags |= n->m_flags & M_EOR;
+ if (sb->sb_mbtail == n)
+ sb->sb_mbtail = m;
+
+ sb->sb_mbcnt -= MSIZE;
+ sb->sb_mcnt -= 1;
+ if (n->m_flags & M_EXT) {
+ sb->sb_mbcnt -= n->m_ext.ext_size;
+ sb->sb_ccnt -= 1;
+ }
+ m_free(n);
+ n = m->m_next;
+ }
+ }
+ SBLASTRECORDCHK(sb);
+ SBLASTMBUFCHK(sb);
+}
+
+/*
+ * Mark ready "count" units of I/O starting with "m". Most mbufs
+ * count as a single unit of I/O except for EXT_PGS-backed mbufs which
+ * can be backed by multiple pages.
*/
int
-sbready(struct sockbuf *sb, struct mbuf *m, int count)
+sbready(struct sockbuf *sb, struct mbuf *m0, int count)
{
+ struct mbuf *m;
u_int blocker;
SOCKBUF_LOCK_ASSERT(sb);
KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
+ KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
+ m = m0;
blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
- for (int i = 0; i < count; i++, m = m->m_next) {
+ while (count > 0) {
KASSERT(m->m_flags & M_NOTREADY,
("%s: m %p !M_NOTREADY", __func__, m));
+#ifndef __rtems__
+ if ((m->m_flags & M_EXT) != 0 &&
+ m->m_ext.ext_type == EXT_PGS) {
+ if (count < m->m_ext.ext_pgs->nrdy) {
+ m->m_ext.ext_pgs->nrdy -= count;
+ count = 0;
+ break;
+ }
+ count -= m->m_ext.ext_pgs->nrdy;
+ m->m_ext.ext_pgs->nrdy = 0;
+ } else
+#endif /* __rtems__ */
+ count--;
+
m->m_flags &= ~(M_NOTREADY | blocker);
if (blocker)
sb->sb_acc += m->m_len;
+ m = m->m_next;
+ }
+
+ /*
+ * If the first mbuf is still not fully ready because only
+ * some of its backing pages were readied, no further progress
+ * can be made.
+ */
+ if (m0 == m) {
+ MPASS(m->m_flags & M_NOTREADY);
+ return (EINPROGRESS);
}
- if (!blocker)
+ if (!blocker) {
+ sbready_compress(sb, m0, m);
return (EINPROGRESS);
+ }
/* This one was blocking all the queue. */
for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
@@ -123,6 +232,7 @@ sbready(struct sockbuf *sb, struct mbuf *m, int count)
}
sb->sb_fnrdy = m;
+ sbready_compress(sb, m0, m);
return (0);
}
@@ -571,6 +681,11 @@ sbdestroy(struct sockbuf *sb, struct socket *so)
{
sbrelease_internal(sb, so);
+#ifdef KERN_TLS
+ if (sb->sb_tls_info != NULL)
+ ktls_free(sb->sb_tls_info);
+ sb->sb_tls_info = NULL;
+#endif
}
/*
@@ -734,6 +849,11 @@ sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
SBLASTMBUFCHK(sb);
+#ifdef KERN_TLS
+ if (sb->sb_tls_info != NULL)
+ ktls_seq(sb, m);
+#endif
+
/* Remove all packet headers and mbuf tags to get a pure data chain. */
m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
@@ -1036,12 +1156,13 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
M_WRITABLE(n) &&
((sb->sb_flags & SB_NOCOALESCE) == 0) &&
!(m->m_flags & M_NOTREADY) &&
- !(n->m_flags & M_NOTREADY) &&
+ !(n->m_flags & (M_NOTREADY | M_NOMAP)) &&
+ !mbuf_has_tls_session(m) &&
+ !mbuf_has_tls_session(n) &&
m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
m->m_len <= M_TRAILINGSPACE(n) &&
n->m_type == m->m_type) {
- bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
- (unsigned)m->m_len);
+ m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
n->m_len += m->m_len;
sb->sb_ccc += m->m_len;
if (sb->sb_fnrdy == NULL)
@@ -1052,6 +1173,10 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
m = m_free(m);
continue;
}
+ if (m->m_len <= MLEN && (m->m_flags & M_NOMAP) &&
+ (m->m_flags & M_NOTREADY) == 0 &&
+ !mbuf_has_tls_session(m))
+ (void)mb_unmapped_compress(m);
if (n)
n->m_next = m;
else
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index 380c97dd..c01535c4 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_sctp.h>
#include <sys/param.h>
@@ -125,6 +126,7 @@ __FBSDID("$FreeBSD$");
#include <sys/hhook.h>
#include <sys/kernel.h>
#include <sys/khelp.h>
+#include <sys/ktls.h>
#include <sys/event.h>
#include <sys/eventhandler.h>
#include <sys/poll.h>
@@ -143,6 +145,7 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <sys/syslog.h>
#include <netinet/in.h>
+#include <netinet/tcp.h>
#include <net/vnet.h>
@@ -911,6 +914,8 @@ solisten_wakeup(struct socket *sol)
}
SOLISTEN_UNLOCK(sol);
wakeup_one(&sol->sol_comp);
+ if ((sol->so_state & SS_ASYNC) && sol->so_sigio != NULL)
+ pgsigio(&sol->so_sigio, SIGIO, 0);
}
/*
@@ -1067,7 +1072,7 @@ sofree(struct socket *so)
*
* We used to do a lot of socket buffer and socket locking here, as
* well as invoke sorflush() and perform wakeups. The direct call to
- * dom_dispose() and sbrelease_internal() are an inlining of what was
+ * dom_dispose() and sbdestroy() are an inlining of what was
* necessary from sorflush().
*
* Notice that the socket buffer and kqueue state are torn down
@@ -1154,9 +1159,9 @@ drop:
so->so_state |= SS_NOFDREF;
sorele(so);
if (listening) {
- struct socket *sp;
+ struct socket *sp, *tsp;
- TAILQ_FOREACH(sp, &lqueue, so_list) {
+ TAILQ_FOREACH_SAFE(sp, &lqueue, so_list, tsp) {
SOCK_LOCK(sp);
if (sp->so_count == 0) {
SOCK_UNLOCK(sp);
@@ -1197,7 +1202,6 @@ soabort(struct socket *so)
KASSERT(so->so_count == 0, ("soabort: so_count"));
KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
- KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE"));
VNET_SO_ASSERT(so);
if (so->so_proto->pr_usrreqs->pru_abort != NULL)
@@ -1468,7 +1472,15 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
ssize_t resid;
int clen = 0, error, dontroute;
int atomic = sosendallatonce(so) || top;
-
+ int pru_flag;
+#ifdef KERN_TLS
+ struct ktls_session *tls;
+ int tls_enq_cnt, tls_pruflag;
+ uint8_t tls_rtype;
+
+ tls = NULL;
+ tls_rtype = TLS_RLTYPE_APP;
+#endif
if (uio != NULL)
resid = uio->uio_resid;
else
@@ -1502,6 +1514,28 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
if (error)
goto out;
+#ifdef KERN_TLS
+ tls_pruflag = 0;
+ tls = ktls_hold(so->so_snd.sb_tls_info);
+ if (tls != NULL) {
+ if (tls->sw_encrypt != NULL)
+ tls_pruflag = PRUS_NOTREADY;
+
+ if (control != NULL) {
+ struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+
+ if (clen >= sizeof(*cm) &&
+ cm->cmsg_type == TLS_SET_RECORD_TYPE) {
+ tls_rtype = *((uint8_t *)CMSG_DATA(cm));
+ clen = 0;
+ m_freem(control);
+ control = NULL;
+ atomic = 1;
+ }
+ }
+ }
+#endif
+
restart:
do {
SOCKBUF_LOCK(&so->so_snd);
@@ -1551,7 +1585,8 @@ restart:
}
if (space < resid + clen &&
(atomic || space < so->so_snd.sb_lowat || space < clen)) {
- if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
+ if ((so->so_state & SS_NBIO) ||
+ (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
SOCKBUF_UNLOCK(&so->so_snd);
error = EWOULDBLOCK;
goto release;
@@ -1578,10 +1613,27 @@ restart:
* is a workaround to prevent protocol send
* methods to panic.
*/
- top = m_uiotombuf(uio, M_WAITOK, space,
- (atomic ? max_hdr : 0),
- (atomic ? M_PKTHDR : 0) |
- ((flags & MSG_EOR) ? M_EOR : 0));
+#ifdef KERN_TLS
+ if (tls != NULL) {
+ top = m_uiotombuf(uio, M_WAITOK, space,
+ tls->params.max_frame_len,
+ M_NOMAP |
+ ((flags & MSG_EOR) ? M_EOR : 0));
+ if (top != NULL) {
+ error = ktls_frame(top, tls,
+ &tls_enq_cnt, tls_rtype);
+ if (error) {
+ m_freem(top);
+ goto release;
+ }
+ }
+ tls_rtype = TLS_RLTYPE_APP;
+ } else
+#endif
+ top = m_uiotombuf(uio, M_WAITOK, space,
+ (atomic ? max_hdr : 0),
+ (atomic ? M_PKTHDR : 0) |
+ ((flags & MSG_EOR) ? M_EOR : 0));
if (top == NULL) {
error = EFAULT; /* only possible error */
goto release;
@@ -1605,8 +1657,8 @@ restart:
* this.
*/
VNET_SO_ASSERT(so);
- error = (*so->so_proto->pr_usrreqs->pru_send)(so,
- (flags & MSG_OOB) ? PRUS_OOB :
+
+ pru_flag = (flags & MSG_OOB) ? PRUS_OOB :
/*
* If the user set MSG_EOF, the protocol understands
* this flag and nothing left to send then use
@@ -1618,13 +1670,37 @@ restart:
PRUS_EOF :
/* If there is more to send set PRUS_MORETOCOME. */
(flags & MSG_MORETOCOME) ||
- (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
- top, addr, control, td);
+ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
+
+#ifdef KERN_TLS
+ pru_flag |= tls_pruflag;
+#endif
+
+ error = (*so->so_proto->pr_usrreqs->pru_send)(so,
+ pru_flag, top, addr, control, td);
+
if (dontroute) {
SOCK_LOCK(so);
so->so_options &= ~SO_DONTROUTE;
SOCK_UNLOCK(so);
}
+
+#ifdef KERN_TLS
+ if (tls != NULL && tls->sw_encrypt != NULL) {
+ /*
+ * Note that error is intentionally
+ * ignored.
+ *
+ * Like sendfile(), we rely on the
+ * completion routine (pru_ready())
+ * to free the mbufs in the event that
+ * pru_send() encountered an error and
+ * did not append them to the sockbuf.
+ */
+ soref(so);
+ ktls_enqueue(top, so, tls_enq_cnt);
+ }
+#endif
clen = 0;
control = NULL;
top = NULL;
@@ -1636,6 +1712,10 @@ restart:
release:
sbunlock(&so->so_snd);
out:
+#ifdef KERN_TLS
+ if (tls != NULL)
+ ktls_free(tls);
+#endif
if (top != NULL)
m_freem(top);
if (control != NULL)
@@ -2011,7 +2091,13 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
- error = uiomove(mtod(m, char *) + moff, (int)len, uio);
+#ifndef __rtems__
+ if ((m->m_flags & M_NOMAP) != 0)
+ error = m_unmappedtouio(m, moff, uio, (int)len);
+ else
+#endif /* __rtems__ */
+ error = uiomove(mtod(m, char *) + moff,
+ (int)len, uio);
SOCKBUF_LOCK(&so->so_rcv);
if (error) {
/*
@@ -2225,7 +2311,7 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
/* Prevent other readers from entering the socket. */
error = sblock(sb, SBLOCKWAIT(flags));
if (error)
- goto out;
+ return (error);
SOCKBUF_LOCK(sb);
/* Easy one, no space to copyout anything. */
@@ -2793,12 +2879,10 @@ sosetopt(struct socket *so, struct sockopt *sopt)
CURVNET_SET(so->so_vnet);
error = 0;
if (sopt->sopt_level != SOL_SOCKET) {
- if (so->so_proto->pr_ctloutput != NULL) {
+ if (so->so_proto->pr_ctloutput != NULL)
error = (*so->so_proto->pr_ctloutput)(so, sopt);
- CURVNET_RESTORE();
- return (error);
- }
- error = ENOPROTOOPT;
+ else
+ error = ENOPROTOOPT;
} else {
switch (sopt->sopt_name) {
case SO_ACCEPTFILTER:
@@ -2811,7 +2895,12 @@ sosetopt(struct socket *so, struct sockopt *sopt)
error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
if (error)
goto bad;
-
+ if (l.l_linger < 0 ||
+ l.l_linger > USHRT_MAX ||
+ l.l_linger > (INT_MAX / hz)) {
+ error = EDOM;
+ goto bad;
+ }
SOCK_LOCK(so);
so->so_linger = l.l_linger;
if (l.l_onoff)
@@ -4162,6 +4251,9 @@ void
so_linger_set(struct socket *so, int val)
{
+ KASSERT(val >= 0 && val <= USHRT_MAX && val <= (INT_MAX / hz),
+ ("%s: val %d out of range", __func__, val));
+
so->so_linger = val;
}
diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c
index 529268a9..39e96abe 100644
--- a/freebsd/sys/kern/uipc_syscalls.c
+++ b/freebsd/sys/kern/uipc_syscalls.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syscallsubr.h>
+#include <sys/sysent.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/unpcb.h>
@@ -451,7 +452,8 @@ accept1(td, s, uname, anamelen, flags)
if (error == 0 && uname != NULL) {
#ifdef COMPAT_OLDSOCK
- if (flags & ACCEPT4_COMPAT)
+ if (SV_PROC_FLAG(td->td_proc, SV_AOUT) &&
+ (flags & ACCEPT4_COMPAT) != 0)
((struct osockaddr *)name)->sa_family =
name->sa_family;
#endif
@@ -968,7 +970,8 @@ sendit(struct thread *td, int s, struct msghdr *mp, int flags)
if (mp->msg_control) {
if (mp->msg_controllen < sizeof(struct cmsghdr)
#ifdef COMPAT_OLDSOCK
- && mp->msg_flags != MSG_COMPAT
+ && (mp->msg_flags != MSG_COMPAT ||
+ !SV_PROC_FLAG(td->td_proc, SV_AOUT))
#endif
) {
error = EINVAL;
@@ -979,7 +982,8 @@ sendit(struct thread *td, int s, struct msghdr *mp, int flags)
if (error != 0)
goto bad;
#ifdef COMPAT_OLDSOCK
- if (mp->msg_flags == MSG_COMPAT) {
+ if (mp->msg_flags == MSG_COMPAT &&
+ SV_PROC_FLAG(td->td_proc, SV_AOUT)) {
struct cmsghdr *cm;
M_PREPEND(control, sizeof(*cm), M_WAITOK);
@@ -1120,7 +1124,8 @@ sys_sendto(struct thread *td, struct sendto_args *uap)
msg.msg_iovlen = 1;
msg.msg_control = 0;
#ifdef COMPAT_OLDSOCK
- msg.msg_flags = 0;
+ if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
+ msg.msg_flags = 0;
#endif
aiov.iov_base = __DECONST(void *, uap->buf);
aiov.iov_len = uap->len;
@@ -1239,7 +1244,8 @@ sys_sendmsg(struct thread *td, struct sendmsg_args *uap)
return (error);
msg.msg_iov = iov;
#ifdef COMPAT_OLDSOCK
- msg.msg_flags = 0;
+ if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
+ msg.msg_flags = 0;
#endif
error = sendit(td, uap->s, &msg, uap->flags);
free(iov, M_IOV);
@@ -1356,7 +1362,8 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
/* save sa_len before it is destroyed by MSG_COMPAT */
len = MIN(len, fromsa->sa_len);
#ifdef COMPAT_OLDSOCK
- if (mp->msg_flags & MSG_COMPAT)
+ if ((mp->msg_flags & MSG_COMPAT) != 0 &&
+ SV_PROC_FLAG(td->td_proc, SV_AOUT))
((struct osockaddr *)fromsa)->sa_family =
fromsa->sa_family;
#endif
@@ -1379,7 +1386,8 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
* If we receive rights, trim the cmsghdr; anything else
* is tossed.
*/
- if (control && mp->msg_flags & MSG_COMPAT) {
+ if (control && (mp->msg_flags & MSG_COMPAT) != 0 &&
+ SV_PROC_FLAG(td->td_proc, SV_AOUT)) {
if (mtod(control, struct cmsghdr *)->cmsg_level !=
SOL_SOCKET ||
mtod(control, struct cmsghdr *)->cmsg_type !=
@@ -1438,7 +1446,8 @@ recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp)
if (namelenp != NULL) {
error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
#ifdef COMPAT_OLDSOCK
- if (mp->msg_flags & MSG_COMPAT)
+ if ((mp->msg_flags & MSG_COMPAT) != 0 &&
+ SV_PROC_FLAG(td->td_proc, SV_AOUT))
error = 0; /* old recvfrom didn't check */
#endif
}
@@ -1581,7 +1590,8 @@ sys_recvmsg(struct thread *td, struct recvmsg_args *uap)
return (error);
msg.msg_flags = uap->flags;
#ifdef COMPAT_OLDSOCK
- msg.msg_flags &= ~MSG_COMPAT;
+ if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
+ msg.msg_flags &= ~MSG_COMPAT;
#endif
uiov = msg.msg_iov;
msg.msg_iov = iov;
@@ -1863,7 +1873,7 @@ getsockname1(struct thread *td, struct getsockname_args *uap, int compat)
if (len != 0) {
#ifdef COMPAT_OLDSOCK
- if (compat)
+ if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT))
((struct osockaddr *)sa)->sa_family = sa->sa_family;
#endif
error = copyout(sa, uap->asa, (u_int)len);
@@ -1978,7 +1988,7 @@ getpeername1(struct thread *td, struct getpeername_args *uap, int compat)
if (len != 0) {
#ifdef COMPAT_OLDSOCK
- if (compat)
+ if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT))
((struct osockaddr *)sa)->sa_family = sa->sa_family;
#endif
error = copyout(sa, uap->asa, (u_int)len);
@@ -2083,7 +2093,8 @@ sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
if (buflen > MLEN) {
#ifdef COMPAT_OLDSOCK
- if (type == MT_SONAME && buflen <= 112)
+ if (type == MT_SONAME && buflen <= 112 &&
+ SV_CURPROC_FLAG(SV_AOUT))
buflen = MLEN; /* unix domain compat. hack */
else
#endif
@@ -2101,7 +2112,8 @@ sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
sa = mtod(m, struct sockaddr *);
#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
- if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
+ if (sa->sa_family == 0 && sa->sa_len < AF_MAX &&
+ SV_CURPROC_FLAG(SV_AOUT))
sa->sa_family = sa->sa_len;
#endif
sa->sa_len = buflen;
@@ -2129,7 +2141,8 @@ getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, size_t len)
free(sa, M_SONAME);
} else {
#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
- if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
+ if (sa->sa_family == 0 && sa->sa_len < AF_MAX &&
+ SV_CURPROC_FLAG(SV_AOUT))
sa->sa_family = sa->sa_len;
#endif
sa->sa_len = len;
@@ -2180,8 +2193,10 @@ m_dispose_extcontrolm(struct mbuf *m)
fd = *fds++;
error = fget(td, fd, &cap_no_rights,
&fp);
- if (error == 0)
+ if (error == 0) {
fdclose(td, fp, fd);
+ fdrop(fp, td);
+ }
}
}
clen -= datalen;
diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c
index 6b34dcb8..39f28b4b 100644
--- a/freebsd/sys/kern/uipc_usrreq.c
+++ b/freebsd/sys/kern/uipc_usrreq.c
@@ -1032,7 +1032,7 @@ uipc_listen(struct socket *so, int backlog, struct thread *td)
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error == 0) {
- cru2x(td->td_ucred, &unp->unp_peercred);
+ cru2xt(td, &unp->unp_peercred);
solisten_proto(so, backlog);
}
SOCK_UNLOCK(so);
@@ -1837,7 +1837,7 @@ void
unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
struct unpcb *server_unp, struct unpcb *listen_unp)
{
- cru2x(td->td_ucred, &client_unp->unp_peercred);
+ cru2xt(td, &client_unp->unp_peercred);
client_unp->unp_flags |= UNP_HAVEPC;
memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
@@ -2306,30 +2306,53 @@ unp_init(void)
}
#ifndef __rtems__
+static void
+unp_internalize_cleanup_rights(struct mbuf *control)
+{
+ struct cmsghdr *cp;
+ struct mbuf *m;
+ void *data;
+ socklen_t datalen;
+
+ for (m = control; m != NULL; m = m->m_next) {
+ cp = mtod(m, struct cmsghdr *);
+ if (cp->cmsg_level != SOL_SOCKET ||
+ cp->cmsg_type != SCM_RIGHTS)
+ continue;
+ data = CMSG_DATA(cp);
+ datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
+ unp_freerights(data, datalen / sizeof(struct filedesc *));
+ }
+}
+
static int
unp_internalize(struct mbuf **controlp, struct thread *td)
{
- struct mbuf *control = *controlp;
- struct proc *p = td->td_proc;
- struct filedesc *fdesc = p->p_fd;
+ struct mbuf *control, **initial_controlp;
+ struct proc *p;
+ struct filedesc *fdesc;
struct bintime *bt;
- struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+ struct cmsghdr *cm;
struct cmsgcred *cmcred;
struct filedescent *fde, **fdep, *fdev;
struct file *fp;
struct timeval *tv;
struct timespec *ts;
- int i, *fdp;
void *data;
- socklen_t clen = control->m_len, datalen;
- int error, oldfds;
+ socklen_t clen, datalen;
+ int i, j, error, *fdp, oldfds;
u_int newlen;
UNP_LINK_UNLOCK_ASSERT();
+ p = td->td_proc;
+ fdesc = p->p_fd;
error = 0;
+ control = *controlp;
+ clen = control->m_len;
*controlp = NULL;
- while (cm != NULL) {
+ initial_controlp = controlp;
+ for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
|| cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
error = EINVAL;
@@ -2400,6 +2423,19 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
goto out;
}
fdp = data;
+ for (i = 0; i < oldfds; i++, fdp++) {
+ if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
+ fdp = data;
+ for (j = 0; j < i; j++, fdp++) {
+ fdrop(fdesc->fd_ofiles[*fdp].
+ fde_file, td);
+ }
+ FILEDESC_SUNLOCK(fdesc);
+ error = EBADF;
+ goto out;
+ }
+ }
+ fdp = data;
fdep = (struct filedescent **)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
@@ -2480,6 +2516,8 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
}
out:
+ if (error != 0 && initial_controlp != NULL)
+ unp_internalize_cleanup_rights(*initial_controlp);
m_freem(control);
return (error);
}
@@ -2601,7 +2639,6 @@ unp_internalize_fp(struct file *fp)
unp->unp_file = fp;
unp->unp_msgcount++;
}
- fhold(fp);
unp_rights++;
UNP_LINK_WUNLOCK();
}
@@ -2762,10 +2799,10 @@ unp_gc(__unused void *arg, int pending)
if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
f = unp->unp_file;
if (unp->unp_msgcount == 0 || f == NULL ||
- f->f_count != unp->unp_msgcount)
+ f->f_count != unp->unp_msgcount ||
+ !fhold(f))
continue;
unref[total++] = f;
- fhold(f);
KASSERT(total <= unp_unreachable,
("unp_gc: incorrect unreachable count."));
}
@@ -2942,8 +2979,8 @@ db_print_xucred(int indent, struct xucred *xu)
int comma, i;
db_print_indent(indent);
- db_printf("cr_version: %u cr_uid: %u cr_ngroups: %d\n",
- xu->cr_version, xu->cr_uid, xu->cr_ngroups);
+ db_printf("cr_version: %u cr_uid: %u cr_pid: %d cr_ngroups: %d\n",
+ xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
db_print_indent(indent);
db_printf("cr_groups: ");
comma = 0;
diff --git a/freebsd/sys/libkern/crc32.c b/freebsd/sys/libkern/gsb_crc32.c
index f1f11e3b..0eba1206 100644
--- a/freebsd/sys/libkern/crc32.c
+++ b/freebsd/sys/libkern/gsb_crc32.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#ifdef _KERNEL
#include <sys/libkern.h>
#include <sys/systm.h>
+#include <sys/gsb_crc32.h>
#if defined(__amd64__) || defined(__i386__)
#include <machine/md_var.h>
diff --git a/freebsd/sys/mips/include/machine/cpufunc.h b/freebsd/sys/mips/include/machine/cpufunc.h
index 7cfc548c..a91d0cc4 100644
--- a/freebsd/sys/mips/include/machine/cpufunc.h
+++ b/freebsd/sys/mips/include/machine/cpufunc.h
@@ -371,27 +371,19 @@ get_intr_mask(void)
return (mips_rd_status() & MIPS_SR_INT_MASK);
}
-#if defined(__GNUC__) && !defined(__mips_o32)
-#define mips3_ld(a) (*(const volatile uint64_t *)(a))
-#define mips3_sd(a, v) (*(volatile uint64_t *)(a) = (v))
-#else
-uint64_t mips3_ld(volatile uint64_t *va);
-void mips3_sd(volatile uint64_t *, uint64_t);
-#endif /* __GNUC__ */
-
#endif /* _KERNEL */
#define readb(va) (*(volatile uint8_t *) (va))
#define readw(va) (*(volatile uint16_t *) (va))
#define readl(va) (*(volatile uint32_t *) (va))
-#if defined(__GNUC__) && !defined(__mips_o32)
+#if !defined(__mips_o32)
#define readq(a) (*(volatile uint64_t *)(a))
#endif
#define writeb(va, d) (*(volatile uint8_t *) (va) = (d))
#define writew(va, d) (*(volatile uint16_t *) (va) = (d))
#define writel(va, d) (*(volatile uint32_t *) (va) = (d))
-#if defined(__GNUC__) && !defined(__mips_o32)
+#if !defined(__mips_o32)
#define writeq(va, d) (*(volatile uint64_t *) (va) = (d))
#endif
diff --git a/freebsd/sys/net/altq/altq_cbq.c b/freebsd/sys/net/altq/altq_cbq.c
index 015e35bf..7c99f8a8 100644
--- a/freebsd/sys/net/altq/altq_cbq.c
+++ b/freebsd/sys/net/altq/altq_cbq.c
@@ -225,12 +225,11 @@ cbq_pfattach(struct pf_altq *a)
}
int
-cbq_add_altq(struct pf_altq *a)
+cbq_add_altq(struct ifnet *ifp, struct pf_altq *a)
{
cbq_state_t *cbqp;
- struct ifnet *ifp;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
diff --git a/freebsd/sys/net/altq/altq_codel.c b/freebsd/sys/net/altq/altq_codel.c
index 4a55cdbe..375fc382 100644
--- a/freebsd/sys/net/altq/altq_codel.c
+++ b/freebsd/sys/net/altq/altq_codel.c
@@ -91,13 +91,12 @@ codel_pfattach(struct pf_altq *a)
}
int
-codel_add_altq(struct pf_altq *a)
+codel_add_altq(struct ifnet *ifp, struct pf_altq *a)
{
struct codel_if *cif;
- struct ifnet *ifp;
struct codel_opts *opts;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
diff --git a/freebsd/sys/net/altq/altq_fairq.c b/freebsd/sys/net/altq/altq_fairq.c
index a1bc3fdb..5b7646e2 100644
--- a/freebsd/sys/net/altq/altq_fairq.c
+++ b/freebsd/sys/net/altq/altq_fairq.c
@@ -150,12 +150,11 @@ fairq_pfattach(struct pf_altq *a)
}
int
-fairq_add_altq(struct pf_altq *a)
+fairq_add_altq(struct ifnet *ifp, struct pf_altq *a)
{
struct fairq_if *pif;
- struct ifnet *ifp;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
diff --git a/freebsd/sys/net/altq/altq_hfsc.c b/freebsd/sys/net/altq/altq_hfsc.c
index 202915a8..024055e3 100644
--- a/freebsd/sys/net/altq/altq_hfsc.c
+++ b/freebsd/sys/net/altq/altq_hfsc.c
@@ -161,12 +161,11 @@ hfsc_pfattach(struct pf_altq *a)
}
int
-hfsc_add_altq(struct pf_altq *a)
+hfsc_add_altq(struct ifnet *ifp, struct pf_altq *a)
{
struct hfsc_if *hif;
- struct ifnet *ifp;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
@@ -508,6 +507,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
goto err_ret;
}
}
+ cl->cl_slot = i;
if (flags & HFCF_DEFAULTCLASS)
hif->hif_defaultclass = cl;
@@ -560,7 +560,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
static int
hfsc_class_destroy(struct hfsc_class *cl)
{
- int i, s;
+ int s;
if (cl == NULL)
return (0);
@@ -591,12 +591,7 @@ hfsc_class_destroy(struct hfsc_class *cl)
ASSERT(p != NULL);
}
- for (i = 0; i < HFSC_MAX_CLASSES; i++)
- if (cl->cl_hif->hif_class_tbl[i] == cl) {
- cl->cl_hif->hif_class_tbl[i] = NULL;
- break;
- }
-
+ cl->cl_hif->hif_class_tbl[cl->cl_slot] = NULL;
cl->cl_hif->hif_classes--;
IFQ_UNLOCK(cl->cl_hif->hif_ifq);
splx(s);
diff --git a/freebsd/sys/net/altq/altq_hfsc.h b/freebsd/sys/net/altq/altq_hfsc.h
index fa4aa811..c43c6671 100644
--- a/freebsd/sys/net/altq/altq_hfsc.h
+++ b/freebsd/sys/net/altq/altq_hfsc.h
@@ -214,6 +214,7 @@ struct runtime_sc {
struct hfsc_class {
u_int cl_id; /* class id (just for debug) */
+ u_int cl_slot; /* slot in hif class table */
u_int32_t cl_handle; /* class handle */
struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */
int cl_flags; /* misc flags */
diff --git a/freebsd/sys/net/altq/altq_priq.c b/freebsd/sys/net/altq/altq_priq.c
index 5e77aef2..0090d8fa 100644
--- a/freebsd/sys/net/altq/altq_priq.c
+++ b/freebsd/sys/net/altq/altq_priq.c
@@ -97,12 +97,11 @@ priq_pfattach(struct pf_altq *a)
}
int
-priq_add_altq(struct pf_altq *a)
+priq_add_altq(struct ifnet * ifp, struct pf_altq *a)
{
struct priq_if *pif;
- struct ifnet *ifp;
- if ((ifp = ifunit(a->ifname)) == NULL)
+ if (ifp == NULL)
return (EINVAL);
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
diff --git a/freebsd/sys/net/altq/altq_subr.c b/freebsd/sys/net/altq/altq_subr.c
index 61aaec59..151bdf10 100644
--- a/freebsd/sys/net/altq/altq_subr.c
+++ b/freebsd/sys/net/altq/altq_subr.c
@@ -412,11 +412,11 @@ tbr_timeout(arg)
{
VNET_ITERATOR_DECL(vnet_iter);
struct ifnet *ifp;
- int active, s;
+ struct epoch_tracker et;
+ int active;
active = 0;
- s = splnet();
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
@@ -433,8 +433,7 @@ tbr_timeout(arg)
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
- IFNET_RUNLOCK_NOSLEEP();
- splx(s);
+ NET_EPOCH_EXIT(et);
if (active > 0)
CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
else
@@ -523,7 +522,7 @@ altq_pfdetach(struct pf_altq *a)
* malloc with WAITOK, also it is not yet clear which lock to use.
*/
int
-altq_add(struct pf_altq *a)
+altq_add(struct ifnet *ifp, struct pf_altq *a)
{
int error = 0;
@@ -538,27 +537,27 @@ altq_add(struct pf_altq *a)
switch (a->scheduler) {
#ifdef ALTQ_CBQ
case ALTQT_CBQ:
- error = cbq_add_altq(a);
+ error = cbq_add_altq(ifp, a);
break;
#endif
#ifdef ALTQ_PRIQ
case ALTQT_PRIQ:
- error = priq_add_altq(a);
+ error = priq_add_altq(ifp, a);
break;
#endif
#ifdef ALTQ_HFSC
case ALTQT_HFSC:
- error = hfsc_add_altq(a);
+ error = hfsc_add_altq(ifp, a);
break;
#endif
#ifdef ALTQ_FAIRQ
case ALTQT_FAIRQ:
- error = fairq_add_altq(a);
+ error = fairq_add_altq(ifp, a);
break;
#endif
#ifdef ALTQ_CODEL
case ALTQT_CODEL:
- error = codel_add_altq(a);
+ error = codel_add_altq(ifp, a);
break;
#endif
default:
diff --git a/freebsd/sys/net/altq/altq_var.h b/freebsd/sys/net/altq/altq_var.h
index 47326a03..f711e093 100644
--- a/freebsd/sys/net/altq/altq_var.h
+++ b/freebsd/sys/net/altq/altq_var.h
@@ -199,40 +199,40 @@ int tbr_set(struct ifaltq *, struct tb_profile *);
int altq_pfattach(struct pf_altq *);
int altq_pfdetach(struct pf_altq *);
-int altq_add(struct pf_altq *);
+int altq_add(struct ifnet *, struct pf_altq *);
int altq_remove(struct pf_altq *);
int altq_add_queue(struct pf_altq *);
int altq_remove_queue(struct pf_altq *);
int altq_getqstats(struct pf_altq *, void *, int *, int);
int cbq_pfattach(struct pf_altq *);
-int cbq_add_altq(struct pf_altq *);
+int cbq_add_altq(struct ifnet *, struct pf_altq *);
int cbq_remove_altq(struct pf_altq *);
int cbq_add_queue(struct pf_altq *);
int cbq_remove_queue(struct pf_altq *);
int cbq_getqstats(struct pf_altq *, void *, int *, int);
int codel_pfattach(struct pf_altq *);
-int codel_add_altq(struct pf_altq *);
+int codel_add_altq(struct ifnet *, struct pf_altq *);
int codel_remove_altq(struct pf_altq *);
int codel_getqstats(struct pf_altq *, void *, int *, int);
int priq_pfattach(struct pf_altq *);
-int priq_add_altq(struct pf_altq *);
+int priq_add_altq(struct ifnet *, struct pf_altq *);
int priq_remove_altq(struct pf_altq *);
int priq_add_queue(struct pf_altq *);
int priq_remove_queue(struct pf_altq *);
int priq_getqstats(struct pf_altq *, void *, int *, int);
int hfsc_pfattach(struct pf_altq *);
-int hfsc_add_altq(struct pf_altq *);
+int hfsc_add_altq(struct ifnet *, struct pf_altq *);
int hfsc_remove_altq(struct pf_altq *);
int hfsc_add_queue(struct pf_altq *);
int hfsc_remove_queue(struct pf_altq *);
int hfsc_getqstats(struct pf_altq *, void *, int *, int);
int fairq_pfattach(struct pf_altq *);
-int fairq_add_altq(struct pf_altq *);
+int fairq_add_altq(struct ifnet *, struct pf_altq *);
int fairq_remove_altq(struct pf_altq *);
int fairq_add_queue(struct pf_altq *);
int fairq_remove_queue(struct pf_altq *);
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index edee632b..101ac4e0 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -5,6 +5,7 @@
*
* Copyright (c) 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org>
*
* This code is derived from the Stanford/CMU enet packet filter,
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
@@ -45,16 +46,16 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_netgraph.h>
-#include <sys/types.h>
#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
-#include <sys/systm.h>
#include <sys/conf.h>
+#include <sys/eventhandler.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/mutex.h>
#include <sys/time.h>
#include <sys/priv.h>
#include <sys/proc.h>
@@ -64,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ttycom.h>
#include <sys/uio.h>
#include <sys/sysent.h>
+#include <sys/systm.h>
#include <sys/event.h>
#include <sys/file.h>
@@ -99,14 +101,16 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
#ifdef __rtems__
#include <rtems/imfs.h>
+#undef devfs_get_cdevpriv
#define devfs_get_cdevpriv(x) 0
+#undef devtoname
#define devtoname(x) "bpf"
#endif /* __rtems__ */
MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
static struct bpf_if_ext dead_bpf_if = {
- .bif_dlist = LIST_HEAD_INITIALIZER()
+ .bif_dlist = CK_LIST_HEAD_INITIALIZER()
};
struct bpf_if {
@@ -115,19 +119,22 @@ struct bpf_if {
struct bpf_if_ext bif_ext; /* public members */
u_int bif_dlt; /* link layer type */
u_int bif_hdrlen; /* length of link header */
+ struct bpfd_list bif_wlist; /* writer-only list */
struct ifnet *bif_ifp; /* corresponding interface */
- struct rwlock bif_lock; /* interface lock */
- LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
- int bif_flags; /* Interface flags */
struct bpf_if **bif_bpf; /* Pointer to pointer to us */
+ volatile u_int bif_refcnt;
+ struct epoch_context epoch_ctx;
};
CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
-#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock)
-#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock)
-#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock)
-#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock)
+struct bpf_program_buffer {
+ struct epoch_context epoch_ctx;
+#ifdef BPF_JITTER
+ bpf_jit_filter *func;
+#endif
+ void *buffer[0];
+};
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
@@ -187,18 +194,24 @@ struct bpf_dltlist32 {
#define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED)
/*
* bpf_iflist is a list of BPF interface structures, each corresponding to a
- * specific DLT. The same network interface might have several BPF interface
+ * specific DLT. The same network interface might have several BPF interface
* structures registered by different layers in the stack (i.e., 802.11
* frames, ethernet frames, etc).
*/
-static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist;
+CK_LIST_HEAD(bpf_iflist, bpf_if);
+static struct bpf_iflist bpf_iflist;
static struct sx bpf_sx; /* bpf global lock */
static int bpf_bpfd_cnt;
+static void bpfif_ref(struct bpf_if *);
+static void bpfif_rele(struct bpf_if *);
+
+static void bpfd_ref(struct bpf_d *);
+static void bpfd_rele(struct bpf_d *);
static void bpf_attachd(struct bpf_d *, struct bpf_if *);
static void bpf_detachd(struct bpf_d *);
-static void bpf_detachd_locked(struct bpf_d *);
-static void bpf_freed(struct bpf_d *);
+static void bpf_detachd_locked(struct bpf_d *, bool);
+static void bpfd_free(epoch_context_t);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
@@ -261,37 +274,106 @@ static struct filterops bpfread_filtops = {
.f_event = filt_bpfread,
};
-eventhandler_tag bpf_ifdetach_cookie = NULL;
-
/*
- * LOCKING MODEL USED BY BPF:
+ * LOCKING MODEL USED BY BPF
+ *
* Locks:
- * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
- * some global counters and every bpf_if reference.
- * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
- * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
- * used by bpf_mtap code.
+ * 1) global lock (BPF_LOCK). Sx, used to protect some global counters,
+ * every bpf_iflist changes, serializes ioctl access to bpf descriptors.
+ * 2) Descriptor lock. Mutex, used to protect BPF buffers and various
+ * structure fields used by bpf_*tap* code.
+ *
+ * Lock order: global lock, then descriptor lock.
*
- * Lock order:
+ * There are several possible consumers:
*
- * Global lock, interface lock, descriptor lock
+ * 1. The kernel registers interface pointer with bpfattach().
+ * Each call allocates new bpf_if structure, references ifnet pointer
+ * and links bpf_if into bpf_iflist chain. This is protected with global
+ * lock.
*
- * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
- * working model. In many places (like bpf_detachd) we start with BPF descriptor
- * (and we need to at least rlock it to get reliable interface pointer). This
- * gives us potential LOR. As a result, we use global lock to protect from bpf_if
- * change in every such place.
+ * 2. An userland application uses ioctl() call to bpf_d descriptor.
+ * All such call are serialized with global lock. BPF filters can be
+ * changed, but pointer to old filter will be freed using epoch_call().
+ * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
+ * filter pointers, even if change will happen during bpf_tap execution.
+ * Destroying of bpf_d descriptor also is doing using epoch_call().
*
- * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
- * 3) descriptor main wlock.
- * Reading bd_bif can be protected by any of these locks, typically global lock.
+ * 3. An userland application can write packets into bpf_d descriptor.
+ * There we need to be sure, that ifnet won't disappear during bpfwrite().
*
- * Changing read/write BPF filter is protected by the same three locks,
- * the same applies for reading.
+ * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to
+ * bif_dlist is protected with net_epoch_preempt section. So, it should
+ * be safe to make access to bpf_d descriptor inside the section.
*
- * Sleeping in global lock is not allowed due to bpfdetach() using it.
+ * 5. The kernel invokes bpfdetach() on interface destroying. All lists
+ * are modified with global lock held and actual free() is done using
+ * epoch_call().
*/
+static void
+bpfif_free(epoch_context_t ctx)
+{
+ struct bpf_if *bp;
+
+ bp = __containerof(ctx, struct bpf_if, epoch_ctx);
+ if_rele(bp->bif_ifp);
+ free(bp, M_BPF);
+}
+
+static void
+bpfif_ref(struct bpf_if *bp)
+{
+
+ refcount_acquire(&bp->bif_refcnt);
+}
+
+static void
+bpfif_rele(struct bpf_if *bp)
+{
+
+ if (!refcount_release(&bp->bif_refcnt))
+ return;
+ epoch_call(net_epoch_preempt, &bp->epoch_ctx, bpfif_free);
+}
+
+static void
+bpfd_ref(struct bpf_d *d)
+{
+
+ refcount_acquire(&d->bd_refcnt);
+}
+
+static void
+bpfd_rele(struct bpf_d *d)
+{
+
+ if (!refcount_release(&d->bd_refcnt))
+ return;
+ epoch_call(net_epoch_preempt, &d->epoch_ctx, bpfd_free);
+}
+
+static struct bpf_program_buffer*
+bpf_program_buffer_alloc(size_t size, int flags)
+{
+
+ return (malloc(sizeof(struct bpf_program_buffer) + size,
+ M_BPF, flags));
+}
+
+static void
+bpf_program_buffer_free(epoch_context_t ctx)
+{
+ struct bpf_program_buffer *ptr;
+
+ ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx);
+#ifdef BPF_JITTER
+ if (ptr->func != NULL)
+ bpf_destroy_jit_filter(ptr->func);
+#endif
+ free(ptr, M_BPF);
+}
+
/*
* Wrapper functions for various buffering methods. If the set of buffer
* modes expands, we will probably want to introduce a switch data structure
@@ -673,7 +755,8 @@ bad:
}
/*
- * Attach file to the bpf interface, i.e. make d listen on bp.
+ * Attach descriptor to the bpf interface, i.e. make d listen on bp,
+ * then reset its buffers and counters with reset_d().
*/
static void
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
@@ -689,7 +772,7 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
op_w = V_bpf_optimize_writers || d->bd_writer;
if (d->bd_bif != NULL)
- bpf_detachd_locked(d);
+ bpf_detachd_locked(d, false);
/*
* Point d at bp, and add d to the interface's list.
* Since there are many applications using BPF for
@@ -698,26 +781,27 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
* some filter is configured.
*/
- BPFIF_WLOCK(bp);
BPFD_LOCK(d);
-
+ /*
+ * Hold reference to bpif while descriptor uses this interface.
+ */
+ bpfif_ref(bp);
d->bd_bif = bp;
-
if (op_w != 0) {
/* Add to writers-only list */
- LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+ CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
/*
* We decrement bd_writer on every filter set operation.
* First BIOCSETF is done by pcap_open_live() to set up
- * snap length. After that appliation usually sets its own filter
+ * snap length. After that appliation usually sets its own
+ * filter.
*/
d->bd_writer = 2;
} else
- LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+ CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+ reset_d(d);
BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
bpf_bpfd_cnt++;
CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
@@ -731,7 +815,8 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
* Check if we need to upgrade our descriptor @d from write-only mode.
*/
static int
-bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
+bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode,
+ int flen)
{
int is_snap, need_upgrade;
@@ -751,7 +836,8 @@ bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
* we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
* do not consider upgrading immediately
*/
- if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K))
+ if (cmd == BIOCSETF && flen == 1 &&
+ fcode[0].code == (BPF_RET | BPF_K))
is_snap = 1;
else
is_snap = 0;
@@ -789,88 +875,45 @@ bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
}
/*
- * Add d to the list of active bp filters.
- * Requires bpf_attachd() to be called before.
- */
-static void
-bpf_upgraded(struct bpf_d *d)
-{
- struct bpf_if *bp;
-
- BPF_LOCK_ASSERT();
-
- bp = d->bd_bif;
-
- /*
- * Filter can be set several times without specifying interface.
- * Mark d as reader and exit.
- */
- if (bp == NULL) {
- BPFD_LOCK(d);
- d->bd_writer = 0;
- BPFD_UNLOCK(d);
- return;
- }
-
- BPFIF_WLOCK(bp);
- BPFD_LOCK(d);
-
- /* Remove from writers-only list */
- LIST_REMOVE(d, bd_next);
- LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
- /* Mark d as reader */
- d->bd_writer = 0;
-
- BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
- CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
-
- EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
-}
-
-/*
* Detach a file from its interface.
*/
static void
bpf_detachd(struct bpf_d *d)
{
BPF_LOCK();
- bpf_detachd_locked(d);
+ bpf_detachd_locked(d, false);
BPF_UNLOCK();
}
static void
-bpf_detachd_locked(struct bpf_d *d)
+bpf_detachd_locked(struct bpf_d *d, bool detached_ifp)
{
- int error;
struct bpf_if *bp;
struct ifnet *ifp;
-
- CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
+ int error;
BPF_LOCK_ASSERT();
+ CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
/* Check if descriptor is attached */
if ((bp = d->bd_bif) == NULL)
return;
- BPFIF_WLOCK(bp);
BPFD_LOCK(d);
-
+ /* Remove d from the interface's descriptor list. */
+ CK_LIST_REMOVE(d, bd_next);
/* Save bd_writer value */
error = d->bd_writer;
-
- /*
- * Remove d from the interface's descriptor list.
- */
- LIST_REMOVE(d, bd_next);
-
ifp = bp->bif_ifp;
d->bd_bif = NULL;
+ if (detached_ifp) {
+ /*
+ * Notify descriptor as it's detached, so that any
+ * sleepers wake up and get ENXIO.
+ */
+ bpf_wakeup(d);
+ }
BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
bpf_bpfd_cnt--;
/* Call event handler iff d is attached */
@@ -879,9 +922,9 @@ bpf_detachd_locked(struct bpf_d *d)
/*
* Check if this descriptor had requested promiscuous mode.
- * If so, turn it off.
+ * If so and ifnet is not detached, turn it off.
*/
- if (d->bd_promisc) {
+ if (d->bd_promisc && !detached_ifp) {
d->bd_promisc = 0;
CURVNET_SET(ifp->if_vnet);
error = ifpromisc(ifp, 0);
@@ -897,6 +940,7 @@ bpf_detachd_locked(struct bpf_d *d)
"bpf_detach: ifpromisc failed (%d)\n", error);
}
}
+ bpfif_rele(bp);
}
/*
@@ -921,8 +965,7 @@ bpf_dtor(void *data)
seldrain(&d->bd_sel);
knlist_destroy(&d->bd_sel.si_note);
callout_drain(&d->bd_callout);
- bpf_freed(d);
- free(d, M_BPF);
+ bpfd_rele(d);
}
/*
@@ -975,6 +1018,7 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
d->bd_bufmode = BPF_BUFMODE_BUFFER;
d->bd_sig = SIGIO;
d->bd_direction = BPF_D_INOUT;
+ d->bd_refcnt = 1;
BPF_PID_REFRESH(d, td);
#ifdef MAC
mac_bpfdesc_init(d);
@@ -1162,7 +1206,8 @@ bpf_timed_out(void *arg)
BPFD_LOCK_ASSERT(d);
- if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
+ if (callout_pending(&d->bd_callout) ||
+ !callout_active(&d->bd_callout))
return;
if (d->bd_state == BPF_WAITING) {
d->bd_state = BPF_TIMED_OUT;
@@ -1192,49 +1237,73 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
#endif /* __rtems__ */
{
+ struct route ro;
+ struct sockaddr dst;
+ struct epoch_tracker et;
+ struct bpf_if *bp;
#ifndef __rtems__
struct bpf_d *d;
#endif /* __rtems__ */
struct ifnet *ifp;
struct mbuf *m, *mc;
- struct sockaddr dst;
- struct route ro;
int error, hlen;
error = devfs_get_cdevpriv((void **)&d);
if (error != 0)
return (error);
+ NET_EPOCH_ENTER(et);
+ BPFD_LOCK(d);
BPF_PID_REFRESH_CUR(d);
counter_u64_add(d->bd_wcount, 1);
- /* XXX: locking required */
- if (d->bd_bif == NULL) {
- counter_u64_add(d->bd_wdcount, 1);
- return (ENXIO);
+ if ((bp = d->bd_bif) == NULL) {
+ error = ENXIO;
+ goto out_locked;
}
- ifp = d->bd_bif->bif_ifp;
-
+ ifp = bp->bif_ifp;
if ((ifp->if_flags & IFF_UP) == 0) {
- counter_u64_add(d->bd_wdcount, 1);
- return (ENETDOWN);
+ error = ENETDOWN;
+ goto out_locked;
}
- if (uio->uio_resid == 0) {
- counter_u64_add(d->bd_wdcount, 1);
- return (0);
- }
+ if (uio->uio_resid == 0)
+ goto out_locked;
bzero(&dst, sizeof(dst));
m = NULL;
hlen = 0;
- /* XXX: bpf_movein() can sleep */
- error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
+
+ /*
+ * Take extra reference, unlock d and exit from epoch section,
+ * since bpf_movein() can sleep.
+ */
+ bpfd_ref(d);
+ NET_EPOCH_EXIT(et);
+ BPFD_UNLOCK(d);
+
+ error = bpf_movein(uio, (int)bp->bif_dlt, ifp,
&m, &dst, &hlen, d);
- if (error) {
+
+ if (error != 0) {
counter_u64_add(d->bd_wdcount, 1);
+ bpfd_rele(d);
return (error);
}
+
+ BPFD_LOCK(d);
+ /*
+ * Check that descriptor is still attached to the interface.
+ * This can happen on bpfdetach(). To avoid access to detached
+ * ifnet, free mbuf and return ENXIO.
+ */
+ if (d->bd_bif == NULL) {
+ counter_u64_add(d->bd_wdcount, 1);
+ BPFD_UNLOCK(d);
+ bpfd_rele(d);
+ m_freem(m);
+ return (ENXIO);
+ }
counter_u64_add(d->bd_wfcount, 1);
if (d->bd_hdrcmplt)
dst.sa_family = pseudo_AF_HDRCMPLT;
@@ -1255,11 +1324,9 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
CURVNET_SET(ifp->if_vnet);
#ifdef MAC
- BPFD_LOCK(d);
mac_bpfdesc_create_mbuf(d, m);
if (mc != NULL)
mac_bpfdesc_create_mbuf(d, mc);
- BPFD_UNLOCK(d);
#endif
bzero(&ro, sizeof(ro));
@@ -1269,6 +1336,9 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
ro.ro_flags = RT_HAS_HEADER;
}
+ /* Avoid possible recursion on BPFD_LOCK(). */
+ NET_EPOCH_ENTER(et);
+ BPFD_UNLOCK(d);
error = (*ifp->if_output)(ifp, m, &dst, &ro);
if (error)
counter_u64_add(d->bd_wdcount, 1);
@@ -1279,8 +1349,15 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
else
m_freem(mc);
}
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
+ bpfd_rele(d);
+ return (error);
+out_locked:
+ counter_u64_add(d->bd_wdcount, 1);
+ NET_EPOCH_EXIT(et);
+ BPFD_UNLOCK(d);
return (error);
}
@@ -1916,16 +1993,11 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
}
/*
- * Set d's packet filter program to fp. If this file already has a filter,
- * free it and replace it. Returns EINVAL for bogus requests.
- *
- * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
- * since reading d->bd_bif can't be protected by d or interface lock due to
- * lock order.
- *
- * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
- * interface read lock to read all filers.
+ * Set d's packet filter program to fp. If this file already has a filter,
+ * free it and replace it. Returns EINVAL for bogus requests.
*
+ * Note we use global lock here to serialize bpf_setf() and bpf_setif()
+ * calls.
*/
static int
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
@@ -1934,13 +2006,14 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
struct bpf_program fp_swab;
struct bpf_program32 *fp32;
#endif
- struct bpf_insn *fcode, *old;
+ struct bpf_program_buffer *fcode;
+ struct bpf_insn *filter;
#ifdef BPF_JITTER
- bpf_jit_filter *jfunc, *ofunc;
+ bpf_jit_filter *jfunc;
#endif
size_t size;
u_int flen;
- int need_upgrade;
+ bool track_event;
#ifdef COMPAT_FREEBSD32
switch (cmd) {
@@ -1949,7 +2022,8 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
case BIOCSETFNR32:
fp32 = (struct bpf_program32 *)fp;
fp_swab.bf_len = fp32->bf_len;
- fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
+ fp_swab.bf_insns =
+ (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
fp = &fp_swab;
switch (cmd) {
case BIOCSETF32:
@@ -1963,12 +2037,10 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
}
#endif
- fcode = NULL;
+ filter = NULL;
#ifdef BPF_JITTER
- jfunc = ofunc = NULL;
+ jfunc = NULL;
#endif
- need_upgrade = 0;
-
/*
* Check new filter validness before acquiring any locks.
* Allocate memory for new filter, if needed.
@@ -1978,10 +2050,11 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
return (EINVAL);
size = flen * sizeof(*fp->bf_insns);
if (size > 0) {
- /* We're setting up new filter. Copy and check actual data. */
- fcode = malloc(size, M_BPF, M_WAITOK);
- if (copyin(fp->bf_insns, fcode, size) != 0 ||
- !bpf_validate(fcode, flen)) {
+ /* We're setting up new filter. Copy and check actual data. */
+ fcode = bpf_program_buffer_alloc(size, M_WAITOK);
+ filter = (struct bpf_insn *)fcode->buffer;
+ if (copyin(fp->bf_insns, filter, size) != 0 ||
+ !bpf_validate(filter, flen)) {
free(fcode, M_BPF);
return (EINVAL);
}
@@ -1991,49 +2064,72 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
* Filter is copied inside fcode and is
* perfectly valid.
*/
- jfunc = bpf_jitter(fcode, flen);
+ jfunc = bpf_jitter(filter, flen);
}
#endif
}
- BPF_LOCK();
+ track_event = false;
+ fcode = NULL;
- /*
- * Set up new filter.
- * Protect filter change by interface lock.
- * Additionally, we are protected by global lock here.
- */
- if (d->bd_bif != NULL)
- BPFIF_WLOCK(d->bd_bif);
+ BPF_LOCK();
BPFD_LOCK(d);
+ /* Set up new filter. */
if (cmd == BIOCSETWF) {
- old = d->bd_wfilter;
- d->bd_wfilter = fcode;
+ if (d->bd_wfilter != NULL) {
+ fcode = __containerof((void *)d->bd_wfilter,
+ struct bpf_program_buffer, buffer);
+#ifdef BPF_JITTER
+ fcode->func = NULL;
+#endif
+ }
+ d->bd_wfilter = filter;
} else {
- old = d->bd_rfilter;
- d->bd_rfilter = fcode;
+ if (d->bd_rfilter != NULL) {
+ fcode = __containerof((void *)d->bd_rfilter,
+ struct bpf_program_buffer, buffer);
+#ifdef BPF_JITTER
+ fcode->func = d->bd_bfilter;
+#endif
+ }
+ d->bd_rfilter = filter;
#ifdef BPF_JITTER
- ofunc = d->bd_bfilter;
d->bd_bfilter = jfunc;
#endif
if (cmd == BIOCSETF)
reset_d(d);
- need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen);
+ if (bpf_check_upgrade(cmd, d, filter, flen) != 0) {
+ /*
+ * Filter can be set several times without
+ * specifying interface. In this case just mark d
+ * as reader.
+ */
+ d->bd_writer = 0;
+ if (d->bd_bif != NULL) {
+ /*
+ * Remove descriptor from writers-only list
+ * and add it to active readers list.
+ */
+ CK_LIST_REMOVE(d, bd_next);
+ CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist,
+ d, bd_next);
+ CTR2(KTR_NET,
+ "%s: upgrade required by pid %d",
+ __func__, d->bd_pid);
+ track_event = true;
+ }
+ }
}
BPFD_UNLOCK(d);
- if (d->bd_bif != NULL)
- BPFIF_WUNLOCK(d->bd_bif);
- if (old != NULL)
- free(old, M_BPF);
-#ifdef BPF_JITTER
- if (ofunc != NULL)
- bpf_destroy_jit_filter(ofunc);
-#endif
- /* Move d to active readers list. */
- if (need_upgrade != 0)
- bpf_upgraded(d);
+ if (fcode != NULL)
+ epoch_call(net_epoch_preempt, &fcode->epoch_ctx,
+ bpf_program_buffer_free);
+
+ if (track_event)
+ EVENTHANDLER_INVOKE(bpf_track,
+ d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1);
BPF_UNLOCK();
return (0);
@@ -2057,15 +2153,6 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
return (ENXIO);
bp = theywant->if_bpf;
-
- /* Check if interface is not being detached from BPF */
- BPFIF_RLOCK(bp);
- if (bp->bif_flags & BPFIF_FLAG_DYING) {
- BPFIF_RUNLOCK(bp);
- return (ENXIO);
- }
- BPFIF_RUNLOCK(bp);
-
/*
* At this point, we expect the buffer is already allocated. If not,
* return an error.
@@ -2084,9 +2171,11 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
}
if (bp != d->bd_bif)
bpf_attachd(d, bp);
- BPFD_LOCK(d);
- reset_d(d);
- BPFD_UNLOCK(d);
+ else {
+ BPFD_LOCK(d);
+ reset_d(d);
+ BPFD_UNLOCK(d);
+ }
return (0);
}
@@ -2253,6 +2342,7 @@ bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
void
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
{
+ struct epoch_tracker et;
struct bintime bt;
struct bpf_d *d;
#ifdef BPF_JITTER
@@ -2262,24 +2352,14 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
int gottime;
gottime = BPF_TSTAMP_NONE;
-
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
- /*
- * We are not using any locks for d here because:
- * 1) any filter change is protected by interface
- * write lock
- * 2) destroying/detaching d is protected by interface
- * write lock, too
- */
-
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
counter_u64_add(d->bd_rcount, 1);
/*
- * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
- * way for the caller to indiciate to us whether this packet
- * is inbound or outbound. In the bpf_mtap() routines, we use
- * the interface pointers on the mbuf to figure it out.
+ * NB: We dont call BPF_CHECK_DIRECTION() here since there
+ * is no way for the caller to indiciate to us whether this
+ * packet is inbound or outbound. In the bpf_mtap() routines,
+ * we use the interface pointers on the mbuf to figure it out.
*/
#ifdef BPF_JITTER
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
@@ -2293,10 +2373,10 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
* Filter matches. Let's to acquire write lock.
*/
BPFD_LOCK(d);
-
counter_u64_add(d->bd_fcount, 1);
if (gottime < bpf_ts_quality(d->bd_tstamp))
- gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
+ gottime = bpf_gettime(&bt, d->bd_tstamp,
+ NULL);
#ifdef MAC
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
#endif
@@ -2305,7 +2385,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
#define BPF_CHECK_DIRECTION(d, r, i) \
@@ -2319,6 +2399,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
void
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
{
+ struct epoch_tracker et;
struct bintime bt;
struct bpf_d *d;
#ifdef BPF_JITTER
@@ -2328,7 +2409,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
int gottime;
/* Skip outgoing duplicate packets. */
- if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
+ if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
m->m_flags &= ~M_PROMISC;
return;
}
@@ -2336,17 +2417,17 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
pktlen = m_length(m, NULL);
gottime = BPF_TSTAMP_NONE;
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
- if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
#ifdef BPF_JITTER
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
/* XXX We cannot handle multiple mbufs. */
if (bf != NULL && m->m_next == NULL)
- slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
+ slen = (*(bf->func))(mtod(m, u_char *), pktlen,
+ pktlen);
else
#endif
slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
@@ -2364,7 +2445,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
/*
@@ -2374,6 +2455,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
void
bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
{
+ struct epoch_tracker et;
struct bintime bt;
struct mbuf mb;
struct bpf_d *d;
@@ -2392,6 +2474,7 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
* Note that we cut corners here; we only setup what's
* absolutely needed--this mbuf should never go anywhere else.
*/
+ mb.m_flags = 0;
mb.m_next = m;
mb.m_data = data;
mb.m_len = dlen;
@@ -2399,9 +2482,8 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
gottime = BPF_TSTAMP_NONE;
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
@@ -2420,11 +2502,10 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
#undef BPF_CHECK_DIRECTION
-
#undef BPF_TSTAMP_NONE
#undef BPF_TSTAMP_FAST
#undef BPF_TSTAMP_NORMAL
@@ -2514,6 +2595,11 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
int tstype;
BPFD_LOCK_ASSERT(d);
+ if (d->bd_bif == NULL) {
+ /* Descriptor was detached in concurrent thread */
+ counter_u64_add(d->bd_dcount, 1);
+ return;
+ }
/*
* Detect whether user space has released a buffer back to us, and if
@@ -2643,26 +2729,36 @@ copy:
* Called on close.
*/
static void
-bpf_freed(struct bpf_d *d)
+bpfd_free(epoch_context_t ctx)
{
+ struct bpf_d *d;
+ struct bpf_program_buffer *p;
/*
* We don't need to lock out interrupts since this descriptor has
* been detached from its interface and it yet hasn't been marked
* free.
*/
+ d = __containerof(ctx, struct bpf_d, epoch_ctx);
bpf_free(d);
if (d->bd_rfilter != NULL) {
- free((caddr_t)d->bd_rfilter, M_BPF);
+ p = __containerof((void *)d->bd_rfilter,
+ struct bpf_program_buffer, buffer);
#ifdef BPF_JITTER
- if (d->bd_bfilter != NULL)
- bpf_destroy_jit_filter(d->bd_bfilter);
+ p->func = d->bd_bfilter;
#endif
+ bpf_program_buffer_free(&p->epoch_ctx);
+ }
+ if (d->bd_wfilter != NULL) {
+ p = __containerof((void *)d->bd_wfilter,
+ struct bpf_program_buffer, buffer);
+#ifdef BPF_JITTER
+ p->func = NULL;
+#endif
+ bpf_program_buffer_free(&p->epoch_ctx);
}
- if (d->bd_wfilter != NULL)
- free((caddr_t)d->bd_wfilter, M_BPF);
- mtx_destroy(&d->bd_lock);
+ mtx_destroy(&d->bd_lock);
counter_u64_free(d->bd_rcount);
counter_u64_free(d->bd_dcount);
counter_u64_free(d->bd_fcount);
@@ -2670,7 +2766,7 @@ bpf_freed(struct bpf_d *d)
counter_u64_free(d->bd_wfcount);
counter_u64_free(d->bd_wdcount);
counter_u64_free(d->bd_zcopy);
-
+ free(d, M_BPF);
}
/*
@@ -2691,29 +2787,33 @@ bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
* headers are not yet supporrted).
*/
void
-bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
+bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen,
+ struct bpf_if **driverp)
{
struct bpf_if *bp;
- bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
- if (bp == NULL)
- panic("bpfattach");
+ KASSERT(*driverp == NULL,
+ ("bpfattach2: driverp already initialized"));
- LIST_INIT(&bp->bif_dlist);
- LIST_INIT(&bp->bif_wlist);
+ bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
+
+ CK_LIST_INIT(&bp->bif_dlist);
+ CK_LIST_INIT(&bp->bif_wlist);
bp->bif_ifp = ifp;
bp->bif_dlt = dlt;
- rw_init(&bp->bif_lock, "bpf interface lock");
- KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
+ bp->bif_hdrlen = hdrlen;
bp->bif_bpf = driverp;
+ bp->bif_refcnt = 1;
*driverp = bp;
-
+ /*
+ * Reference ifnet pointer, so it won't freed until
+ * we release it.
+ */
+ if_ref(ifp);
BPF_LOCK();
- LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
+ CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
BPF_UNLOCK();
- bp->bif_hdrlen = hdrlen;
-
if (bootverbose && IS_DEFAULT_VNET(curvnet))
if_printf(ifp, "bpf attached\n");
}
@@ -2752,98 +2852,32 @@ bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
void
bpfdetach(struct ifnet *ifp)
{
- struct bpf_if *bp, *bp_temp;
- struct bpf_d *d;
- int ndetached;
-
- ndetached = 0;
+ struct bpf_if *bp, *bp_temp;
+ struct bpf_d *d;
BPF_LOCK();
/* Find all bpf_if struct's which reference ifp and detach them. */
- LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
+ CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
if (ifp != bp->bif_ifp)
continue;
- LIST_REMOVE(bp, bif_next);
- /* Add to to-be-freed list */
- LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
-
- ndetached++;
- /*
- * Delay freeing bp till interface is detached
- * and all routes through this interface are removed.
- * Mark bp as detached to restrict new consumers.
- */
- BPFIF_WLOCK(bp);
- bp->bif_flags |= BPFIF_FLAG_DYING;
+ CK_LIST_REMOVE(bp, bif_next);
*bp->bif_bpf = (struct bpf_if *)&dead_bpf_if;
- BPFIF_WUNLOCK(bp);
- CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
+ CTR4(KTR_NET,
+ "%s: sheduling free for encap %d (%p) for if %p",
__func__, bp->bif_dlt, bp, ifp);
- /* Free common descriptors */
- while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
+ /* Detach common descriptors */
+ while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
+ bpf_detachd_locked(d, true);
}
- /* Free writer-only descriptors */
- while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
+ /* Detach writer-only descriptors */
+ while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
+ bpf_detachd_locked(d, true);
}
- }
- BPF_UNLOCK();
-
-#ifdef INVARIANTS
- if (ndetached == 0)
- printf("bpfdetach: %s was not attached\n", ifp->if_xname);
-#endif
-}
-
-/*
- * Interface departure handler.
- * Note departure event does not guarantee interface is going down.
- * Interface renaming is currently done via departure/arrival event set.
- *
- * Departure handled is called after all routes pointing to
- * given interface are removed and interface is in down state
- * restricting any packets to be sent/received. We assume it is now safe
- * to free data allocated by BPF.
- */
-static void
-bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
-{
- struct bpf_if *bp, *bp_temp;
- int nmatched = 0;
-
- /* Ignore ifnet renaming. */
- if (ifp->if_flags & IFF_RENAMING)
- return;
-
- BPF_LOCK();
- /*
- * Find matching entries in free list.
- * Nothing should be found if bpfdetach() was not called.
- */
- LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
- if (ifp != bp->bif_ifp)
- continue;
-
- CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
- __func__, bp, ifp);
-
- LIST_REMOVE(bp, bif_next);
-
- rw_destroy(&bp->bif_lock);
- free(bp, M_BPF);
-
- nmatched++;
+ bpfif_rele(bp);
}
BPF_UNLOCK();
}
@@ -2862,9 +2896,8 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
BPF_LOCK_ASSERT();
ifp = d->bd_bif->bif_ifp;
-again:
n1 = 0;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp == ifp)
n1++;
}
@@ -2874,24 +2907,16 @@ again:
}
if (n1 > bfl->bfl_len)
return (ENOMEM);
- BPF_UNLOCK();
+
lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
n = 0;
- BPF_LOCK();
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp != ifp)
continue;
- if (n >= n1) {
- free(lst, M_TEMP);
- goto again;
- }
- lst[n] = bp->bif_dlt;
- n++;
+ lst[n++] = bp->bif_dlt;
}
- BPF_UNLOCK();
error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
free(lst, M_TEMP);
- BPF_LOCK();
bfl->bfl_len = n;
return (error);
}
@@ -2907,33 +2932,34 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
struct bpf_if *bp;
BPF_LOCK_ASSERT();
+ MPASS(d->bd_bif != NULL);
+ /*
+ * It is safe to check bd_bif without BPFD_LOCK, it can not be
+ * changed while we hold global lock.
+ */
if (d->bd_bif->bif_dlt == dlt)
return (0);
- ifp = d->bd_bif->bif_ifp;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ ifp = d->bd_bif->bif_ifp;
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
break;
}
+ if (bp == NULL)
+ return (EINVAL);
- if (bp != NULL) {
- opromisc = d->bd_promisc;
- bpf_attachd(d, bp);
- BPFD_LOCK(d);
- reset_d(d);
- BPFD_UNLOCK(d);
- if (opromisc) {
- error = ifpromisc(bp->bif_ifp, 1);
- if (error)
- if_printf(bp->bif_ifp,
- "bpf_setdlt: ifpromisc failed (%d)\n",
- error);
- else
- d->bd_promisc = 1;
- }
+ opromisc = d->bd_promisc;
+ bpf_attachd(d, bp);
+ if (opromisc) {
+ error = ifpromisc(bp->bif_ifp, 1);
+ if (error)
+ if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n",
+ __func__, error);
+ else
+ d->bd_promisc = 1;
}
- return (bp == NULL ? EINVAL : 0);
+ return (0);
}
#ifdef __rtems__
static struct bpf_d *
@@ -2973,7 +2999,7 @@ bpf_imfs_readv(rtems_libio_t *iop, const struct iovec *iov, int iovcnt, ssize_t
struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
struct thread *td = rtems_bsd_get_curthread_or_null();
struct uio uio = {
- .uio_iov = iov,
+ .uio_iov = RTEMS_DECONST(struct iovec *, iov),
.uio_iovcnt = iovcnt,
.uio_offset = 0,
.uio_resid = total,
@@ -3014,7 +3040,7 @@ bpf_imfs_writev(rtems_libio_t *iop, const struct iovec *iov, int iovcnt, ssize_t
struct bpf_d *d = bpf_imfs_get_context_by_iop(iop);
struct thread *td = rtems_bsd_get_curthread_or_null();
struct uio uio = {
- .uio_iov = iov,
+ .uio_iov = RTEMS_DECONST(struct iovec *, iov),
.uio_iovcnt = iovcnt,
.uio_offset = 0,
.uio_resid = total,
@@ -3042,7 +3068,7 @@ static ssize_t
bpf_imfs_write(rtems_libio_t *iop, const void *buffer, size_t count)
{
struct iovec iov = {
- .iov_base = buffer,
+ .iov_base = RTEMS_DECONST(void *, buffer),
.iov_len = count
};
@@ -3115,24 +3141,23 @@ bpf_drvinit(void *unused)
#endif /* __rtems__ */
sx_init(&bpf_sx, "bpf global lock");
- LIST_INIT(&bpf_iflist);
- LIST_INIT(&bpf_freelist);
+ CK_LIST_INIT(&bpf_iflist);
#ifndef __rtems__
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
/* For compatibility */
make_dev_alias(dev, "bpf0");
-#else /* __rtems__ */
- rv = IMFS_make_generic_node("/dev/bpf", mode, &bpf_imfs_control, NULL);
- BSD_ASSERT(rv == 0);
- rv = symlink("/dev/bpf", "/dev/bpf0");
- BSD_ASSERT(rv == 0);
-#endif /* __rtems__ */
/* Register interface departure handler */
bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
ifnet_departure_event, bpf_ifdetach, NULL,
EVENTHANDLER_PRI_ANY);
+#else /* __rtems__ */
+ rv = IMFS_make_generic_node("/dev/bpf", mode, &bpf_imfs_control, NULL);
+ BSD_ASSERT(rv == 0);
+ rv = symlink("/dev/bpf", "/dev/bpf0");
+ BSD_ASSERT(rv == 0);
+#endif /* __rtems__ */
}
/*
@@ -3147,19 +3172,19 @@ bpf_zero_counters(void)
struct bpf_d *bd;
BPF_LOCK();
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- BPFIF_RLOCK(bp);
- LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
- BPFD_LOCK(bd);
+ /*
+ * We are protected by global lock here, interfaces and
+ * descriptors can not be deleted while we hold it.
+ */
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
counter_u64_zero(bd->bd_rcount);
counter_u64_zero(bd->bd_dcount);
counter_u64_zero(bd->bd_fcount);
counter_u64_zero(bd->bd_wcount);
counter_u64_zero(bd->bd_wfcount);
counter_u64_zero(bd->bd_zcopy);
- BPFD_UNLOCK(bd);
}
- BPFIF_RUNLOCK(bp);
}
BPF_UNLOCK();
}
@@ -3171,10 +3196,9 @@ static void
bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
{
+ BPF_LOCK_ASSERT();
bzero(d, sizeof(*d));
- BPFD_LOCK_ASSERT(bd);
d->bd_structsize = sizeof(*d);
- /* XXX: reading should be protected by global lock */
d->bd_immediate = bd->bd_immediate;
d->bd_promisc = bd->bd_promisc;
d->bd_hdrcmplt = bd->bd_hdrcmplt;
@@ -3251,22 +3275,16 @@ bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
return (ENOMEM);
}
index = 0;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- BPFIF_RLOCK(bp);
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
/* Send writers-only first */
- LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
xbd = &xbdbuf[index++];
- BPFD_LOCK(bd);
bpfstats_fill_xbpf(xbd, bd);
- BPFD_UNLOCK(bd);
}
- LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
xbd = &xbdbuf[index++];
- BPFD_LOCK(bd);
bpfstats_fill_xbpf(xbd, bd);
- BPFD_UNLOCK(bd);
}
- BPFIF_RUNLOCK(bp);
}
BPF_UNLOCK();
error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
@@ -3346,10 +3364,10 @@ bpf_show_bpf_if(struct bpf_if *bpf_if)
/* bif_ext.bif_dlist */
BPF_DB_PRINTF("%#x", bif_dlt);
BPF_DB_PRINTF("%u", bif_hdrlen);
- BPF_DB_PRINTF("%p", bif_ifp);
- /* bif_lock */
/* bif_wlist */
- BPF_DB_PRINTF("%#x", bif_flags);
+ BPF_DB_PRINTF("%p", bif_ifp);
+ BPF_DB_PRINTF("%p", bif_bpf);
+ BPF_DB_PRINTF("%u", bif_refcnt);
}
DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
index d8eb7ff4..55b03b54 100644
--- a/freebsd/sys/net/bpf.h
+++ b/freebsd/sys/net/bpf.h
@@ -42,6 +42,10 @@
#ifndef _NET_BPF_H_
#define _NET_BPF_H_
+#include <sys/_eventhandler.h>
+#include <sys/ck.h>
+#include <net/dlt.h>
+
#if defined(__rtems__) && !defined(__FreeBSD__)
#define __FreeBSD__ 1
#endif /* defined(__rtems__) && !defined(__FreeBSD__) */
@@ -236,9 +240,6 @@ struct bpf_zbuf_header {
u_int _bzh_pad[5];
};
-/* Pull in data-link level type codes. */
-#include <net/dlt.h>
-
/*
* The instruction encodings.
*
@@ -412,10 +413,11 @@ SYSCTL_DECL(_net_bpf);
* bpf_peers_present() calls.
*/
struct bpf_if;
+CK_LIST_HEAD(bpfd_list, bpf_d);
struct bpf_if_ext {
- LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
- LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
+ CK_LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
+ struct bpfd_list bif_dlist; /* descriptor list */
};
void bpf_bufheld(struct bpf_d *d);
@@ -439,7 +441,7 @@ bpf_peers_present(struct bpf_if *bpf)
struct bpf_if_ext *ext;
ext = (struct bpf_if_ext *)bpf;
- if (!LIST_EMPTY(&ext->bif_dlist))
+ if (!CK_LIST_EMPTY(&ext->bif_dlist))
return (1);
return (0);
}
@@ -467,12 +469,10 @@ bpf_peers_present(struct bpf_if *bpf)
*/
#define BPF_MEMWORDS 16
-#ifdef _SYS_EVENTHANDLER_H_
/* BPF attach/detach events */
struct ifnet;
typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */,
int /* 1 =>'s attach */);
EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn);
-#endif /* _SYS_EVENTHANDLER_H_ */
#endif /* _NET_BPF_H_ */
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
index 7a182a61..daa9e267 100644
--- a/freebsd/sys/net/bpf_buffer.c
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -71,8 +71,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bpf.h>
#include <sys/param.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/mutex.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/kernel.h>
@@ -119,19 +121,10 @@ bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
{
const struct mbuf *m;
u_char *dst;
- u_int count;
m = (struct mbuf *)src;
dst = (u_char *)buf + offset;
- while (len > 0) {
- if (m == NULL)
- panic("bpf_mcopy");
- count = min(m->m_len, len);
- bcopy(mtod(m, void *), dst, count);
- m = m->m_next;
- dst += count;
- len -= count;
- }
+ m_copydata(m, 0, len, dst);
}
/*
diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h
index 2ce9204b..c28a74f9 100644
--- a/freebsd/sys/net/bpfdesc.h
+++ b/freebsd/sys/net/bpfdesc.h
@@ -43,9 +43,10 @@
#include <sys/callout.h>
#include <sys/selinfo.h>
-#include <sys/queue.h>
+#include <sys/ck.h>
#include <sys/conf.h>
#include <sys/counter.h>
+#include <sys/epoch.h>
#include <net/if.h>
/*
@@ -53,7 +54,7 @@
*/
struct zbuf;
struct bpf_d {
- LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
+ CK_LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
/*
* Buffer slots: two memory buffers store the incoming packets.
* The model has three slots. Sbuf is always occupied.
@@ -106,6 +107,9 @@ struct bpf_d {
counter_u64_t bd_wdcount; /* number of packets dropped during a write */
counter_u64_t bd_zcopy; /* number of zero copy operations */
u_char bd_compat32; /* 32-bit stream on LP64 system */
+
+ volatile u_int bd_refcnt;
+ struct epoch_context epoch_ctx;
};
/* Values for bd_state */
diff --git a/freebsd/sys/net/bridgestp.c b/freebsd/sys/net/bridgestp.c
index 49e772b3..424f4d69 100644
--- a/freebsd/sys/net/bridgestp.c
+++ b/freebsd/sys/net/bridgestp.c
@@ -2024,6 +2024,7 @@ bstp_same_bridgeid(uint64_t id1, uint64_t id2)
void
bstp_reinit(struct bstp_state *bs)
{
+ struct epoch_tracker et;
struct bstp_port *bp;
struct ifnet *ifp, *mif;
u_char *e_addr;
@@ -2044,7 +2045,7 @@ bstp_reinit(struct bstp_state *bs)
* from is part of this bridge, so we can have more than one independent
* bridges in the same STP domain.
*/
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp->if_type != IFT_ETHER)
continue; /* Not Ethernet */
@@ -2064,7 +2065,7 @@ bstp_reinit(struct bstp_state *bs)
continue;
}
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
if (mif == NULL)
goto disablestp;
@@ -2275,4 +2276,7 @@ bstp_destroy(struct bstp_port *bp)
taskqueue_drain(taskqueue_swi, &bp->bp_statetask);
taskqueue_drain(taskqueue_swi, &bp->bp_rtagetask);
taskqueue_drain(taskqueue_swi, &bp->bp_mediatask);
+
+ if (bp->bp_bs->bs_root_port == bp)
+ bstp_assign_roles(bp->bp_bs);
}
diff --git a/freebsd/sys/net/ethernet.h b/freebsd/sys/net/ethernet.h
index fa75c1df..7ceb9b80 100644
--- a/freebsd/sys/net/ethernet.h
+++ b/freebsd/sys/net/ethernet.h
@@ -401,6 +401,8 @@ struct ether_vlan_header {
#ifdef _KERNEL
+#include <sys/_eventhandler.h>
+
struct ifnet;
struct mbuf;
struct route;
@@ -422,12 +424,11 @@ void ether_vlan_mtap(struct bpf_if *, struct mbuf *,
struct mbuf *ether_vlanencap(struct mbuf *, uint16_t);
bool ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
uint16_t vid, uint8_t pcp);
+void ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr);
-#ifdef _SYS_EVENTHANDLER_H_
/* new ethernet interface attached event */
typedef void (*ether_ifattach_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ether_ifattach_event, ether_ifattach_event_handler_t);
-#endif
#else /* _KERNEL */
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 9a70d6a1..46076a23 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -34,6 +34,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_ratelimit.h>
#include <sys/param.h>
@@ -837,7 +838,9 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
struct lacp_softc *lsc = LACP_SOFTC(sc);
struct lacp_portmap *pm;
struct lacp_port *lp;
+ struct lacp_port **map;
uint32_t hash;
+ int count;
if (__predict_false(lsc->lsc_suppress_distributing)) {
LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
@@ -850,13 +853,31 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (NULL);
}
+#ifdef NUMA
+ if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
+ pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) {
+ count = pm->pm_numa[m->m_pkthdr.numa_domain].count;
+ if (count > 0) {
+ map = pm->pm_numa[m->m_pkthdr.numa_domain].map;
+ } else {
+ /* No ports on this domain; use global hash. */
+ map = pm->pm_map;
+ count = pm->pm_count;
+ }
+ } else
+#endif
+ {
+ map = pm->pm_map;
+ count = pm->pm_count;
+ }
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
hash = m->m_pkthdr.flowid >> sc->flowid_shift;
else
hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
- hash %= pm->pm_count;
- lp = pm->pm_map[hash];
+
+ hash %= count;
+ lp = map[hash];
KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
("aggregated port is not distributing"));
@@ -864,7 +885,7 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (lp->lp_lagg);
}
-#ifdef RATELIMIT
+#if defined(RATELIMIT) || defined(KERN_TLS)
struct lagg_port *
lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid)
{
@@ -1046,6 +1067,10 @@ lacp_update_portmap(struct lacp_softc *lsc)
uint64_t speed;
u_int newmap;
int i;
+#ifdef NUMA
+ int count;
+ uint8_t domain;
+#endif
newmap = lsc->lsc_activemap == 0 ? 1 : 0;
p = &lsc->lsc_pmap[newmap];
@@ -1056,9 +1081,25 @@ lacp_update_portmap(struct lacp_softc *lsc)
if (la != NULL && la->la_nports > 0) {
p->pm_count = la->la_nports;
i = 0;
- TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
+ TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) {
p->pm_map[i++] = lp;
+#ifdef NUMA
+ domain = lp->lp_ifp->if_numa_domain;
+ if (domain >= MAXMEMDOM)
+ continue;
+ count = p->pm_numa[domain].count;
+ p->pm_numa[domain].map[count] = lp;
+ p->pm_numa[domain].count++;
+#endif
+ }
KASSERT(i == p->pm_count, ("Invalid port count"));
+
+#ifdef NUMA
+ for (i = 0; i < MAXMEMDOM; i++) {
+ if (p->pm_numa[i].count != 0)
+ p->pm_num_dom++;
+ }
+#endif
speed = lacp_aggregator_bandwidth(la);
}
sc->sc_ifp->if_baudrate = speed;
diff --git a/freebsd/sys/net/ieee8023ad_lacp.h b/freebsd/sys/net/ieee8023ad_lacp.h
index 5ae48ceb..b6a0860f 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.h
+++ b/freebsd/sys/net/ieee8023ad_lacp.h
@@ -197,8 +197,15 @@ enum lacp_mux_state {
#define LACP_MAX_PORTS 32
+struct lacp_numa {
+ int count;
+ struct lacp_port *map[LACP_MAX_PORTS];
+};
+
struct lacp_portmap {
int pm_count;
+ int pm_num_dom;
+ struct lacp_numa pm_numa[MAXMEMDOM];
struct lacp_port *pm_map[LACP_MAX_PORTS];
};
@@ -286,7 +293,7 @@ struct lacp_softc {
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-#ifdef RATELIMIT
+#if defined(RATELIMIT) || defined(KERN_TLS)
struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t);
#endif
void lacp_attach(struct lagg_softc *);
diff --git a/freebsd/sys/net/ieee_oui.h b/freebsd/sys/net/ieee_oui.h
new file mode 100644
index 00000000..068328d8
--- /dev/null
+++ b/freebsd/sys/net/ieee_oui.h
@@ -0,0 +1,85 @@
+/* -
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * Author: George V. Neville-Neil
+ *
+ */
+
+/* Organizationally Unique Identifier assigned by IEEE 14 Nov 2013 */
+#define OUI_FREEBSD_BASE 0x589cfc000000
+#define OUI_FREEBSD(nic) (OUI_FREEBSD_BASE | (nic))
+
+/*
+ * OUIs are most often used to uniquely identify network interfaces
+ * and occupy the first 3 bytes of both destination and source MAC
+ * addresses. The following allocations exist so that various
+ * software systems associated with FreeBSD can have unique IDs in the
+ * absence of hardware. The use of OUIs for this purpose is not fully
+ * fleshed out but is now in common use in virtualization technology.
+ *
+ * Allocations from this range are expected to be made using COMMON
+ * SENSE by developers. Do NOT take a large range just because
+ * they're currently wide open. Take the smallest useful range for
+ * your system. We have (2^24 - 2) available addresses (see Reserved
+ * Values below) but that is far from infinite.
+ *
+ * In the event of a conflict arbitration of allocation in this file
+ * is subject to core@ approval.
+ *
+ * Applications are differentiated based on the high order bit(s) of
+ * the remaining three bytes. Our first allocation has all 0s, the
+ * next allocation has the highest bit set. Allocating in this way
+ * gives us 254 allocations of 64K addresses. Address blocks can be
+ * concatenated if necessary.
+ *
+ * Reserved Values: 0x000000 and 0xffffff are reserved and MUST NOT BE
+ * allocated for any reason.
+ */
+
+/* Allocate 20 bits to bhyve */
+#define OUI_FREEBSD_BHYVE_LOW OUI_FREEBSD(0x000001)
+#define OUI_FREEBSD_BHYVE_HIGH OUI_FREEBSD(0x0fffff)
+
+/*
+ * Allocate 16 bits for a pool to give to various interfaces that need a
+ * generated address, but don't quite need to slice off a whole section of
+ * the OUI (e.g. cloned interfaces, one-off NICs of various vendors).
+ *
+ * ether_gen_addr should be used to generate an address from this pool.
+ */
+#define OUI_FREEBSD_GENERATED_MASK 0x10ffff
+#define OUI_FREEBSD_GENERATED_LOW OUI_FREEBSD(0x100000)
+#define OUI_FREEBSD_GENERATED_HIGH OUI_FREEBSD(OUI_FREEBSD_GENERATED_MASK)
+
+/* Allocate 16 bits for emulated NVMe devices */
+#define OUI_FREEBSD_NVME_MASK 0x20ffff
+#define OUI_FREEBSD_NVME_LOW OUI_FREEBSD(0x200000)
+#define OUI_FREEBSD_NVME_HIGH OUI_FREEBSD(OUI_FREEBSD_NVME_MASK)
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index 9d233444..c1fd928e 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -38,9 +38,10 @@
#include <rtems/bsd/local/opt_inet.h>
#include <sys/param.h>
-#include <sys/types.h>
#include <sys/conf.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
+#include <sys/domainset.h>
#include <sys/sbuf.h>
#include <sys/bus.h>
#include <sys/epoch.h>
@@ -175,14 +176,14 @@ struct ifmediareq32 {
#define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
#define _CASE_IOC_IFGROUPREQ_32(cmd) \
- case _IOC_NEWTYPE((cmd), struct ifgroupreq32):
+ _IOC_NEWTYPE((cmd), struct ifgroupreq32): case
#else /* !COMPAT_FREEBSD32 */
#define _CASE_IOC_IFGROUPREQ_32(cmd)
#endif /* !COMPAT_FREEBSD32 */
#define CASE_IOC_IFGROUPREQ(cmd) \
_CASE_IOC_IFGROUPREQ_32(cmd) \
- case (cmd)
+ (cmd)
union ifreq_union {
struct ifreq ifr;
@@ -270,7 +271,6 @@ static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
-static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
static void do_link_state_change(void *, int);
static int if_getgroup(struct ifgroupreq *, struct ifnet *);
@@ -358,16 +358,17 @@ ifnet_byindex(u_short idx)
struct ifnet *
ifnet_byindex_ref(u_short idx)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
ifp = ifnet_byindex_locked(idx);
if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (NULL);
}
if_ref(ifp);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
@@ -431,14 +432,15 @@ ifnet_setbyindex(u_short idx, struct ifnet *ifp)
struct ifaddr *
ifaddr_byindex(u_short idx)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
struct ifaddr *ifa = NULL;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
ifp = ifnet_byindex_locked(idx);
if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
ifa_ref(ifa);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (ifa);
}
@@ -531,13 +533,23 @@ if_grow(void)
* registered for the passed type.
*/
struct ifnet *
-if_alloc(u_char type)
+if_alloc_domain(u_char type, int numa_domain)
{
struct ifnet *ifp;
u_short idx;
void *old;
- ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
+#ifndef __rtems__
+ KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large"));
+ if (numa_domain == IF_NODOM)
+#endif /* __rtems__ */
+ ifp = malloc(sizeof(struct ifnet), M_IFNET,
+ M_WAITOK | M_ZERO);
+#ifndef __rtems__
+ else
+ ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET,
+ DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO);
+#endif /* __rtems__ */
restart:
IFNET_WLOCK();
idx = ifindex_alloc(&old);
@@ -552,6 +564,9 @@ if_alloc(u_char type)
ifp->if_index = idx;
ifp->if_type = type;
ifp->if_alloctype = type;
+#ifndef __rtems__
+ ifp->if_numa_domain = numa_domain;
+#endif /* __rtems__ */
#ifdef VIMAGE
ifp->if_vnet = curvnet;
#endif
@@ -585,6 +600,22 @@ if_alloc(u_char type)
return (ifp);
}
+struct ifnet *
+if_alloc_dev(u_char type, device_t dev)
+{
+ int numa_domain;
+
+ if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0)
+ return (if_alloc_domain(type, IF_NODOM));
+ return (if_alloc_domain(type, numa_domain));
+}
+
+struct ifnet *
+if_alloc(u_char type)
+{
+
+ return (if_alloc_domain(type, IF_NODOM));
+}
/*
* Do the actual work of freeing a struct ifnet, and layer 2 common
* structure. This call is made when the last reference to an
@@ -613,7 +644,14 @@ if_free_internal(struct ifnet *ifp)
free(ifp->if_description, M_IFDESCR);
free(ifp->if_hw_addr, M_IFADDR);
- free(ifp, M_IFNET);
+#ifndef __rtems__
+ if (ifp->if_numa_domain == IF_NODOM)
+#endif /* __rtems__ */
+ free(ifp, M_IFNET);
+#ifndef __rtems__
+ else
+ free_domain(ifp, M_IFNET);
+#endif /* __rtems__ */
}
static void
@@ -840,7 +878,6 @@ if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
sdl->sdl_type = ifp->if_type;
ifp->if_addr = ifa;
ifa->ifa_ifp = ifp;
- ifa->ifa_rtrequest = link_rtrequest;
ifa->ifa_addr = (struct sockaddr *)sdl;
sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
ifa->ifa_netmask = (struct sockaddr *)sdl;
@@ -976,12 +1013,14 @@ if_purgeaddrs(struct ifnet *ifp)
struct ifaddr *ifa;
while (1) {
- NET_EPOCH_ENTER();
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_LINK)
break;
}
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
if (ifa == NULL)
break;
@@ -1107,6 +1146,15 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
curvnet->vnet_ifcnt--;
#endif
epoch_wait_preempt(net_epoch_preempt);
+
+ /*
+ * Ensure all pending EPOCH(9) callbacks have been executed. This
+ * fixes issues about late destruction of multicast options
+ * which lead to leave group calls, which in turn access the
+ * belonging ifnet structure:
+ */
+ epoch_drain_callbacks(net_epoch_preempt);
+
/*
* In any case (destroy or vmove) detach us from the groups
* and remove/wait for pending events on the taskq.
@@ -1618,38 +1666,39 @@ ifgr_groups_get(void *ifgrp)
static int
if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
{
+ struct epoch_tracker et;
int len, error;
struct ifg_list *ifgl;
struct ifg_req ifgrq, *ifgp;
if (ifgr->ifgr_len == 0) {
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
ifgr->ifgr_len += sizeof(struct ifg_req);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
}
len = ifgr->ifgr_len;
ifgp = ifgr_groups_get(ifgr);
/* XXX: wire */
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
if (len < sizeof(ifgrq)) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (EINVAL);
}
bzero(&ifgrq, sizeof ifgrq);
strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
sizeof(ifgrq.ifgrq_group));
if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (error);
}
len -= sizeof(ifgrq);
ifgp++;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
}
@@ -1869,6 +1918,7 @@ static int
ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
struct sockaddr *ia)
{
+ struct epoch_tracker et;
int error;
struct rt_addrinfo info;
struct sockaddr_dl null_sdl;
@@ -1879,6 +1929,16 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
bzero(&info, sizeof(info));
if (cmd != RTM_DELETE)
info.rti_ifp = V_loif;
+ if (cmd == RTM_ADD) {
+ /* explicitly specify (loopback) ifa */
+ if (info.rti_ifp != NULL) {
+ NET_EPOCH_ENTER(et);
+ info.rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
+ if (info.rti_ifa != NULL)
+ ifa_ref(info.rti_ifa);
+ NET_EPOCH_EXIT(et);
+ }
+ }
info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
info.rti_info[RTAX_DST] = ia;
info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
@@ -1963,11 +2023,12 @@ done:
int
ifa_ifwithaddr_check(const struct sockaddr *addr)
{
+ struct epoch_tracker et;
int rc;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
rc = (ifa_ifwithaddr(addr) != NULL);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (rc);
}
@@ -2057,9 +2118,7 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
/*
* Scan though each interface, looking for ones that have addresses
- * in this address family and the requested fib. Maintain a reference
- * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
- * kept it stable when we move onto the next interface.
+ * in this address family and the requested fib.
*/
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
@@ -2188,38 +2247,6 @@ ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
((*carp_master_p)(next) && !(*carp_master_p)(cur))));
}
-#include <net/if_llatbl.h>
-
-/*
- * Default action when installing a route with a Link Level gateway.
- * Lookup an appropriate real ifa to point to.
- * This should be moved to /sys/net/link.c eventually.
- */
-static void
-link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
-{
- struct ifaddr *ifa, *oifa;
- struct sockaddr *dst;
- struct ifnet *ifp;
-
- if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
- ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
- return;
- NET_EPOCH_ENTER();
- ifa = ifaof_ifpforaddr(dst, ifp);
- if (ifa) {
- oifa = rt->rt_ifa;
- if (oifa != ifa) {
- ifa_free(oifa);
- ifa_ref(ifa);
- }
- rt->rt_ifa = ifa;
- if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
- ifa->ifa_rtrequest(cmd, rt, info);
- }
- NET_EPOCH_EXIT();
-}
-
struct sockaddr_dl *
link_alloc_sdl(size_t size, int flags)
{
@@ -2418,9 +2445,10 @@ if_qflush(struct ifnet *ifp)
struct ifnet *
ifunit_ref(const char *name)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
!(ifp->if_flags & IFF_DYING))
@@ -2428,21 +2456,22 @@ ifunit_ref(const char *name)
}
if (ifp != NULL)
if_ref(ifp);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
struct ifnet *
ifunit(const char *name)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
break;
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
@@ -2706,6 +2735,8 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
if (strlen(new_name) == IFNAMSIZ-1)
return (EINVAL);
}
+ if (strcmp(new_name, ifp->if_xname) == 0)
+ break;
if (ifunit(new_name) != NULL)
return (EEXIST);
@@ -2830,6 +2861,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (EINVAL);
if (cmd == SIOCADDMULTI) {
+ struct epoch_tracker et;
struct ifmultiaddr *ifma;
/*
@@ -2839,9 +2871,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
* lose a race while we check if the membership
* already exists.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
ifma = if_findmulti(ifp, &ifr->ifr_addr);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ifma != NULL)
error = EADDRINUSE;
else
@@ -2878,6 +2910,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
case SIOCGIFGENERIC:
case SIOCGIFRSSKEY:
case SIOCGIFRSSHASH:
+ case SIOCGIFDOWNREASON:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
error = (*ifp->if_ioctl)(ifp, cmd, data);
@@ -2895,7 +2928,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
error = if_gethwaddr(ifp, ifr);
break;
- CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
+ case CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
error = priv_check(td, PRIV_NET_ADDIFGROUP);
if (error)
return (error);
@@ -2904,12 +2937,12 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
break;
- CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
+ case CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
if ((error = if_getgroup((struct ifgroupreq *)data, ifp)))
return (error);
break;
- CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
+ case CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
error = priv_check(td, PRIV_NET_DELIFGROUP);
if (error)
return (error);
@@ -3080,7 +3113,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
error = if_clone_list((struct if_clonereq *)data);
goto out_noref;
- CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
+ case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
error = if_getgroupmembers((struct ifgroupreq *)data);
goto out_noref;
@@ -3280,6 +3313,7 @@ again:
IFNET_RLOCK();
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ struct epoch_tracker et;
int addrs;
/*
@@ -3296,7 +3330,7 @@ again:
}
addrs = 0;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa = ifa->ifa_addr;
@@ -3324,7 +3358,7 @@ again:
if (sbuf_error(sb) == 0)
valid_len = sbuf_len(sb);
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (addrs == 0) {
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
@@ -3631,15 +3665,16 @@ if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
struct ifmultiaddr *ifma;
int lastref;
#ifdef INVARIANTS
+ struct epoch_tracker et;
struct ifnet *oifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
if (ifp == oifp)
break;
if (ifp != oifp)
ifp = NULL;
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
#endif
@@ -3705,15 +3740,16 @@ if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags)
if (ifp == NULL) {
printf("%s: ifma_ifp seems to be detached\n", __func__);
} else {
+ struct epoch_tracker et;
struct ifnet *oifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
if (ifp == oifp)
break;
if (ifp != oifp)
ifp = NULL;
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
}
#endif
/*
@@ -3837,10 +3873,11 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
struct sockaddr_dl *sdl;
struct ifaddr *ifa;
struct ifreq ifr;
+ struct epoch_tracker et;
int rc;
rc = 0;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
ifa = ifp->if_addr;
if (ifa == NULL) {
rc = EINVAL;
@@ -3874,7 +3911,7 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
* to re-init it in order to reprogram its
* address filter.
*/
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
if ((ifp->if_flags & IFF_UP) != 0) {
if (ifp->if_ioctl) {
ifp->if_flags &= ~IFF_UP;
@@ -3890,7 +3927,7 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
EVENTHANDLER_INVOKE(iflladdr_event, ifp);
return (0);
out:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (rc);
}
@@ -4305,6 +4342,8 @@ if_getsoftc(if_t ifp)
void
if_setrcvif(struct mbuf *m, if_t ifp)
{
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (struct ifnet *)ifp;
}
diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h
index 070dbafe..f4c3bec2 100644
--- a/freebsd/sys/net/if_arp.h
+++ b/freebsd/sys/net/if_arp.h
@@ -105,8 +105,9 @@ struct arpstat {
uint64_t rxrequests; /* # of ARP requests received by this host. */
uint64_t rxreplies; /* # of ARP replies received by this host. */
uint64_t received; /* # of ARP packets received by this host. */
+ uint64_t txerrors; /* # of ARP requests failed to send. */
- uint64_t arp_spares[4]; /* For either the upper or lower half. */
+ uint64_t arp_spares[3]; /* For either the upper or lower half. */
/* Abnormal event and error counting: */
uint64_t dropped; /* # of packets dropped waiting for a reply. */
uint64_t timeouts; /* # of times with entries removed */
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index aa56be48..18e0e7bf 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -228,7 +228,7 @@ struct bridge_softc {
struct bstp_state sc_stp; /* STP state */
uint32_t sc_brtexceeded; /* # of cache drops */
struct ifnet *sc_ifaddr; /* member mac copied from */
- u_char sc_defaddr[6]; /* Default MAC address */
+ struct ether_addr sc_defaddr; /* Default MAC address */
};
VNET_DEFINE_STATIC(struct mtx, bridge_list_mtx);
@@ -237,7 +237,8 @@ static eventhandler_tag bridge_detach_cookie;
int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
-uma_zone_t bridge_rtnode_zone;
+VNET_DEFINE_STATIC(uma_zone_t, bridge_rtnode_zone);
+#define V_bridge_rtnode_zone VNET(bridge_rtnode_zone)
static int bridge_clone_create(struct if_clone *, int, caddr_t);
static void bridge_clone_destroy(struct ifnet *);
@@ -529,6 +530,9 @@ static void
vnet_bridge_init(const void *unused __unused)
{
+ V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
+ sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
BRIDGE_LIST_LOCK_INIT();
LIST_INIT(&V_bridge_list);
V_bridge_cloner = if_clone_simple(bridge_name,
@@ -544,6 +548,7 @@ vnet_bridge_uninit(const void *unused __unused)
if_clone_detach(V_bridge_cloner);
V_bridge_cloner = NULL;
BRIDGE_LIST_LOCK_DESTROY();
+ uma_zdestroy(V_bridge_rtnode_zone);
}
VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
vnet_bridge_uninit, NULL);
@@ -554,9 +559,6 @@ bridge_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
- sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
- UMA_ALIGN_PTR, 0);
bridge_dn_p = bridge_dummynet;
bridge_detach_cookie = EVENTHANDLER_REGISTER(
ifnet_departure_event, bridge_ifdetach, NULL,
@@ -565,7 +567,6 @@ bridge_modevent(module_t mod, int type, void *data)
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
bridge_detach_cookie);
- uma_zdestroy(bridge_rtnode_zone);
bridge_dn_p = NULL;
break;
default:
@@ -672,16 +673,14 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
getcredhostid(curthread->td_ucred, &hostid);
do {
if (fb || hostid == 0) {
- arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
- sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
- sc->sc_defaddr[0] |= 2; /* set the LAA bit */
+ ether_gen_addr(ifp, &sc->sc_defaddr);
} else {
- sc->sc_defaddr[0] = 0x2;
- sc->sc_defaddr[1] = (hostid >> 24) & 0xff;
- sc->sc_defaddr[2] = (hostid >> 16) & 0xff;
- sc->sc_defaddr[3] = (hostid >> 8 ) & 0xff;
- sc->sc_defaddr[4] = hostid & 0xff;
- sc->sc_defaddr[5] = ifp->if_dunit & 0xff;
+ sc->sc_defaddr.octet[0] = 0x2;
+ sc->sc_defaddr.octet[1] = (hostid >> 24) & 0xff;
+ sc->sc_defaddr.octet[2] = (hostid >> 16) & 0xff;
+ sc->sc_defaddr.octet[3] = (hostid >> 8 ) & 0xff;
+ sc->sc_defaddr.octet[4] = hostid & 0xff;
+ sc->sc_defaddr.octet[5] = ifp->if_dunit & 0xff;
}
fb = 1;
@@ -689,7 +688,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
BRIDGE_LIST_LOCK();
LIST_FOREACH(sc2, &V_bridge_list, sc_list) {
bifp = sc2->sc_ifp;
- if (memcmp(sc->sc_defaddr,
+ if (memcmp(sc->sc_defaddr.octet,
IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
retry = 1;
break;
@@ -699,7 +698,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
} while (retry == 1);
bstp_attach(&sc->sc_stp, &bridge_ops);
- ether_ifattach(ifp, sc->sc_defaddr);
+ ether_ifattach(ifp, sc->sc_defaddr.octet);
/* Now undo some of the damage... */
ifp->if_baudrate = 0;
ifp->if_type = IFT_BRIDGE;
@@ -734,6 +733,9 @@ bridge_clone_destroy(struct ifnet *ifp)
bridge_delete_span(sc, bif);
}
+ /* Tear down the routing table. */
+ bridge_rtable_fini(sc);
+
BRIDGE_UNLOCK(sc);
callout_drain(&sc->sc_brcallout);
@@ -746,9 +748,6 @@ bridge_clone_destroy(struct ifnet *ifp)
ether_ifdetach(ifp);
if_free(ifp);
- /* Tear down the routing table. */
- bridge_rtable_fini(sc);
-
BRIDGE_LOCK_DESTROY(sc);
free(sc, M_DEVBUF);
}
@@ -927,7 +926,7 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
{
struct ifnet *ifp = bif->bif_ifp;
struct ifreq ifr;
- int error;
+ int error, mask, stuck;
BRIDGE_UNLOCK_ASSERT(sc);
@@ -940,10 +939,12 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
if_printf(sc->sc_ifp,
"error setting capabilities on %s: %d\n",
ifp->if_xname, error);
- if ((ifp->if_capenable & ~set) != 0)
+ mask = BRIDGE_IFCAPS_MASK | BRIDGE_IFCAPS_STRIP;
+ stuck = ifp->if_capenable & mask & ~set;
+ if (stuck != 0)
if_printf(sc->sc_ifp,
"can't disable some capabilities on %s: 0x%x\n",
- ifp->if_xname, ifp->if_capenable & ~set);
+ ifp->if_xname, stuck);
}
}
@@ -1018,7 +1019,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
*/
if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
if (LIST_EMPTY(&sc->sc_iflist)) {
- bcopy(sc->sc_defaddr,
+ bcopy(&sc->sc_defaddr,
IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
sc->sc_ifaddr = NULL;
} else {
@@ -1189,7 +1190,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
* the default randomly generated one.
*/
if (V_bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
- !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
+ !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr.octet, ETHER_ADDR_LEN)) {
bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
sc->sc_ifaddr = ifs;
EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
@@ -1972,9 +1973,9 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
return;
}
- if (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (PFIL_HOOKED_OUT(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_OUT(V_inet6_pfil_head)
#endif
) {
if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
@@ -2001,7 +2002,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
struct rtentry *rt)
{
struct ether_header *eh;
- struct ifnet *dst_if;
+ struct ifnet *bifp, *dst_if;
struct bridge_softc *sc;
uint16_t vlan;
@@ -2016,13 +2017,14 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
vlan = VLANTAGOF(m);
BRIDGE_LOCK(sc);
+ bifp = sc->sc_ifp;
/*
* If bridge is down, but the original output interface is up,
* go ahead and send out that interface. Otherwise, the packet
* is dropped below.
*/
- if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
dst_if = ifp;
goto sendunicast;
}
@@ -2035,6 +2037,9 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
dst_if = NULL;
else
dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
+ /* Tap any traffic not passing back out the originating interface */
+ if (dst_if != ifp)
+ ETHER_BPF_MTAP(bifp, m);
if (dst_if == NULL) {
struct bridge_iflist *bif;
struct mbuf *mc;
@@ -2072,7 +2077,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
} else {
mc = m_copypacket(m, M_NOWAIT);
if (mc == NULL) {
- if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
+ if_inc_counter(bifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
@@ -2232,9 +2237,9 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
ETHER_BPF_MTAP(ifp, m);
/* run the packet filter */
- if (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (PFIL_HOOKED_IN(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_IN(V_inet6_pfil_head)
#endif
) {
BRIDGE_UNLOCK(sc);
@@ -2272,9 +2277,9 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
BRIDGE_UNLOCK(sc);
- if (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (PFIL_HOOKED_OUT(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_OUT(V_inet6_pfil_head)
#endif
) {
if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
@@ -2411,7 +2416,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
#ifdef INET6
# define OR_PFIL_HOOKED_INET6 \
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_IN(V_inet6_pfil_head)
#else
# define OR_PFIL_HOOKED_INET6
#endif
@@ -2423,22 +2428,6 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
if (memcmp(IF_LLADDR((iface)), eh->ether_dhost, ETHER_ADDR_LEN) == 0 \
OR_CARP_CHECK_WE_ARE_DST((iface)) \
) { \
- if ((iface)->if_type == IFT_BRIDGE) { \
- ETHER_BPF_MTAP(iface, m); \
- if_inc_counter(iface, IFCOUNTER_IPACKETS, 1); \
- if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len); \
- /* Filter on the physical interface. */ \
- if (V_pfil_local_phys && \
- (PFIL_HOOKED(&V_inet_pfil_hook) \
- OR_PFIL_HOOKED_INET6)) { \
- if (bridge_pfil(&m, NULL, ifp, \
- PFIL_IN) != 0 || m == NULL) { \
- BRIDGE_UNLOCK(sc); \
- return (NULL); \
- } \
- eh = mtod(m, struct ether_header *); \
- } \
- } \
if (bif->bif_flags & IFBIF_LEARNING) { \
error = bridge_rtupdate(sc, eh->ether_shost, \
vlan, bif, 0, IFBAF_DYNAMIC); \
@@ -2449,6 +2438,26 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
} \
} \
m->m_pkthdr.rcvif = iface; \
+ if ((iface) == ifp) { \
+ /* Skip bridge processing... src == dest */ \
+ BRIDGE_UNLOCK(sc); \
+ return (m); \
+ } \
+ /* It's passing over or to the bridge, locally. */ \
+ ETHER_BPF_MTAP(bifp, m); \
+ if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1); \
+ if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); \
+ /* Filter on the physical interface. */ \
+ if (V_pfil_local_phys && (PFIL_HOOKED_IN(V_inet_pfil_head) \
+ OR_PFIL_HOOKED_INET6)) { \
+ if (bridge_pfil(&m, NULL, ifp, \
+ PFIL_IN) != 0 || m == NULL) { \
+ BRIDGE_UNLOCK(sc); \
+ return (NULL); \
+ } \
+ } \
+ if ((iface) != bifp) \
+ ETHER_BPF_MTAP(iface, m); \
BRIDGE_UNLOCK(sc); \
return (m); \
} \
@@ -2519,9 +2528,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
}
/* Filter on the bridge interface before broadcasting */
- if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_OUT(V_inet6_pfil_head)
#endif
)) {
if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
@@ -2566,9 +2575,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
* pointer so we do not redundantly filter on the bridge for
* each interface we broadcast on.
*/
- if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook)
+ if (runfilt && (PFIL_HOOKED_OUT(V_inet_pfil_head)
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ || PFIL_HOOKED_OUT(V_inet6_pfil_head)
#endif
)) {
if (used == 0) {
@@ -2671,7 +2680,7 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
* initialize the expiration time and Ethernet
* address.
*/
- brt = uma_zalloc(bridge_rtnode_zone, M_NOWAIT | M_ZERO);
+ brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO);
if (brt == NULL)
return (ENOMEM);
@@ -2684,7 +2693,7 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
brt->brt_vlan = vlan;
if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
- uma_zfree(bridge_rtnode_zone, brt);
+ uma_zfree(V_bridge_rtnode_zone, brt);
return (error);
}
brt->brt_dst = bif;
@@ -2768,11 +2777,14 @@ bridge_timer(void *arg)
BRIDGE_LOCK_ASSERT(sc);
+ /* Destruction of rtnodes requires a proper vnet context */
+ CURVNET_SET(sc->sc_ifp->if_vnet);
bridge_rtage(sc);
if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
callout_reset(&sc->sc_brcallout,
bridge_rtable_prune_period * hz, bridge_timer, sc);
+ CURVNET_RESTORE();
}
/*
@@ -3030,7 +3042,7 @@ bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
LIST_REMOVE(brt, brt_list);
sc->sc_brtcnt--;
brt->brt_dst->bif_addrcnt--;
- uma_zfree(bridge_rtnode_zone, brt);
+ uma_zfree(V_bridge_rtnode_zone, brt);
}
/*
@@ -3044,6 +3056,7 @@ bridge_rtable_expire(struct ifnet *ifp, int age)
struct bridge_softc *sc = ifp->if_bridge;
struct bridge_rtnode *brt;
+ CURVNET_SET(ifp->if_vnet);
BRIDGE_LOCK(sc);
/*
@@ -3062,6 +3075,7 @@ bridge_rtable_expire(struct ifnet *ifp, int age)
}
}
BRIDGE_UNLOCK(sc);
+ CURVNET_RESTORE();
}
/*
@@ -3103,6 +3117,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
struct ip *ip;
struct llc llc1;
u_int16_t ether_type;
+ pfil_return_t rv;
snap = 0;
error = -1; /* Default error if not error == 0 */
@@ -3174,14 +3189,14 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
}
/* Run the packet through pfil before stripping link headers */
- if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 &&
- dir == PFIL_OUT && ifp != NULL) {
-
- error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, 0,
- NULL);
-
- if (*mp == NULL || error != 0) /* packet consumed by filter */
- return (error);
+ if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 &&
+ dir == PFIL_OUT && ifp != NULL) {
+ switch (pfil_run_hooks(V_link_pfil_head, mp, ifp, dir, NULL)) {
+ case PFIL_DROPPED:
+ return (EPERM);
+ case PFIL_CONSUMED:
+ return (0);
+ }
}
/* Strip off the Ethernet header and keep a copy. */
@@ -3219,6 +3234,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
/*
* Run the packet through pfil
*/
+ rv = PFIL_PASS;
switch (ether_type) {
case ETHERTYPE_IP:
/*
@@ -3228,25 +3244,19 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
* Keep the order:
* in_if -> bridge_if -> out_if
*/
- if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
- error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
+ pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) !=
+ PFIL_PASS)
break;
- if (V_pfil_member && ifp != NULL)
- error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_member && ifp != NULL && (rv =
+ pfil_run_hooks(V_inet_pfil_head, mp, ifp, dir, NULL)) !=
+ PFIL_PASS)
break;
- if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
- error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
+ pfil_run_hooks(V_inet_pfil_head, mp, bifp, dir, NULL)) !=
+ PFIL_PASS)
break;
/* check if we need to fragment the packet */
@@ -3282,35 +3292,33 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
break;
#ifdef INET6
case ETHERTYPE_IPV6:
- if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
- error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
+ pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) !=
+ PFIL_PASS)
break;
- if (V_pfil_member && ifp != NULL)
- error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp,
- dir, 0, NULL);
-
- if (*mp == NULL || error != 0) /* filter may consume */
+ if (V_pfil_member && ifp != NULL && (rv =
+ pfil_run_hooks(V_inet6_pfil_head, mp, ifp, dir, NULL)) !=
+ PFIL_PASS)
break;
- if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
- error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
- dir, 0, NULL);
+ if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
+ pfil_run_hooks(V_inet6_pfil_head, mp, bifp, dir, NULL)) !=
+ PFIL_PASS)
+ break;
break;
#endif
+ }
+
+ switch (rv) {
+ case PFIL_CONSUMED:
+ return (0);
+ case PFIL_DROPPED:
+ return (EPERM);
default:
- error = 0;
break;
}
- if (*mp == NULL)
- return (error);
- if (error != 0)
- goto bad;
-
error = -1;
/*
diff --git a/freebsd/sys/net/if_clone.h b/freebsd/sys/net/if_clone.h
index 5dceacf6..30d604f3 100644
--- a/freebsd/sys/net/if_clone.h
+++ b/freebsd/sys/net/if_clone.h
@@ -37,6 +37,8 @@
#ifdef _KERNEL
+#include <sys/_eventhandler.h>
+
#define IFC_NOGROUP 0x1
struct if_clone;
@@ -65,11 +67,9 @@ const char *ifc_name(struct if_clone *);
void ifc_flags_set(struct if_clone *, int flags);
int ifc_flags_get(struct if_clone *);
-#ifdef _SYS_EVENTHANDLER_H_
/* Interface clone event. */
typedef void (*if_clone_event_handler_t)(void *, struct if_clone *);
EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t);
-#endif
/* The below interfaces used only by net/if.c. */
void vnet_if_clone_init(void);
diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c
index 552be13f..ff73ceaf 100644
--- a/freebsd/sys/net/if_dead.c
+++ b/freebsd/sys/net/if_dead.c
@@ -128,6 +128,23 @@ ifdead_snd_tag_free(struct m_snd_tag *pmt)
{
}
+static void
+ifdead_ratelimit_query(struct ifnet *ifp __unused,
+ struct if_ratelimit_query_results *q)
+{
+ /*
+ * This guy does not support
+ * this interface. Not sure
+ * why we would specify a
+ * flag on the interface
+ * that says we do.
+ */
+ q->rate_table = NULL;
+ q->flags = RT_NOSUPPORT;
+ q->max_flows = 0;
+ q->number_of_rates = 0;
+}
+
void
if_dead(struct ifnet *ifp)
{
@@ -144,4 +161,5 @@ if_dead(struct ifnet *ifp)
ifp->if_snd_tag_modify = ifdead_snd_tag_modify;
ifp->if_snd_tag_query = ifdead_snd_tag_query;
ifp->if_snd_tag_free = ifdead_snd_tag_free;
+ ifp->if_ratelimit_query = ifdead_ratelimit_query;
}
diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c
index ebfbf5cb..9e7fcc53 100644
--- a/freebsd/sys/net/if_enc.c
+++ b/freebsd/sys/net/if_enc.c
@@ -287,24 +287,24 @@ enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data,
switch (hhook_id) {
#ifdef INET
case AF_INET:
- ph = &V_inet_pfil_hook;
+ ph = V_inet_pfil_head;
break;
#endif
#ifdef INET6
case AF_INET6:
- ph = &V_inet6_pfil_hook;
+ ph = V_inet6_pfil_head;
break;
#endif
default:
ph = NULL;
}
- if (ph == NULL || !PFIL_HOOKED(ph))
+ if (ph == NULL || (pdir == PFIL_OUT && !PFIL_HOOKED_OUT(ph)) ||
+ (pdir == PFIL_IN && !PFIL_HOOKED_IN(ph)))
return (0);
/* Make a packet looks like it was received on enc(4) */
rcvif = (*ctx->mp)->m_pkthdr.rcvif;
(*ctx->mp)->m_pkthdr.rcvif = ifp;
- if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, 0, ctx->inp) != 0 ||
- *ctx->mp == NULL) {
+ if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, ctx->inp) != PFIL_PASS) {
*ctx->mp = NULL; /* consumed by filter */
return (EACCES);
}
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 96ed309a..6c5c2ccb 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -44,11 +44,13 @@
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/eventhandler.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mbuf.h>
+#include <sys/proc.h>
#include <sys/priv.h>
#include <sys/random.h>
#include <sys/socket.h>
@@ -56,6 +58,7 @@
#include <sys/sysctl.h>
#include <sys/uuid.h>
+#include <net/ieee_oui.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_arp.h>
@@ -87,12 +90,14 @@
#endif
#include <security/mac/mac_framework.h>
+#include <crypto/sha1.h>
+
#ifdef CTASSERT
CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
#endif
-VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */
+VNET_DEFINE(pfil_head_t, link_pfil_head); /* Packet filter hooks */
/* netgraph node hooks for ng_ether(4) */
void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
@@ -459,7 +464,6 @@ ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
int
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
{
- int error;
uint8_t pcp;
pcp = ifp->if_pcp;
@@ -467,27 +471,27 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m)
!ether_set_pcp(&m, ifp, pcp))
return (0);
- if (PFIL_HOOKED(&V_link_pfil_hook)) {
- error = pfil_run_hooks(&V_link_pfil_hook, &m, ifp,
- PFIL_OUT, 0, NULL);
- if (error != 0)
+ if (PFIL_HOOKED_OUT(V_link_pfil_head))
+ switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT,
+ NULL)) {
+ case PFIL_DROPPED:
return (EACCES);
-
- if (m == NULL)
+ case PFIL_CONSUMED:
return (0);
- }
+ }
#ifdef EXPERIMENTAL
#if defined(INET6) && defined(INET)
/* draft-ietf-6man-ipv6only-flag */
- /* Catch ETHERTYPE_IP, and ETHERTYPE_ARP if we are v6-only. */
- if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY) != 0) {
+ /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
struct ether_header *eh;
eh = mtod(m, struct ether_header *);
switch (ntohs(eh->ether_type)) {
case ETHERTYPE_IP:
case ETHERTYPE_ARP:
+ case ETHERTYPE_REVARP:
m_freem(m);
return (EAFNOSUPPORT);
/* NOTREACHED */
@@ -538,6 +542,25 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
etype = ntohs(eh->ether_type);
random_harvest_queue_ether(m, sizeof(*m));
+#ifdef EXPERIMENTAL
+#if defined(INET6) && defined(INET)
+ /* draft-ietf-6man-ipv6only-flag */
+ /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
+
+ switch (etype) {
+ case ETHERTYPE_IP:
+ case ETHERTYPE_ARP:
+ case ETHERTYPE_REVARP:
+ m_freem(m);
+ return;
+ /* NOTREACHED */
+ break;
+ };
+ }
+#endif
+#endif
+
CURVNET_SET_QUIET(ifp->if_vnet);
if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
@@ -739,14 +762,14 @@ SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
static void
vnet_ether_init(__unused void *arg)
{
- int i;
+ struct pfil_head_args args;
+
+ args.pa_version = PFIL_VERSION;
+ args.pa_flags = PFIL_IN | PFIL_OUT;
+ args.pa_type = PFIL_TYPE_ETHERNET;
+ args.pa_headname = PFIL_ETHER_NAME;
+ V_link_pfil_head = pfil_head_register(&args);
- /* Initialize packet filter hooks. */
- V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
- V_link_pfil_hook.ph_af = AF_LINK;
- if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
- printf("%s: WARNING: unable to register pfil link hook, "
- "error %d\n", __func__, i);
#ifdef VIMAGE
netisr_register_vnet(&ether_nh);
#endif
@@ -758,11 +781,8 @@ VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
static void
vnet_ether_pfil_destroy(__unused void *arg)
{
- int i;
- if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
- printf("%s: WARNING: unable to unregister pfil link hook, "
- "error %d\n", __func__, i);
+ pfil_head_unregister(V_link_pfil_head);
}
VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
vnet_ether_pfil_destroy, NULL);
@@ -798,6 +818,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
* We will rely on rcvif being set properly in the deferred context,
* so assert it is correct here.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
"rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
CURVNET_SET_QUIET(ifp->if_vnet);
@@ -820,10 +841,8 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
/* Do not grab PROMISC frames in case we are re-entered. */
- if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
- i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, 0,
- NULL);
-
+ if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) {
+ i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL);
if (i != 0 || m == NULL)
return;
}
@@ -1390,5 +1409,38 @@ ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
return (true);
}
+/*
+ * Allocate an address from the FreeBSD Foundation OUI. This uses a
+ * cryptographic hash function on the containing jail's UUID and the interface
+ * name to attempt to provide a unique but stable address. Pseudo-interfaces
+ * which require a MAC address should use this function to allocate
+ * non-locally-administered addresses.
+ */
+void
+ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr)
+{
+#define ETHER_GEN_ADDR_BUFSIZ HOSTUUIDLEN + IFNAMSIZ + 2
+ SHA1_CTX ctx;
+ char buf[ETHER_GEN_ADDR_BUFSIZ];
+ char uuid[HOSTUUIDLEN + 1];
+ uint64_t addr;
+ int i, sz;
+ char digest[SHA1_RESULTLEN];
+
+ getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid));
+ sz = snprintf(buf, ETHER_GEN_ADDR_BUFSIZ, "%s-%s", uuid, ifp->if_xname);
+ SHA1Init(&ctx);
+ SHA1Update(&ctx, buf, sz);
+ SHA1Final(digest, &ctx);
+
+ addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) &
+ OUI_FREEBSD_GENERATED_MASK;
+ addr = OUI_FREEBSD(addr);
+ for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+ hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) &
+ 0xFF;
+ }
+}
+
DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
MODULE_VERSION(ether, 1);
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index 4fbc105e..5aeb8266 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <sys/param.h>
#include <sys/kernel.h>
@@ -51,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
@@ -67,19 +69,27 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <netinet/in.h>
+#include <netinet/in_pcb.h>
#ifdef INET
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
+#ifdef RSS
+#include <netinet/in_rss.h>
+#endif
#endif
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
+#ifdef RSS
+#include <netinet6/in6_rss.h>
+#endif
#endif
#include <netinet/ip_encap.h>
+#include <netinet/udp.h>
#include <net/bpf.h>
#include <net/if_gre.h>
@@ -153,6 +163,7 @@ vnet_gre_uninit(const void *unused __unused)
#ifdef INET6
in6_gre_uninit();
#endif
+ /* XXX: epoch_call drain */
}
VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_gre_uninit, NULL);
@@ -272,6 +283,7 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
case GRESKEY:
case GRESOPTS:
+ case GRESPORT:
if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
@@ -287,23 +299,45 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
if (sc->gre_options == opt)
break;
+ } else if (cmd == GRESPORT) {
+ if (opt != 0 && (opt < V_ipport_hifirstauto ||
+ opt > V_ipport_hilastauto)) {
+ error = EINVAL;
+ break;
+ }
+ if (sc->gre_port == opt)
+ break;
+ if ((sc->gre_options & GRE_UDPENCAP) == 0) {
+ /*
+ * UDP encapsulation is not enabled, thus
+ * there is no need to reattach softc.
+ */
+ sc->gre_port = opt;
+ break;
+ }
}
switch (sc->gre_family) {
#ifdef INET
case AF_INET:
- in_gre_setopts(sc, cmd, opt);
+ error = in_gre_setopts(sc, cmd, opt);
break;
#endif
#ifdef INET6
case AF_INET6:
- in6_gre_setopts(sc, cmd, opt);
+ error = in6_gre_setopts(sc, cmd, opt);
break;
#endif
default:
+ /*
+ * Tunnel is not yet configured.
+ * We can just change any parameters.
+ */
if (cmd == GRESKEY)
sc->gre_key = opt;
- else
+ if (cmd == GRESOPTS)
sc->gre_options = opt;
+ if (cmd == GRESPORT)
+ sc->gre_port = opt;
break;
}
/*
@@ -319,6 +353,10 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
sizeof(sc->gre_options));
break;
+ case GREGPORT:
+ error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
+ sizeof(sc->gre_port));
+ break;
default:
error = EINVAL;
break;
@@ -343,6 +381,7 @@ end:
static void
gre_delete_tunnel(struct gre_softc *sc)
{
+ struct gre_socket *gs;
sx_assert(&gre_ioctl_sx, SA_XLOCKED);
if (sc->gre_family != 0) {
@@ -352,6 +391,16 @@ gre_delete_tunnel(struct gre_softc *sc)
free(sc->gre_hdr, M_GRE);
sc->gre_family = 0;
}
+ /*
+ * If this Tunnel was the last one that could use UDP socket,
+ * we should unlink socket from hash table and close it.
+ */
+ if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
+ CK_LIST_REMOVE(gs, chain);
+ soclose(gs->so);
+ epoch_call(net_epoch_preempt, &gs->epoch_ctx, gre_sofree);
+ sc->gre_so = NULL;
+ }
GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
}
@@ -378,7 +427,38 @@ gre_hashdestroy(struct gre_list *hash)
}
void
-gre_updatehdr(struct gre_softc *sc, struct grehdr *gh)
+gre_sofree(epoch_context_t ctx)
+{
+ struct gre_socket *gs;
+
+ gs = __containerof(ctx, struct gre_socket, epoch_ctx);
+ free(gs, M_GRE);
+}
+
+static __inline uint16_t
+gre_cksum_add(uint16_t sum, uint16_t a)
+{
+ uint16_t res;
+
+ res = sum + a;
+ return (res + (res < a));
+}
+
+void
+gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
+{
+
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ MPASS(sc->gre_options & GRE_UDPENCAP);
+
+ udp->uh_dport = htons(GRE_UDPPORT);
+ udp->uh_sport = htons(sc->gre_port);
+ udp->uh_sum = csum;
+ udp->uh_ulen = 0;
+}
+
+void
+gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
{
uint32_t *opts;
uint16_t flags;
@@ -545,6 +625,52 @@ gre_setseqn(struct grehdr *gh, uint32_t seq)
*opts = htonl(seq);
}
+static uint32_t
+gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
+{
+ uint32_t flowid;
+
+ if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
+ return (0);
+#ifndef RSS
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ flowid = mtod(m, struct ip *)->ip_src.s_addr ^
+ mtod(m, struct ip *)->ip_dst.s_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
+ mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
+ break;
+#endif
+ default:
+ flowid = 0;
+ }
+#else /* RSS */
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
+ mtod(m, struct ip *)->ip_dst);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ flowid = rss_hash_ip6_2tuple(
+ &mtod(m, struct ip6_hdr *)->ip6_src,
+ &mtod(m, struct ip6_hdr *)->ip6_dst);
+ break;
+#endif
+ default:
+ flowid = 0;
+ }
+#endif
+ return (flowid);
+}
+
#define MTAG_GRE 1307983903
static int
gre_transmit(struct ifnet *ifp, struct mbuf *m)
@@ -552,7 +678,8 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
GRE_RLOCK_TRACKER;
struct gre_softc *sc;
struct grehdr *gh;
- uint32_t af;
+ struct udphdr *uh;
+ uint32_t af, flowid;
int error, len;
uint16_t proto;
@@ -579,6 +706,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
af = m->m_pkthdr.csum_data;
BPF_MTAP2(ifp, &af, sizeof(af), m);
m->m_flags &= ~(M_BCAST|M_MCAST);
+ flowid = gre_flowid(sc, m, af);
M_SETFIB(m, sc->gre_fibnum);
M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
if (m == NULL) {
@@ -620,6 +748,19 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
error = ENETDOWN;
goto drop;
}
+ if (sc->gre_options & GRE_UDPENCAP) {
+ uh = (struct udphdr *)mtodo(m, len);
+ uh->uh_sport |= htons(V_ipport_hifirstauto) |
+ (flowid >> 16) | (flowid & 0xFFFF);
+ uh->uh_sport = htons(ntohs(uh->uh_sport) %
+ V_ipport_hilastauto);
+ uh->uh_ulen = htons(m->m_pkthdr.len - len);
+ uh->uh_sum = gre_cksum_add(uh->uh_sum,
+ htons(m->m_pkthdr.len - len + IPPROTO_UDP));
+ m->m_pkthdr.csum_flags = sc->gre_csumflags;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ len += sizeof(struct udphdr);
+ }
gh = (struct grehdr *)mtodo(m, len);
gh->gre_proto = proto;
if (sc->gre_options & GRE_ENABLE_SEQ)
@@ -637,7 +778,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m)
#endif
#ifdef INET6
case AF_INET6:
- error = in6_gre_output(m, af, sc->gre_hlen);
+ error = in6_gre_output(m, af, sc->gre_hlen, flowid);
break;
#endif
default:
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 4b93321a..de3c5979 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -53,14 +53,35 @@ struct greip {
struct ip gi_ip;
struct grehdr gi_gre;
} __packed;
-#endif
+
+struct greudp {
+ struct ip gi_ip;
+ struct udphdr gi_udp;
+ struct grehdr gi_gre;
+} __packed;
+#endif /* INET */
#ifdef INET6
struct greip6 {
struct ip6_hdr gi6_ip6;
struct grehdr gi6_gre;
} __packed;
-#endif
+
+struct greudp6 {
+ struct ip6_hdr gi6_ip6;
+ struct udphdr gi6_udp;
+ struct grehdr gi6_gre;
+} __packed;
+#endif /* INET6 */
+
+CK_LIST_HEAD(gre_list, gre_softc);
+CK_LIST_HEAD(gre_sockets, gre_socket);
+struct gre_socket {
+ struct socket *so;
+ struct gre_list list;
+ CK_LIST_ENTRY(gre_socket) chain;
+ struct epoch_context epoch_ctx;
+};
struct gre_softc {
struct ifnet *gre_ifp;
@@ -69,22 +90,26 @@ struct gre_softc {
uint32_t gre_oseq;
uint32_t gre_key;
uint32_t gre_options;
+ uint32_t gre_csumflags;
+ uint32_t gre_port;
u_int gre_fibnum;
u_int gre_hlen; /* header size */
union {
void *hdr;
#ifdef INET
- struct greip *gihdr;
+ struct greip *iphdr;
+ struct greudp *udphdr;
#endif
#ifdef INET6
- struct greip6 *gi6hdr;
+ struct greip6 *ip6hdr;
+ struct greudp6 *udp6hdr;
#endif
} gre_uhdr;
+ struct gre_socket *gre_so;
CK_LIST_ENTRY(gre_softc) chain;
CK_LIST_ENTRY(gre_softc) srchash;
};
-CK_LIST_HEAD(gre_list, gre_softc);
MALLOC_DECLARE(M_GRE);
#ifndef GRE_HASH_SIZE
@@ -98,28 +123,35 @@ MALLOC_DECLARE(M_GRE);
#define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define gre_hdr gre_uhdr.hdr
-#define gre_gihdr gre_uhdr.gihdr
-#define gre_gi6hdr gre_uhdr.gi6hdr
-#define gre_oip gre_gihdr->gi_ip
-#define gre_oip6 gre_gi6hdr->gi6_ip6
+#define gre_iphdr gre_uhdr.iphdr
+#define gre_ip6hdr gre_uhdr.ip6hdr
+#define gre_udphdr gre_uhdr.udphdr
+#define gre_udp6hdr gre_uhdr.udp6hdr
+
+#define gre_oip gre_iphdr->gi_ip
+#define gre_udp gre_udphdr->gi_udp
+#define gre_oip6 gre_ip6hdr->gi6_ip6
+#define gre_udp6 gre_udp6hdr->gi6_udp
struct gre_list *gre_hashinit(void);
void gre_hashdestroy(struct gre_list *);
int gre_input(struct mbuf *, int, int, void *);
-void gre_updatehdr(struct gre_softc *, struct grehdr *);
+void gre_update_hdr(struct gre_softc *, struct grehdr *);
+void gre_update_udphdr(struct gre_softc *, struct udphdr *, uint16_t);
+void gre_sofree(epoch_context_t);
void in_gre_init(void);
void in_gre_uninit(void);
-void in_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int in_gre_setopts(struct gre_softc *, u_long, uint32_t);
int in_gre_ioctl(struct gre_softc *, u_long, caddr_t);
int in_gre_output(struct mbuf *, int, int);
void in6_gre_init(void);
void in6_gre_uninit(void);
-void in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
+int in6_gre_setopts(struct gre_softc *, u_long, uint32_t);
int in6_gre_ioctl(struct gre_softc *, u_long, caddr_t);
-int in6_gre_output(struct mbuf *, int, int);
+int in6_gre_output(struct mbuf *, int, int, uint32_t);
/*
* CISCO uses special type for GRE tunnel created as part of WCCP
* connection, while in fact those packets are just IPv4 encapsulated
@@ -139,9 +171,15 @@ int in6_gre_output(struct mbuf *, int, int);
#define GRESKEY _IOW('i', 108, struct ifreq)
#define GREGOPTS _IOWR('i', 109, struct ifreq)
#define GRESOPTS _IOW('i', 110, struct ifreq)
+#define GREGPORT _IOWR('i', 111, struct ifreq)
+#define GRESPORT _IOW('i', 112, struct ifreq)
+
+/* GRE-in-UDP encapsulation destination port as defined in RFC8086 */
+#define GRE_UDPPORT 4754
#define GRE_ENABLE_CSUM 0x0001
#define GRE_ENABLE_SEQ 0x0002
-#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
+#define GRE_UDPENCAP 0x0004
+#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ|GRE_UDPENCAP)
#endif /* _NET_IF_GRE_H_ */
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index 85099115..b82313eb 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -25,6 +25,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_ratelimit.h>
#include <sys/param.h>
@@ -97,6 +98,11 @@ static struct {
{0, NULL}
};
+struct lagg_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
#define V_lagg_list VNET(lagg_list)
VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
@@ -113,6 +119,7 @@ static void lagg_clone_destroy(struct ifnet *);
VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner);
#define V_lagg_cloner VNET(lagg_cloner)
static const char laggname[] = "lagg";
+static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface");
static void lagg_capabilities(struct lagg_softc *);
static int lagg_port_create(struct lagg_softc *, struct ifnet *);
@@ -131,10 +138,17 @@ static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
static void lagg_init(void *);
static void lagg_stop(struct lagg_softc *);
static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
static int lagg_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *,
struct m_snd_tag **);
+static int lagg_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int lagg_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
+static void lagg_snd_tag_free(struct m_snd_tag *);
+static void lagg_ratelimit_query(struct ifnet *,
+ struct if_ratelimit_query_results *);
#endif
static int lagg_setmulti(struct lagg_port *);
static int lagg_clrmulti(struct lagg_port *);
@@ -264,6 +278,13 @@ SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
&VNET_NAME(def_use_flowid), 0,
"Default setting for using flow id for load sharing");
+/* Default value for using numa */
+VNET_DEFINE_STATIC(int, def_use_numa) = 1;
+#define V_def_use_numa VNET(def_use_numa)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN,
+ &VNET_NAME(def_use_numa), 0,
+ "Use numa to steer flows");
+
/* Default value for flowid shift */
VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
#define V_def_flowid_shift VNET(def_flowid_shift)
@@ -480,10 +501,10 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
struct ifnet *ifp;
static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
- sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
if (ifp == NULL) {
- free(sc, M_DEVBUF);
+ free(sc, M_LAGG);
return (ENOSPC);
}
LAGG_SX_INIT(sc);
@@ -491,6 +512,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
LAGG_XLOCK(sc);
if (V_def_use_flowid)
sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+ if (V_def_use_numa)
+ sc->sc_opts |= LAGG_OPT_USE_NUMA;
sc->flowid_shift = V_def_flowid_shift;
/* Hash all layers by default */
@@ -514,12 +537,14 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifp->if_ioctl = lagg_ioctl;
ifp->if_get_counter = lagg_get_counter;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
- ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT;
-#else
- ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
+ ifp->if_snd_tag_modify = lagg_snd_tag_modify;
+ ifp->if_snd_tag_query = lagg_snd_tag_query;
+ ifp->if_snd_tag_free = lagg_snd_tag_free;
+ ifp->if_ratelimit_query = lagg_ratelimit_query;
#endif
+ ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
/*
* Attach as an ordinary ethernet device, children will be attached
@@ -572,7 +597,7 @@ lagg_clone_destroy(struct ifnet *ifp)
LAGG_LIST_UNLOCK();
LAGG_SX_DESTROY(sc);
- free(sc, M_DEVBUF);
+ free(sc, M_LAGG);
}
static void
@@ -686,7 +711,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
ifr.ifr_mtu = oldmtu;
}
- lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_WAITOK|M_ZERO);
+ lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO);
lp->lp_softc = sc;
/* Check if port is a stacked lagg */
@@ -694,7 +719,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
if (ifp == sc_ptr->sc_ifp) {
LAGG_LIST_UNLOCK();
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
if (oldmtu != -1)
(*ifp->if_ioctl)(ifp, SIOCSIFMTU,
(caddr_t)&ifr);
@@ -705,7 +730,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
if (lagg_port_checkstacking(sc_ptr) >=
LAGG_MAX_STACKING) {
LAGG_LIST_UNLOCK();
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
if (oldmtu != -1)
(*ifp->if_ioctl)(ifp, SIOCSIFMTU,
(caddr_t)&ifr);
@@ -753,7 +778,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
* is predictable and `ifconfig laggN create ...` command
* will lead to the same result each time.
*/
- LAGG_RLOCK();
CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
if (tlp->lp_ifp->if_index < ifp->if_index && (
CK_SLIST_NEXT(tlp, lp_entries) == NULL ||
@@ -761,7 +785,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
ifp->if_index))
break;
}
- LAGG_RUNLOCK();
if (tlp != NULL)
CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries);
else
@@ -816,7 +839,7 @@ lagg_port_destroy_cb(epoch_context_t ec)
ifp = lp->lp_ifp;
if_rele(ifp);
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
}
static int
@@ -1250,6 +1273,8 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
switch (ro->ro_opts) {
case LAGG_OPT_USE_FLOWID:
case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_USE_NUMA:
+ case -LAGG_OPT_USE_NUMA:
case LAGG_OPT_FLOWIDSHIFT:
valid = 1;
lacp = 0;
@@ -1528,49 +1553,142 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (error);
}
-#ifdef RATELIMIT
-static int
-lagg_snd_tag_alloc(struct ifnet *ifp,
- union if_snd_tag_alloc_params *params,
- struct m_snd_tag **ppmt)
+#if defined(KERN_TLS) || defined(RATELIMIT)
+static inline struct lagg_snd_tag *
+mst_to_lst(struct m_snd_tag *mst)
{
- struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+
+ return (__containerof(mst, struct lagg_snd_tag, com));
+}
+
+/*
+ * Look up the port used by a specific flow. This only works for lagg
+ * protocols with deterministic port mappings (e.g. not roundrobin).
+ * In addition protocols which use a hash to map flows to ports must
+ * be configured to use the mbuf flowid rather than hashing packet
+ * contents.
+ */
+static struct lagg_port *
+lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
+{
+ struct lagg_softc *sc;
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t p;
+ sc = ifp->if_softc;
+
switch (sc->sc_proto) {
case LAGG_PROTO_FAILOVER:
- lp = lagg_link_active(sc, sc->sc_primary);
- break;
+ return (lagg_link_active(sc, sc->sc_primary));
case LAGG_PROTO_LOADBALANCE:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE)
- return (EOPNOTSUPP);
- p = params->hdr.flowid >> sc->flowid_shift;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ p = flowid >> sc->flowid_shift;
p %= sc->sc_count;
lb = (struct lagg_lb *)sc->sc_psc;
lp = lb->lb_ports[p];
- lp = lagg_link_active(sc, lp);
- break;
+ return (lagg_link_active(sc, lp));
case LAGG_PROTO_LACP:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE)
- return (EOPNOTSUPP);
- lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
- break;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ return (lacp_select_tx_port_by_hash(sc, flowid));
default:
- return (EOPNOTSUPP);
+ return (NULL);
}
- if (lp == NULL)
+}
+
+static int
+lagg_snd_tag_alloc(struct ifnet *ifp,
+ union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+ struct lagg_snd_tag *lst;
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *lp_ifp;
+ int error;
+
+ sc = ifp->if_softc;
+
+ LAGG_RLOCK();
+ lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
+ if (lp == NULL) {
+ LAGG_RUNLOCK();
return (EOPNOTSUPP);
- ifp = lp->lp_ifp;
- if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
- (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ }
+ if (lp->lp_ifp == NULL || lp->lp_ifp->if_snd_tag_alloc == NULL) {
+ LAGG_RUNLOCK();
return (EOPNOTSUPP);
+ }
+ lp_ifp = lp->lp_ifp;
+ if_ref(lp_ifp);
+ LAGG_RUNLOCK();
+
+ lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
+ if (lst == NULL) {
+ if_rele(lp_ifp);
+ return (ENOMEM);
+ }
+
+ error = lp_ifp->if_snd_tag_alloc(lp_ifp, params, &lst->tag);
+ if_rele(lp_ifp);
+ if (error) {
+ free(lst, M_LAGG);
+ return (error);
+ }
+
+ m_snd_tag_init(&lst->com, ifp);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ *ppmt = &lst->com;
+ return (0);
+}
+
+static int
+lagg_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_modify(lst->tag, params));
+}
+
+static int
+lagg_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_query(lst->tag, params));
+}
+
+static void
+lagg_snd_tag_free(struct m_snd_tag *mst)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ m_snd_tag_rele(lst->tag);
+ free(lst, M_LAGG);
+}
+
+static void
+lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
+{
+ /*
+ * For lagg, we have an indirect
+ * interface. The caller needs to
+ * get a ratelimit tag on the actual
+ * interface the flow will go on.
+ */
+ q->rate_table = NULL;
+ q->flags = RT_IS_INDIRECT;
+ q->max_flows = 0;
+ q->number_of_rates = 0;
}
#endif
@@ -1588,7 +1706,7 @@ lagg_setmulti(struct lagg_port *lp)
CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
- mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
+ mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT);
if (mc == NULL) {
IF_ADDR_WUNLOCK(scifp);
return (ENOMEM);
@@ -1619,7 +1737,7 @@ lagg_clrmulti(struct lagg_port *lp)
SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
if (mc->mc_ifma && lp->lp_detaching == 0)
if_delmulti_ifma(mc->mc_ifma);
- free(mc, M_DEVBUF);
+ free(mc, M_LAGG);
}
return (0);
}
@@ -1696,6 +1814,10 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error;
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
LAGG_RLOCK();
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
@@ -1848,12 +1970,20 @@ struct lagg_port *
lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
{
struct lagg_port *lp_next, *rval = NULL;
- struct epoch_tracker net_et;
/*
* Search a port which reports an active link state.
*/
+#ifdef INVARIANTS
+ /*
+ * This is called with either LAGG_RLOCK() held or
+ * LAGG_XLOCK(sc) held.
+ */
+ if (!in_epoch(net_epoch_preempt))
+ LAGG_XLOCK_ASSERT(sc);
+#endif
+
if (lp == NULL)
goto search;
if (LAGG_PORTACTIVE(lp)) {
@@ -1866,15 +1996,12 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
goto found;
}
- search:
- epoch_enter_preempt(net_epoch_preempt, &net_et);
+search:
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp_next)) {
- epoch_exit_preempt(net_epoch_preempt, &net_et);
return (lp_next);
}
}
- epoch_exit_preempt(net_epoch_preempt, &net_et);
found:
return (rval);
}
@@ -1883,6 +2010,21 @@ int
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct lagg_snd_tag *lst;
+ struct m_snd_tag *mst;
+
+ mst = m->m_pkthdr.snd_tag;
+ lst = mst_to_lst(mst);
+ if (lst->tag->ifp != ifp) {
+ m_freem(m);
+ return (EAGAIN);
+ }
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
return (ifp->if_transmit)(ifp, m);
}
@@ -1956,7 +2098,7 @@ lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp, *last = NULL;
struct mbuf *m0;
- LAGG_RLOCK();
+ LAGG_RLOCK_ASSERT();
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
if (!LAGG_PORTACTIVE(lp))
continue;
@@ -1977,7 +2119,6 @@ lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
}
last = lp;
}
- LAGG_RUNLOCK();
if (last == NULL) {
m_freem(m);
@@ -2063,7 +2204,7 @@ lagg_lb_attach(struct lagg_softc *sc)
struct lagg_lb *lb;
LAGG_XLOCK_ASSERT(sc);
- lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
+ lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
lb->lb_key = m_ether_tcpip_hash_init();
sc->sc_psc = lb;
@@ -2078,7 +2219,7 @@ lagg_lb_detach(struct lagg_softc *sc)
lb = (struct lagg_lb *)sc->sc_psc;
if (lb != NULL)
- free(lb, M_DEVBUF);
+ free(lb, M_LAGG);
}
static int
@@ -2090,7 +2231,7 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
rv = 0;
bzero(&lb->lb_ports, sizeof(lb->lb_ports));
- LAGG_RLOCK();
+ LAGG_XLOCK_ASSERT(sc);
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (lp_next == lp)
continue;
@@ -2103,7 +2244,6 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
lb->lb_ports[i++] = lp_next;
}
- LAGG_RUNLOCK();
return (rv);
}
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
index f1e2d8f4..2c566c0d 100644
--- a/freebsd/sys/net/if_lagg.h
+++ b/freebsd/sys/net/if_lagg.h
@@ -143,6 +143,7 @@ struct lagg_reqopts {
#define LAGG_OPT_USE_FLOWID 0x01 /* enable use of flowid */
/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
#define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */
+#define LAGG_OPT_USE_NUMA 0x04 /* enable use of numa */
#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */
#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */
#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */
@@ -158,8 +159,9 @@ struct lagg_reqopts {
#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts)
#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts)
-#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \
- "\006LACP_TXTEST\007LACP_RXTEST"
+#define LAGG_OPT_BITS "\020\001USE_FLOWID\003USE_NUMA" \
+ "\005LACP_STRICT\006LACP_TXTEST" \
+ "\007LACP_RXTEST"
#ifdef _KERNEL
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index b220d7aa..e79b9ba9 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/syslog.h>
@@ -92,6 +93,7 @@ static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
static int
lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
{
+ struct epoch_tracker et;
int error;
LLTABLE_LIST_LOCK_ASSERT();
@@ -100,10 +102,10 @@ lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
return (0);
error = 0;
- IF_AFDATA_RLOCK(llt->llt_ifp);
+ NET_EPOCH_ENTER(et);
error = lltable_foreach_lle(llt,
(llt_foreach_cb_t *)llt->llt_dump_entry, wr);
- IF_AFDATA_RUNLOCK(llt->llt_ifp);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -455,11 +457,12 @@ struct llentry *
llentry_alloc(struct ifnet *ifp, struct lltable *lt,
struct sockaddr_storage *dst)
{
+ struct epoch_tracker et;
struct llentry *la, *la_tmp;
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (la != NULL) {
LLE_ADDREF(la);
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
index 74301284..7bf57bdb 100644
--- a/freebsd/sys/net/if_llatbl.h
+++ b/freebsd/sys/net/if_llatbl.h
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#ifndef _NET_IF_LLATBL_H_
#define _NET_IF_LLATBL_H_
+#include <sys/_eventhandler.h>
#include <sys/_rwlock.h>
#include <netinet/in.h>
#include <sys/epoch.h>
@@ -267,7 +268,6 @@ llentry_mark_used(struct llentry *lle)
int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
-#include <sys/eventhandler.h>
enum {
LLENTRY_RESOLVED,
LLENTRY_TIMEDOUT,
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
index f5b78dec..9aec7cd1 100644
--- a/freebsd/sys/net/if_spppsubr.c
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -1062,15 +1062,13 @@ sppp_detach(struct ifnet *ifp)
KASSERT(mtx_initialized(&sp->mtx), ("sppp mutex is not initialized"));
/* Stop keepalive handler. */
- if (!callout_drain(&sp->keepalive_callout))
- callout_stop(&sp->keepalive_callout);
+ callout_drain(&sp->keepalive_callout);
for (i = 0; i < IDX_COUNT; i++) {
- if (!callout_drain(&sp->ch[i]))
- callout_stop(&sp->ch[i]);
+ callout_drain(&sp->ch[i]);
}
- if (!callout_drain(&sp->pap_my_to_ch))
- callout_stop(&sp->pap_my_to_ch);
+ callout_drain(&sp->pap_my_to_ch);
+
mtx_destroy(&sp->pp_cpq.ifq_mtx);
mtx_destroy(&sp->pp_fastq.ifq_mtx);
mtx_destroy(&sp->mtx);
@@ -4339,16 +4337,12 @@ sppp_chap_tld(struct sppp *sp)
static void
sppp_chap_scr(struct sppp *sp)
{
- u_long *ch, seed;
+ u_long *ch;
u_char clen;
/* Compute random challenge. */
ch = (u_long *)sp->myauth.challenge;
- read_random(&seed, sizeof seed);
- ch[0] = seed ^ random();
- ch[1] = seed ^ random();
- ch[2] = seed ^ random();
- ch[3] = seed ^ random();
+ arc4random_buf(ch, 4 * sizeof(*ch));
clen = AUTHKEYLEN;
sp->confid[IDX_CHAP] = ++sp->pp_seq[IDX_CHAP];
@@ -4809,7 +4803,7 @@ sppp_keepalive(void *dummy)
sppp_cisco_send (sp, CISCO_KEEPALIVE_REQ,
++sp->pp_seq[IDX_LCP], sp->pp_rseq[IDX_LCP]);
else if (sp->pp_phase >= PHASE_AUTHENTICATE) {
- long nmagic = htonl (sp->lcp.magic);
+ uint32_t nmagic = htonl(sp->lcp.magic);
sp->lcp.echoid = ++sp->pp_seq[IDX_LCP];
sppp_cp_send (sp, PPP_LCP, ECHO_REQ,
sp->lcp.echoid, 4, &nmagic);
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index 3ba9f8c0..7185fb8d 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -730,6 +730,7 @@ stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
ifp->if_flags |= IFF_UP;
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
break;
case SIOCADDMULTI:
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
deleted file mode 100644
index dbf3e599..00000000
--- a/freebsd/sys/net/if_tap.c
+++ /dev/null
@@ -1,1133 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * BASED ON:
- * -------------------------------------------------------------------------
- *
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- */
-
-/*
- * $FreeBSD$
- * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-
-#include <sys/param.h>
-#include <sys/conf.h>
-#include <sys/fcntl.h>
-#include <sys/filio.h>
-#include <sys/jail.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/poll.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/selinfo.h>
-#include <sys/signalvar.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/sysctl.h>
-#include <sys/systm.h>
-#include <sys/ttycom.h>
-#include <sys/uio.h>
-#include <sys/queue.h>
-
-#include <net/bpf.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/if_clone.h>
-#include <net/if_dl.h>
-#include <net/if_media.h>
-#include <net/if_types.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-
-#include <net/if_tapvar.h>
-#include <net/if_tap.h>
-
-#define CDEV_NAME "tap"
-#define TAPDEBUG if (tapdebug) printf
-
-static const char tapname[] = "tap";
-static const char vmnetname[] = "vmnet";
-#define TAPMAXUNIT 0x7fff
-#define VMNET_DEV_MASK CLONE_FLAG0
-
-/* module */
-static int tapmodevent(module_t, int, void *);
-
-/* device */
-static void tapclone(void *, struct ucred *, char *, int,
- struct cdev **);
-static void tapcreate(struct cdev *);
-
-/* network interface */
-static void tapifstart(struct ifnet *);
-static int tapifioctl(struct ifnet *, u_long, caddr_t);
-static void tapifinit(void *);
-
-static int tap_clone_create(struct if_clone *, int, caddr_t);
-static void tap_clone_destroy(struct ifnet *);
-static struct if_clone *tap_cloner;
-static int vmnet_clone_create(struct if_clone *, int, caddr_t);
-static void vmnet_clone_destroy(struct ifnet *);
-static struct if_clone *vmnet_cloner;
-
-/* character device */
-static d_open_t tapopen;
-static d_close_t tapclose;
-static d_read_t tapread;
-static d_write_t tapwrite;
-static d_ioctl_t tapioctl;
-static d_poll_t tappoll;
-static d_kqfilter_t tapkqfilter;
-
-/* kqueue(2) */
-static int tapkqread(struct knote *, long);
-static int tapkqwrite(struct knote *, long);
-static void tapkqdetach(struct knote *);
-
-static struct filterops tap_read_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tapkqdetach,
- .f_event = tapkqread,
-};
-
-static struct filterops tap_write_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tapkqdetach,
- .f_event = tapkqwrite,
-};
-
-static struct cdevsw tap_cdevsw = {
- .d_version = D_VERSION,
- .d_flags = D_NEEDMINOR,
- .d_open = tapopen,
- .d_close = tapclose,
- .d_read = tapread,
- .d_write = tapwrite,
- .d_ioctl = tapioctl,
- .d_poll = tappoll,
- .d_name = CDEV_NAME,
- .d_kqfilter = tapkqfilter,
-};
-
-/*
- * All global variables in if_tap.c are locked with tapmtx, with the
- * exception of tapdebug, which is accessed unlocked; tapclones is
- * static at runtime.
- */
-static struct mtx tapmtx;
-static int tapdebug = 0; /* debug flag */
-static int tapuopen = 0; /* allow user open() */
-static int tapuponopen = 0; /* IFF_UP on open() */
-static int tapdclone = 1; /* enable devfs cloning */
-static SLIST_HEAD(, tap_softc) taphead; /* first device */
-static struct clonedevs *tapclones;
-
-MALLOC_DECLARE(M_TAP);
-MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
-SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
-
-SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
- "Ethernet tunnel software network interface");
-SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
- "Allow user to open /dev/tap (based on node permissions)");
-SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
- "Bring interface up when /dev/tap is opened");
-SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
- "Enable legacy devfs interface creation");
-SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
-
-DEV_MODULE(if_tap, tapmodevent, NULL);
-
-static int
-tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
- struct cdev *dev;
- int i;
-
- /* Find any existing device, or allocate new unit number. */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0);
- if (i) {
- dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600,
- "%s%d", tapname, unit);
- }
-
- tapcreate(dev);
- return (0);
-}
-
-/* vmnet devices are tap devices in disguise */
-static int
-vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
- struct cdev *dev;
- int i;
-
- /* Find any existing device, or allocate new unit number. */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK);
- if (i) {
- dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT,
- GID_WHEEL, 0600, "%s%d", vmnetname, unit);
- }
-
- tapcreate(dev);
- return (0);
-}
-
-static void
-tap_destroy(struct tap_softc *tp)
-{
- struct ifnet *ifp = tp->tap_ifp;
-
- CURVNET_SET(ifp->if_vnet);
- destroy_dev(tp->tap_dev);
- seldrain(&tp->tap_rsel);
- knlist_clear(&tp->tap_rsel.si_note, 0);
- knlist_destroy(&tp->tap_rsel.si_note);
- ether_ifdetach(ifp);
- if_free(ifp);
-
- mtx_destroy(&tp->tap_mtx);
- free(tp, M_TAP);
- CURVNET_RESTORE();
-}
-
-static void
-tap_clone_destroy(struct ifnet *ifp)
-{
- struct tap_softc *tp = ifp->if_softc;
-
- mtx_lock(&tapmtx);
- SLIST_REMOVE(&taphead, tp, tap_softc, tap_next);
- mtx_unlock(&tapmtx);
- tap_destroy(tp);
-}
-
-/* vmnet devices are tap devices in disguise */
-static void
-vmnet_clone_destroy(struct ifnet *ifp)
-{
- tap_clone_destroy(ifp);
-}
-
-/*
- * tapmodevent
- *
- * module event handler
- */
-static int
-tapmodevent(module_t mod, int type, void *data)
-{
- static eventhandler_tag eh_tag = NULL;
- struct tap_softc *tp = NULL;
- struct ifnet *ifp = NULL;
-
- switch (type) {
- case MOD_LOAD:
-
- /* intitialize device */
-
- mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
- SLIST_INIT(&taphead);
-
- clone_setup(&tapclones);
- eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
- if (eh_tag == NULL) {
- clone_cleanup(&tapclones);
- mtx_destroy(&tapmtx);
- return (ENOMEM);
- }
- tap_cloner = if_clone_simple(tapname, tap_clone_create,
- tap_clone_destroy, 0);
- vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create,
- vmnet_clone_destroy, 0);
- return (0);
-
- case MOD_UNLOAD:
- /*
- * The EBUSY algorithm here can't quite atomically
- * guarantee that this is race-free since we have to
- * release the tap mtx to deregister the clone handler.
- */
- mtx_lock(&tapmtx);
- SLIST_FOREACH(tp, &taphead, tap_next) {
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_flags & TAP_OPEN) {
- mtx_unlock(&tp->tap_mtx);
- mtx_unlock(&tapmtx);
- return (EBUSY);
- }
- mtx_unlock(&tp->tap_mtx);
- }
- mtx_unlock(&tapmtx);
-
- EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
- if_clone_detach(tap_cloner);
- if_clone_detach(vmnet_cloner);
- drain_dev_clone_events();
-
- mtx_lock(&tapmtx);
- while ((tp = SLIST_FIRST(&taphead)) != NULL) {
- SLIST_REMOVE_HEAD(&taphead, tap_next);
- mtx_unlock(&tapmtx);
-
- ifp = tp->tap_ifp;
-
- TAPDEBUG("detaching %s\n", ifp->if_xname);
-
- tap_destroy(tp);
- mtx_lock(&tapmtx);
- }
- mtx_unlock(&tapmtx);
- clone_cleanup(&tapclones);
-
- mtx_destroy(&tapmtx);
-
- break;
-
- default:
- return (EOPNOTSUPP);
- }
-
- return (0);
-} /* tapmodevent */
-
-
-/*
- * DEVFS handler
- *
- * We need to support two kind of devices - tap and vmnet
- */
-static void
-tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev)
-{
- char devname[SPECNAMELEN + 1];
- int i, unit, append_unit;
- int extra;
-
- if (*dev != NULL)
- return;
-
- if (!tapdclone ||
- (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE) != 0))
- return;
-
- unit = 0;
- append_unit = 0;
- extra = 0;
-
- /* We're interested in only tap/vmnet devices. */
- if (strcmp(name, tapname) == 0) {
- unit = -1;
- } else if (strcmp(name, vmnetname) == 0) {
- unit = -1;
- extra = VMNET_DEV_MASK;
- } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) {
- if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) {
- return;
- } else {
- extra = VMNET_DEV_MASK;
- }
- }
-
- if (unit == -1)
- append_unit = 1;
-
- CURVNET_SET(CRED_TO_VNET(cred));
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
- if (i) {
- if (append_unit) {
- /*
- * We were passed 'tun' or 'tap', with no unit specified
- * so we'll need to append it now.
- */
- namelen = snprintf(devname, sizeof(devname), "%s%d", name,
- unit);
- name = devname;
- }
-
- *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra,
- cred, UID_ROOT, GID_WHEEL, 0600, "%s", name);
- }
-
- if_clone_create(name, namelen, NULL);
- CURVNET_RESTORE();
-} /* tapclone */
-
-
-/*
- * tapcreate
- *
- * to create interface
- */
-static void
-tapcreate(struct cdev *dev)
-{
- struct ifnet *ifp = NULL;
- struct tap_softc *tp = NULL;
- unsigned short macaddr_hi;
- uint32_t macaddr_mid;
- int unit;
- const char *name = NULL;
- u_char eaddr[6];
-
- /* allocate driver storage and create device */
- tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
- mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
- mtx_lock(&tapmtx);
- SLIST_INSERT_HEAD(&taphead, tp, tap_next);
- mtx_unlock(&tapmtx);
-
- unit = dev2unit(dev);
-
- /* select device: tap or vmnet */
- if (unit & VMNET_DEV_MASK) {
- name = vmnetname;
- tp->tap_flags |= TAP_VMNET;
- } else
- name = tapname;
-
- unit &= TAPMAXUNIT;
-
- TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev));
-
- /* generate fake MAC address: 00 bd xx xx xx unit_no */
- macaddr_hi = htons(0x00bd);
- macaddr_mid = (uint32_t) ticks;
- bcopy(&macaddr_hi, eaddr, sizeof(short));
- bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
- eaddr[5] = (u_char)unit;
-
- /* fill the rest and attach interface */
- ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL)
- panic("%s%d: can not if_alloc()", name, unit);
- ifp->if_softc = tp;
- if_initname(ifp, name, unit);
- ifp->if_init = tapifinit;
- ifp->if_start = tapifstart;
- ifp->if_ioctl = tapifioctl;
- ifp->if_mtu = ETHERMTU;
- ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_capabilities |= IFCAP_LINKSTATE;
- ifp->if_capenable |= IFCAP_LINKSTATE;
-
- dev->si_drv1 = tp;
- tp->tap_dev = dev;
-
- ether_ifattach(ifp, eaddr);
-
- mtx_lock(&tp->tap_mtx);
- tp->tap_flags |= TAP_INITED;
- mtx_unlock(&tp->tap_mtx);
-
- knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx);
-
- TAPDEBUG("interface %s is created. minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-} /* tapcreate */
-
-
-/*
- * tapopen
- *
- * to open tunnel. must be superuser
- */
-static int
-tapopen(struct cdev *dev, int flag, int mode, struct thread *td)
-{
- struct tap_softc *tp = NULL;
- struct ifnet *ifp = NULL;
- int error;
-
- if (tapuopen == 0) {
- error = priv_check(td, PRIV_NET_TAP);
- if (error)
- return (error);
- }
-
- if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
- return (ENXIO);
-
- tp = dev->si_drv1;
-
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_flags & TAP_OPEN) {
- mtx_unlock(&tp->tap_mtx);
- return (EBUSY);
- }
-
- bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
-#ifndef __rtems__
- tp->tap_pid = td->td_proc->p_pid;
-#else /* __rtems__ */
- tp->tap_pid = BSD_DEFAULT_PID;
-#endif /* __rtems__ */
- tp->tap_flags |= TAP_OPEN;
- ifp = tp->tap_ifp;
-
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- if (tapuponopen)
- ifp->if_flags |= IFF_UP;
- if_link_state_change(ifp, LINK_STATE_UP);
- mtx_unlock(&tp->tap_mtx);
-
- TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev));
-
- return (0);
-} /* tapopen */
-
-
-/*
- * tapclose
- *
- * close the device - mark i/f down & delete routing info
- */
-static int
-tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
-{
- struct ifaddr *ifa;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
-
- /* junk all pending output */
- mtx_lock(&tp->tap_mtx);
- CURVNET_SET(ifp->if_vnet);
- IF_DRAIN(&ifp->if_snd);
-
- /*
- * Do not bring the interface down, and do not anything with
- * interface, if we are in VMnet mode. Just close the device.
- */
- if (((tp->tap_flags & TAP_VMNET) == 0) &&
- (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) {
- mtx_unlock(&tp->tap_mtx);
- if_down(ifp);
- mtx_lock(&tp->tap_mtx);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- mtx_unlock(&tp->tap_mtx);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- rtinit(ifa, (int)RTM_DELETE, 0);
- }
- if_purgeaddrs(ifp);
- mtx_lock(&tp->tap_mtx);
- }
- }
-
- if_link_state_change(ifp, LINK_STATE_DOWN);
- CURVNET_RESTORE();
-
- funsetown(&tp->tap_sigio);
- selwakeuppri(&tp->tap_rsel, PZERO+1);
- KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
-
- tp->tap_flags &= ~TAP_OPEN;
- tp->tap_pid = 0;
- mtx_unlock(&tp->tap_mtx);
-
- TAPDEBUG("%s is closed. minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- return (0);
-} /* tapclose */
-
-
-/*
- * tapifinit
- *
- * network interface initialization function
- */
-static void
-tapifinit(void *xtp)
-{
- struct tap_softc *tp = (struct tap_softc *)xtp;
- struct ifnet *ifp = tp->tap_ifp;
-
- TAPDEBUG("initializing %s\n", ifp->if_xname);
-
- mtx_lock(&tp->tap_mtx);
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- mtx_unlock(&tp->tap_mtx);
-
- /* attempt to start output */
- tapifstart(ifp);
-} /* tapifinit */
-
-
-/*
- * tapifioctl
- *
- * Process an ioctl request on network interface
- */
-static int
-tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct tap_softc *tp = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq *)data;
- struct ifstat *ifs = NULL;
- struct ifmediareq *ifmr = NULL;
- int dummy, error = 0;
-
- switch (cmd) {
- case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- break;
-
- case SIOCGIFMEDIA:
- ifmr = (struct ifmediareq *)data;
- dummy = ifmr->ifm_count;
- ifmr->ifm_count = 1;
- ifmr->ifm_status = IFM_AVALID;
- ifmr->ifm_active = IFM_ETHER;
- if (tp->tap_flags & TAP_OPEN)
- ifmr->ifm_status |= IFM_ACTIVE;
- ifmr->ifm_current = ifmr->ifm_active;
- if (dummy >= 1) {
- int media = IFM_ETHER;
- error = copyout(&media, ifmr->ifm_ulist,
- sizeof(int));
- }
- break;
-
- case SIOCSIFMTU:
- ifp->if_mtu = ifr->ifr_mtu;
- break;
-
- case SIOCGIFSTATUS:
- ifs = (struct ifstat *)data;
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_pid != 0)
- snprintf(ifs->ascii, sizeof(ifs->ascii),
- "\tOpened by PID %d\n", tp->tap_pid);
- else
- ifs->ascii[0] = '\0';
- mtx_unlock(&tp->tap_mtx);
- break;
-
- default:
- error = ether_ioctl(ifp, cmd, data);
- break;
- }
-
- return (error);
-} /* tapifioctl */
-
-
-/*
- * tapifstart
- *
- * queue packets from higher level ready to put out
- */
-static void
-tapifstart(struct ifnet *ifp)
-{
- struct tap_softc *tp = ifp->if_softc;
-
- TAPDEBUG("%s starting\n", ifp->if_xname);
-
- /*
- * do not junk pending output if we are in VMnet mode.
- * XXX: can this do any harm because of queue overflow?
- */
-
- mtx_lock(&tp->tap_mtx);
- if (((tp->tap_flags & TAP_VMNET) == 0) &&
- ((tp->tap_flags & TAP_READY) != TAP_READY)) {
- struct mbuf *m;
-
- /* Unlocked read. */
- TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
- tp->tap_flags);
-
- for (;;) {
- IF_DEQUEUE(&ifp->if_snd, m);
- if (m != NULL) {
- m_freem(m);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- } else
- break;
- }
- mtx_unlock(&tp->tap_mtx);
-
- return;
- }
-
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- if (tp->tap_flags & TAP_RWAIT) {
- tp->tap_flags &= ~TAP_RWAIT;
- wakeup(tp);
- }
-
- if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
- mtx_unlock(&tp->tap_mtx);
- pgsigio(&tp->tap_sigio, SIGIO, 0);
- mtx_lock(&tp->tap_mtx);
- }
-
- selwakeuppri(&tp->tap_rsel, PZERO+1);
- KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
- }
-
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- mtx_unlock(&tp->tap_mtx);
-} /* tapifstart */
-
-
-/*
- * tapioctl
- *
- * the cdevsw interface is now pretty minimal
- */
-static int
-tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
-{
- struct ifreq ifr;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct tapinfo *tapp = NULL;
- int f;
- int error;
-#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
- defined(COMPAT_FREEBSD4)
- int ival;
-#endif
-
- switch (cmd) {
- case TAPSIFINFO:
- tapp = (struct tapinfo *)data;
- if (ifp->if_type != tapp->type)
- return (EPROTOTYPE);
- mtx_lock(&tp->tap_mtx);
- if (ifp->if_mtu != tapp->mtu) {
- strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ);
- ifr.ifr_mtu = tapp->mtu;
- CURVNET_SET(ifp->if_vnet);
- error = ifhwioctl(SIOCSIFMTU, ifp,
- (caddr_t)&ifr, td);
- CURVNET_RESTORE();
- if (error) {
- mtx_unlock(&tp->tap_mtx);
- return (error);
- }
- }
- ifp->if_baudrate = tapp->baudrate;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case TAPGIFINFO:
- tapp = (struct tapinfo *)data;
- mtx_lock(&tp->tap_mtx);
- tapp->mtu = ifp->if_mtu;
- tapp->type = ifp->if_type;
- tapp->baudrate = ifp->if_baudrate;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case TAPSDEBUG:
- tapdebug = *(int *)data;
- break;
-
- case TAPGDEBUG:
- *(int *)data = tapdebug;
- break;
-
- case TAPGIFNAME: {
- struct ifreq *ifr = (struct ifreq *) data;
-
- strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
- } break;
-
- case FIONBIO:
- break;
-
- case FIOASYNC:
- mtx_lock(&tp->tap_mtx);
- if (*(int *)data)
- tp->tap_flags |= TAP_ASYNC;
- else
- tp->tap_flags &= ~TAP_ASYNC;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case FIONREAD:
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- struct mbuf *mb;
-
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, mb);
- for (*(int *)data = 0; mb != NULL;
- mb = mb->m_next)
- *(int *)data += mb->m_len;
- IFQ_UNLOCK(&ifp->if_snd);
- } else
- *(int *)data = 0;
- break;
-
- case FIOSETOWN:
- return (fsetown(*(int *)data, &tp->tap_sigio));
-
- case FIOGETOWN:
- *(int *)data = fgetown(&tp->tap_sigio);
- return (0);
-
- /* this is deprecated, FIOSETOWN should be used instead */
- case TIOCSPGRP:
- return (fsetown(-(*(int *)data), &tp->tap_sigio));
-
- /* this is deprecated, FIOGETOWN should be used instead */
- case TIOCGPGRP:
- *(int *)data = -fgetown(&tp->tap_sigio);
- return (0);
-
- /* VMware/VMnet port ioctl's */
-
-#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
- defined(COMPAT_FREEBSD4)
- case _IO('V', 0):
- ival = IOCPARM_IVAL(data);
- data = (caddr_t)&ival;
- /* FALLTHROUGH */
-#endif
- case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
- f = *(int *)data;
- f &= 0x0fff;
- f &= ~IFF_CANTCHANGE;
- f |= IFF_UP;
-
- mtx_lock(&tp->tap_mtx);
- ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case SIOCGIFADDR: /* get MAC address of the remote side */
- mtx_lock(&tp->tap_mtx);
- bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case SIOCSIFADDR: /* set MAC address of the remote side */
- mtx_lock(&tp->tap_mtx);
- bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
- mtx_unlock(&tp->tap_mtx);
- break;
-
- default:
- return (ENOTTY);
- }
- return (0);
-} /* tapioctl */
-
-
-/*
- * tapread
- *
- * the cdevsw read interface - reads a packet at a time, or at
- * least as much of a packet as can be read
- */
-static int
-tapread(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct mbuf *m = NULL;
- int error = 0, len;
-
- TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev));
-
- mtx_lock(&tp->tap_mtx);
- if ((tp->tap_flags & TAP_READY) != TAP_READY) {
- mtx_unlock(&tp->tap_mtx);
-
- /* Unlocked read. */
- TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
- ifp->if_xname, dev2unit(dev), tp->tap_flags);
-
- return (EHOSTDOWN);
- }
-
- tp->tap_flags &= ~TAP_RWAIT;
-
- /* sleep until we get a packet */
- do {
- IF_DEQUEUE(&ifp->if_snd, m);
-
- if (m == NULL) {
- if (flag & O_NONBLOCK) {
- mtx_unlock(&tp->tap_mtx);
- return (EWOULDBLOCK);
- }
-
- tp->tap_flags |= TAP_RWAIT;
- error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1),
- "taprd", 0);
- if (error) {
- mtx_unlock(&tp->tap_mtx);
- return (error);
- }
- }
- } while (m == NULL);
- mtx_unlock(&tp->tap_mtx);
-
- /* feed packet to bpf */
- BPF_MTAP(ifp, m);
-
- /* xfer packet to user space */
- while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
- len = min(uio->uio_resid, m->m_len);
- if (len == 0)
- break;
-
- error = uiomove(mtod(m, void *), len, uio);
- m = m_free(m);
- }
-
- if (m != NULL) {
- TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
- dev2unit(dev));
- m_freem(m);
- }
-
- return (error);
-} /* tapread */
-
-
-/*
- * tapwrite
- *
- * the cdevsw write interface - an atomic write is a packet - or else!
- */
-static int
-tapwrite(struct cdev *dev, struct uio *uio, int flag)
-{
- struct ether_header *eh;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct mbuf *m;
-
- TAPDEBUG("%s writing, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- if (uio->uio_resid == 0)
- return (0);
-
- if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
- TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n",
- ifp->if_xname, uio->uio_resid, dev2unit(dev));
-
- return (EIO);
- }
-
- if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN,
- M_PKTHDR)) == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return (ENOBUFS);
- }
-
- m->m_pkthdr.rcvif = ifp;
-
- /*
- * Only pass a unicast frame to ether_input(), if it would actually
- * have been received by non-virtual hardware.
- */
- if (m->m_len < sizeof(struct ether_header)) {
- m_freem(m);
- return (0);
- }
- eh = mtod(m, struct ether_header *);
-
- if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
- !ETHER_IS_MULTICAST(eh->ether_dhost) &&
- bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
- m_freem(m);
- return (0);
- }
-
- /* Pass packet up to parent. */
- CURVNET_SET(ifp->if_vnet);
- (*ifp->if_input)(ifp, m);
- CURVNET_RESTORE();
- if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */
-
- return (0);
-} /* tapwrite */
-
-
-/*
- * tappoll
- *
- * the poll interface, this is only useful on reads
- * really. the write detect always returns true, write never blocks
- * anyway, it either accepts the packet or drops it
- */
-static int
-tappoll(struct cdev *dev, int events, struct thread *td)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- int revents = 0;
-
- TAPDEBUG("%s polling, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- if (events & (POLLIN | POLLRDNORM)) {
- IFQ_LOCK(&ifp->if_snd);
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- TAPDEBUG("%s have data in queue. len = %d, " \
- "minor = %#x\n", ifp->if_xname,
- ifp->if_snd.ifq_len, dev2unit(dev));
-
- revents |= (events & (POLLIN | POLLRDNORM));
- } else {
- TAPDEBUG("%s waiting for data, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- selrecord(td, &tp->tap_rsel);
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
-
- if (events & (POLLOUT | POLLWRNORM))
- revents |= (events & (POLLOUT | POLLWRNORM));
-
- return (revents);
-} /* tappoll */
-
-
-/*
- * tap_kqfilter
- *
- * support for kevent() system call
- */
-static int
-tapkqfilter(struct cdev *dev, struct knote *kn)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
-
- switch (kn->kn_filter) {
- case EVFILT_READ:
- TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tap_read_filterops;
- break;
-
- case EVFILT_WRITE:
- TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tap_write_filterops;
- break;
-
- default:
- TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- return (EINVAL);
- /* NOT REACHED */
- }
-
- kn->kn_hook = tp;
- knlist_add(&tp->tap_rsel.si_note, kn, 0);
-
- return (0);
-} /* tapkqfilter */
-
-
-/*
- * tap_kqread
- *
- * Return true if there is data in the interface queue
- */
-static int
-tapkqread(struct knote *kn, long hint)
-{
- int ret;
- struct tap_softc *tp = kn->kn_hook;
- struct cdev *dev = tp->tap_dev;
- struct ifnet *ifp = tp->tap_ifp;
-
- if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
- TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
- ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
- ret = 1;
- } else {
- TAPDEBUG("%s waiting for data, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- ret = 0;
- }
-
- return (ret);
-} /* tapkqread */
-
-
-/*
- * tap_kqwrite
- *
- * Always can write. Return the MTU in kn->data
- */
-static int
-tapkqwrite(struct knote *kn, long hint)
-{
- struct tap_softc *tp = kn->kn_hook;
- struct ifnet *ifp = tp->tap_ifp;
-
- kn->kn_data = ifp->if_mtu;
-
- return (1);
-} /* tapkqwrite */
-
-
-static void
-tapkqdetach(struct knote *kn)
-{
- struct tap_softc *tp = kn->kn_hook;
-
- knlist_remove(&tp->tap_rsel.si_note, kn, 0);
-} /* tapkqdetach */
-
diff --git a/freebsd/sys/net/if_tap.h b/freebsd/sys/net/if_tap.h
index 34f44b38..9718cee4 100644
--- a/freebsd/sys/net/if_tap.h
+++ b/freebsd/sys/net/if_tap.h
@@ -40,24 +40,22 @@
#ifndef _NET_IF_TAP_H_
#define _NET_IF_TAP_H_
-/* refer to if_tapvar.h for the softc stuff */
+#include <net/if_tun.h>
/* maximum receive packet size (hard limit) */
#define TAPMRU 16384
-struct tapinfo {
- int baudrate; /* linespeed */
- short mtu; /* maximum transmission unit */
- u_char type; /* ethernet, tokenring, etc. */
- u_char dummy; /* place holder */
-};
+#define tapinfo tuninfo
-/* ioctl's for get/set debug */
-#define TAPSDEBUG _IOW('t', 90, int)
-#define TAPGDEBUG _IOR('t', 89, int)
-#define TAPSIFINFO _IOW('t', 91, struct tapinfo)
-#define TAPGIFINFO _IOR('t', 92, struct tapinfo)
-#define TAPGIFNAME _IOR('t', 93, struct ifreq)
+/*
+ * ioctl's for get/set debug; these are aliases of TUN* ioctls, see net/if_tun.h
+ * for details.
+ */
+#define TAPSDEBUG TUNSDEBUG
+#define TAPGDEBUG TUNGDEBUG
+#define TAPSIFINFO TUNSIFINFO
+#define TAPGIFINFO TUNGIFINFO
+#define TAPGIFNAME TUNGIFNAME
/* VMware ioctl's */
#define VMIO_SIOCSIFFLAGS _IOWINT('V', 0)
diff --git a/freebsd/sys/net/if_tapvar.h b/freebsd/sys/net/if_tapvar.h
deleted file mode 100644
index f5cf9f3e..00000000
--- a/freebsd/sys/net/if_tapvar.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * BASED ON:
- * -------------------------------------------------------------------------
- *
- * Copyright (c) 1998 Brian Somers <brian@Awfulhak.org>
- * All rights reserved.
- *
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- */
-
-/*
- * $FreeBSD$
- * $Id: if_tapvar.h,v 0.6 2000/07/11 02:16:08 max Exp $
- */
-
-#ifndef _NET_IF_TAPVAR_H_
-#define _NET_IF_TAPVAR_H_
-
-/*
- * tap_mtx locks tap_flags, tap_pid. tap_next locked with global tapmtx.
- * Other fields locked by owning subsystems.
- */
-struct tap_softc {
- struct ifnet *tap_ifp;
- u_short tap_flags; /* misc flags */
-#define TAP_OPEN (1 << 0)
-#define TAP_INITED (1 << 1)
-#define TAP_RWAIT (1 << 2)
-#define TAP_ASYNC (1 << 3)
-#define TAP_READY (TAP_OPEN|TAP_INITED)
-#define TAP_VMNET (1 << 4)
-
- u_int8_t ether_addr[ETHER_ADDR_LEN]; /* ether addr of the remote side */
-
- pid_t tap_pid; /* PID of process to open */
- struct sigio *tap_sigio; /* information for async I/O */
- struct selinfo tap_rsel; /* read select */
-
- SLIST_ENTRY(tap_softc) tap_next; /* next device in chain */
- struct cdev *tap_dev;
- struct mtx tap_mtx; /* per-softc mutex */
-};
-
-#endif /* !_NET_IF_TAPVAR_H_ */
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
deleted file mode 100644
index 44441773..00000000
--- a/freebsd/sys/net/if_tun.c
+++ /dev/null
@@ -1,1055 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */
-
-/*-
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- *
- * This source may be freely distributed, however I would be interested
- * in any changes that are made.
- *
- * This driver takes packets off the IP i/f and hands them up to a
- * user process to have its wicked way with. This driver has it's
- * roots in a similar driver written by Phil Cockcroft (formerly) at
- * UCL. This driver is based much more on read/write/poll mode of
- * operation though.
- *
- * $FreeBSD$
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <sys/param.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/jail.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/socket.h>
-#include <sys/fcntl.h>
-#include <sys/filio.h>
-#include <sys/sockio.h>
-#include <sys/ttycom.h>
-#include <sys/poll.h>
-#include <sys/selinfo.h>
-#include <sys/signalvar.h>
-#include <sys/filedesc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/conf.h>
-#include <sys/uio.h>
-#include <sys/malloc.h>
-#include <sys/random.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/if_clone.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/route.h>
-#include <net/vnet.h>
-#ifdef INET
-#include <netinet/in.h>
-#endif
-#include <net/bpf.h>
-#include <net/if_tun.h>
-
-#include <sys/queue.h>
-#include <sys/condvar.h>
-
-#include <security/mac/mac_framework.h>
-
-/*
- * tun_list is protected by global tunmtx. Other mutable fields are
- * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is
- * static for the duration of a tunnel interface.
- */
-struct tun_softc {
- TAILQ_ENTRY(tun_softc) tun_list;
- struct cdev *tun_dev;
- u_short tun_flags; /* misc flags */
-#define TUN_OPEN 0x0001
-#define TUN_INITED 0x0002
-#define TUN_RCOLL 0x0004
-#define TUN_IASET 0x0008
-#define TUN_DSTADDR 0x0010
-#define TUN_LMODE 0x0020
-#define TUN_RWAIT 0x0040
-#define TUN_ASYNC 0x0080
-#define TUN_IFHEAD 0x0100
-
-#define TUN_READY (TUN_OPEN | TUN_INITED)
-
- /*
- * XXXRW: tun_pid is used to exclusively lock /dev/tun. Is this
- * actually needed? Can we just return EBUSY if already open?
- * Problem is that this involved inherent races when a tun device
- * is handed off from one process to another, as opposed to just
- * being slightly stale informationally.
- */
- pid_t tun_pid; /* owning pid */
- struct ifnet *tun_ifp; /* the interface */
- struct sigio *tun_sigio; /* information for async I/O */
- struct selinfo tun_rsel; /* read select */
- struct mtx tun_mtx; /* protect mutable softc fields */
- struct cv tun_cv; /* protect against ref'd dev destroy */
-};
-#define TUN2IFP(sc) ((sc)->tun_ifp)
-
-#define TUNDEBUG if (tundebug) if_printf
-
-/*
- * All mutable global variables in if_tun are locked using tunmtx, with
- * the exception of tundebug, which is used unlocked, and tunclones,
- * which is static after setup.
- */
-static struct mtx tunmtx;
-static const char tunname[] = "tun";
-static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
-static int tundebug = 0;
-static int tundclone = 1;
-static struct clonedevs *tunclones;
-static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
-SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
-
-SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
- "IP tunnel software network interface.");
-SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
- "Enable legacy devfs interface creation.");
-
-static void tunclone(void *arg, struct ucred *cred, char *name,
- int namelen, struct cdev **dev);
-static void tuncreate(const char *name, struct cdev *dev);
-static int tunifioctl(struct ifnet *, u_long, caddr_t);
-static void tuninit(struct ifnet *);
-static int tunmodevent(module_t, int, void *);
-static int tunoutput(struct ifnet *, struct mbuf *,
- const struct sockaddr *, struct route *ro);
-static void tunstart(struct ifnet *);
-
-static int tun_clone_create(struct if_clone *, int, caddr_t);
-static void tun_clone_destroy(struct ifnet *);
-static struct if_clone *tun_cloner;
-
-static d_open_t tunopen;
-static d_close_t tunclose;
-static d_read_t tunread;
-static d_write_t tunwrite;
-static d_ioctl_t tunioctl;
-static d_poll_t tunpoll;
-static d_kqfilter_t tunkqfilter;
-
-static int tunkqread(struct knote *, long);
-static int tunkqwrite(struct knote *, long);
-static void tunkqdetach(struct knote *);
-
-static struct filterops tun_read_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tunkqdetach,
- .f_event = tunkqread,
-};
-
-static struct filterops tun_write_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tunkqdetach,
- .f_event = tunkqwrite,
-};
-
-static struct cdevsw tun_cdevsw = {
- .d_version = D_VERSION,
- .d_flags = D_NEEDMINOR,
- .d_open = tunopen,
- .d_close = tunclose,
- .d_read = tunread,
- .d_write = tunwrite,
- .d_ioctl = tunioctl,
- .d_poll = tunpoll,
- .d_kqfilter = tunkqfilter,
- .d_name = tunname,
-};
-
-static int
-tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
- struct cdev *dev;
- int i;
-
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0);
- if (i) {
- /* No preexisting struct cdev *, create one */
- dev = make_dev(&tun_cdevsw, unit,
- UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
- }
- tuncreate(tunname, dev);
-
- return (0);
-}
-
-static void
-tunclone(void *arg, struct ucred *cred, char *name, int namelen,
- struct cdev **dev)
-{
- char devname[SPECNAMELEN + 1];
- int u, i, append_unit;
-
- if (*dev != NULL)
- return;
-
- /*
- * If tun cloning is enabled, only the superuser can create an
- * interface.
- */
- if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE) != 0)
- return;
-
- if (strcmp(name, tunname) == 0) {
- u = -1;
- } else if (dev_stdclone(name, NULL, tunname, &u) != 1)
- return; /* Don't recognise the name */
- if (u != -1 && u > IF_MAXUNIT)
- return; /* Unit number too high */
-
- if (u == -1)
- append_unit = 1;
- else
- append_unit = 0;
-
- CURVNET_SET(CRED_TO_VNET(cred));
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
- if (i) {
- if (append_unit) {
- namelen = snprintf(devname, sizeof(devname), "%s%d",
- name, u);
- name = devname;
- }
- /* No preexisting struct cdev *, create one */
- *dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred,
- UID_UUCP, GID_DIALER, 0600, "%s", name);
- }
-
- if_clone_create(name, namelen, NULL);
- CURVNET_RESTORE();
-}
-
-static void
-tun_destroy(struct tun_softc *tp)
-{
- struct cdev *dev;
-
- mtx_lock(&tp->tun_mtx);
- if ((tp->tun_flags & TUN_OPEN) != 0)
- cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
- else
- mtx_unlock(&tp->tun_mtx);
-
- CURVNET_SET(TUN2IFP(tp)->if_vnet);
- dev = tp->tun_dev;
- bpfdetach(TUN2IFP(tp));
- if_detach(TUN2IFP(tp));
- if_free(TUN2IFP(tp));
- destroy_dev(dev);
- seldrain(&tp->tun_rsel);
- knlist_clear(&tp->tun_rsel.si_note, 0);
- knlist_destroy(&tp->tun_rsel.si_note);
- mtx_destroy(&tp->tun_mtx);
- cv_destroy(&tp->tun_cv);
- free(tp, M_TUN);
- CURVNET_RESTORE();
-}
-
-static void
-tun_clone_destroy(struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
-
- mtx_lock(&tunmtx);
- TAILQ_REMOVE(&tunhead, tp, tun_list);
- mtx_unlock(&tunmtx);
- tun_destroy(tp);
-}
-
-static int
-tunmodevent(module_t mod, int type, void *data)
-{
- static eventhandler_tag tag;
- struct tun_softc *tp;
-
- switch (type) {
- case MOD_LOAD:
- mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
- clone_setup(&tunclones);
- tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
- if (tag == NULL)
- return (ENOMEM);
- tun_cloner = if_clone_simple(tunname, tun_clone_create,
- tun_clone_destroy, 0);
- break;
- case MOD_UNLOAD:
- if_clone_detach(tun_cloner);
- EVENTHANDLER_DEREGISTER(dev_clone, tag);
- drain_dev_clone_events();
-
- mtx_lock(&tunmtx);
- while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
- TAILQ_REMOVE(&tunhead, tp, tun_list);
- mtx_unlock(&tunmtx);
- tun_destroy(tp);
- mtx_lock(&tunmtx);
- }
- mtx_unlock(&tunmtx);
- clone_cleanup(&tunclones);
- mtx_destroy(&tunmtx);
- break;
- default:
- return EOPNOTSUPP;
- }
- return 0;
-}
-
-static moduledata_t tun_mod = {
- "if_tun",
- tunmodevent,
- 0
-};
-
-DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(if_tun, 1);
-
-static void
-tunstart(struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
- struct mbuf *m;
-
- TUNDEBUG(ifp,"%s starting\n", ifp->if_xname);
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, m);
- if (m == NULL) {
- IFQ_UNLOCK(&ifp->if_snd);
- return;
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
-
- mtx_lock(&tp->tun_mtx);
- if (tp->tun_flags & TUN_RWAIT) {
- tp->tun_flags &= ~TUN_RWAIT;
- wakeup(tp);
- }
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
- KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
- if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
- mtx_unlock(&tp->tun_mtx);
- pgsigio(&tp->tun_sigio, SIGIO, 0);
- } else
- mtx_unlock(&tp->tun_mtx);
-}
-
-/* XXX: should return an error code so it can fail. */
-static void
-tuncreate(const char *name, struct cdev *dev)
-{
- struct tun_softc *sc;
- struct ifnet *ifp;
-
- sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
- mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
- cv_init(&sc->tun_cv, "tun_condvar");
- sc->tun_flags = TUN_INITED;
- sc->tun_dev = dev;
- mtx_lock(&tunmtx);
- TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
- mtx_unlock(&tunmtx);
-
- ifp = sc->tun_ifp = if_alloc(IFT_PPP);
- if (ifp == NULL)
- panic("%s%d: failed to if_alloc() interface.\n",
- name, dev2unit(dev));
- if_initname(ifp, name, dev2unit(dev));
- ifp->if_mtu = TUNMTU;
- ifp->if_ioctl = tunifioctl;
- ifp->if_output = tunoutput;
- ifp->if_start = tunstart;
- ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
- ifp->if_softc = sc;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_snd.ifq_drv_maxlen = 0;
- IFQ_SET_READY(&ifp->if_snd);
- knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
- ifp->if_capabilities |= IFCAP_LINKSTATE;
- ifp->if_capenable |= IFCAP_LINKSTATE;
-
- if_attach(ifp);
- bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
- dev->si_drv1 = sc;
- TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-}
-
-static int
-tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
-{
- struct ifnet *ifp;
- struct tun_softc *tp;
-
- /*
- * XXXRW: Non-atomic test and set of dev->si_drv1 requires
- * synchronization.
- */
- tp = dev->si_drv1;
- if (!tp) {
- tuncreate(tunname, dev);
- tp = dev->si_drv1;
- }
-
- /*
- * XXXRW: This use of tun_pid is subject to error due to the
- * fact that a reference to the tunnel can live beyond the
- * death of the process that created it. Can we replace this
- * with a simple busy flag?
- */
- mtx_lock(&tp->tun_mtx);
-#ifndef __rtems__
- if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) {
-#else /* __rtems__ */
- if (tp->tun_pid != 0 && tp->tun_pid != BSD_DEFAULT_PID) {
-#endif /* __rtems__ */
- mtx_unlock(&tp->tun_mtx);
- return (EBUSY);
- }
-#ifndef __rtems__
- tp->tun_pid = td->td_proc->p_pid;
-#else /* __rtems__ */
- tp->tun_pid = BSD_DEFAULT_PID;
-#endif /* __rtems__ */
-
- tp->tun_flags |= TUN_OPEN;
- ifp = TUN2IFP(tp);
- if_link_state_change(ifp, LINK_STATE_UP);
- TUNDEBUG(ifp, "open\n");
- mtx_unlock(&tp->tun_mtx);
-
- return (0);
-}
-
-/*
- * tunclose - close the device - mark i/f down & delete
- * routing info
- */
-static int
-tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
-{
- struct tun_softc *tp;
- struct ifnet *ifp;
-
- tp = dev->si_drv1;
- ifp = TUN2IFP(tp);
-
- mtx_lock(&tp->tun_mtx);
- tp->tun_flags &= ~TUN_OPEN;
- tp->tun_pid = 0;
-
- /*
- * junk all pending output
- */
- CURVNET_SET(ifp->if_vnet);
- IFQ_PURGE(&ifp->if_snd);
-
- if (ifp->if_flags & IFF_UP) {
- mtx_unlock(&tp->tun_mtx);
- if_down(ifp);
- mtx_lock(&tp->tun_mtx);
- }
-
- /* Delete all addresses and routes which reference this interface. */
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- struct ifaddr *ifa;
-
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- mtx_unlock(&tp->tun_mtx);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- /* deal w/IPv4 PtP destination; unlocked read */
- if (ifa->ifa_addr->sa_family == AF_INET) {
- rtinit(ifa, (int)RTM_DELETE,
- tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
- } else {
- rtinit(ifa, (int)RTM_DELETE, 0);
- }
- }
- if_purgeaddrs(ifp);
- mtx_lock(&tp->tun_mtx);
- }
- if_link_state_change(ifp, LINK_STATE_DOWN);
- CURVNET_RESTORE();
-
- funsetown(&tp->tun_sigio);
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
- KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
- TUNDEBUG (ifp, "closed\n");
-
- cv_broadcast(&tp->tun_cv);
- mtx_unlock(&tp->tun_mtx);
- return (0);
-}
-
-static void
-tuninit(struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
-#ifdef INET
- struct ifaddr *ifa;
-#endif
-
- TUNDEBUG(ifp, "tuninit\n");
-
- mtx_lock(&tp->tun_mtx);
- ifp->if_flags |= IFF_UP;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- getmicrotime(&ifp->if_lastchange);
-
-#ifdef INET
- if_addr_rlock(ifp);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family == AF_INET) {
- struct sockaddr_in *si;
-
- si = (struct sockaddr_in *)ifa->ifa_addr;
- if (si->sin_addr.s_addr)
- tp->tun_flags |= TUN_IASET;
-
- si = (struct sockaddr_in *)ifa->ifa_dstaddr;
- if (si && si->sin_addr.s_addr)
- tp->tun_flags |= TUN_DSTADDR;
- }
- }
- if_addr_runlock(ifp);
-#endif
- mtx_unlock(&tp->tun_mtx);
-}
-
-/*
- * Process an ioctl request.
- */
-static int
-tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct ifreq *ifr = (struct ifreq *)data;
- struct tun_softc *tp = ifp->if_softc;
- struct ifstat *ifs;
- int error = 0;
-
- switch(cmd) {
- case SIOCGIFSTATUS:
- ifs = (struct ifstat *)data;
- mtx_lock(&tp->tun_mtx);
- if (tp->tun_pid)
- snprintf(ifs->ascii, sizeof(ifs->ascii),
- "\tOpened by PID %d\n", tp->tun_pid);
- else
- ifs->ascii[0] = '\0';
- mtx_unlock(&tp->tun_mtx);
- break;
- case SIOCSIFADDR:
- tuninit(ifp);
- TUNDEBUG(ifp, "address set\n");
- break;
- case SIOCSIFMTU:
- ifp->if_mtu = ifr->ifr_mtu;
- TUNDEBUG(ifp, "mtu set\n");
- break;
- case SIOCSIFFLAGS:
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- break;
- default:
- error = EINVAL;
- }
- return (error);
-}
-
-/*
- * tunoutput - queue packets from higher level ready to put out.
- */
-static int
-tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
- struct route *ro)
-{
- struct tun_softc *tp = ifp->if_softc;
- u_short cached_tun_flags;
- int error;
- u_int32_t af;
-
- TUNDEBUG (ifp, "tunoutput\n");
-
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m0);
- if (error) {
- m_freem(m0);
- return (error);
- }
-#endif
-
- /* Could be unlocked read? */
- mtx_lock(&tp->tun_mtx);
- cached_tun_flags = tp->tun_flags;
- mtx_unlock(&tp->tun_mtx);
- if ((cached_tun_flags & TUN_READY) != TUN_READY) {
- TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
- m_freem (m0);
- return (EHOSTDOWN);
- }
-
- if ((ifp->if_flags & IFF_UP) != IFF_UP) {
- m_freem (m0);
- return (EHOSTDOWN);
- }
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC)
- bcopy(dst->sa_data, &af, sizeof(af));
- else
- af = dst->sa_family;
-
- if (bpf_peers_present(ifp->if_bpf))
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
-
- /* prepend sockaddr? this may abort if the mbuf allocation fails */
- if (cached_tun_flags & TUN_LMODE) {
- /* allocate space for sockaddr */
- M_PREPEND(m0, dst->sa_len, M_NOWAIT);
-
- /* if allocation failed drop packet */
- if (m0 == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (ENOBUFS);
- } else {
- bcopy(dst, m0->m_data, dst->sa_len);
- }
- }
-
- if (cached_tun_flags & TUN_IFHEAD) {
- /* Prepend the address family */
- M_PREPEND(m0, 4, M_NOWAIT);
-
- /* if allocation failed drop packet */
- if (m0 == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (ENOBUFS);
- } else
- *(u_int32_t *)m0->m_data = htonl(af);
- } else {
-#ifdef INET
- if (af != AF_INET)
-#endif
- {
- m_freem(m0);
- return (EAFNOSUPPORT);
- }
- }
-
- error = (ifp->if_transmit)(ifp, m0);
- if (error)
- return (ENOBUFS);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
- return (0);
-}
-
-/*
- * the cdevsw interface is now pretty minimal.
- */
-static int
-tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
- struct thread *td)
-{
- struct ifreq ifr;
- struct tun_softc *tp = dev->si_drv1;
- struct tuninfo *tunp;
- int error;
-
- switch (cmd) {
- case TUNSIFINFO:
- tunp = (struct tuninfo *)data;
- if (TUN2IFP(tp)->if_type != tunp->type)
- return (EPROTOTYPE);
- mtx_lock(&tp->tun_mtx);
- if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
- strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
- ifr.ifr_mtu = tunp->mtu;
- CURVNET_SET(TUN2IFP(tp)->if_vnet);
- error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
- (caddr_t)&ifr, td);
- CURVNET_RESTORE();
- if (error) {
- mtx_unlock(&tp->tun_mtx);
- return (error);
- }
- }
- TUN2IFP(tp)->if_baudrate = tunp->baudrate;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNGIFINFO:
- tunp = (struct tuninfo *)data;
- mtx_lock(&tp->tun_mtx);
- tunp->mtu = TUN2IFP(tp)->if_mtu;
- tunp->type = TUN2IFP(tp)->if_type;
- tunp->baudrate = TUN2IFP(tp)->if_baudrate;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSDEBUG:
- tundebug = *(int *)data;
- break;
- case TUNGDEBUG:
- *(int *)data = tundebug;
- break;
- case TUNSLMODE:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data) {
- tp->tun_flags |= TUN_LMODE;
- tp->tun_flags &= ~TUN_IFHEAD;
- } else
- tp->tun_flags &= ~TUN_LMODE;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSIFHEAD:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data) {
- tp->tun_flags |= TUN_IFHEAD;
- tp->tun_flags &= ~TUN_LMODE;
- } else
- tp->tun_flags &= ~TUN_IFHEAD;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNGIFHEAD:
- mtx_lock(&tp->tun_mtx);
- *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSIFMODE:
- /* deny this if UP */
- if (TUN2IFP(tp)->if_flags & IFF_UP)
- return(EBUSY);
-
- switch (*(int *)data & ~IFF_MULTICAST) {
- case IFF_POINTOPOINT:
- case IFF_BROADCAST:
- mtx_lock(&tp->tun_mtx);
- TUN2IFP(tp)->if_flags &=
- ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
- TUN2IFP(tp)->if_flags |= *(int *)data;
- mtx_unlock(&tp->tun_mtx);
- break;
- default:
- return(EINVAL);
- }
- break;
- case TUNSIFPID:
- mtx_lock(&tp->tun_mtx);
-#ifndef __rtems__
- tp->tun_pid = curthread->td_proc->p_pid;
-#else /* __rtems__ */
- tp->tun_pid = BSD_DEFAULT_PID;
-#endif /* __rtems__ */
- mtx_unlock(&tp->tun_mtx);
- break;
- case FIONBIO:
- break;
- case FIOASYNC:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data)
- tp->tun_flags |= TUN_ASYNC;
- else
- tp->tun_flags &= ~TUN_ASYNC;
- mtx_unlock(&tp->tun_mtx);
- break;
- case FIONREAD:
- if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
- struct mbuf *mb;
- IFQ_LOCK(&TUN2IFP(tp)->if_snd);
- IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
- for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
- *(int *)data += mb->m_len;
- IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
- } else
- *(int *)data = 0;
- break;
- case FIOSETOWN:
- return (fsetown(*(int *)data, &tp->tun_sigio));
-
- case FIOGETOWN:
- *(int *)data = fgetown(&tp->tun_sigio);
- return (0);
-
- /* This is deprecated, FIOSETOWN should be used instead. */
- case TIOCSPGRP:
- return (fsetown(-(*(int *)data), &tp->tun_sigio));
-
- /* This is deprecated, FIOGETOWN should be used instead. */
- case TIOCGPGRP:
- *(int *)data = -fgetown(&tp->tun_sigio);
- return (0);
-
- default:
- return (ENOTTY);
- }
- return (0);
-}
-
-/*
- * The cdevsw read interface - reads a packet at a time, or at
- * least as much of a packet as can be read.
- */
-static int
-tunread(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- struct mbuf *m;
- int error=0, len;
-
- TUNDEBUG (ifp, "read\n");
- mtx_lock(&tp->tun_mtx);
- if ((tp->tun_flags & TUN_READY) != TUN_READY) {
- mtx_unlock(&tp->tun_mtx);
- TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
- return (EHOSTDOWN);
- }
-
- tp->tun_flags &= ~TUN_RWAIT;
-
- do {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == NULL) {
- if (flag & O_NONBLOCK) {
- mtx_unlock(&tp->tun_mtx);
- return (EWOULDBLOCK);
- }
- tp->tun_flags |= TUN_RWAIT;
- error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
- "tunread", 0);
- if (error != 0) {
- mtx_unlock(&tp->tun_mtx);
- return (error);
- }
- }
- } while (m == NULL);
- mtx_unlock(&tp->tun_mtx);
-
- while (m && uio->uio_resid > 0 && error == 0) {
- len = min(uio->uio_resid, m->m_len);
- if (len != 0)
- error = uiomove(mtod(m, void *), len, uio);
- m = m_free(m);
- }
-
- if (m) {
- TUNDEBUG(ifp, "Dropping mbuf\n");
- m_freem(m);
- }
- return (error);
-}
-
-/*
- * the cdevsw write interface - an atomic write is a packet - or else!
- */
-static int
-tunwrite(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- struct mbuf *m;
- uint32_t family, mru;
- int isr;
-
- TUNDEBUG(ifp, "tunwrite\n");
-
- if ((ifp->if_flags & IFF_UP) != IFF_UP)
- /* ignore silently */
- return (0);
-
- if (uio->uio_resid == 0)
- return (0);
-
- mru = TUNMRU;
- if (tp->tun_flags & TUN_IFHEAD)
- mru += sizeof(family);
- if (uio->uio_resid < 0 || uio->uio_resid > mru) {
- TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
- return (EIO);
- }
-
- if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return (ENOBUFS);
- }
-
- m->m_pkthdr.rcvif = ifp;
-#ifdef MAC
- mac_ifnet_create_mbuf(ifp, m);
-#endif
-
- /* Could be unlocked read? */
- mtx_lock(&tp->tun_mtx);
- if (tp->tun_flags & TUN_IFHEAD) {
- mtx_unlock(&tp->tun_mtx);
- if (m->m_len < sizeof(family) &&
- (m = m_pullup(m, sizeof(family))) == NULL)
- return (ENOBUFS);
- family = ntohl(*mtod(m, u_int32_t *));
- m_adj(m, sizeof(family));
- } else {
- mtx_unlock(&tp->tun_mtx);
- family = AF_INET;
- }
-
- BPF_MTAP2(ifp, &family, sizeof(family), m);
-
- switch (family) {
-#ifdef INET
- case AF_INET:
- isr = NETISR_IP;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- isr = NETISR_IPV6;
- break;
-#endif
- default:
- m_freem(m);
- return (EAFNOSUPPORT);
- }
- random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
- if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
- if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
- CURVNET_SET(ifp->if_vnet);
- M_SETFIB(m, ifp->if_fib);
- netisr_dispatch(isr, m);
- CURVNET_RESTORE();
- return (0);
-}
-
-/*
- * tunpoll - the poll interface, this is only useful on reads
- * really. The write detect always returns true, write never blocks
- * anyway, it either accepts the packet or drops it.
- */
-static int
-tunpoll(struct cdev *dev, int events, struct thread *td)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- int revents = 0;
- struct mbuf *m;
-
- TUNDEBUG(ifp, "tunpoll\n");
-
- if (events & (POLLIN | POLLRDNORM)) {
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, m);
- if (m != NULL) {
- TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
- revents |= events & (POLLIN | POLLRDNORM);
- } else {
- TUNDEBUG(ifp, "tunpoll waiting\n");
- selrecord(td, &tp->tun_rsel);
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
- if (events & (POLLOUT | POLLWRNORM))
- revents |= events & (POLLOUT | POLLWRNORM);
-
- return (revents);
-}
-
-/*
- * tunkqfilter - support for the kevent() system call.
- */
-static int
-tunkqfilter(struct cdev *dev, struct knote *kn)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
-
- switch(kn->kn_filter) {
- case EVFILT_READ:
- TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tun_read_filterops;
- break;
-
- case EVFILT_WRITE:
- TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tun_write_filterops;
- break;
-
- default:
- TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- return(EINVAL);
- }
-
- kn->kn_hook = tp;
- knlist_add(&tp->tun_rsel.si_note, kn, 0);
-
- return (0);
-}
-
-/*
- * Return true of there is data in the interface queue.
- */
-static int
-tunkqread(struct knote *kn, long hint)
-{
- int ret;
- struct tun_softc *tp = kn->kn_hook;
- struct cdev *dev = tp->tun_dev;
- struct ifnet *ifp = TUN2IFP(tp);
-
- if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
- TUNDEBUG(ifp,
- "%s have data in the queue. Len = %d, minor = %#x\n",
- ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
- ret = 1;
- } else {
- TUNDEBUG(ifp,
- "%s waiting for data, minor = %#x\n", ifp->if_xname,
- dev2unit(dev));
- ret = 0;
- }
-
- return (ret);
-}
-
-/*
- * Always can write, always return MTU in kn->data.
- */
-static int
-tunkqwrite(struct knote *kn, long hint)
-{
- struct tun_softc *tp = kn->kn_hook;
- struct ifnet *ifp = TUN2IFP(tp);
-
- kn->kn_data = ifp->if_mtu;
-
- return (1);
-}
-
-static void
-tunkqdetach(struct knote *kn)
-{
- struct tun_softc *tp = kn->kn_hook;
-
- knlist_remove(&tp->tun_rsel.si_note, kn, 0);
-}
diff --git a/freebsd/sys/net/if_tun.h b/freebsd/sys/net/if_tun.h
index 1ea375f7..a44c87bd 100644
--- a/freebsd/sys/net/if_tun.h
+++ b/freebsd/sys/net/if_tun.h
@@ -40,6 +40,7 @@ struct tuninfo {
#define TUNSIFINFO _IOW('t', 91, struct tuninfo)
#define TUNGIFINFO _IOR('t', 92, struct tuninfo)
#define TUNSLMODE _IOW('t', 93, int)
+#define TUNGIFNAME _IOR('t', 93, struct ifreq)
#define TUNSIFMODE _IOW('t', 94, int)
#define TUNSIFPID _IO('t', 95)
#define TUNSIFHEAD _IOW('t', 96, int)
diff --git a/freebsd/sys/net/if_tuntap.c b/freebsd/sys/net/if_tuntap.c
new file mode 100644
index 00000000..3516d82b
--- /dev/null
+++ b/freebsd/sys/net/if_tuntap.c
@@ -0,0 +1,1734 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
+ * All rights reserved.
+ * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * BASED ON:
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
+ * Nottingham University 1987.
+ *
+ * This source may be freely distributed, however I would be interested
+ * in any changes that are made.
+ *
+ * This driver takes packets off the IP i/f and hands them up to a
+ * user process to have its wicked way with. This driver has it's
+ * roots in a similar driver written by Phil Cockcroft (formerly) at
+ * UCL. This driver is based much more on read/write/poll mode of
+ * operation though.
+ *
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/jail.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/socket.h>
+#include <sys/eventhandler.h>
+#include <sys/fcntl.h>
+#include <sys/filio.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/ttycom.h>
+#include <sys/poll.h>
+#include <sys/selinfo.h>
+#include <sys/signalvar.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/random.h>
+#include <sys/ctype.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/vnet.h>
+#ifdef INET
+#include <netinet/in.h>
+#endif
+#include <net/bpf.h>
+#include <net/if_tap.h>
+#include <net/if_tun.h>
+
+#include <sys/queue.h>
+#include <sys/condvar.h>
+#include <security/mac/mac_framework.h>
+
+struct tuntap_driver;
+
+/*
+ * tun_list is protected by global tunmtx. Other mutable fields are
+ * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is
+ * static for the duration of a tunnel interface.
+ */
+struct tuntap_softc {
+ TAILQ_ENTRY(tuntap_softc) tun_list;
+ struct cdev *tun_dev;
+ u_short tun_flags; /* misc flags */
+#define TUN_OPEN 0x0001
+#define TUN_INITED 0x0002
+#define TUN_IASET 0x0008
+#define TUN_DSTADDR 0x0010
+#define TUN_LMODE 0x0020
+#define TUN_RWAIT 0x0040
+#define TUN_ASYNC 0x0080
+#define TUN_IFHEAD 0x0100
+#define TUN_DYING 0x0200
+#define TUN_L2 0x0400
+#define TUN_VMNET 0x0800
+
+#define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET)
+#define TUN_READY (TUN_OPEN | TUN_INITED)
+
+#ifndef __rtems__
+ pid_t tun_pid; /* owning pid */
+#endif /* __rtems__ */
+ struct ifnet *tun_ifp; /* the interface */
+ struct sigio *tun_sigio; /* async I/O info */
+ struct tuntap_driver *tun_drv; /* appropriate driver */
+ struct selinfo tun_rsel; /* read select */
+ struct mtx tun_mtx; /* softc field mutex */
+ struct cv tun_cv; /* for ref'd dev destroy */
+ struct ether_addr tun_ether; /* remote address */
+};
+#define TUN2IFP(sc) ((sc)->tun_ifp)
+
+#define TUNDEBUG if (tundebug) if_printf
+
+#define TUN_LOCK(tp) mtx_lock(&(tp)->tun_mtx)
+#define TUN_UNLOCK(tp) mtx_unlock(&(tp)->tun_mtx)
+
+#define TUN_VMIO_FLAG_MASK 0x0fff
+
+/*
+ * All mutable global variables in if_tun are locked using tunmtx, with
+ * the exception of tundebug, which is used unlocked, and the drivers' *clones,
+ * which are static after setup.
+ */
+static struct mtx tunmtx;
+static eventhandler_tag tag;
+static const char tunname[] = "tun";
+static const char tapname[] = "tap";
+static const char vmnetname[] = "vmnet";
+static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
+static int tundebug = 0;
+static int tundclone = 1;
+static int tap_allow_uopen = 0; /* allow user open() */
+static int tapuponopen = 0; /* IFF_UP on open() */
+static int tapdclone = 1; /* enable devfs cloning */
+
+static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
+SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
+
+static struct sx tun_ioctl_sx;
+SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
+
+SYSCTL_DECL(_net_link);
+/* tun */
+static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
+ "IP tunnel software network interface");
+SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
+ "Enable legacy devfs interface creation");
+
+/* tap */
+static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
+ "Ethernet tunnel software network interface");
+SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
+ "Allow user to open /dev/tap (based on node permissions)");
+SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
+ "Bring interface up when /dev/tap is opened");
+SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
+ "Enable legacy devfs interface creation");
+SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
+
+static int tuntap_name2info(const char *name, int *unit, int *flags);
+static void tunclone(void *arg, struct ucred *cred, char *name,
+ int namelen, struct cdev **dev);
+static void tuncreate(struct cdev *dev, struct tuntap_driver *);
+static int tunifioctl(struct ifnet *, u_long, caddr_t);
+static void tuninit(struct ifnet *);
+static void tunifinit(void *xtp);
+static int tuntapmodevent(module_t, int, void *);
+static int tunoutput(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *ro);
+static void tunstart(struct ifnet *);
+static void tunstart_l2(struct ifnet *);
+
+static int tun_clone_match(struct if_clone *ifc, const char *name);
+static int tap_clone_match(struct if_clone *ifc, const char *name);
+static int vmnet_clone_match(struct if_clone *ifc, const char *name);
+static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
+static int tun_clone_destroy(struct if_clone *, struct ifnet *);
+
+static d_open_t tunopen;
+static d_close_t tunclose;
+static d_read_t tunread;
+static d_write_t tunwrite;
+static d_ioctl_t tunioctl;
+static d_poll_t tunpoll;
+static d_kqfilter_t tunkqfilter;
+
+static int tunkqread(struct knote *, long);
+static int tunkqwrite(struct knote *, long);
+static void tunkqdetach(struct knote *);
+
+static struct filterops tun_read_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tunkqdetach,
+ .f_event = tunkqread,
+};
+
+static struct filterops tun_write_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tunkqdetach,
+ .f_event = tunkqwrite,
+};
+
+static struct tuntap_driver {
+ struct cdevsw cdevsw;
+ int ident_flags;
+ struct unrhdr *unrhdr;
+ struct clonedevs *clones;
+ ifc_match_t *clone_match_fn;
+ ifc_create_t *clone_create_fn;
+ ifc_destroy_t *clone_destroy_fn;
+} tuntap_drivers[] = {
+ {
+ .ident_flags = 0,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_close = tunclose,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = tunname,
+ },
+ .clone_match_fn = tun_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+ {
+ .ident_flags = TUN_L2,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_close = tunclose,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = tapname,
+ },
+ .clone_match_fn = tap_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+ {
+ .ident_flags = TUN_L2 | TUN_VMNET,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_close = tunclose,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = vmnetname,
+ },
+ .clone_match_fn = vmnet_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+};
+
+struct tuntap_driver_cloner {
+ SLIST_ENTRY(tuntap_driver_cloner) link;
+ struct tuntap_driver *drv;
+ struct if_clone *cloner;
+};
+
+VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
+ SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
+
+#define V_tuntap_driver_cloners VNET(tuntap_driver_cloners)
+
+/*
+ * Sets unit and/or flags given the device name. Must be called with correct
+ * vnet context.
+ */
+static int
+tuntap_name2info(const char *name, int *outunit, int *outflags)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+ char *dname;
+ int flags, unit;
+ bool found;
+
+ if (name == NULL)
+ return (EINVAL);
+
+ /*
+ * Needed for dev_stdclone, but dev_stdclone will not modify, it just
+ * wants to be able to pass back a char * through the second param. We
+ * will always set that as NULL here, so we'll fake it.
+ */
+ dname = __DECONST(char *, name);
+ found = false;
+
+ KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
+ ("tuntap_driver_cloners failed to initialize"));
+ SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
+ KASSERT(drvc->drv != NULL,
+ ("tuntap_driver_cloners entry not properly initialized"));
+ drv = drvc->drv;
+
+ if (strcmp(name, drv->cdevsw.d_name) == 0) {
+ found = true;
+ unit = -1;
+ flags = drv->ident_flags;
+ break;
+ }
+
+ if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
+ found = true;
+ flags = drv->ident_flags;
+ break;
+ }
+ }
+
+ if (!found)
+ return (ENXIO);
+
+ if (outunit != NULL)
+ *outunit = unit;
+ if (outflags != NULL)
+ *outflags = flags;
+ return (0);
+}
+
+/*
+ * Get driver information from a set of flags specified. Masks the identifying
+ * part of the flags and compares it against all of the available
+ * tuntap_drivers. Must be called with correct vnet context.
+ */
+static struct tuntap_driver *
+tuntap_driver_from_flags(int tun_flags)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+
+ KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
+ ("tuntap_driver_cloners failed to initialize"));
+ SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
+ KASSERT(drvc->drv != NULL,
+ ("tuntap_driver_cloners entry not properly initialized"));
+ drv = drvc->drv;
+ if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags)
+ return (drv);
+ }
+
+ return (NULL);
+}
+
+
+
+static int
+tun_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & TUN_L2) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+tap_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+vmnet_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & TUN_VMNET) != 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+ struct tuntap_driver *drv;
+ struct cdev *dev;
+ int err, i, tunflags, unit;
+
+ tunflags = 0;
+ /* The name here tells us exactly what we're creating */
+ err = tuntap_name2info(name, &unit, &tunflags);
+ if (err != 0)
+ return (err);
+
+ drv = tuntap_driver_from_flags(tunflags);
+ if (drv == NULL)
+ return (ENXIO);
+
+ if (unit != -1) {
+ /* If this unit number is still available that's okay. */
+ if (alloc_unr_specific(drv->unrhdr, unit) == -1)
+ return (EEXIST);
+ } else {
+ unit = alloc_unr(drv->unrhdr);
+ }
+
+ snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
+
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
+ if (i) {
+ /* No preexisting struct cdev *, create one */
+ dev = make_dev(&drv->cdevsw, unit, UID_UUCP, GID_DIALER, 0600,
+ "%s%d", drv->cdevsw.d_name, unit);
+ }
+
+ tuncreate(dev, drv);
+
+ return (0);
+}
+
+static void
+tunclone(void *arg, struct ucred *cred, char *name, int namelen,
+ struct cdev **dev)
+{
+ char devname[SPECNAMELEN + 1];
+ struct tuntap_driver *drv;
+ int append_unit, i, u, tunflags;
+ bool mayclone;
+
+ if (*dev != NULL)
+ return;
+
+ tunflags = 0;
+ CURVNET_SET(CRED_TO_VNET(cred));
+ if (tuntap_name2info(name, &u, &tunflags) != 0)
+ goto out; /* Not recognized */
+
+ if (u != -1 && u > IF_MAXUNIT)
+ goto out; /* Unit number too high */
+
+ mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0;
+ if ((tunflags & TUN_L2) != 0) {
+ /* tap/vmnet allow user open with a sysctl */
+ mayclone = (mayclone || tap_allow_uopen) && tapdclone;
+ } else {
+ mayclone = mayclone && tundclone;
+ }
+
+ /*
+ * If tun cloning is enabled, only the superuser can create an
+ * interface.
+ */
+ if (!mayclone)
+ goto out;
+
+ if (u == -1)
+ append_unit = 1;
+ else
+ append_unit = 0;
+
+ drv = tuntap_driver_from_flags(tunflags);
+ if (drv == NULL)
+ goto out;
+
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
+ if (i) {
+ if (append_unit) {
+ namelen = snprintf(devname, sizeof(devname), "%s%d",
+ name, u);
+ name = devname;
+ }
+ /* No preexisting struct cdev *, create one */
+ *dev = make_dev_credf(MAKEDEV_REF, &drv->cdevsw, u, cred,
+ UID_UUCP, GID_DIALER, 0600, "%s", name);
+ }
+
+ if_clone_create(name, namelen, NULL);
+out:
+ CURVNET_RESTORE();
+}
+
+static void
+tun_destroy(struct tuntap_softc *tp)
+{
+
+ TUN_LOCK(tp);
+ tp->tun_flags |= TUN_DYING;
+ if ((tp->tun_flags & TUN_OPEN) != 0)
+ cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
+ else
+ TUN_UNLOCK(tp);
+
+ CURVNET_SET(TUN2IFP(tp)->if_vnet);
+
+ destroy_dev(tp->tun_dev);
+ seldrain(&tp->tun_rsel);
+ knlist_clear(&tp->tun_rsel.si_note, 0);
+ knlist_destroy(&tp->tun_rsel.si_note);
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ ether_ifdetach(TUN2IFP(tp));
+ } else {
+ bpfdetach(TUN2IFP(tp));
+ if_detach(TUN2IFP(tp));
+ }
+ sx_xlock(&tun_ioctl_sx);
+ TUN2IFP(tp)->if_softc = NULL;
+ sx_xunlock(&tun_ioctl_sx);
+ free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
+ if_free(TUN2IFP(tp));
+ mtx_destroy(&tp->tun_mtx);
+ cv_destroy(&tp->tun_cv);
+ free(tp, M_TUN);
+ CURVNET_RESTORE();
+}
+
+static int
+tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+
+ mtx_lock(&tunmtx);
+ TAILQ_REMOVE(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ tun_destroy(tp);
+
+ return (0);
+}
+
+static void
+vnet_tun_init(const void *unused __unused)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+ int i;
+
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
+
+ drvc->drv = drv;
+ drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0,
+ drv->clone_match_fn, drv->clone_create_fn,
+ drv->clone_destroy_fn);
+ SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
+ };
+}
+VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_tun_init, NULL);
+
+#ifndef __rtems__
+static void
+vnet_tun_uninit(const void *unused __unused)
+{
+ struct tuntap_driver_cloner *drvc;
+
+ while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
+ drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
+ SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
+
+ if_clone_detach(drvc->cloner);
+ free(drvc, M_TUN);
+ }
+}
+VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_tun_uninit, NULL);
+
+static void
+tun_uninit(const void *unused __unused)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_softc *tp;
+ int i;
+
+ EVENTHANDLER_DEREGISTER(dev_clone, tag);
+ drain_dev_clone_events();
+
+ mtx_lock(&tunmtx);
+ while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
+ TAILQ_REMOVE(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ tun_destroy(tp);
+ mtx_lock(&tunmtx);
+ }
+ mtx_unlock(&tunmtx);
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ delete_unrhdr(drv->unrhdr);
+ clone_cleanup(&drv->clones);
+ }
+ mtx_destroy(&tunmtx);
+}
+SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
+#endif /* __rtems__ */
+
+static int
+tuntapmodevent(module_t mod, int type, void *data)
+{
+ struct tuntap_driver *drv;
+ int i;
+
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ clone_setup(&drv->clones);
+ drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
+ }
+ tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
+ if (tag == NULL)
+ return (ENOMEM);
+ break;
+ case MOD_UNLOAD:
+ /* See tun_uninit, so it's done after the vnet_sysuninit() */
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static moduledata_t tuntap_mod = {
+ "if_tuntap",
+ tuntapmodevent,
+ 0
+};
+
+DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_tuntap, 1);
+MODULE_VERSION(if_tun, 1);
+MODULE_VERSION(if_tap, 1);
+
+static void
+tunstart(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+ struct mbuf *m;
+
+ TUNDEBUG(ifp, "starting\n");
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m == NULL) {
+ IFQ_UNLOCK(&ifp->if_snd);
+ return;
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+
+ TUN_LOCK(tp);
+ if (tp->tun_flags & TUN_RWAIT) {
+ tp->tun_flags &= ~TUN_RWAIT;
+ wakeup(tp);
+ }
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
+ TUN_UNLOCK(tp);
+ pgsigio(&tp->tun_sigio, SIGIO, 0);
+ } else
+ TUN_UNLOCK(tp);
+}
+
+/*
+ * tunstart_l2
+ *
+ * queue packets from higher level ready to put out
+ */
+static void
+tunstart_l2(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+
+ TUNDEBUG(ifp, "starting\n");
+
+ /*
+ * do not junk pending output if we are in VMnet mode.
+ * XXX: can this do any harm because of queue overflow?
+ */
+
+ TUN_LOCK(tp);
+ if (((tp->tun_flags & TUN_VMNET) == 0) &&
+ ((tp->tun_flags & TUN_READY) != TUN_READY)) {
+ struct mbuf *m;
+
+ /* Unlocked read. */
+ TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
+
+ for (;;) {
+ IF_DEQUEUE(&ifp->if_snd, m);
+ if (m != NULL) {
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ } else
+ break;
+ }
+ TUN_UNLOCK(tp);
+
+ return;
+ }
+
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ if (tp->tun_flags & TUN_RWAIT) {
+ tp->tun_flags &= ~TUN_RWAIT;
+ wakeup(tp);
+ }
+
+ if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
+ TUN_UNLOCK(tp);
+ pgsigio(&tp->tun_sigio, SIGIO, 0);
+ TUN_LOCK(tp);
+ }
+
+ selwakeuppri(&tp->tun_rsel, PZERO+1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
+ }
+
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ TUN_UNLOCK(tp);
+} /* tunstart_l2 */
+
+
+/* XXX: should return an error code so it can fail. */
+static void
+tuncreate(struct cdev *dev, struct tuntap_driver *drv)
+{
+ struct tuntap_softc *sc;
+ struct ifnet *ifp;
+ struct ether_addr eaddr;
+ int iflags;
+ u_char type;
+
+ sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
+ mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
+ cv_init(&sc->tun_cv, "tun_condvar");
+ sc->tun_flags = drv->ident_flags;
+ sc->tun_dev = dev;
+ sc->tun_drv = drv;
+ mtx_lock(&tunmtx);
+ TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
+ mtx_unlock(&tunmtx);
+
+ iflags = IFF_MULTICAST;
+ if ((sc->tun_flags & TUN_L2) != 0) {
+ type = IFT_ETHER;
+ iflags |= IFF_BROADCAST | IFF_SIMPLEX;
+ } else {
+ type = IFT_PPP;
+ iflags |= IFF_POINTOPOINT;
+ }
+ ifp = sc->tun_ifp = if_alloc(type);
+ if (ifp == NULL)
+ panic("%s%d: failed to if_alloc() interface.\n",
+ drv->cdevsw.d_name, dev2unit(dev));
+ ifp->if_softc = sc;
+ if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
+ ifp->if_ioctl = tunifioctl;
+ ifp->if_flags = iflags;
+ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+ knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
+ ifp->if_capabilities |= IFCAP_LINKSTATE;
+ ifp->if_capenable |= IFCAP_LINKSTATE;
+
+ if ((sc->tun_flags & TUN_L2) != 0) {
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_init = tunifinit;
+ ifp->if_start = tunstart_l2;
+
+ ether_gen_addr(ifp, &eaddr);
+ ether_ifattach(ifp, eaddr.octet);
+ } else {
+ ifp->if_mtu = TUNMTU;
+ ifp->if_start = tunstart;
+ ifp->if_output = tunoutput;
+
+ ifp->if_snd.ifq_drv_maxlen = 0;
+ IFQ_SET_READY(&ifp->if_snd);
+
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+ }
+ dev->si_drv1 = sc;
+
+ TUN_LOCK(sc);
+ sc->tun_flags |= TUN_INITED;
+ TUN_UNLOCK(sc);
+
+ TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+}
+
+static int
+tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct ifnet *ifp;
+ struct tuntap_driver *drv;
+ struct tuntap_softc *tp;
+ int error, tunflags;
+
+ tunflags = 0;
+ CURVNET_SET(TD_TO_VNET(td));
+ error = tuntap_name2info(dev->si_name, NULL, &tunflags);
+ if (error != 0) {
+ CURVNET_RESTORE();
+ return (error); /* Shouldn't happen */
+ }
+
+ if ((tunflags & TUN_L2) != 0) {
+ /* Restrict? */
+ if (tap_allow_uopen == 0) {
+ error = priv_check(td, PRIV_NET_TAP);
+ if (error != 0) {
+ CURVNET_RESTORE();
+ return (error);
+ }
+ }
+ }
+
+ /*
+ * XXXRW: Non-atomic test and set of dev->si_drv1 requires
+ * synchronization.
+ */
+ tp = dev->si_drv1;
+ if (!tp) {
+ drv = tuntap_driver_from_flags(tunflags);
+ if (drv == NULL) {
+ CURVNET_RESTORE();
+ return (ENXIO);
+ }
+ tuncreate(dev, drv);
+ tp = dev->si_drv1;
+ }
+
+ TUN_LOCK(tp);
+ if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
+ TUN_UNLOCK(tp);
+ CURVNET_RESTORE();
+ return (EBUSY);
+ }
+
+ ifp = TUN2IFP(tp);
+
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
+ sizeof(tp->tun_ether.octet));
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+
+ if (tapuponopen)
+ ifp->if_flags |= IFF_UP;
+ }
+
+#ifndef __rtems__
+ tp->tun_pid = td->td_proc->p_pid;
+#endif /* __rtems__ */
+ tp->tun_flags |= TUN_OPEN;
+
+ if_link_state_change(ifp, LINK_STATE_UP);
+ TUNDEBUG(ifp, "open\n");
+ TUN_UNLOCK(tp);
+ CURVNET_RESTORE();
+ return (0);
+}
+
+/*
+ * tunclose - close the device - mark i/f down & delete
+ * routing info
+ */
+static int
+tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
+{
+ struct tuntap_softc *tp;
+ struct ifnet *ifp;
+ bool l2tun;
+
+ tp = dev->si_drv1;
+ ifp = TUN2IFP(tp);
+
+ TUN_LOCK(tp);
+#ifndef __rtems__
+ /*
+ * Simply close the device if this isn't the controlling process. This
+ * may happen if, for instance, the tunnel has been handed off to
+ * another process. The original controller should be able to close it
+ * without putting us into an inconsistent state.
+ */
+ if (td->td_proc->p_pid != tp->tun_pid) {
+ TUN_UNLOCK(tp);
+ return (0);
+ }
+#endif /* __rtems__ */
+
+ /*
+ * junk all pending output
+ */
+ CURVNET_SET(ifp->if_vnet);
+
+ l2tun = false;
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ l2tun = true;
+ IF_DRAIN(&ifp->if_snd);
+ } else {
+ IFQ_PURGE(&ifp->if_snd);
+ }
+
+ /* For vmnet, we won't do most of the address/route bits */
+ if ((tp->tun_flags & TUN_VMNET) != 0 ||
+ (l2tun && (ifp->if_flags & IFF_LINK0) != 0))
+ goto out;
+
+ if (ifp->if_flags & IFF_UP) {
+ TUN_UNLOCK(tp);
+ if_down(ifp);
+ TUN_LOCK(tp);
+ }
+
+ /* Delete all addresses and routes which reference this interface. */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ struct ifaddr *ifa;
+
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ TUN_UNLOCK(tp);
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ /* deal w/IPv4 PtP destination; unlocked read */
+ if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) {
+ rtinit(ifa, (int)RTM_DELETE,
+ tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
+ } else {
+ rtinit(ifa, (int)RTM_DELETE, 0);
+ }
+ }
+ if_purgeaddrs(ifp);
+ TUN_LOCK(tp);
+ }
+
+out:
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ CURVNET_RESTORE();
+
+ funsetown(&tp->tun_sigio);
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ TUNDEBUG (ifp, "closed\n");
+ tp->tun_flags &= ~TUN_OPEN;
+#ifndef __rtems__
+ tp->tun_pid = 0;
+#endif /* __rtems__ */
+
+ cv_broadcast(&tp->tun_cv);
+ TUN_UNLOCK(tp);
+ return (0);
+}
+
+static void
+tuninit(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+#ifdef INET
+ struct ifaddr *ifa;
+#endif
+
+ TUNDEBUG(ifp, "tuninit\n");
+
+ TUN_LOCK(tp);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if ((tp->tun_flags & TUN_L2) == 0) {
+ ifp->if_flags |= IFF_UP;
+ getmicrotime(&ifp->if_lastchange);
+#ifdef INET
+ if_addr_rlock(ifp);
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ struct sockaddr_in *si;
+
+ si = (struct sockaddr_in *)ifa->ifa_addr;
+ if (si->sin_addr.s_addr)
+ tp->tun_flags |= TUN_IASET;
+
+ si = (struct sockaddr_in *)ifa->ifa_dstaddr;
+ if (si && si->sin_addr.s_addr)
+ tp->tun_flags |= TUN_DSTADDR;
+ }
+ }
+ if_addr_runlock(ifp);
+#endif
+ TUN_UNLOCK(tp);
+ } else {
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ TUN_UNLOCK(tp);
+ /* attempt to start output */
+ tunstart_l2(ifp);
+ }
+
+}
+
+/*
+ * Used only for l2 tunnel.
+ */
+static void
+tunifinit(void *xtp)
+{
+ struct tuntap_softc *tp;
+
+ tp = (struct tuntap_softc *)xtp;
+ tuninit(tp->tun_ifp);
+}
+
+/*
+ * Process an ioctl request.
+ */
+static int
+tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct tuntap_softc *tp;
+ struct ifstat *ifs;
+ struct ifmediareq *ifmr;
+ int dummy, error = 0;
+ bool l2tun;
+
+ ifmr = NULL;
+ sx_xlock(&tun_ioctl_sx);
+ tp = ifp->if_softc;
+ if (tp == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ switch(cmd) {
+ case SIOCGIFSTATUS:
+ ifs = (struct ifstat *)data;
+ TUN_LOCK(tp);
+#ifndef __rtems__
+ if (tp->tun_pid)
+ snprintf(ifs->ascii, sizeof(ifs->ascii),
+ "\tOpened by PID %d\n", tp->tun_pid);
+ else
+#endif /* __rtems__ */
+ ifs->ascii[0] = '\0';
+ TUN_UNLOCK(tp);
+ break;
+ case SIOCSIFADDR:
+ if (l2tun)
+ error = ether_ioctl(ifp, cmd, data);
+ else
+ tuninit(ifp);
+ if (error == 0)
+ TUNDEBUG(ifp, "address set\n");
+ break;
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ TUNDEBUG(ifp, "mtu set\n");
+ break;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+ case SIOCGIFMEDIA:
+ if (!l2tun) {
+ error = EINVAL;
+ break;
+ }
+
+ ifmr = (struct ifmediareq *)data;
+ dummy = ifmr->ifm_count;
+ ifmr->ifm_count = 1;
+ ifmr->ifm_status = IFM_AVALID;
+ ifmr->ifm_active = IFM_ETHER;
+ if (tp->tun_flags & TUN_OPEN)
+ ifmr->ifm_status |= IFM_ACTIVE;
+ ifmr->ifm_current = ifmr->ifm_active;
+ if (dummy >= 1) {
+ int media = IFM_ETHER;
+ error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
+ }
+ break;
+ default:
+ if (l2tun) {
+ error = ether_ioctl(ifp, cmd, data);
+ } else {
+ error = EINVAL;
+ }
+ }
+bad:
+ sx_xunlock(&tun_ioctl_sx);
+ return (error);
+}
+
+/*
+ * tunoutput - queue packets from higher level ready to put out.
+ */
+static int
+tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
+ struct route *ro)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+ u_short cached_tun_flags;
+ int error;
+ u_int32_t af;
+
+ TUNDEBUG (ifp, "tunoutput\n");
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m0);
+ if (error) {
+ m_freem(m0);
+ return (error);
+ }
+#endif
+
+ /* Could be unlocked read? */
+ TUN_LOCK(tp);
+ cached_tun_flags = tp->tun_flags;
+ TUN_UNLOCK(tp);
+ if ((cached_tun_flags & TUN_READY) != TUN_READY) {
+ TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
+ m_freem (m0);
+ return (EHOSTDOWN);
+ }
+
+ if ((ifp->if_flags & IFF_UP) != IFF_UP) {
+ m_freem (m0);
+ return (EHOSTDOWN);
+ }
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+
+ if (bpf_peers_present(ifp->if_bpf))
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
+
+ /* prepend sockaddr? this may abort if the mbuf allocation fails */
+ if (cached_tun_flags & TUN_LMODE) {
+ /* allocate space for sockaddr */
+ M_PREPEND(m0, dst->sa_len, M_NOWAIT);
+
+ /* if allocation failed drop packet */
+ if (m0 == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ } else {
+ bcopy(dst, m0->m_data, dst->sa_len);
+ }
+ }
+
+ if (cached_tun_flags & TUN_IFHEAD) {
+ /* Prepend the address family */
+ M_PREPEND(m0, 4, M_NOWAIT);
+
+ /* if allocation failed drop packet */
+ if (m0 == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ } else
+ *(u_int32_t *)m0->m_data = htonl(af);
+ } else {
+#ifdef INET
+ if (af != AF_INET)
+#endif
+ {
+ m_freem(m0);
+ return (EAFNOSUPPORT);
+ }
+ }
+
+ error = (ifp->if_transmit)(ifp, m0);
+ if (error)
+ return (ENOBUFS);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ return (0);
+}
+
+/*
+ * the cdevsw interface is now pretty minimal.
+ */
+static int
+tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
+ struct thread *td)
+{
+ struct ifreq ifr, *ifrp;
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct tuninfo *tunp;
+ int error, iflags;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+ defined(COMPAT_FREEBSD4)
+ int ival;
+#endif
+ bool l2tun;
+
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ if (l2tun) {
+ /* tap specific ioctls */
+ switch(cmd) {
+ /* VMware/VMnet port ioctl's */
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+ defined(COMPAT_FREEBSD4)
+ case _IO('V', 0):
+ ival = IOCPARM_IVAL(data);
+ data = (caddr_t)&ival;
+ /* FALLTHROUGH */
+#endif
+ case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
+ iflags = *(int *)data;
+ iflags &= TUN_VMIO_FLAG_MASK;
+ iflags &= ~IFF_CANTCHANGE;
+ iflags |= IFF_UP;
+
+ TUN_LOCK(tp);
+ TUN2IFP(tp)->if_flags = iflags |
+ (TUN2IFP(tp)->if_flags & IFF_CANTCHANGE);
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case SIOCGIFADDR: /* get MAC address of the remote side */
+ TUN_LOCK(tp);
+ bcopy(&tp->tun_ether.octet, data,
+ sizeof(tp->tun_ether.octet));
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case SIOCSIFADDR: /* set MAC address of the remote side */
+ TUN_LOCK(tp);
+ bcopy(data, &tp->tun_ether.octet,
+ sizeof(tp->tun_ether.octet));
+ TUN_UNLOCK(tp);
+
+ return (0);
+ }
+
+ /* Fall through to the common ioctls if unhandled */
+ } else {
+ switch (cmd) {
+ case TUNSLMODE:
+ TUN_LOCK(tp);
+ if (*(int *)data) {
+ tp->tun_flags |= TUN_LMODE;
+ tp->tun_flags &= ~TUN_IFHEAD;
+ } else
+ tp->tun_flags &= ~TUN_LMODE;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNSIFHEAD:
+ TUN_LOCK(tp);
+ if (*(int *)data) {
+ tp->tun_flags |= TUN_IFHEAD;
+ tp->tun_flags &= ~TUN_LMODE;
+ } else
+ tp->tun_flags &= ~TUN_IFHEAD;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNGIFHEAD:
+ TUN_LOCK(tp);
+ *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNSIFMODE:
+ /* deny this if UP */
+ if (TUN2IFP(tp)->if_flags & IFF_UP)
+ return (EBUSY);
+
+ switch (*(int *)data & ~IFF_MULTICAST) {
+ case IFF_POINTOPOINT:
+ case IFF_BROADCAST:
+ TUN_LOCK(tp);
+ TUN2IFP(tp)->if_flags &=
+ ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
+ TUN2IFP(tp)->if_flags |= *(int *)data;
+ TUN_UNLOCK(tp);
+
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+ case TUNSIFPID:
+#ifndef __rtems__
+ TUN_LOCK(tp);
+ tp->tun_pid = curthread->td_proc->p_pid;
+ TUN_UNLOCK(tp);
+#endif /* __rtems__ */
+
+ return (0);
+ }
+ /* Fall through to the common ioctls if unhandled */
+ }
+
+ switch (cmd) {
+ case TUNGIFNAME:
+ ifrp = (struct ifreq *)data;
+ strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
+
+ return (0);
+ case TUNSIFINFO:
+ tunp = (struct tuninfo *)data;
+ if (TUN2IFP(tp)->if_type != tunp->type)
+ return (EPROTOTYPE);
+ TUN_LOCK(tp);
+ if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
+ strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
+ ifr.ifr_mtu = tunp->mtu;
+ CURVNET_SET(TUN2IFP(tp)->if_vnet);
+ error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
+ (caddr_t)&ifr, td);
+ CURVNET_RESTORE();
+ if (error) {
+ TUN_UNLOCK(tp);
+ return (error);
+ }
+ }
+ TUN2IFP(tp)->if_baudrate = tunp->baudrate;
+ TUN_UNLOCK(tp);
+ break;
+ case TUNGIFINFO:
+ tunp = (struct tuninfo *)data;
+ TUN_LOCK(tp);
+ tunp->mtu = TUN2IFP(tp)->if_mtu;
+ tunp->type = TUN2IFP(tp)->if_type;
+ tunp->baudrate = TUN2IFP(tp)->if_baudrate;
+ TUN_UNLOCK(tp);
+ break;
+ case TUNSDEBUG:
+ tundebug = *(int *)data;
+ break;
+ case TUNGDEBUG:
+ *(int *)data = tundebug;
+ break;
+ case FIONBIO:
+ break;
+ case FIOASYNC:
+ TUN_LOCK(tp);
+ if (*(int *)data)
+ tp->tun_flags |= TUN_ASYNC;
+ else
+ tp->tun_flags &= ~TUN_ASYNC;
+ TUN_UNLOCK(tp);
+ break;
+ case FIONREAD:
+ if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
+ struct mbuf *mb;
+ IFQ_LOCK(&TUN2IFP(tp)->if_snd);
+ IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
+ for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
+ *(int *)data += mb->m_len;
+ IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
+ } else
+ *(int *)data = 0;
+ break;
+ case FIOSETOWN:
+ return (fsetown(*(int *)data, &tp->tun_sigio));
+
+ case FIOGETOWN:
+ *(int *)data = fgetown(&tp->tun_sigio);
+ return (0);
+
+ /* This is deprecated, FIOSETOWN should be used instead. */
+ case TIOCSPGRP:
+ return (fsetown(-(*(int *)data), &tp->tun_sigio));
+
+ /* This is deprecated, FIOGETOWN should be used instead. */
+ case TIOCGPGRP:
+ *(int *)data = -fgetown(&tp->tun_sigio);
+ return (0);
+
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+}
+
+/*
+ * The cdevsw read interface - reads a packet at a time, or at
+ * least as much of a packet as can be read.
+ */
+static int
+tunread(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ struct mbuf *m;
+ int error=0, len;
+
+ TUNDEBUG (ifp, "read\n");
+ TUN_LOCK(tp);
+ if ((tp->tun_flags & TUN_READY) != TUN_READY) {
+ TUN_UNLOCK(tp);
+ TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
+ return (EHOSTDOWN);
+ }
+
+ tp->tun_flags &= ~TUN_RWAIT;
+
+ for (;;) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m != NULL)
+ break;
+ if (flag & O_NONBLOCK) {
+ TUN_UNLOCK(tp);
+ return (EWOULDBLOCK);
+ }
+ tp->tun_flags |= TUN_RWAIT;
+ error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
+ "tunread", 0);
+ if (error != 0) {
+ TUN_UNLOCK(tp);
+ return (error);
+ }
+ }
+ TUN_UNLOCK(tp);
+
+ if ((tp->tun_flags & TUN_L2) != 0)
+ BPF_MTAP(ifp, m);
+
+ while (m && uio->uio_resid > 0 && error == 0) {
+ len = min(uio->uio_resid, m->m_len);
+ if (len != 0)
+ error = uiomove(mtod(m, void *), len, uio);
+ m = m_free(m);
+ }
+
+ if (m) {
+ TUNDEBUG(ifp, "Dropping mbuf\n");
+ m_freem(m);
+ }
+ return (error);
+}
+
+static int
+tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m)
+{
+ struct ether_header *eh;
+ struct ifnet *ifp;
+
+ ifp = TUN2IFP(tp);
+
+ /*
+ * Only pass a unicast frame to ether_input(), if it would
+ * actually have been received by non-virtual hardware.
+ */
+ if (m->m_len < sizeof(struct ether_header)) {
+ m_freem(m);
+ return (0);
+ }
+
+ eh = mtod(m, struct ether_header *);
+
+ if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
+ !ETHER_IS_MULTICAST(eh->ether_dhost) &&
+ bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
+ m_freem(m);
+ return (0);
+ }
+
+ /* Pass packet up to parent. */
+ CURVNET_SET(ifp->if_vnet);
+ (*ifp->if_input)(ifp, m);
+ CURVNET_RESTORE();
+ /* ibytes are counted in parent */
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ return (0);
+}
+
+static int
+tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
+{
+ struct ifnet *ifp;
+ int family, isr;
+
+ ifp = TUN2IFP(tp);
+ /* Could be unlocked read? */
+ TUN_LOCK(tp);
+ if (tp->tun_flags & TUN_IFHEAD) {
+ TUN_UNLOCK(tp);
+ if (m->m_len < sizeof(family) &&
+ (m = m_pullup(m, sizeof(family))) == NULL)
+ return (ENOBUFS);
+ family = ntohl(*mtod(m, u_int32_t *));
+ m_adj(m, sizeof(family));
+ } else {
+ TUN_UNLOCK(tp);
+ family = AF_INET;
+ }
+
+ BPF_MTAP2(ifp, &family, sizeof(family), m);
+
+ switch (family) {
+#ifdef INET
+ case AF_INET:
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+ default:
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+ random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ CURVNET_SET(ifp->if_vnet);
+ M_SETFIB(m, ifp->if_fib);
+ netisr_dispatch(isr, m);
+ CURVNET_RESTORE();
+ return (0);
+}
+
+/*
+ * the cdevsw write interface - an atomic write is a packet - or else!
+ */
+static int
+tunwrite(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tuntap_softc *tp;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ uint32_t mru;
+ int align;
+ bool l2tun;
+
+ tp = dev->si_drv1;
+ ifp = TUN2IFP(tp);
+ TUNDEBUG(ifp, "tunwrite\n");
+ if ((ifp->if_flags & IFF_UP) != IFF_UP)
+ /* ignore silently */
+ return (0);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ align = 0;
+ mru = l2tun ? TAPMRU : TUNMRU;
+ if (l2tun)
+ align = ETHER_ALIGN;
+ else if ((tp->tun_flags & TUN_IFHEAD) != 0)
+ mru += sizeof(uint32_t); /* family */
+ if (uio->uio_resid < 0 || uio->uio_resid > mru) {
+ TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
+ return (EIO);
+ }
+
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ m->m_pkthdr.rcvif = ifp;
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ if (l2tun)
+ return (tunwrite_l2(tp, m));
+
+ return (tunwrite_l3(tp, m));
+}
+
+/*
+ * tunpoll - the poll interface, this is only useful on reads
+ * really. The write detect always returns true, write never blocks
+ * anyway, it either accepts the packet or drops it.
+ */
+static int
+tunpoll(struct cdev *dev, int events, struct thread *td)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ int revents = 0;
+
+ TUNDEBUG(ifp, "tunpoll\n");
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ IFQ_LOCK(&ifp->if_snd);
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
+ revents |= events & (POLLIN | POLLRDNORM);
+ } else {
+ TUNDEBUG(ifp, "tunpoll waiting\n");
+ selrecord(td, &tp->tun_rsel);
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+ revents |= events & (POLLOUT | POLLWRNORM);
+
+ return (revents);
+}
+
+/*
+ * tunkqfilter - support for the kevent() system call.
+ */
+static int
+tunkqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ switch(kn->kn_filter) {
+ case EVFILT_READ:
+ TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tun_read_filterops;
+ break;
+
+ case EVFILT_WRITE:
+ TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tun_write_filterops;
+ break;
+
+ default:
+ TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ return(EINVAL);
+ }
+
+ kn->kn_hook = tp;
+ knlist_add(&tp->tun_rsel.si_note, kn, 0);
+
+ return (0);
+}
+
+/*
+ * Return true of there is data in the interface queue.
+ */
+static int
+tunkqread(struct knote *kn, long hint)
+{
+ int ret;
+ struct tuntap_softc *tp = kn->kn_hook;
+ struct cdev *dev = tp->tun_dev;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
+ TUNDEBUG(ifp,
+ "%s have data in the queue. Len = %d, minor = %#x\n",
+ ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
+ ret = 1;
+ } else {
+ TUNDEBUG(ifp,
+ "%s waiting for data, minor = %#x\n", ifp->if_xname,
+ dev2unit(dev));
+ ret = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * Always can write, always return MTU in kn->data.
+ */
+static int
+tunkqwrite(struct knote *kn, long hint)
+{
+ struct tuntap_softc *tp = kn->kn_hook;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ kn->kn_data = ifp->if_mtu;
+
+ return (1);
+}
+
+static void
+tunkqdetach(struct knote *kn)
+{
+ struct tuntap_softc *tp = kn->kn_hook;
+
+ knlist_remove(&tp->tun_rsel.si_note, kn, 0);
+}
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index d23928e5..700296fa 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -73,6 +73,7 @@ struct netmap_adapter;
struct netdump_methods;
#ifdef _KERNEL
+#include <sys/_eventhandler.h>
#include <sys/mbuf.h> /* ifqueue only? */
#include <sys/buf_ring.h>
#include <net/vnet.h>
@@ -95,8 +96,9 @@ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr);
CK_STAILQ_HEAD(ifgrouphead, ifg_group);
#ifdef _KERNEL
-VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */
-#define V_link_pfil_hook VNET(link_pfil_hook)
+VNET_DECLARE(struct pfil_head *, link_pfil_head);
+#define V_link_pfil_head VNET(link_pfil_head)
+#define PFIL_ETHER_NAME "ethernet"
#define HHOOK_IPSEC_INET 0
#define HHOOK_IPSEC_INET6 1
@@ -193,11 +195,13 @@ struct if_encap_req {
* m_snd_tag" comes from the network driver and it is free to allocate
* as much additional space as it wants for its own use.
*/
+struct ktls_session;
struct m_snd_tag;
#define IF_SND_TAG_TYPE_RATE_LIMIT 0
#define IF_SND_TAG_TYPE_UNLIMITED 1
-#define IF_SND_TAG_TYPE_MAX 2
+#define IF_SND_TAG_TYPE_TLS 2
+#define IF_SND_TAG_TYPE_MAX 3
struct if_snd_tag_alloc_header {
uint32_t type; /* send tag type, see IF_SND_TAG_XXX */
@@ -208,6 +212,14 @@ struct if_snd_tag_alloc_header {
struct if_snd_tag_alloc_rate_limit {
struct if_snd_tag_alloc_header hdr;
uint64_t max_rate; /* in bytes/s */
+ uint32_t flags; /* M_NOWAIT or M_WAITOK */
+ uint32_t reserved; /* alignment */
+};
+
+struct if_snd_tag_alloc_tls {
+ struct if_snd_tag_alloc_header hdr;
+ struct inpcb *inp;
+ const struct ktls_session *tls;
};
struct if_snd_tag_rate_limit_params {
@@ -215,13 +227,14 @@ struct if_snd_tag_rate_limit_params {
uint32_t queue_level; /* 0 (empty) .. 65535 (full) */
#define IF_SND_QUEUE_LEVEL_MIN 0
#define IF_SND_QUEUE_LEVEL_MAX 65535
- uint32_t reserved; /* padding */
+ uint32_t flags; /* M_NOWAIT or M_WAITOK */
};
union if_snd_tag_alloc_params {
struct if_snd_tag_alloc_header hdr;
struct if_snd_tag_alloc_rate_limit rate_limit;
struct if_snd_tag_alloc_rate_limit unlimited;
+ struct if_snd_tag_alloc_tls tls;
};
union if_snd_tag_modify_params {
@@ -234,11 +247,37 @@ union if_snd_tag_query_params {
struct if_snd_tag_rate_limit_params unlimited;
};
+/* Query return flags */
+#define RT_NOSUPPORT 0x00000000 /* Not supported */
+#define RT_IS_INDIRECT 0x00000001 /*
+ * Interface like a lagg, select
+ * the actual interface for
+ * capabilities.
+ */
+#define RT_IS_SELECTABLE 0x00000002 /*
+ * No rate table, you select
+ * rates and the first
+ * number_of_rates are created.
+ */
+#define RT_IS_FIXED_TABLE 0x00000004 /* A fixed table is attached */
+#define RT_IS_UNUSABLE 0x00000008 /* It is not usable for this */
+
+struct if_ratelimit_query_results {
+ const uint64_t *rate_table; /* Pointer to table if present */
+ uint32_t flags; /* Flags indicating results */
+ uint32_t max_flows; /* Max flows using, 0=unlimited */
+ uint32_t number_of_rates; /* How many unique rates can be created */
+ uint32_t min_segment_burst; /* The amount the adapter bursts at each send */
+};
+
typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *,
struct m_snd_tag **);
typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *);
typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *);
typedef void (if_snd_tag_free_t)(struct m_snd_tag *);
+typedef void (if_ratelimit_query_t)(struct ifnet *,
+ struct if_ratelimit_query_results *);
+
/*
* Structure defining a network interface.
@@ -250,7 +289,9 @@ struct ifnet {
CK_STAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if (CK_) */
/* protected by if_addr_lock */
u_char if_alloctype; /* if_type at time of allocation */
-
+#ifndef __rtems__
+ uint8_t if_numa_domain; /* NUMA domain of device */
+#endif /* __rtems__ */
/* Driver and protocol specific information that remains stable. */
void *if_softc; /* pointer to driver state */
void *if_llsoftc; /* link layer softc */
@@ -379,6 +420,7 @@ struct ifnet {
if_snd_tag_modify_t *if_snd_tag_modify;
if_snd_tag_query_t *if_snd_tag_query;
if_snd_tag_free_t *if_snd_tag_free;
+ if_ratelimit_query_t *if_ratelimit_query;
/* Ethernet PCP */
uint8_t if_pcp;
@@ -416,24 +458,21 @@ struct rtems_ifinputreq {
/* for compatibility with other BSDs */
#define if_name(ifp) ((ifp)->if_xname)
+#define IF_NODOM 255
/*
* Locks for address lists on the network interface.
*/
#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF)
#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock)
-#define IF_ADDR_RLOCK(if) struct epoch_tracker if_addr_et; epoch_enter_preempt(net_epoch_preempt, &if_addr_et);
-#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt, &if_addr_et);
#define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock)
#define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock)
#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock))
#define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED)
-#define NET_EPOCH_ENTER() struct epoch_tracker nep_et; epoch_enter_preempt(net_epoch_preempt, &nep_et)
-#define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et))
-#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et)
-#define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et))
-#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt)
-
+#define NET_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et))
+#define NET_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et))
+#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt)
+#define NET_EPOCH_ASSERT() MPASS(in_epoch(net_epoch_preempt))
/*
* Function variations on locking macros intended to be used by loadable
@@ -446,7 +485,6 @@ void if_maddr_rlock(if_t ifp); /* if_multiaddrs */
void if_maddr_runlock(if_t ifp); /* if_multiaddrs */
#ifdef _KERNEL
-#ifdef _SYS_EVENTHANDLER_H_
/* interface link layer address change event */
typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t);
@@ -474,7 +512,6 @@ EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t);
typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event);
EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn);
-#endif /* _SYS_EVENTHANDLER_H_ */
/*
* interface groups
@@ -513,16 +550,13 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF)
#define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock)
-#define IF_AFDATA_RLOCK(ifp) struct epoch_tracker if_afdata_et; epoch_enter_preempt(net_epoch_preempt, &if_afdata_et)
#define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock)
-#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt, &if_afdata_et)
#define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp)
#define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp)
#define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock)
#define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock)
#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock))
-#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt));
#define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED)
#define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED)
@@ -606,16 +640,13 @@ extern struct sx ifnet_sxlock;
* write, but also whether it was acquired with sleep support or not.
*/
#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED)
-#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt))
#define IFNET_WLOCK_ASSERT() do { \
sx_assert(&ifnet_sxlock, SA_XLOCKED); \
rw_assert(&ifnet_rwlock, RA_WLOCKED); \
} while (0)
#define IFNET_RLOCK() sx_slock(&ifnet_sxlock)
-#define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et)
#define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)
-#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt, &ifnet_rlock_et)
/*
* Look up an ifnet given its index; the _ref variant also acquires a
@@ -654,6 +685,8 @@ int if_delgroup(struct ifnet *, const char *);
int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **);
int if_allmulti(struct ifnet *, int);
struct ifnet* if_alloc(u_char);
+struct ifnet* if_alloc_dev(u_char, device_t dev);
+struct ifnet* if_alloc_domain(u_char, int numa_domain);
void if_attach(struct ifnet *);
void if_dead(struct ifnet *);
int if_delmulti(struct ifnet *, struct sockaddr *);
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 893bb2cf..10a8a3bf 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -48,6 +48,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_vlan.h>
#include <rtems/bsd/local/opt_ratelimit.h>
@@ -105,6 +106,20 @@ struct ifvlantrunk {
int refcnt;
};
+#if defined(KERN_TLS) || defined(RATELIMIT)
+struct vlan_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
+static inline struct vlan_snd_tag *
+mst_to_vst(struct m_snd_tag *mst)
+{
+
+ return (__containerof(mst, struct vlan_snd_tag, com));
+}
+#endif
+
/*
* This macro provides a facility to iterate over every vlan on a trunk with
* the assumption that none will be added/removed during iteration.
@@ -158,7 +173,7 @@ struct vlan_mc_entry {
struct epoch_context mc_epoch_ctx;
};
-struct ifvlan {
+struct ifvlan {
struct ifvlantrunk *ifv_trunk;
struct ifnet *ifv_ifp;
#define TRUNK(ifv) ((ifv)->ifv_trunk)
@@ -166,28 +181,19 @@ struct ifvlan {
void *ifv_cookie;
int ifv_pflags; /* special flags we have set on parent */
int ifv_capenable;
- struct ifv_linkmib {
- int ifvm_encaplen; /* encapsulation length */
- int ifvm_mtufudge; /* MTU fudged by this much */
- int ifvm_mintu; /* min transmission unit */
- uint16_t ifvm_proto; /* encapsulation ethertype */
- uint16_t ifvm_tag; /* tag to apply on packets leaving if */
- uint16_t ifvm_vid; /* VLAN ID */
- uint8_t ifvm_pcp; /* Priority Code Point (PCP). */
- } ifv_mib;
+ int ifv_encaplen; /* encapsulation length */
+ int ifv_mtufudge; /* MTU fudged by this much */
+ int ifv_mintu; /* min transmission unit */
+ uint16_t ifv_proto; /* encapsulation ethertype */
+ uint16_t ifv_tag; /* tag to apply on packets leaving if */
+ uint16_t ifv_vid; /* VLAN ID */
+ uint8_t ifv_pcp; /* Priority Code Point (PCP). */
struct task lladdr_task;
CK_SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
#ifndef VLAN_ARRAY
CK_SLIST_ENTRY(ifvlan) ifv_list;
#endif
};
-#define ifv_proto ifv_mib.ifvm_proto
-#define ifv_tag ifv_mib.ifvm_tag
-#define ifv_vid ifv_mib.ifvm_vid
-#define ifv_pcp ifv_mib.ifvm_pcp
-#define ifv_encaplen ifv_mib.ifvm_encaplen
-#define ifv_mtufudge ifv_mib.ifvm_mtufudge
-#define ifv_mintu ifv_mib.ifvm_mintu
/* Special flags we should propagate to parent. */
static struct {
@@ -235,10 +241,6 @@ static struct sx _VLAN_SX_ID;
#define VLAN_LOCKING_DESTROY() \
sx_destroy(&_VLAN_SX_ID)
-#define VLAN_RLOCK() NET_EPOCH_ENTER();
-#define VLAN_RUNLOCK() NET_EPOCH_EXIT();
-#define VLAN_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt))
-
#define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID)
#define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID)
#define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID)
@@ -254,11 +256,8 @@ static struct sx _VLAN_SX_ID;
*/
#define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF)
#define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock)
-#define TRUNK_RLOCK(trunk) NET_EPOCH_ENTER()
#define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock)
-#define TRUNK_RUNLOCK(trunk) NET_EPOCH_EXIT();
#define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock)
-#define TRUNK_RLOCK_ASSERT(trunk) MPASS(in_epoch(net_epoch_preempt))
#define TRUNK_LOCK_ASSERT(trunk) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(trunk)->lock))
#define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED);
@@ -282,9 +281,14 @@ static void trunk_destroy(struct ifvlantrunk *trunk);
static void vlan_init(void *foo);
static void vlan_input(struct ifnet *ifp, struct mbuf *m);
static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
static int vlan_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *, struct m_snd_tag **);
+static int vlan_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int vlan_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
+static void vlan_snd_tag_free(struct m_snd_tag *);
#endif
static void vlan_qflush(struct ifnet *ifp);
static int vlan_setflag(struct ifnet *ifp, int flag, int status,
@@ -292,6 +296,8 @@ static int vlan_setflag(struct ifnet *ifp, int flag, int status,
static int vlan_setflags(struct ifnet *ifp, int status);
static int vlan_setmulti(struct ifnet *ifp);
static int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
+static int vlan_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro);
static void vlan_unconfig(struct ifnet *ifp);
static void vlan_unconfig_locked(struct ifnet *ifp, int departing);
static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
@@ -474,7 +480,7 @@ vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
{
struct ifvlan *ifv;
- TRUNK_RLOCK_ASSERT(trunk);
+ NET_EPOCH_ASSERT();
CK_SLIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
if (ifv->ifv_vid == vid)
@@ -619,16 +625,17 @@ vlan_setmulti(struct ifnet *ifp)
static void
vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifvlan *ifv;
struct ifnet *ifv_ifp;
struct ifvlantrunk *trunk;
struct sockaddr_dl *sdl;
- /* Need the rmlock since this is run on taskqueue_swi. */
- VLAN_RLOCK();
+ /* Need the epoch since this is run on taskqueue_swi. */
+ NET_EPOCH_ENTER(et);
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return;
}
@@ -654,7 +661,7 @@ vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task);
}
TRUNK_WUNLOCK(trunk);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
}
/*
@@ -700,17 +707,18 @@ vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
static struct ifnet *
vlan_trunkdev(struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifvlan *ifv;
if (ifp->if_type != IFT_L2VLAN)
return (NULL);
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
ifv = ifp->if_softc;
ifp = NULL;
if (ifv->ifv_trunk)
ifp = PARENT(ifv);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
@@ -782,20 +790,21 @@ vlan_setcookie(struct ifnet *ifp, void *cookie)
static struct ifnet *
vlan_devat(struct ifnet *ifp, uint16_t vid)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (NULL);
}
ifp = NULL;
ifv = vlan_gethash(trunk, vid);
if (ifv)
ifp = ifv->ifv_ifp;
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (ifp);
}
@@ -1055,17 +1064,16 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
strlcpy(ifp->if_xname, name, IFNAMSIZ);
ifp->if_dname = vlanname;
ifp->if_dunit = unit;
- /* NB: flags are not set here */
- ifp->if_linkmib = &ifv->ifv_mib;
- ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
- /* NB: mtu is not set here */
ifp->if_init = vlan_init;
ifp->if_transmit = vlan_transmit;
ifp->if_qflush = vlan_qflush;
ifp->if_ioctl = vlan_ioctl;
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
ifp->if_snd_tag_alloc = vlan_snd_tag_alloc;
+ ifp->if_snd_tag_modify = vlan_snd_tag_modify;
+ ifp->if_snd_tag_query = vlan_snd_tag_query;
+ ifp->if_snd_tag_free = vlan_snd_tag_free;
#endif
ifp->if_flags = VLAN_IFFLAGS;
ether_ifattach(ifp, eaddr);
@@ -1135,15 +1143,16 @@ vlan_init(void *foo __unused)
static int
vlan_transmit(struct ifnet *ifp, struct mbuf *m)
{
+ struct epoch_tracker et;
struct ifvlan *ifv;
struct ifnet *p;
int error, len, mcast;
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
ifv = ifp->if_softc;
if (TRUNK(ifv) == NULL) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return (ENETDOWN);
}
@@ -1153,20 +1162,40 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
BPF_MTAP(ifp, m);
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct vlan_snd_tag *vst;
+ struct m_snd_tag *mst;
+
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+ mst = m->m_pkthdr.snd_tag;
+ vst = mst_to_vst(mst);
+ if (vst->tag->ifp != p) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ NET_EPOCH_EXIT(et);
+ m_freem(m);
+ return (EAGAIN);
+ }
+
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
+
/*
* Do not run parent's if_transmit() if the parent is not up,
* or parent's driver will cause a system crash.
*/
if (!UP_AND_RUNNING(p)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return (ENETDOWN);
}
if (!ether_8021q_frame(&m, ifp, p, ifv->ifv_vid, ifv->ifv_pcp)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (0);
}
@@ -1180,10 +1209,31 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast);
} else
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return (error);
}
+static int
+vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ struct epoch_tracker et;
+ struct ifvlan *ifv;
+ struct ifnet *p;
+
+ NET_EPOCH_ENTER(et);
+ ifv = ifp->if_softc;
+ if (TRUNK(ifv) == NULL) {
+ NET_EPOCH_EXIT(et);
+ m_freem(m);
+ return (ENETDOWN);
+ }
+ p = PARENT(ifv);
+ NET_EPOCH_EXIT(et);
+ return p->if_output(ifp, m, dst, ro);
+}
+
+
/*
* The ifp->if_qflush entry point for vlan(4) is a no-op.
*/
@@ -1195,15 +1245,16 @@ vlan_qflush(struct ifnet *ifp __unused)
static void
vlan_input(struct ifnet *ifp, struct mbuf *m)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
struct m_tag *mtag;
uint16_t vid, tag;
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return;
}
@@ -1226,7 +1277,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
if (m->m_len < sizeof(*evl) &&
(m = m_pullup(m, sizeof(*evl))) == NULL) {
if_printf(ifp, "cannot pullup VLAN header\n");
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return;
}
evl = mtod(m, struct ether_vlan_header *);
@@ -1249,7 +1300,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
__func__, ifp->if_xname, ifp->if_type);
#endif
if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return;
}
@@ -1259,7 +1310,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
ifv = vlan_gethash(trunk, vid);
if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
m_freem(m);
return;
@@ -1279,7 +1330,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
sizeof(uint8_t), M_NOWAIT);
if (mtag == NULL) {
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return;
}
@@ -1290,7 +1341,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
m->m_pkthdr.rcvif = ifv->ifv_ifp;
if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
/* Pass it back through the parent's input routine. */
(*ifv->ifv_ifp->if_input)(ifv->ifv_ifp, m);
@@ -1316,6 +1367,7 @@ vlan_lladdr_fn(void *arg, int pending __unused)
static int
vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifnet *ifp;
int error = 0;
@@ -1397,7 +1449,6 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
*/
ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
ifp->if_baudrate = p->if_baudrate;
- ifp->if_output = p->if_output;
ifp->if_input = p->if_input;
ifp->if_resolvemulti = p->if_resolvemulti;
ifp->if_addrlen = p->if_addrlen;
@@ -1405,6 +1456,12 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
ifp->if_pcp = ifv->ifv_pcp;
/*
+ * We wrap the parent's if_output using vlan_output to ensure that it
+ * can't become stale.
+ */
+ ifp->if_output = vlan_output;
+
+ /*
* Copy only a selected subset of flags from the parent.
* Other flags are none of our business.
*/
@@ -1415,9 +1472,9 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
ifp->if_link_state = p->if_link_state;
- TRUNK_RLOCK(TRUNK(ifv));
+ NET_EPOCH_ENTER(et);
vlan_capabilities(ifv);
- TRUNK_RUNLOCK(TRUNK(ifv));
+ NET_EPOCH_EXIT(et);
/*
* Set up our interface address to reflect the underlying
@@ -1589,14 +1646,15 @@ vlan_setflags(struct ifnet *ifp, int status)
static void
vlan_link_state(struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
/* Called from a taskqueue_swi task, so we cannot sleep. */
- VLAN_RLOCK();
+ NET_EPOCH_ENTER(et);
trunk = ifp->if_vlantrunk;
if (trunk == NULL) {
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
return;
}
@@ -1607,7 +1665,7 @@ vlan_link_state(struct ifnet *ifp)
trunk->parent->if_link_state);
}
TRUNK_WUNLOCK(trunk);
- VLAN_RUNLOCK();
+ NET_EPOCH_EXIT(et);
}
static void
@@ -1620,7 +1678,7 @@ vlan_capabilities(struct ifvlan *ifv)
u_long hwa = 0;
VLAN_SXLOCK_ASSERT();
- TRUNK_RLOCK_ASSERT(TRUNK(ifv));
+ NET_EPOCH_ASSERT();
p = PARENT(ifv);
ifp = ifv->ifv_ifp;
@@ -1704,6 +1762,30 @@ vlan_capabilities(struct ifvlan *ifv)
ena |= (mena & IFCAP_TXRTLMT);
#endif
+ /*
+ * If the parent interface supports unmapped mbufs, so does
+ * the VLAN interface. Note that this should be fine even for
+ * interfaces that don't support hardware tagging as headers
+ * are prepended in normal mbufs to unmapped mbufs holding
+ * payload data.
+ */
+ cap |= (p->if_capabilities & IFCAP_NOMAP);
+ ena |= (mena & IFCAP_NOMAP);
+
+ /*
+ * If the parent interface can offload encryption and segmentation
+ * of TLS records over TCP, propagate it's capability to the VLAN
+ * interface.
+ *
+ * All TLS drivers in the tree today can deal with VLANs. If
+ * this ever changes, then a new IFCAP_VLAN_TXTLS can be
+ * defined.
+ */
+ if (p->if_capabilities & IFCAP_TXTLS)
+ cap |= p->if_capabilities & IFCAP_TXTLS;
+ if (p->if_capenable & IFCAP_TXTLS)
+ ena |= mena & IFCAP_TXTLS;
+
ifp->if_capabilities = cap;
ifp->if_capenable = ena;
ifp->if_hwassist = hwa;
@@ -1712,6 +1794,7 @@ vlan_capabilities(struct ifvlan *ifv)
static void
vlan_trunk_capabilities(struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
@@ -1721,11 +1804,11 @@ vlan_trunk_capabilities(struct ifnet *ifp)
VLAN_SUNLOCK();
return;
}
- TRUNK_RLOCK(trunk);
+ NET_EPOCH_ENTER(et);
VLAN_FOREACH(ifv, trunk) {
vlan_capabilities(ifv);
}
- TRUNK_RUNLOCK(trunk);
+ NET_EPOCH_EXIT(et);
VLAN_SUNLOCK();
}
@@ -1917,9 +2000,11 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifv->ifv_capenable = ifr->ifr_reqcap;
trunk = TRUNK(ifv);
if (trunk != NULL) {
- TRUNK_RLOCK(trunk);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
vlan_capabilities(ifv);
- TRUNK_RUNLOCK(trunk);
+ NET_EPOCH_EXIT(et);
}
VLAN_SUNLOCK();
break;
@@ -1932,18 +2017,77 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (error);
}
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
static int
vlan_snd_tag_alloc(struct ifnet *ifp,
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
+ struct epoch_tracker et;
+ struct vlan_snd_tag *vst;
+ struct ifvlan *ifv;
+ struct ifnet *parent;
+ int error;
- /* get trunk device */
- ifp = vlan_trunkdev(ifp);
- if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ NET_EPOCH_ENTER(et);
+ ifv = ifp->if_softc;
+ if (ifv->ifv_trunk != NULL)
+ parent = PARENT(ifv);
+ else
+ parent = NULL;
+ if (parent == NULL || parent->if_snd_tag_alloc == NULL) {
+ NET_EPOCH_EXIT(et);
return (EOPNOTSUPP);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ }
+ if_ref(parent);
+ NET_EPOCH_EXIT(et);
+
+ vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT);
+ if (vst == NULL) {
+ if_rele(parent);
+ return (ENOMEM);
+ }
+
+ error = parent->if_snd_tag_alloc(parent, params, &vst->tag);
+ if_rele(parent);
+ if (error) {
+ free(vst, M_VLAN);
+ return (error);
+ }
+
+ m_snd_tag_init(&vst->com, ifp);
+
+ *ppmt = &vst->com;
+ return (0);
+}
+
+static int
+vlan_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ return (vst->tag->ifp->if_snd_tag_modify(vst->tag, params));
+}
+
+static int
+vlan_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ return (vst->tag->ifp->if_snd_tag_query(vst->tag, params));
+}
+
+static void
+vlan_snd_tag_free(struct m_snd_tag *mst)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ m_snd_tag_rele(vst->tag);
+ free(vst, M_VLAN);
}
#endif
diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h
index 0b66ec0a..28b0fa73 100644
--- a/freebsd/sys/net/if_vlan_var.h
+++ b/freebsd/sys/net/if_vlan_var.h
@@ -150,13 +150,13 @@ extern int (*vlan_pcp_p)(struct ifnet *, uint16_t *);
extern int (*vlan_setcookie_p)(struct ifnet *, void *);
extern void *(*vlan_cookie_p)(struct ifnet *);
-#ifdef _SYS_EVENTHANDLER_H_
+#include <sys/_eventhandler.h>
+
/* VLAN state change events */
typedef void (*vlan_config_fn)(void *, struct ifnet *, uint16_t);
typedef void (*vlan_unconfig_fn)(void *, struct ifnet *, uint16_t);
EVENTHANDLER_DECLARE(vlan_config, vlan_config_fn);
EVENTHANDLER_DECLARE(vlan_unconfig, vlan_unconfig_fn);
-#endif /* _SYS_EVENTHANDLER_H_ */
#endif /* _KERNEL */
diff --git a/freebsd/sys/net/iflib.h b/freebsd/sys/net/iflib.h
index 8c2be41b..cda00c4c 100644
--- a/freebsd/sys/net/iflib.h
+++ b/freebsd/sys/net/iflib.h
@@ -69,6 +69,9 @@ typedef struct if_rxd_frag {
uint16_t irf_len;
} *if_rxd_frag_t;
+/* bnxt supports 64 with hardware LRO enabled */
+#define IFLIB_MAX_RX_SEGS 64
+
typedef struct if_rxd_info {
/* set by iflib */
uint16_t iri_qsidx; /* qset index */
@@ -76,7 +79,7 @@ typedef struct if_rxd_info {
/* XXX redundant with the new irf_len field */
uint16_t iri_len; /* packet length */
qidx_t iri_cidx; /* consumer index of cq */
- struct ifnet *iri_ifp; /* some drivers >1 interface per softc */
+ if_t iri_ifp; /* driver may have >1 iface per softc */
/* updated by driver */
if_rxd_frag_t iri_frags;
@@ -129,12 +132,12 @@ typedef struct if_pkt_info {
uint8_t ipi_mflags; /* packet mbuf flags */
uint32_t ipi_tcp_seq; /* tcp seqno */
- uint32_t ipi_tcp_sum; /* tcp csum */
+ uint32_t __spare0__;
} *if_pkt_info_t;
typedef struct if_irq {
struct resource *ii_res;
- int ii_rid;
+ int __spare0__;
void *ii_tag;
} *if_irq_t;
@@ -163,7 +166,7 @@ typedef struct pci_vendor_info {
uint32_t pvi_subdevice_id;
uint32_t pvi_rev_id;
uint32_t pvi_class_mask;
- caddr_t pvi_name;
+ const char *pvi_name;
} pci_vendor_info_t;
#define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name}
@@ -191,9 +194,8 @@ typedef struct if_softc_ctx {
int isc_vectors;
int isc_nrxqsets;
int isc_ntxqsets;
- uint8_t isc_min_tx_latency; /* disable doorbell update batching */
- uint8_t isc_rx_mvec_enable; /* generate mvecs on rx */
- uint32_t isc_txrx_budget_bytes_max;
+ uint16_t __spare0__;
+ uint32_t __spare1__;
int isc_msix_bar; /* can be model specific - initialize in attach_pre */
int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */
int isc_ntxd[8];
@@ -215,16 +217,23 @@ typedef struct if_softc_ctx {
int isc_rss_table_mask;
int isc_nrxqsets_max;
int isc_ntxqsets_max;
- uint32_t isc_tx_qdepth;
+ uint32_t __spare2__;
iflib_intr_mode_t isc_intr;
uint16_t isc_max_frame_size; /* set at init time by driver */
uint16_t isc_min_frame_size; /* set at init time by driver, only used if
IFLIB_NEED_ETHER_PAD is set. */
uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */
- pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */
+ uint32_t __spare3__;
+ uint32_t __spare4__;
+ uint32_t __spare5__;
+ uint32_t __spare6__;
+ uint32_t __spare7__;
+ uint32_t __spare8__;
+ caddr_t __spare9__;
int isc_disable_msix;
if_txrx_t isc_txrx;
+ struct ifmedia *isc_media;
} *if_softc_ctx_t;
/*
@@ -244,8 +253,8 @@ struct if_shared_ctx {
int isc_admin_intrcnt; /* # of admin/link interrupts */
/* fields necessary for probe */
- pci_vendor_info_t *isc_vendor_info;
- char *isc_driver_version;
+ const pci_vendor_info_t *isc_vendor_info;
+ const char *isc_driver_version;
/* optional function to transform the read values to match the table*/
void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id,
uint16_t *subdevice_id, uint16_t *rev_id);
@@ -260,7 +269,7 @@ struct if_shared_ctx {
int isc_nfl __aligned(CACHE_LINE_SIZE);
int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */
int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
- int isc_rx_process_limit;
+ int __spare0__;
int isc_tx_reclaim_thresh;
int isc_flags;
const char *isc_name;
@@ -284,11 +293,6 @@ typedef enum {
IFLIB_INTR_IOV,
} iflib_intr_type_t;
-#ifndef ETH_ADDR_LEN
-#define ETH_ADDR_LEN 6
-#endif
-
-
/*
* Interface has a separate command queue for RX
*/
@@ -358,7 +362,10 @@ typedef enum {
* Interface needs admin task to ignore interface up/down status
*/
#define IFLIB_ADMIN_ALWAYS_RUN 0x10000
-
+/*
+ * Driver will pass the media
+ */
+#define IFLIB_DRIVER_MEDIA 0x20000
/*
* field accessors
@@ -378,6 +385,8 @@ void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]);
void iflib_request_reset(if_ctx_t ctx);
uint8_t iflib_in_detach(if_ctx_t ctx);
+uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx);
+
/*
* If the driver can plug cleanly in to newbus use these
*/
@@ -388,6 +397,12 @@ int iflib_device_suspend(device_t);
int iflib_device_resume(device_t);
int iflib_device_shutdown(device_t);
+/*
+ * Use this instead of iflib_device_probe if the driver should report
+ * BUS_PROBE_VENDOR instead of BUS_PROBE_DEFAULT. (For example, an out-of-tree
+ * driver based on iflib).
+ */
+int iflib_device_probe_vendor(device_t);
int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *);
void iflib_device_iov_uninit(device_t);
@@ -400,8 +415,6 @@ int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *);
int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp);
int iflib_device_deregister(if_ctx_t);
-
-
int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, const char *name);
int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
iflib_intr_type_t type, driver_filter_t *filter,
@@ -410,33 +423,28 @@ void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t t
void iflib_irq_free(if_ctx_t ctx, if_irq_t irq);
-void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name);
+void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu,
+ const char *name);
void iflib_config_gtask_init(void *ctx, struct grouptask *gtask,
gtask_fn_t *fn, const char *name);
-
void iflib_config_gtask_deinit(struct grouptask *gtask);
-
-
void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid);
void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid);
void iflib_admin_intr_deferred(if_ctx_t ctx);
void iflib_iov_intr_deferred(if_ctx_t ctx);
-
void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate);
int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags);
+int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags);
void iflib_dma_free(iflib_dma_info_t dma);
-
int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count);
void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count);
-
struct sx *iflib_ctx_lock_get(if_ctx_t);
-struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t);
void iflib_led_create(if_ctx_t ctx);
@@ -448,4 +456,5 @@ void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *,
*/
if_pseudo_t iflib_clone_register(if_shared_ctx_t);
void iflib_clone_deregister(if_pseudo_t);
+
#endif /* __IFLIB_H_ */
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index a3da964b..0f7c4800 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -868,6 +868,7 @@ netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
("%s: invalid policy %u for %s", __func__, npp->np_policy,
npp->np_name));
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
ifp = m->m_pkthdr.rcvif;
if (ifp != NULL)
*cpuidp = nws_array[(ifp->if_index + source) % nws_count];
diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c
index 65af515f..1dea915d 100644
--- a/freebsd/sys/net/pfil.c
+++ b/freebsd/sys/net/pfil.c
@@ -6,6 +6,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
+ * Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 1996 Matthew R. Green
* All rights reserved.
*
@@ -34,445 +35,650 @@
*/
#include <sys/param.h>
+#include <sys/conf.h>
#include <sys/kernel.h>
+#include <sys/epoch.h>
#include <sys/errno.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>
-#include <sys/condvar.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/ucred.h>
+#include <sys/jail.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/pfil.h>
-static struct mtx pfil_global_lock;
-
-MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
- MTX_DEF);
-
-static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
-static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
-static int pfil_chain_remove(pfil_chain_t *, void *, void *);
-static int pfil_add_hook_priv(void *, void *, int, struct pfil_head *, bool);
+static MALLOC_DEFINE(M_PFIL, "pfil", "pfil(9) packet filter hooks");
+
+static int pfil_ioctl(struct cdev *, u_long, caddr_t, int, struct thread *);
+static struct cdevsw pfil_cdevsw = {
+ .d_ioctl = pfil_ioctl,
+ .d_name = PFILDEV,
+ .d_version = D_VERSION,
+};
+static struct cdev *pfil_dev;
+
+static struct mtx pfil_lock;
+MTX_SYSINIT(pfil_mtxinit, &pfil_lock, "pfil(9) lock", MTX_DEF);
+#define PFIL_LOCK() mtx_lock(&pfil_lock)
+#define PFIL_UNLOCK() mtx_unlock(&pfil_lock)
+#define PFIL_LOCK_ASSERT() mtx_assert(&pfil_lock, MA_OWNED)
+
+#define PFIL_EPOCH net_epoch_preempt
+#define PFIL_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et))
+#define PFIL_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et))
+
+struct pfil_hook {
+ pfil_func_t hook_func;
+ void *hook_ruleset;
+ int hook_flags;
+ int hook_links;
+ enum pfil_types hook_type;
+ const char *hook_modname;
+ const char *hook_rulname;
+ LIST_ENTRY(pfil_hook) hook_list;
+};
+
+struct pfil_link {
+ CK_STAILQ_ENTRY(pfil_link) link_chain;
+ pfil_func_t link_func;
+ void *link_ruleset;
+ int link_flags;
+ struct pfil_hook *link_hook;
+ struct epoch_context link_epoch_ctx;
+};
+
+typedef CK_STAILQ_HEAD(pfil_chain, pfil_link) pfil_chain_t;
+struct pfil_head {
+ int head_nhooksin;
+ int head_nhooksout;
+ pfil_chain_t head_in;
+ pfil_chain_t head_out;
+ int head_flags;
+ enum pfil_types head_type;
+ LIST_ENTRY(pfil_head) head_list;
+ const char *head_name;
+};
LIST_HEAD(pfilheadhead, pfil_head);
-VNET_DEFINE(struct pfilheadhead, pfil_head_list);
+VNET_DEFINE_STATIC(struct pfilheadhead, pfil_head_list) =
+ LIST_HEAD_INITIALIZER(pfil_head_list);
#define V_pfil_head_list VNET(pfil_head_list)
-VNET_DEFINE(struct rmlock, pfil_lock);
-
-#define PFIL_LOCK_INIT_REAL(l, t) \
- rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE)
-#define PFIL_LOCK_DESTROY_REAL(l) \
- rm_destroy(l)
-#define PFIL_LOCK_INIT(p) do { \
- if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \
- PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \
- (p)->ph_plock = &(p)->ph_lock; \
- } else \
- (p)->ph_plock = &V_pfil_lock; \
-} while (0)
-#define PFIL_LOCK_DESTROY(p) do { \
- if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \
- PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \
-} while (0)
-
-#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t))
-#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t))
-#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock)
-#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t))
-#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock)
-#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock)
-
-#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock)
-/*
- * pfil_run_hooks() runs the specified packet filter hook chain.
- */
+LIST_HEAD(pfilhookhead, pfil_hook);
+VNET_DEFINE_STATIC(struct pfilhookhead, pfil_hook_list) =
+ LIST_HEAD_INITIALIZER(pfil_hook_list);
+#define V_pfil_hook_list VNET(pfil_hook_list)
+
+static struct pfil_link *pfil_link_remove(pfil_chain_t *, pfil_hook_t );
+static void pfil_link_free(epoch_context_t);
+
int
-pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
- int dir, int flags, struct inpcb *inp)
+pfil_realloc(pfil_packet_t *p, int flags, struct ifnet *ifp)
{
- struct rm_priotracker rmpt;
- struct packet_filter_hook *pfh;
- struct mbuf *m = *mp;
- int rv = 0;
-
- PFIL_RLOCK(ph, &rmpt);
- KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
- for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
- pfh = TAILQ_NEXT(pfh, pfil_chain)) {
- if (pfh->pfil_func_flags != NULL) {
- rv = (*pfh->pfil_func_flags)(pfh->pfil_arg, &m, ifp,
- dir, flags, inp);
- if (rv != 0 || m == NULL)
- break;
- }
- if (pfh->pfil_func != NULL) {
- rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
- inp);
- if (rv != 0 || m == NULL)
- break;
- }
- }
- PFIL_RUNLOCK(ph, &rmpt);
- *mp = m;
- return (rv);
+ struct mbuf *m;
+
+ MPASS(flags & PFIL_MEMPTR);
+
+ if ((m = m_devget(p->mem, PFIL_LENGTH(flags), 0, ifp, NULL)) == NULL)
+ return (ENOMEM);
+ *p = pfil_packet_align(*p);
+ *p->m = m;
+
+ return (0);
}
-static struct packet_filter_hook *
-pfil_chain_get(int dir, struct pfil_head *ph)
+static __noinline int
+pfil_fake_mbuf(pfil_func_t func, pfil_packet_t *p, struct ifnet *ifp, int flags,
+ void *ruleset, struct inpcb *inp)
{
+ struct mbuf m, *mp;
+ pfil_return_t rv;
+
+ (void)m_init(&m, M_NOWAIT, MT_DATA, M_NOFREE | M_PKTHDR);
+ m_extadd(&m, p->mem, PFIL_LENGTH(flags), NULL, NULL, NULL, 0,
+ EXT_RXRING);
+ m.m_len = m.m_pkthdr.len = PFIL_LENGTH(flags);
+ mp = &m;
+ flags &= ~(PFIL_MEMPTR | PFIL_LENMASK);
+
+ rv = func(&mp, ifp, flags, ruleset, inp);
+ if (rv == PFIL_PASS && mp != &m) {
+ /*
+ * Firewalls that need pfil_fake_mbuf() most likely don't
+ * know they need return PFIL_REALLOCED.
+ */
+ rv = PFIL_REALLOCED;
+ *p = pfil_packet_align(*p);
+ *p->m = mp;
+ }
- if (dir == PFIL_IN)
- return (TAILQ_FIRST(&ph->ph_in));
- else if (dir == PFIL_OUT)
- return (TAILQ_FIRST(&ph->ph_out));
- else
- return (NULL);
+ return (rv);
}
-#ifndef __rtems__
/*
- * pfil_try_rlock() acquires rm reader lock for specified head
- * if this is immediately possible.
+ * pfil_run_hooks() runs the specified packet filter hook chain.
*/
int
-pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp,
+ int flags, struct inpcb *inp)
{
-
- return (PFIL_TRY_RLOCK(ph, tracker));
+ struct epoch_tracker et;
+ pfil_chain_t *pch;
+ struct pfil_link *link;
+ pfil_return_t rv;
+ bool realloc = false;
+
+ if (PFIL_DIR(flags) == PFIL_IN)
+ pch = &head->head_in;
+ else if (__predict_true(PFIL_DIR(flags) == PFIL_OUT))
+ pch = &head->head_out;
+ else
+ panic("%s: bogus flags %d", __func__, flags);
+
+ rv = PFIL_PASS;
+ PFIL_EPOCH_ENTER(et);
+ CK_STAILQ_FOREACH(link, pch, link_chain) {
+ if ((flags & PFIL_MEMPTR) && !(link->link_flags & PFIL_MEMPTR))
+ rv = pfil_fake_mbuf(link->link_func, &p, ifp, flags,
+ link->link_ruleset, inp);
+ else
+ rv = (*link->link_func)(p, ifp, flags,
+ link->link_ruleset, inp);
+ if (rv == PFIL_DROPPED || rv == PFIL_CONSUMED)
+ break;
+ else if (rv == PFIL_REALLOCED) {
+ flags &= ~(PFIL_MEMPTR | PFIL_LENMASK);
+ realloc = true;
+ }
+ }
+ PFIL_EPOCH_EXIT(et);
+ if (realloc && rv == PFIL_PASS)
+ rv = PFIL_REALLOCED;
+ return (rv);
}
-#endif /* __rtems__ */
/*
- * pfil_rlock() acquires rm reader lock for specified head.
+ * pfil_head_register() registers a pfil_head with the packet filter hook
+ * mechanism.
*/
-void
-pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+pfil_head_t
+pfil_head_register(struct pfil_head_args *pa)
{
+ struct pfil_head *head, *list;
- PFIL_RLOCK(ph, tracker);
-}
+ MPASS(pa->pa_version == PFIL_VERSION);
-/*
- * pfil_runlock() releases reader lock for specified head.
- */
-void
-pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker)
-{
+ head = malloc(sizeof(struct pfil_head), M_PFIL, M_WAITOK);
+
+ head->head_nhooksin = head->head_nhooksout = 0;
+ head->head_flags = pa->pa_flags;
+ head->head_type = pa->pa_type;
+ head->head_name = pa->pa_headname;
+ CK_STAILQ_INIT(&head->head_in);
+ CK_STAILQ_INIT(&head->head_out);
+
+ PFIL_LOCK();
+ LIST_FOREACH(list, &V_pfil_head_list, head_list)
+ if (strcmp(pa->pa_headname, list->head_name) == 0) {
+ printf("pfil: duplicate head \"%s\"\n",
+ pa->pa_headname);
+ }
+ LIST_INSERT_HEAD(&V_pfil_head_list, head, head_list);
+ PFIL_UNLOCK();
- PFIL_RUNLOCK(ph, tracker);
+ return (head);
}
/*
- * pfil_wlock() acquires writer lock for specified head.
+ * pfil_head_unregister() removes a pfil_head from the packet filter hook
+ * mechanism. The producer of the hook promises that all outstanding
+ * invocations of the hook have completed before it unregisters the hook.
*/
void
-pfil_wlock(struct pfil_head *ph)
+pfil_head_unregister(pfil_head_t ph)
{
+ struct pfil_link *link, *next;
+
+ PFIL_LOCK();
+ LIST_REMOVE(ph, head_list);
- PFIL_WLOCK(ph);
+ CK_STAILQ_FOREACH_SAFE(link, &ph->head_in, link_chain, next) {
+ link->link_hook->hook_links--;
+ free(link, M_PFIL);
+ }
+ CK_STAILQ_FOREACH_SAFE(link, &ph->head_out, link_chain, next) {
+ link->link_hook->hook_links--;
+ free(link, M_PFIL);
+ }
+ PFIL_UNLOCK();
}
-/*
- * pfil_wunlock() releases writer lock for specified head.
- */
-void
-pfil_wunlock(struct pfil_head *ph)
+pfil_hook_t
+pfil_add_hook(struct pfil_hook_args *pa)
{
+ struct pfil_hook *hook, *list;
+
+ MPASS(pa->pa_version == PFIL_VERSION);
+
+ hook = malloc(sizeof(struct pfil_hook), M_PFIL, M_WAITOK | M_ZERO);
+ hook->hook_func = pa->pa_func;
+ hook->hook_ruleset = pa->pa_ruleset;
+ hook->hook_flags = pa->pa_flags;
+ hook->hook_type = pa->pa_type;
+ hook->hook_modname = pa->pa_modname;
+ hook->hook_rulname = pa->pa_rulname;
+
+ PFIL_LOCK();
+ LIST_FOREACH(list, &V_pfil_hook_list, hook_list)
+ if (strcmp(pa->pa_modname, list->hook_modname) == 0 &&
+ strcmp(pa->pa_rulname, list->hook_rulname) == 0) {
+ printf("pfil: duplicate hook \"%s:%s\"\n",
+ pa->pa_modname, pa->pa_rulname);
+ }
+ LIST_INSERT_HEAD(&V_pfil_hook_list, hook, hook_list);
+ PFIL_UNLOCK();
- PFIL_WUNLOCK(ph);
+ return (hook);
}
-/*
- * pfil_wowned() returns a non-zero value if the current thread owns
- * an exclusive lock.
- */
-int
-pfil_wowned(struct pfil_head *ph)
+static int
+pfil_unlink(struct pfil_link_args *pa, pfil_head_t head, pfil_hook_t hook)
{
+ struct pfil_link *in, *out;
+
+ PFIL_LOCK_ASSERT();
- return (PFIL_WOWNED(ph));
+ if (pa->pa_flags & PFIL_IN) {
+ in = pfil_link_remove(&head->head_in, hook);
+ if (in != NULL) {
+ head->head_nhooksin--;
+ hook->hook_links--;
+ }
+ } else
+ in = NULL;
+ if (pa->pa_flags & PFIL_OUT) {
+ out = pfil_link_remove(&head->head_out, hook);
+ if (out != NULL) {
+ head->head_nhooksout--;
+ hook->hook_links--;
+ }
+ } else
+ out = NULL;
+ PFIL_UNLOCK();
+
+ if (in != NULL)
+ epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, pfil_link_free);
+ if (out != NULL)
+ epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, pfil_link_free);
+
+ if (in == NULL && out == NULL)
+ return (ENOENT);
+ else
+ return (0);
}
-/*
- * pfil_head_register() registers a pfil_head with the packet filter hook
- * mechanism.
- */
int
-pfil_head_register(struct pfil_head *ph)
+pfil_link(struct pfil_link_args *pa)
{
- struct pfil_head *lph;
-
- PFIL_HEADLIST_LOCK();
- LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
- if (ph->ph_type == lph->ph_type &&
- ph->ph_un.phu_val == lph->ph_un.phu_val) {
- PFIL_HEADLIST_UNLOCK();
- return (EEXIST);
- }
+ struct pfil_link *in, *out, *link;
+ struct pfil_head *head;
+ struct pfil_hook *hook;
+ int error;
+
+ MPASS(pa->pa_version == PFIL_VERSION);
+
+ if ((pa->pa_flags & (PFIL_IN | PFIL_UNLINK)) == PFIL_IN)
+ in = malloc(sizeof(*in), M_PFIL, M_WAITOK | M_ZERO);
+ else
+ in = NULL;
+ if ((pa->pa_flags & (PFIL_OUT | PFIL_UNLINK)) == PFIL_OUT)
+ out = malloc(sizeof(*out), M_PFIL, M_WAITOK | M_ZERO);
+ else
+ out = NULL;
+
+ PFIL_LOCK();
+ if (pa->pa_flags & PFIL_HEADPTR)
+ head = pa->pa_head;
+ else
+ LIST_FOREACH(head, &V_pfil_head_list, head_list)
+ if (strcmp(pa->pa_headname, head->head_name) == 0)
+ break;
+ if (pa->pa_flags & PFIL_HOOKPTR)
+ hook = pa->pa_hook;
+ else
+ LIST_FOREACH(hook, &V_pfil_hook_list, hook_list)
+ if (strcmp(pa->pa_modname, hook->hook_modname) == 0 &&
+ strcmp(pa->pa_rulname, hook->hook_rulname) == 0)
+ break;
+ if (head == NULL || hook == NULL) {
+ error = ENOENT;
+ goto fail;
+ }
+
+ if (pa->pa_flags & PFIL_UNLINK)
+ return (pfil_unlink(pa, head, hook));
+
+ if (head->head_type != hook->hook_type ||
+ ((hook->hook_flags & pa->pa_flags) & ~head->head_flags)) {
+ error = EINVAL;
+ goto fail;
+ }
+
+ if (pa->pa_flags & PFIL_IN)
+ CK_STAILQ_FOREACH(link, &head->head_in, link_chain)
+ if (link->link_hook == hook) {
+ error = EEXIST;
+ goto fail;
+ }
+ if (pa->pa_flags & PFIL_OUT)
+ CK_STAILQ_FOREACH(link, &head->head_out, link_chain)
+ if (link->link_hook == hook) {
+ error = EEXIST;
+ goto fail;
+ }
+
+ if (pa->pa_flags & PFIL_IN) {
+ in->link_hook = hook;
+ in->link_func = hook->hook_func;
+ in->link_flags = hook->hook_flags;
+ in->link_ruleset = hook->hook_ruleset;
+ if (pa->pa_flags & PFIL_APPEND)
+ CK_STAILQ_INSERT_TAIL(&head->head_in, in, link_chain);
+ else
+ CK_STAILQ_INSERT_HEAD(&head->head_in, in, link_chain);
+ hook->hook_links++;
+ head->head_nhooksin++;
+ }
+ if (pa->pa_flags & PFIL_OUT) {
+ out->link_hook = hook;
+ out->link_func = hook->hook_func;
+ out->link_flags = hook->hook_flags;
+ out->link_ruleset = hook->hook_ruleset;
+ if (pa->pa_flags & PFIL_APPEND)
+ CK_STAILQ_INSERT_HEAD(&head->head_out, out, link_chain);
+ else
+ CK_STAILQ_INSERT_TAIL(&head->head_out, out, link_chain);
+ hook->hook_links++;
+ head->head_nhooksout++;
}
- PFIL_LOCK_INIT(ph);
- ph->ph_nhooks = 0;
- TAILQ_INIT(&ph->ph_in);
- TAILQ_INIT(&ph->ph_out);
- LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
- PFIL_HEADLIST_UNLOCK();
+ PFIL_UNLOCK();
+
return (0);
+
+fail:
+ PFIL_UNLOCK();
+ free(in, M_PFIL);
+ free(out, M_PFIL);
+ return (error);
+}
+
+static void
+pfil_link_free(epoch_context_t ctx)
+{
+ struct pfil_link *link;
+
+ link = __containerof(ctx, struct pfil_link, link_epoch_ctx);
+ free(link, M_PFIL);
}
/*
- * pfil_head_unregister() removes a pfil_head from the packet filter hook
- * mechanism. The producer of the hook promises that all outstanding
- * invocations of the hook have completed before it unregisters the hook.
+ * pfil_remove_hook removes a filter from all filtering points.
*/
-int
-pfil_head_unregister(struct pfil_head *ph)
+void
+pfil_remove_hook(pfil_hook_t hook)
{
- struct packet_filter_hook *pfh, *pfnext;
-
- PFIL_HEADLIST_LOCK();
- LIST_REMOVE(ph, ph_list);
- PFIL_HEADLIST_UNLOCK();
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext)
- free(pfh, M_IFADDR);
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext)
- free(pfh, M_IFADDR);
- PFIL_LOCK_DESTROY(ph);
- return (0);
+ struct pfil_head *head;
+ struct pfil_link *in, *out;
+
+ PFIL_LOCK();
+ LIST_FOREACH(head, &V_pfil_head_list, head_list) {
+retry:
+ in = pfil_link_remove(&head->head_in, hook);
+ if (in != NULL) {
+ head->head_nhooksin--;
+ hook->hook_links--;
+ epoch_call(PFIL_EPOCH, &in->link_epoch_ctx,
+ pfil_link_free);
+ }
+ out = pfil_link_remove(&head->head_out, hook);
+ if (out != NULL) {
+ head->head_nhooksout--;
+ hook->hook_links--;
+ epoch_call(PFIL_EPOCH, &out->link_epoch_ctx,
+ pfil_link_free);
+ }
+ if (in != NULL || out != NULL)
+ /* What if some stupid admin put same filter twice? */
+ goto retry;
+ }
+ LIST_REMOVE(hook, hook_list);
+ PFIL_UNLOCK();
+ MPASS(hook->hook_links == 0);
+ free(hook, M_PFIL);
}
/*
- * pfil_head_get() returns the pfil_head for a given key/dlt.
+ * Internal: Remove a pfil hook from a hook chain.
*/
-struct pfil_head *
-pfil_head_get(int type, u_long val)
+static struct pfil_link *
+pfil_link_remove(pfil_chain_t *chain, pfil_hook_t hook)
{
- struct pfil_head *ph;
+ struct pfil_link *link;
- PFIL_HEADLIST_LOCK();
- LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
- if (ph->ph_type == type && ph->ph_un.phu_val == val)
- break;
- PFIL_HEADLIST_UNLOCK();
- return (ph);
+ PFIL_LOCK_ASSERT();
+
+ CK_STAILQ_FOREACH(link, chain, link_chain)
+ if (link->link_hook == hook) {
+ CK_STAILQ_REMOVE(chain, link, pfil_link, link_chain);
+ return (link);
+ }
+
+ return (NULL);
}
-/*
- * pfil_add_hook_flags() adds a function to the packet filter hook. the
- * flags are:
- * PFIL_IN call me on incoming packets
- * PFIL_OUT call me on outgoing packets
- * PFIL_ALL call me on all of the above
- * PFIL_WAITOK OK to call malloc with M_WAITOK.
- */
-int
-pfil_add_hook_flags(pfil_func_flags_t func, void *arg, int flags,
- struct pfil_head *ph)
+static void
+pfil_init(const void *unused __unused)
{
- return (pfil_add_hook_priv(func, arg, flags, ph, true));
+ struct make_dev_args args;
+ int error;
+
+ make_dev_args_init(&args);
+ args.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
+ args.mda_devsw = &pfil_cdevsw;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_WHEEL;
+ args.mda_mode = 0600;
+ error = make_dev_s(&args, &pfil_dev, PFILDEV);
+ KASSERT(error == 0, ("%s: failed to create dev: %d", __func__, error));
}
+/*
+ * Make sure the pfil bits are first before any possible subsystem which
+ * might piggyback on the SI_SUB_PROTO_PFIL.
+ */
+SYSINIT(pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, pfil_init, NULL);
/*
- * pfil_add_hook() adds a function to the packet filter hook. the
- * flags are:
- * PFIL_IN call me on incoming packets
- * PFIL_OUT call me on outgoing packets
- * PFIL_ALL call me on all of the above
- * PFIL_WAITOK OK to call malloc with M_WAITOK.
+ * User control interface.
*/
-int
-pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
+static int pfilioc_listheads(struct pfilioc_list *);
+static int pfilioc_listhooks(struct pfilioc_list *);
+static int pfilioc_link(struct pfilioc_link *);
+
+static int
+pfil_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
+ struct thread *td)
{
- return (pfil_add_hook_priv(func, arg, flags, ph, false));
+ int error;
+
+ CURVNET_SET(TD_TO_VNET(td));
+ error = 0;
+ switch (cmd) {
+ case PFILIOC_LISTHEADS:
+ error = pfilioc_listheads((struct pfilioc_list *)addr);
+ break;
+ case PFILIOC_LISTHOOKS:
+ error = pfilioc_listhooks((struct pfilioc_list *)addr);
+ break;
+ case PFILIOC_LINK:
+ error = pfilioc_link((struct pfilioc_link *)addr);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ CURVNET_RESTORE();
+ return (error);
}
static int
-pfil_add_hook_priv(void *func, void *arg, int flags,
- struct pfil_head *ph, bool hasflags)
+pfilioc_listheads(struct pfilioc_list *req)
{
- struct packet_filter_hook *pfh1 = NULL;
- struct packet_filter_hook *pfh2 = NULL;
- int err;
-
- if (flags & PFIL_IN) {
- pfh1 = (struct packet_filter_hook *)malloc(sizeof(*pfh1),
- M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
- if (pfh1 == NULL) {
- err = ENOMEM;
- goto error;
- }
- }
- if (flags & PFIL_OUT) {
- pfh2 = (struct packet_filter_hook *)malloc(sizeof(*pfh1),
- M_IFADDR, (flags & PFIL_WAITOK) ? M_WAITOK : M_NOWAIT);
- if (pfh2 == NULL) {
- err = ENOMEM;
- goto error;
- }
+ struct pfil_head *head;
+ struct pfil_link *link;
+ struct pfilioc_head *iohead;
+ struct pfilioc_hook *iohook;
+ u_int nheads, nhooks, hd, hk;
+ int error;
+
+ PFIL_LOCK();
+restart:
+ nheads = nhooks = 0;
+ LIST_FOREACH(head, &V_pfil_head_list, head_list) {
+ nheads++;
+ nhooks += head->head_nhooksin + head->head_nhooksout;
}
- PFIL_WLOCK(ph);
- if (flags & PFIL_IN) {
- pfh1->pfil_func_flags = hasflags ? func : NULL;
- pfh1->pfil_func = hasflags ? NULL : func;
- pfh1->pfil_arg = arg;
- err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
- if (err)
- goto locked_error;
- ph->ph_nhooks++;
+ PFIL_UNLOCK();
+
+ if (req->pio_nheads < nheads || req->pio_nhooks < nhooks) {
+ req->pio_nheads = nheads;
+ req->pio_nhooks = nhooks;
+ return (0);
}
- if (flags & PFIL_OUT) {
- pfh2->pfil_func_flags = hasflags ? func : NULL;
- pfh2->pfil_func = hasflags ? NULL : func;
- pfh2->pfil_arg = arg;
- err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
- if (err) {
- if (flags & PFIL_IN)
- pfil_chain_remove(&ph->ph_in, func, arg);
- goto locked_error;
+
+ iohead = malloc(sizeof(*iohead) * nheads, M_TEMP, M_WAITOK);
+ iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK);
+
+ hd = hk = 0;
+ PFIL_LOCK();
+ LIST_FOREACH(head, &V_pfil_head_list, head_list) {
+ if (hd + 1 > nheads ||
+ hk + head->head_nhooksin + head->head_nhooksout > nhooks) {
+ /* Configuration changed during malloc(). */
+ free(iohead, M_TEMP);
+ free(iohook, M_TEMP);
+ goto restart;
+ }
+ strlcpy(iohead[hd].pio_name, head->head_name,
+ sizeof(iohead[0].pio_name));
+ iohead[hd].pio_nhooksin = head->head_nhooksin;
+ iohead[hd].pio_nhooksout = head->head_nhooksout;
+ iohead[hd].pio_type = head->head_type;
+ CK_STAILQ_FOREACH(link, &head->head_in, link_chain) {
+ strlcpy(iohook[hk].pio_module,
+ link->link_hook->hook_modname,
+ sizeof(iohook[0].pio_module));
+ strlcpy(iohook[hk].pio_ruleset,
+ link->link_hook->hook_rulname,
+ sizeof(iohook[0].pio_ruleset));
+ hk++;
}
- ph->ph_nhooks++;
+ CK_STAILQ_FOREACH(link, &head->head_out, link_chain) {
+ strlcpy(iohook[hk].pio_module,
+ link->link_hook->hook_modname,
+ sizeof(iohook[0].pio_module));
+ strlcpy(iohook[hk].pio_ruleset,
+ link->link_hook->hook_rulname,
+ sizeof(iohook[0].pio_ruleset));
+ hk++;
+ }
+ hd++;
}
- PFIL_WUNLOCK(ph);
- return (0);
-locked_error:
- PFIL_WUNLOCK(ph);
-error:
- if (pfh1 != NULL)
- free(pfh1, M_IFADDR);
- if (pfh2 != NULL)
- free(pfh2, M_IFADDR);
- return (err);
-}
+ PFIL_UNLOCK();
-/*
- * pfil_remove_hook_flags removes a specific function from the packet filter hook
- * chain.
- */
-int
-pfil_remove_hook_flags(pfil_func_flags_t func, void *arg, int flags,
- struct pfil_head *ph)
-{
- return (pfil_remove_hook((pfil_func_t)func, arg, flags, ph));
-}
+ error = copyout(iohead, req->pio_heads,
+ sizeof(*iohead) * min(hd, req->pio_nheads));
+ if (error == 0)
+ error = copyout(iohook, req->pio_hooks,
+ sizeof(*iohook) * min(req->pio_nhooks, hk));
-/*
- * pfil_remove_hook removes a specific function from the packet filter hook
- * chain.
- */
-int
-pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
-{
- int err = 0;
+ req->pio_nheads = hd;
+ req->pio_nhooks = hk;
- PFIL_WLOCK(ph);
- if (flags & PFIL_IN) {
- err = pfil_chain_remove(&ph->ph_in, func, arg);
- if (err == 0)
- ph->ph_nhooks--;
- }
- if ((err == 0) && (flags & PFIL_OUT)) {
- err = pfil_chain_remove(&ph->ph_out, func, arg);
- if (err == 0)
- ph->ph_nhooks--;
- }
- PFIL_WUNLOCK(ph);
- return (err);
-}
+ free(iohead, M_TEMP);
+ free(iohook, M_TEMP);
-/*
- * Internal: Add a new pfil hook into a hook chain.
- */
-static int
-pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags)
-{
- struct packet_filter_hook *pfh;
-
- /*
- * First make sure the hook is not already there.
- */
- TAILQ_FOREACH(pfh, chain, pfil_chain)
- if (((pfh->pfil_func != NULL && pfh->pfil_func == pfh1->pfil_func) ||
- (pfh->pfil_func_flags != NULL &&
- pfh->pfil_func_flags == pfh1->pfil_func_flags)) &&
- pfh->pfil_arg == pfh1->pfil_arg)
- return (EEXIST);
-
- /*
- * Insert the input list in reverse order of the output list so that
- * the same path is followed in or out of the kernel.
- */
- if (flags & PFIL_IN)
- TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain);
- else
- TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain);
- return (0);
+ return (error);
}
-/*
- * Internal: Remove a pfil hook from a hook chain.
- */
static int
-pfil_chain_remove(pfil_chain_t *chain, void *func, void *arg)
+pfilioc_listhooks(struct pfilioc_list *req)
{
- struct packet_filter_hook *pfh;
-
- TAILQ_FOREACH(pfh, chain, pfil_chain)
- if ((pfh->pfil_func == func || pfh->pfil_func_flags == func) &&
- pfh->pfil_arg == arg) {
- TAILQ_REMOVE(chain, pfh, pfil_chain);
- free(pfh, M_IFADDR);
- return (0);
+ struct pfil_hook *hook;
+ struct pfilioc_hook *iohook;
+ u_int nhooks, hk;
+ int error;
+
+ PFIL_LOCK();
+restart:
+ nhooks = 0;
+ LIST_FOREACH(hook, &V_pfil_hook_list, hook_list)
+ nhooks++;
+ PFIL_UNLOCK();
+
+ if (req->pio_nhooks < nhooks) {
+ req->pio_nhooks = nhooks;
+ return (0);
+ }
+
+ iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK);
+
+ hk = 0;
+ PFIL_LOCK();
+ LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) {
+ if (hk + 1 > nhooks) {
+ /* Configuration changed during malloc(). */
+ free(iohook, M_TEMP);
+ goto restart;
}
- return (ENOENT);
-}
+ strlcpy(iohook[hk].pio_module, hook->hook_modname,
+ sizeof(iohook[0].pio_module));
+ strlcpy(iohook[hk].pio_ruleset, hook->hook_rulname,
+ sizeof(iohook[0].pio_ruleset));
+ iohook[hk].pio_type = hook->hook_type;
+ iohook[hk].pio_flags = hook->hook_flags;
+ hk++;
+ }
+ PFIL_UNLOCK();
-/*
- * Stuff that must be initialized for every instance (including the first of
- * course).
- */
-static void
-vnet_pfil_init(const void *unused __unused)
-{
+ error = copyout(iohook, req->pio_hooks,
+ sizeof(*iohook) * min(req->pio_nhooks, hk));
+ req->pio_nhooks = hk;
+ free(iohook, M_TEMP);
- LIST_INIT(&V_pfil_head_list);
- PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared");
+ return (error);
}
-/*
- * Called for the removal of each instance.
- */
-static void
-vnet_pfil_uninit(const void *unused __unused)
+static int
+pfilioc_link(struct pfilioc_link *req)
{
+ struct pfil_link_args args;
- KASSERT(LIST_EMPTY(&V_pfil_head_list),
- ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list));
- PFIL_LOCK_DESTROY_REAL(&V_pfil_lock);
-}
+ if (req->pio_flags & ~(PFIL_IN | PFIL_OUT | PFIL_UNLINK | PFIL_APPEND))
+ return (EINVAL);
-/*
- * Starting up.
- *
- * VNET_SYSINIT is called for each existing vnet and each new vnet.
- * Make sure the pfil bits are first before any possible subsystem which
- * might piggyback on the SI_SUB_PROTO_PFIL.
- */
-VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
- vnet_pfil_init, NULL);
-
-/*
- * Closing up shop. These are done in REVERSE ORDER. Not called on reboot.
- *
- * VNET_SYSUNINIT is called for each exiting vnet as it exits.
- */
-VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
- vnet_pfil_uninit, NULL);
+ args.pa_version = PFIL_VERSION;
+ args.pa_flags = req->pio_flags;
+ args.pa_headname = req->pio_name;
+ args.pa_modname = req->pio_module;
+ args.pa_rulname = req->pio_ruleset;
+
+ return (pfil_link(&args));
+}
diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h
index 8fdaf5a6..da045b30 100644
--- a/freebsd/sys/net/pfil.h
+++ b/freebsd/sys/net/pfil.h
@@ -4,6 +4,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
+ * Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 1996 Matthew R. Green
* All rights reserved.
*
@@ -34,98 +35,180 @@
#ifndef _NET_PFIL_H_
#define _NET_PFIL_H_
-#include <sys/systm.h>
-#include <sys/queue.h>
-#include <sys/_lock.h>
-#include <sys/_mutex.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
-#include <net/vnet.h>
+#include <sys/ioccom.h>
+enum pfil_types {
+ PFIL_TYPE_IP4,
+ PFIL_TYPE_IP6,
+ PFIL_TYPE_ETHERNET,
+};
+
+#define MAXPFILNAME 64
+
+struct pfilioc_head {
+ char pio_name[MAXPFILNAME];
+ int pio_nhooksin;
+ int pio_nhooksout;
+ enum pfil_types pio_type;
+};
+
+struct pfilioc_hook {
+ char pio_module[MAXPFILNAME];
+ char pio_ruleset[MAXPFILNAME];
+ int pio_flags;
+ enum pfil_types pio_type;
+};
+
+struct pfilioc_list {
+ u_int pio_nheads;
+ u_int pio_nhooks;
+ struct pfilioc_head *pio_heads;
+ struct pfilioc_hook *pio_hooks;
+};
+
+struct pfilioc_link {
+ char pio_name[MAXPFILNAME];
+ char pio_module[MAXPFILNAME];
+ char pio_ruleset[MAXPFILNAME];
+ int pio_flags;
+};
+
+#define PFILDEV "pfil"
+#define PFILIOC_LISTHEADS _IOWR('P', 1, struct pfilioc_list)
+#define PFILIOC_LISTHOOKS _IOWR('P', 2, struct pfilioc_list)
+#define PFILIOC_LINK _IOW('P', 3, struct pfilioc_link)
+
+#define PFIL_IN 0x00010000
+#define PFIL_OUT 0x00020000
+#define PFIL_FWD 0x00040000
+#define PFIL_DIR(f) ((f) & (PFIL_IN|PFIL_OUT))
+#define PFIL_MEMPTR 0x00080000
+#define PFIL_HEADPTR 0x00100000
+#define PFIL_HOOKPTR 0x00200000
+#define PFIL_APPEND 0x00400000
+#define PFIL_UNLINK 0x00800000
+#define PFIL_LENMASK 0x0000ffff
+#define PFIL_LENGTH(f) ((f) & PFIL_LENMASK)
+
+#ifdef _KERNEL
struct mbuf;
struct ifnet;
struct inpcb;
-typedef int (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *);
-typedef int (*pfil_func_flags_t)(void *, struct mbuf **, struct ifnet *,
- int, int, struct inpcb *);
+typedef union {
+ struct mbuf **m;
+ void *mem;
+ uintptr_t __ui;
+} pfil_packet_t __attribute__((__transparent_union__));
+
+static inline pfil_packet_t
+pfil_packet_align(pfil_packet_t p)
+{
+
+ return ((pfil_packet_t ) (((uintptr_t)(p).mem +
+ (_Alignof(void *) - 1)) & - _Alignof(void *)));
+}
+
+static inline struct mbuf *
+pfil_mem2mbuf(void *v)
+{
+
+ return (*(struct mbuf **) (((uintptr_t)(v) +
+ (_Alignof(void *) - 1)) & - _Alignof(void *)));
+}
+
+typedef enum {
+ PFIL_PASS = 0,
+ PFIL_DROPPED,
+ PFIL_CONSUMED,
+ PFIL_REALLOCED,
+} pfil_return_t;
+
+typedef pfil_return_t (*pfil_func_t)(pfil_packet_t, struct ifnet *, int,
+ void *, struct inpcb *);
+/*
+ * A pfil head is created by a packet intercept point.
+ *
+ * A pfil hook is created by a packet filter.
+ *
+ * Hooks are chained on heads. Historically some hooking happens
+ * automatically, e.g. ipfw(4), pf(4) and ipfilter(4) would register
+ * theirselves on IPv4 and IPv6 input/output.
+ */
+
+typedef struct pfil_hook * pfil_hook_t;
+typedef struct pfil_head * pfil_head_t;
/*
- * The packet filter hooks are designed for anything to call them to
- * possibly intercept the packet. Multiple filter hooks are chained
- * together and after each other in the specified order.
+ * Give us a chance to modify pfil_xxx_args structures in future.
*/
-struct packet_filter_hook {
- TAILQ_ENTRY(packet_filter_hook) pfil_chain;
- pfil_func_t pfil_func;
- pfil_func_flags_t pfil_func_flags;
- void *pfil_arg;
+#define PFIL_VERSION 1
+
+/* Argument structure used by packet filters to register themselves. */
+struct pfil_hook_args {
+ int pa_version;
+ int pa_flags;
+ enum pfil_types pa_type;
+ pfil_func_t pa_func;
+ void *pa_ruleset;
+ const char *pa_modname;
+ const char *pa_rulname;
};
-#define PFIL_IN 0x00000001
-#define PFIL_OUT 0x00000002
-#define PFIL_WAITOK 0x00000004
-#define PFIL_FWD 0x00000008
-#define PFIL_ALL (PFIL_IN|PFIL_OUT)
+/* Public functions for pfil hook management by packet filters. */
+pfil_hook_t pfil_add_hook(struct pfil_hook_args *);
+void pfil_remove_hook(pfil_hook_t);
-typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t;
+/* Argument structure used by ioctl() and packet filters to set filters. */
+struct pfil_link_args {
+ int pa_version;
+ int pa_flags;
+ union {
+ const char *pa_headname;
+ pfil_head_t pa_head;
+ };
+ union {
+ struct {
+ const char *pa_modname;
+ const char *pa_rulname;
+ };
+ pfil_hook_t pa_hook;
+ };
+};
-#define PFIL_TYPE_AF 1 /* key is AF_* type */
-#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */
+/* Public function to configure filter chains. Used by ioctl() and filters. */
+int pfil_link(struct pfil_link_args *);
-#define PFIL_FLAG_PRIVATE_LOCK 0x01 /* Personal lock instead of global */
+/* Argument structure used by inspection points to register themselves. */
+struct pfil_head_args {
+ int pa_version;
+ int pa_flags;
+ enum pfil_types pa_type;
+ const char *pa_headname;
+};
+/* Public functions for pfil head management by inspection points. */
+pfil_head_t pfil_head_register(struct pfil_head_args *);
+void pfil_head_unregister(pfil_head_t);
+
+/* Public functions to run the packet inspection by inspection points. */
+int pfil_run_hooks(struct pfil_head *, pfil_packet_t, struct ifnet *, int,
+ struct inpcb *inp);
/*
- * A pfil head is created by each protocol or packet intercept point.
- * For packet is then run through the hook chain for inspection.
+ * Minimally exposed structure to avoid function call in case of absence
+ * of any filters by protocols and macros to do the check.
*/
-struct pfil_head {
- pfil_chain_t ph_in;
- pfil_chain_t ph_out;
- int ph_type;
- int ph_nhooks;
-#if defined( __linux__ ) || defined( _WIN32 )
- rwlock_t ph_mtx;
-#else
- struct rmlock *ph_plock; /* Pointer to the used lock */
- struct rmlock ph_lock; /* Private lock storage */
- int flags;
-#endif
- union {
- u_long phu_val;
- void *phu_ptr;
- } ph_un;
-#define ph_af ph_un.phu_val
-#define ph_ifnet ph_un.phu_ptr
- LIST_ENTRY(pfil_head) ph_list;
+struct _pfil_head {
+ int head_nhooksin;
+ int head_nhooksout;
};
+#define PFIL_HOOKED_IN(p) (((struct _pfil_head *)(p))->head_nhooksin > 0)
+#define PFIL_HOOKED_OUT(p) (((struct _pfil_head *)(p))->head_nhooksout > 0)
-VNET_DECLARE(struct rmlock, pfil_lock);
-#define V_pfil_lock VNET(pfil_lock)
-
-/* Public functions for pfil hook management by packet filters. */
-struct pfil_head *pfil_head_get(int, u_long);
-int pfil_add_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *);
-int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
-int pfil_remove_hook_flags(pfil_func_flags_t, void *, int, struct pfil_head *);
-int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
-#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
-
-/* Public functions to run the packet inspection by protocols. */
-int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, int,
- int, struct inpcb *inp);
-
-/* Public functions for pfil head management by protocols. */
-int pfil_head_register(struct pfil_head *);
-int pfil_head_unregister(struct pfil_head *);
-
-/* Public pfil locking functions for self managed locks by packet filters. */
-int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_rlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_runlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_wlock(struct pfil_head *);
-void pfil_wunlock(struct pfil_head *);
-int pfil_wowned(struct pfil_head *ph);
+/*
+ * Alloc mbuf to be used instead of memory pointer.
+ */
+int pfil_realloc(pfil_packet_t *, int, struct ifnet *);
+#endif /* _KERNEL */
#endif /* _NET_PFIL_H_ */
diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h
index 2924c06d..bfa7e773 100644
--- a/freebsd/sys/net/pfvar.h
+++ b/freebsd/sys/net/pfvar.h
@@ -41,6 +41,7 @@
#include <sys/cpuset.h>
#include <sys/malloc.h>
#include <sys/refcount.h>
+#include <sys/sysctl.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/tree.h>
@@ -95,6 +96,9 @@ struct pf_addr_wrap {
#ifdef _KERNEL
+SYSCTL_DECL(_net_pf);
+MALLOC_DECLARE(M_PFHASH);
+
struct pfi_dynaddr {
TAILQ_ENTRY(pfi_dynaddr) entry;
struct pf_addr pfid_addr4;
@@ -1017,6 +1021,17 @@ struct pfr_tstats {
int pfrts_cnt;
int pfrts_refcnt[PFR_REFCNT_MAX];
};
+
+struct pfr_ktstats {
+ struct pfr_table pfrts_t;
+ counter_u64_t pfrkts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ counter_u64_t pfrkts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ counter_u64_t pfrkts_match;
+ counter_u64_t pfrkts_nomatch;
+ long pfrkts_tzero;
+ int pfrkts_cnt;
+ int pfrkts_refcnt[PFR_REFCNT_MAX];
+};
#define pfrts_name pfrts_t.pfrt_name
#define pfrts_flags pfrts_t.pfrt_flags
@@ -1030,8 +1045,9 @@ union sockaddr_union {
#endif /* _SOCKADDR_UNION_DEFINED */
struct pfr_kcounters {
- u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
- u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ counter_u64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ counter_u64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ long pfrkc_tzero;
};
SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
@@ -1039,8 +1055,7 @@ struct pfr_kentry {
struct radix_node pfrke_node[2];
union sockaddr_union pfrke_sa;
SLIST_ENTRY(pfr_kentry) pfrke_workq;
- struct pfr_kcounters *pfrke_counters;
- long pfrke_tzero;
+ struct pfr_kcounters pfrke_counters;
u_int8_t pfrke_af;
u_int8_t pfrke_net;
u_int8_t pfrke_not;
@@ -1050,7 +1065,7 @@ struct pfr_kentry {
SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
RB_HEAD(pfr_ktablehead, pfr_ktable);
struct pfr_ktable {
- struct pfr_tstats pfrkt_ts;
+ struct pfr_ktstats pfrkt_kts;
RB_ENTRY(pfr_ktable) pfrkt_tree;
SLIST_ENTRY(pfr_ktable) pfrkt_workq;
struct radix_node_head *pfrkt_ip4;
@@ -1061,18 +1076,18 @@ struct pfr_ktable {
long pfrkt_larg;
int pfrkt_nflags;
};
-#define pfrkt_t pfrkt_ts.pfrts_t
+#define pfrkt_t pfrkt_kts.pfrts_t
#define pfrkt_name pfrkt_t.pfrt_name
#define pfrkt_anchor pfrkt_t.pfrt_anchor
#define pfrkt_ruleset pfrkt_t.pfrt_ruleset
#define pfrkt_flags pfrkt_t.pfrt_flags
-#define pfrkt_cnt pfrkt_ts.pfrts_cnt
-#define pfrkt_refcnt pfrkt_ts.pfrts_refcnt
-#define pfrkt_packets pfrkt_ts.pfrts_packets
-#define pfrkt_bytes pfrkt_ts.pfrts_bytes
-#define pfrkt_match pfrkt_ts.pfrts_match
-#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch
-#define pfrkt_tzero pfrkt_ts.pfrts_tzero
+#define pfrkt_cnt pfrkt_kts.pfrkts_cnt
+#define pfrkt_refcnt pfrkt_kts.pfrkts_refcnt
+#define pfrkt_packets pfrkt_kts.pfrkts_packets
+#define pfrkt_bytes pfrkt_kts.pfrkts_bytes
+#define pfrkt_match pfrkt_kts.pfrkts_match
+#define pfrkt_nomatch pfrkt_kts.pfrkts_nomatch
+#define pfrkt_tzero pfrkt_kts.pfrkts_tzero
/* keep synced with pfi_kif, used in RB_FIND */
struct pfi_kif_cmp {
@@ -1601,7 +1616,7 @@ VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]);
#define V_pf_stateid VNET(pf_stateid)
TAILQ_HEAD(pf_altqqueue, pf_altq);
-VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]);
+VNET_DECLARE(struct pf_altqqueue, pf_altqs[4]);
#define V_pf_altqs VNET(pf_altqs)
VNET_DECLARE(struct pf_palist, pf_pabuf);
#define V_pf_pabuf VNET(pf_pabuf)
@@ -1616,8 +1631,12 @@ VNET_DECLARE(u_int32_t, ticket_pabuf);
#define V_ticket_pabuf VNET(ticket_pabuf)
VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active);
#define V_pf_altqs_active VNET(pf_altqs_active)
+VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_active);
+#define V_pf_altq_ifs_active VNET(pf_altq_ifs_active)
VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive);
#define V_pf_altqs_inactive VNET(pf_altqs_inactive)
+VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_inactive);
+#define V_pf_altq_ifs_inactive VNET(pf_altq_ifs_inactive)
VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules);
#define V_pf_unlinked_rules VNET(pf_unlinked_rules)
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index 3cd909c1..36f3bf41 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -625,11 +625,12 @@ rtredirect_fib(struct sockaddr *dst,
int error = 0;
short *stat = NULL;
struct rt_addrinfo info;
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct rib_head *rnh;
ifa = NULL;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
if (rnh == NULL) {
error = EAFNOSUPPORT;
@@ -724,7 +725,7 @@ done:
if (rt)
RTFREE_LOCKED(rt);
out:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
if (error)
V_rtstat.rts_badredirect++;
else if (stat != NULL)
@@ -1307,11 +1308,14 @@ rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
/*
* Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined,
* it will be referenced so the caller must free it.
+ *
+ * Assume basic consistency checks are executed by callers:
+ * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well.
*/
int
rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
{
- struct ifaddr *ifa;
+ struct epoch_tracker et;
int needref, error;
/*
@@ -1320,22 +1324,55 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
*/
error = 0;
needref = (info->rti_ifa == NULL);
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
+
+ /* If we have interface specified by the ifindex in the address, use it */
if (info->rti_ifp == NULL && ifpaddr != NULL &&
- ifpaddr->sa_family == AF_LINK &&
- (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) {
- info->rti_ifp = ifa->ifa_ifp;
+ ifpaddr->sa_family == AF_LINK) {
+ const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)ifpaddr;
+ if (sdl->sdl_index != 0)
+ info->rti_ifp = ifnet_byindex_locked(sdl->sdl_index);
}
+ /*
+ * If we have source address specified, try to find it
+ * TODO: avoid enumerating all ifas on all interfaces.
+ */
if (info->rti_ifa == NULL && ifaaddr != NULL)
info->rti_ifa = ifa_ifwithaddr(ifaaddr);
if (info->rti_ifa == NULL) {
struct sockaddr *sa;
- sa = ifaaddr != NULL ? ifaaddr :
- (gateway != NULL ? gateway : dst);
- if (sa != NULL && info->rti_ifp != NULL)
+ /*
+ * Most common use case for the userland-supplied routes.
+ *
+ * Choose sockaddr to select ifa.
+ * -- if ifp is set --
+ * Order of preference:
+ * 1) IFA address
+ * 2) gateway address
+ * Note: for interface routes link-level gateway address
+ * is specified to indicate the interface index without
+ * specifying RTF_GATEWAY. In this case, ignore gateway
+ * Note: gateway AF may be different from dst AF. In this case,
+ * ignore gateway
+ * 3) final destination.
+ * 4) if all of these fails, try to get at least link-level ifa.
+ * -- else --
+ * try to lookup gateway or dst in the routing table to get ifa
+ */
+ if (info->rti_info[RTAX_IFA] != NULL)
+ sa = info->rti_info[RTAX_IFA];
+ else if ((info->rti_flags & RTF_GATEWAY) != 0 &&
+ gateway->sa_family == dst->sa_family)
+ sa = gateway;
+ else
+ sa = dst;
+ if (info->rti_ifp != NULL) {
info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
- else if (dst != NULL && gateway != NULL)
+ /* Case 4 */
+ if (info->rti_ifa == NULL && gateway != NULL)
+ info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp);
+ } else if (dst != NULL && gateway != NULL)
info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
fibnum);
else if (sa != NULL)
@@ -1348,7 +1385,7 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
ifa_ref(info->rti_ifa);
} else
error = ENETUNREACH;
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1585,6 +1622,8 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
switch (req) {
case RTM_DELETE:
if (netmask) {
+ if (dst->sa_len > sizeof(mdst))
+ return (EINVAL);
rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
dst = (struct sockaddr *)&mdst;
}
@@ -1990,7 +2029,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
char tempbuf[_SOCKADDR_TMPSIZE];
int didwork = 0;
int a_failure = 0;
- static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+ struct sockaddr_dl *sdl = NULL;
struct rib_head *rnh;
if (flags & RTF_HOST) {
@@ -2045,7 +2084,14 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
dst = (struct sockaddr *)tempbuf;
}
- }
+ } else if (cmd == RTM_ADD) {
+ sdl = (struct sockaddr_dl *)tempbuf;
+ bzero(sdl, sizeof(struct sockaddr_dl));
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_len = sizeof(struct sockaddr_dl);
+ sdl->sdl_type = ifa->ifa_ifp->if_type;
+ sdl->sdl_index = ifa->ifa_ifp->if_index;
+ }
/*
* Now go through all the requested tables (fibs) and do the
* requested action. Realistically, this will either be fib 0
@@ -2108,8 +2154,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
* doing this for compatibility reasons
*/
if (cmd == RTM_ADD)
- info.rti_info[RTAX_GATEWAY] =
- (struct sockaddr *)&null_sdl;
+ info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sdl;
else
info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
info.rti_info[RTAX_NETMASK] = netmask;
@@ -2136,15 +2181,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
rt->rt_ifa = ifa;
}
#endif
- /*
- * doing this for compatibility reasons
- */
- if (cmd == RTM_ADD) {
- ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
- rt->rt_ifp->if_type;
- ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
- rt->rt_ifp->if_index;
- }
RT_ADDREF(rt);
RT_UNLOCK(rt);
rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum);
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index c4333838..bdeb9869 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -210,6 +210,7 @@ struct rtentry {
#define NHF_DEFAULT 0x0080 /* Default route */
#define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */
#define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */
+#define NHF_HOST 0x0400 /* RTF_HOST */
/* Nexthop request flags */
#define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
diff --git a/freebsd/sys/net/route_var.h b/freebsd/sys/net/route_var.h
index 9d0d1931..db3db4e3 100644
--- a/freebsd/sys/net/route_var.h
+++ b/freebsd/sys/net/route_var.h
@@ -67,6 +67,7 @@ fib_rte_to_nh_flags(int rt_flags)
uint16_t res;
res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
+ res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0;
res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index e1b87095..6c457a9d 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -33,6 +33,7 @@
* @(#)rtsock.c 8.7 (Berkeley) 10/12/95
* $FreeBSD$
*/
+#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_mpath.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -55,6 +56,11 @@
#include <sys/sysctl.h>
#include <sys/systm.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+#endif
+
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
@@ -448,6 +454,9 @@ static int
rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
{
+#if defined(INET) || defined(INET6)
+ struct epoch_tracker et;
+#endif
/* First, see if the returned address is part of the jail. */
if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
@@ -468,7 +477,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
* Try to find an address on the given outgoing interface
* that belongs to the jail.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa;
sa = ifa->ifa_addr;
@@ -480,7 +489,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (!found) {
/*
* As a last resort return the 'default' jail address.
@@ -510,7 +519,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
* Try to find an address on the given outgoing interface
* that belongs to the jail.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa;
sa = ifa->ifa_addr;
@@ -523,7 +532,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (!found) {
/*
* As a last resort return the 'default' jail address.
@@ -627,6 +636,8 @@ route_output(struct mbuf *m, struct socket *so, ...)
if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
senderr(EINVAL);
+ if (rtm->rtm_flags & RTF_RNH_LOCKED)
+ senderr(EINVAL);
info.rti_flags = rtm->rtm_flags;
if (info.rti_info[RTAX_DST] == NULL ||
info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
@@ -798,16 +809,17 @@ route_output(struct mbuf *m, struct socket *so, ...)
if (rt->rt_ifp != NULL &&
rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
+ struct epoch_tracker et;
struct ifaddr *ifa;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1,
RT_ALL_FIBS);
if (ifa != NULL)
rt_maskedcopy(ifa->ifa_addr,
&laddr,
ifa->ifa_netmask);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
} else
rt_maskedcopy(rt->rt_ifa->ifa_addr,
&laddr,
@@ -1571,7 +1583,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
struct rt_addrinfo info;
struct sockaddr_storage ss;
- IFNET_RLOCK_NOSLEEP_ASSERT();
+ NET_EPOCH_ASSERT();
if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
return 0;
@@ -1765,7 +1777,7 @@ sysctl_iflist(int af, struct walkarg *w)
bzero((caddr_t)&info, sizeof(info));
bzero(&ifd, sizeof(ifd));
- NET_EPOCH_ENTER_ET(et);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
@@ -1815,7 +1827,7 @@ sysctl_iflist(int af, struct walkarg *w)
info.rti_info[RTAX_BRD] = NULL;
}
done:
- NET_EPOCH_EXIT_ET(et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1823,6 +1835,7 @@ static int
sysctl_ifmalist(int af, struct walkarg *w)
{
struct rt_addrinfo info;
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct ifmultiaddr *ifma;
struct ifnet *ifp;
@@ -1831,13 +1844,12 @@ sysctl_ifmalist(int af, struct walkarg *w)
error = 0;
bzero((caddr_t)&info, sizeof(info));
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
ifa = ifp->if_addr;
info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (af && af != ifma->ifma_addr->sa_family)
continue;
@@ -1864,11 +1876,10 @@ sysctl_ifmalist(int af, struct walkarg *w)
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
if (error != 0)
break;
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1955,11 +1966,13 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
for (error = 0; error == 0 && i <= lim; i++) {
rnh = rt_tables_get_rnh(fib, i);
if (rnh != NULL) {
+ struct epoch_tracker et;
+
RIB_RLOCK(rnh);
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
error = rnh->rnh_walktree(&rnh->head,
sysctl_dumpentry, &w);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
RIB_RUNLOCK(rnh);
} else if (af != 0)
error = EAFNOSUPPORT;
@@ -2008,3 +2021,408 @@ static struct domain routedomain = {
};
VNET_DOMAIN_SET(route);
+
+#ifdef DDB
+/*
+ * Unfortunately, RTF_ values are expressed as raw masks rather than powers of
+ * 2, so we cannot use them as nice C99 initializer indices below.
+ */
+static const char * const rtf_flag_strings[] = {
+ "UP",
+ "GATEWAY",
+ "HOST",
+ "REJECT",
+ "DYNAMIC",
+ "MODIFIED",
+ "DONE",
+ "UNUSED_0x80",
+ "UNUSED_0x100",
+ "XRESOLVE",
+ "LLDATA",
+ "STATIC",
+ "BLACKHOLE",
+ "UNUSED_0x2000",
+ "PROTO2",
+ "PROTO1",
+ "UNUSED_0x10000",
+ "UNUSED_0x20000",
+ "PROTO3",
+ "FIXEDMTU",
+ "PINNED",
+ "LOCAL",
+ "BROADCAST",
+ "MULTICAST",
+ /* Big gap. */
+ [28] = "STICKY",
+ [30] = "RNH_LOCKED",
+ [31] = "GWFLAG_COMPAT",
+};
+
+static const char * __pure
+rt_flag_name(unsigned idx)
+{
+ if (idx >= nitems(rtf_flag_strings))
+ return ("INVALID_FLAG");
+ if (rtf_flag_strings[idx] == NULL)
+ return ("UNKNOWN");
+ return (rtf_flag_strings[idx]);
+}
+
+static void
+rt_dumpaddr_ddb(const char *name, const struct sockaddr *sa)
+{
+ char buf[INET6_ADDRSTRLEN], *res;
+
+ res = NULL;
+ if (sa == NULL)
+ res = "NULL";
+ else if (sa->sa_family == AF_INET) {
+ res = inet_ntop(AF_INET,
+ &((const struct sockaddr_in *)sa)->sin_addr,
+ buf, sizeof(buf));
+ } else if (sa->sa_family == AF_INET6) {
+ res = inet_ntop(AF_INET6,
+ &((const struct sockaddr_in6 *)sa)->sin6_addr,
+ buf, sizeof(buf));
+ } else if (sa->sa_family == AF_LINK) {
+ res = "on link";
+ }
+
+ if (res != NULL) {
+ db_printf("%s <%s> ", name, res);
+ return;
+ }
+
+ db_printf("%s <af:%d> ", name, sa->sa_family);
+}
+
+static int
+rt_dumpentry_ddb(struct radix_node *rn, void *arg __unused)
+{
+ struct sockaddr_storage ss;
+ struct rtentry *rt;
+ int flags, idx;
+
+ /* If RNTORT is important, put it in a header. */
+ rt = (void *)rn;
+
+ rt_dumpaddr_ddb("dst", rt_key(rt));
+ rt_dumpaddr_ddb("gateway", rt->rt_gateway);
+ rt_dumpaddr_ddb("netmask", rtsock_fix_netmask(rt_key(rt), rt_mask(rt),
+ &ss));
+ if (rt->rt_ifp != NULL && (rt->rt_ifp->if_flags & IFF_DYING) == 0) {
+ rt_dumpaddr_ddb("ifp", rt->rt_ifp->if_addr->ifa_addr);
+ rt_dumpaddr_ddb("ifa", rt->rt_ifa->ifa_addr);
+ }
+
+ db_printf("flags ");
+ flags = rt->rt_flags;
+ if (flags == 0)
+ db_printf("none");
+
+ while ((idx = ffs(flags)) > 0) {
+ idx--;
+
+ if (flags != rt->rt_flags)
+ db_printf(",");
+ db_printf("%s", rt_flag_name(idx));
+
+ flags &= ~(1ul << idx);
+ }
+
+ db_printf("\n");
+ return (0);
+}
+
+DB_SHOW_COMMAND(routetable, db_show_routetable_cmd)
+{
+ struct rib_head *rnh;
+ int error, i, lim;
+
+ if (have_addr)
+ i = lim = addr;
+ else {
+ i = 1;
+ lim = AF_MAX;
+ }
+
+ for (; i <= lim; i++) {
+ rnh = rt_tables_get_rnh(0, i);
+ if (rnh == NULL) {
+ if (have_addr) {
+ db_printf("%s: AF %d not supported?\n",
+ __func__, i);
+ break;
+ }
+ continue;
+ }
+
+ if (!have_addr && i > 1)
+ db_printf("\n");
+
+ db_printf("Route table for AF %d%s%s%s:\n", i,
+ (i == AF_INET || i == AF_INET6) ? " (" : "",
+ (i == AF_INET) ? "INET" : (i == AF_INET6) ? "INET6" : "",
+ (i == AF_INET || i == AF_INET6) ? ")" : "");
+
+ error = rnh->rnh_walktree(&rnh->head, rt_dumpentry_ddb, NULL);
+ if (error != 0)
+ db_printf("%s: walktree(%d): %d\n", __func__, i,
+ error);
+ }
+}
+
+_DB_FUNC(_show, route, db_show_route_cmd, db_show_table, CS_OWN, NULL)
+{
+ char buf[INET6_ADDRSTRLEN], *bp;
+ const void *dst_addrp;
+ struct sockaddr *dstp;
+ struct rtentry *rt;
+ union {
+ struct sockaddr_in dest_sin;
+ struct sockaddr_in6 dest_sin6;
+ } u;
+ uint16_t hextets[8];
+ unsigned i, tets;
+ int t, af, exp, tokflags;
+
+ /*
+ * Undecoded address family. No double-colon expansion seen yet.
+ */
+ af = -1;
+ exp = -1;
+ /* Assume INET6 to start; we can work back if guess was wrong. */
+ tokflags = DRT_WSPACE | DRT_HEX | DRT_HEXADECIMAL;
+
+ /*
+ * db_command has lexed 'show route' for us.
+ */
+ t = db_read_token_flags(tokflags);
+ if (t == tWSPACE)
+ t = db_read_token_flags(tokflags);
+
+ /*
+ * tEOL: Just 'show route' isn't a valid mode.
+ * tMINUS: It's either '-h' or some invalid option. Regardless, usage.
+ */
+ if (t == tEOL || t == tMINUS)
+ goto usage;
+
+ db_unread_token(t);
+
+ tets = nitems(hextets);
+
+ /*
+ * Each loop iteration, we expect to read one octet (v4) or hextet
+ * (v6), followed by an appropriate field separator ('.' or ':' or
+ * '::').
+ *
+ * At the start of each loop, we're looking for a number (octet or
+ * hextet).
+ *
+ * INET6 addresses have a special case where they may begin with '::'.
+ */
+ for (i = 0; i < tets; i++) {
+ t = db_read_token_flags(tokflags);
+
+ if (t == tCOLONCOLON) {
+ /* INET6 with leading '::' or invalid. */
+ if (i != 0) {
+ db_printf("Parse error: unexpected extra "
+ "colons.\n");
+ goto exit;
+ }
+
+ af = AF_INET6;
+ exp = i;
+ hextets[i] = 0;
+ continue;
+ } else if (t == tNUMBER) {
+ /*
+ * Lexer separates out '-' as tMINUS, but make the
+ * assumption explicit here.
+ */
+ MPASS(db_tok_number >= 0);
+
+ if (af == AF_INET && db_tok_number > UINT8_MAX) {
+ db_printf("Not a valid v4 octet: %ld\n",
+ (long)db_tok_number);
+ goto exit;
+ }
+ hextets[i] = db_tok_number;
+ } else if (t == tEOL) {
+ /*
+ * We can only detect the end of an IPv6 address in
+ * compact representation with EOL.
+ */
+ if (af != AF_INET6 || exp < 0) {
+ db_printf("Parse failed. Got unexpected EOF "
+ "when the address is not a compact-"
+ "representation IPv6 address.\n");
+ goto exit;
+ }
+ break;
+ } else {
+ db_printf("Parse failed. Unexpected token %d.\n", t);
+ goto exit;
+ }
+
+ /* Next, look for a separator, if appropriate. */
+ if (i == tets - 1)
+ continue;
+
+ t = db_read_token_flags(tokflags);
+ if (af < 0) {
+ if (t == tCOLON) {
+ af = AF_INET6;
+ continue;
+ }
+ if (t == tCOLONCOLON) {
+ af = AF_INET6;
+ i++;
+ hextets[i] = 0;
+ exp = i;
+ continue;
+ }
+ if (t == tDOT) {
+ unsigned hn, dn;
+
+ af = AF_INET;
+ /* Need to fixup the first parsed number. */
+ if (hextets[0] > 0x255 ||
+ (hextets[0] & 0xf0) > 0x90 ||
+ (hextets[0] & 0xf) > 9) {
+ db_printf("Not a valid v4 octet: %x\n",
+ hextets[0]);
+ goto exit;
+ }
+
+ hn = hextets[0];
+ dn = (hn >> 8) * 100 +
+ ((hn >> 4) & 0xf) * 10 +
+ (hn & 0xf);
+
+ hextets[0] = dn;
+
+ /* Switch to decimal for remaining octets. */
+ tokflags &= ~DRT_RADIX_MASK;
+ tokflags |= DRT_DECIMAL;
+
+ tets = 4;
+ continue;
+ }
+
+ db_printf("Parse error. Unexpected token %d.\n", t);
+ goto exit;
+ } else if (af == AF_INET) {
+ if (t == tDOT)
+ continue;
+ db_printf("Expected '.' (%d) between octets but got "
+ "(%d).\n", tDOT, t);
+ goto exit;
+
+ } else if (af == AF_INET6) {
+ if (t == tCOLON)
+ continue;
+ if (t == tCOLONCOLON) {
+ if (exp < 0) {
+ i++;
+ hextets[i] = 0;
+ exp = i;
+ continue;
+ }
+ db_printf("Got bogus second '::' in v6 "
+ "address.\n");
+ goto exit;
+ }
+ if (t == tEOL) {
+ /*
+ * Handle in the earlier part of the loop
+ * because we need to handle trailing :: too.
+ */
+ db_unread_token(t);
+ continue;
+ }
+
+ db_printf("Expected ':' (%d) or '::' (%d) between "
+ "hextets but got (%d).\n", tCOLON, tCOLONCOLON, t);
+ goto exit;
+ }
+ }
+
+ /* Check for trailing garbage. */
+ if (i == tets) {
+ t = db_read_token_flags(tokflags);
+ if (t != tEOL) {
+ db_printf("Got unexpected garbage after address "
+ "(%d).\n", t);
+ goto exit;
+ }
+ }
+
+ /*
+ * Need to expand compact INET6 addresses.
+ *
+ * Technically '::' for a single ':0:' is MUST NOT but just in case,
+ * don't bother expanding that form (exp >= 0 && i == tets case).
+ */
+ if (af == AF_INET6 && exp >= 0 && i < tets) {
+ if (exp + 1 < i) {
+ memmove(&hextets[exp + 1 + (nitems(hextets) - i)],
+ &hextets[exp + 1],
+ (i - (exp + 1)) * sizeof(hextets[0]));
+ }
+ memset(&hextets[exp + 1], 0, (nitems(hextets) - i) *
+ sizeof(hextets[0]));
+ }
+
+ memset(&u, 0, sizeof(u));
+ if (af == AF_INET) {
+ u.dest_sin.sin_family = AF_INET;
+ u.dest_sin.sin_len = sizeof(u.dest_sin);
+ u.dest_sin.sin_addr.s_addr = htonl(
+ ((uint32_t)hextets[0] << 24) |
+ ((uint32_t)hextets[1] << 16) |
+ ((uint32_t)hextets[2] << 8) |
+ (uint32_t)hextets[3]);
+ dstp = (void *)&u.dest_sin;
+ dst_addrp = &u.dest_sin.sin_addr;
+ } else if (af == AF_INET6) {
+ u.dest_sin6.sin6_family = AF_INET6;
+ u.dest_sin6.sin6_len = sizeof(u.dest_sin6);
+ for (i = 0; i < nitems(hextets); i++)
+ u.dest_sin6.sin6_addr.s6_addr16[i] = htons(hextets[i]);
+ dstp = (void *)&u.dest_sin6;
+ dst_addrp = &u.dest_sin6.sin6_addr;
+ } else {
+ MPASS(false);
+ /* UNREACHABLE */
+ /* Appease Clang false positive: */
+ dstp = NULL;
+ }
+
+ bp = inet_ntop(af, dst_addrp, buf, sizeof(buf));
+ if (bp != NULL)
+ db_printf("Looking up route to destination '%s'\n", bp);
+
+ CURVNET_SET(vnet0);
+ rt = rtalloc1(dstp, 0, RTF_RNH_LOCKED);
+ CURVNET_RESTORE();
+
+ if (rt == NULL) {
+ db_printf("Could not get route for that server.\n");
+ return;
+ }
+
+ rt_dumpentry_ddb((void *)rt, NULL);
+ RTFREE_LOCKED(rt);
+
+ return;
+usage:
+ db_printf("Usage: 'show route <address>'\n"
+ " Currently accepts only dotted-decimal INET or colon-separated\n"
+ " hextet INET6 addresses.\n");
+exit:
+ db_skip_to_eol();
+}
+#endif
diff --git a/freebsd/sys/net/sff8472.h b/freebsd/sys/net/sff8472.h
index d38fcfc0..9fa465a1 100644
--- a/freebsd/sys/net/sff8472.h
+++ b/freebsd/sys/net/sff8472.h
@@ -379,7 +379,7 @@ enum {
/*
* Table 3.2 Identifier values.
- * Identifier constants has taken from SFF-8024 rev 4.2 table 4.1
+ * Identifier constants has taken from SFF-8024 rev 4.6 table 4.1
* (as referenced by table 3.2 footer)
* */
enum {
@@ -396,10 +396,10 @@ enum {
SFF_8024_ID_X2 = 0xA, /* X2 */
SFF_8024_ID_DWDM_SFP = 0xB, /* DWDM-SFP */
SFF_8024_ID_QSFP = 0xC, /* QSFP */
- SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ */
+ SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ or later */
SFF_8024_ID_CXP = 0xE, /* CXP */
- SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
- SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
+ SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
+ SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
SFF_8024_ID_QSFP28 = 0x11, /* QSFP28 or later */
SFF_8024_ID_CXP2 = 0x12, /* CXP2 (aka CXP28) */
SFF_8024_ID_CDFP = 0x13, /* CDFP (Style 1/Style 2) */
@@ -408,34 +408,49 @@ enum {
SFF_8024_ID_CDFP3 = 0x16, /* CDFP (Style3) */
SFF_8024_ID_MICROQSFP = 0x17, /* microQSFP */
SFF_8024_ID_QSFP_DD = 0x18, /* QSFP-DD 8X Pluggable Transceiver */
- SFF_8024_ID_LAST = SFF_8024_ID_QSFP_DD
- };
-
-static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {"Unknown",
- "GBIC",
- "SFF",
- "SFP/SFP+/SFP28",
- "XBI",
- "Xenpak",
- "XFP",
- "XFF",
- "XFP-E",
- "XPAK",
- "X2",
- "DWDM-SFP/SFP+",
- "QSFP",
- "QSFP+",
- "CXP",
- "HD4X",
- "HD8X",
- "QSFP28",
- "CXP2",
- "CDFP",
- "SMM4",
- "SMM8",
- "CDFP3",
- "microQSFP",
- "QSFP-DD"};
+ SFF_8024_ID_OSFP8X = 0x19, /* OSFP 8X Pluggable Transceiver */
+ SFF_8024_ID_SFP_DD = 0x1A, /* SFP-DD 2X Pluggable Transceiver */
+ SFF_8024_ID_DSFP = 0x1B, /* DSFP Dual SFF Pluggable Transceiver */
+ SFF_8024_ID_X4ML = 0x1C, /* x4 MiniLink/OcuLink */
+ SFF_8024_ID_X8ML = 0x1D, /* x8 MiniLink */
+ SFF_8024_ID_QSFP_CMIS = 0x1E, /* QSFP+ or later w/ Common Management
+ Interface Specification */
+ SFF_8024_ID_LAST = SFF_8024_ID_QSFP_CMIS
+};
+
+static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {
+ "Unknown",
+ "GBIC",
+ "SFF",
+ "SFP/SFP+/SFP28",
+ "XBI",
+ "Xenpak",
+ "XFP",
+ "XFF",
+ "XFP-E",
+ "XPAK",
+ "X2",
+ "DWDM-SFP/SFP+",
+ "QSFP",
+ "QSFP+",
+ "CXP",
+ "HD4X",
+ "HD8X",
+ "QSFP28",
+ "CXP2",
+ "CDFP",
+ "SMM4",
+ "SMM8",
+ "CDFP3",
+ "microQSFP",
+ "QSFP-DD",
+ "QSFP8X",
+ "SFP-DD",
+ "DSFP",
+ "x4MiniLink/OcuLink",
+ "x8MiniLink",
+ "QSFP+(CIMS)"
+};
/* Keep compatibility with old definitions */
#define SFF_8472_ID_UNKNOWN SFF_8024_ID_UNKNOWN
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index b4168750..a8c9887e 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -273,7 +273,8 @@ extern struct sx vnet_sxlock;
/* struct _hack is to stop this from being used with static data */
#define VNET_DEFINE(t, n) \
struct _hack; t VNET_NAME(n) __section(VNET_SETNAME) __used
-#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv))
+#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv) \
+ || defined(__powerpc64__))
/*
* As with DPCPU_DEFINE_STATIC we are unable to mark this data as static
* in modules on some architectures.
diff --git a/freebsd/sys/net80211/ieee80211.c b/freebsd/sys/net80211/ieee80211.c
index a81b5343..f003c769 100644
--- a/freebsd/sys/net80211/ieee80211.c
+++ b/freebsd/sys/net80211/ieee80211.c
@@ -407,8 +407,10 @@ ieee80211_ifdetach(struct ieee80211com *ic)
* The VAP is responsible for setting and clearing
* the VIMAGE context.
*/
- while ((vap = TAILQ_FIRST(&ic->ic_vaps)) != NULL)
+ while ((vap = TAILQ_FIRST(&ic->ic_vaps)) != NULL) {
+ ieee80211_com_vdetach(vap);
ieee80211_vap_destroy(vap);
+ }
ieee80211_waitfor_parent(ic);
ieee80211_sysctl_detach(ic);
@@ -1386,6 +1388,8 @@ getflags(const uint8_t bands[], uint32_t flags[], int ht40, int vht80)
/*
* Add one 20 MHz channel into specified channel list.
+ * You MUST NOT mix bands when calling this. It will not add 5ghz
+ * channels if you have any B/G/N band bit set.
*/
/* XXX VHT */
int
@@ -1632,6 +1636,17 @@ ieee80211_add_channel_list_2ghz(struct ieee80211_channel chans[], int maxchans,
}
int
+ieee80211_add_channels_default_2ghz(struct ieee80211_channel chans[],
+ int maxchans, int *nchans, const uint8_t bands[], int ht40)
+{
+ const uint8_t default_chan_list[] =
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
+
+ return (ieee80211_add_channel_list_2ghz(chans, maxchans, nchans,
+ default_chan_list, nitems(default_chan_list), bands, ht40));
+}
+
+int
ieee80211_add_channel_list_5ghz(struct ieee80211_channel chans[], int maxchans,
int *nchans, const uint8_t ieee[], int nieee, const uint8_t bands[],
int ht40)
diff --git a/freebsd/sys/net80211/ieee80211.h b/freebsd/sys/net80211/ieee80211.h
index db46b8f1..61389169 100644
--- a/freebsd/sys/net80211/ieee80211.h
+++ b/freebsd/sys/net80211/ieee80211.h
@@ -951,9 +951,11 @@ enum {
IEEE80211_ELEMID_ERP = 42,
IEEE80211_ELEMID_HTCAP = 45,
IEEE80211_ELEMID_QOS = 46,
+ IEEE80211_ELEMID_RESERVED_47 = 47,
IEEE80211_ELEMID_RSN = 48,
IEEE80211_ELEMID_XRATES = 50,
IEEE80211_ELEMID_APCHANREP = 51,
+ IEEE80211_ELEMID_MOBILITY_DOMAIN = 54,
IEEE80211_ELEMID_HTINFO = 61,
IEEE80211_ELEMID_SECCHAN_OFFSET = 62,
IEEE80211_ELEMID_RRM_ENACAPS = 70,
diff --git a/freebsd/sys/net80211/ieee80211_adhoc.c b/freebsd/sys/net80211/ieee80211_adhoc.c
index fdd02e46..3f3b6d1f 100644
--- a/freebsd/sys/net80211/ieee80211_adhoc.c
+++ b/freebsd/sys/net80211/ieee80211_adhoc.c
@@ -524,11 +524,9 @@ adhoc_input(struct ieee80211_node *ni, struct mbuf *m,
/*
* Save QoS bits for use below--before we strip the header.
*/
- if (subtype == IEEE80211_FC0_SUBTYPE_QOS) {
- qos = (dir == IEEE80211_FC1_DIR_DSTODS) ?
- ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] :
- ((struct ieee80211_qosframe *)wh)->i_qos[0];
- } else
+ if (subtype == IEEE80211_FC0_SUBTYPE_QOS)
+ qos = ieee80211_getqos(wh)[0];
+ else
qos = 0;
/*
diff --git a/freebsd/sys/net80211/ieee80211_amrr.c b/freebsd/sys/net80211/ieee80211_amrr.c
index 42e9ac1a..a827f470 100644
--- a/freebsd/sys/net80211/ieee80211_amrr.c
+++ b/freebsd/sys/net80211/ieee80211_amrr.c
@@ -104,12 +104,13 @@ static void
amrr_setinterval(const struct ieee80211vap *vap, int msecs)
{
struct ieee80211_amrr *amrr = vap->iv_rs;
- int t;
+
+ if (!amrr)
+ return;
if (msecs < 100)
msecs = 100;
- t = msecs_to_ticks(msecs);
- amrr->amrr_interval = (t < 1) ? 1 : t;
+ amrr->amrr_interval = msecs_to_ticks(msecs);
}
static void
@@ -168,6 +169,12 @@ amrr_node_init(struct ieee80211_node *ni)
struct ieee80211_amrr_node *amn;
uint8_t rate;
+ if (!amrr) {
+ if_printf(vap->iv_ifp, "ratectl structure was not allocated, "
+ "per-node structure allocation skipped\n");
+ return;
+ }
+
if (ni->ni_rctls == NULL) {
ni->ni_rctls = amn = IEEE80211_MALLOC(sizeof(struct ieee80211_amrr_node),
M_80211_RATECTL, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO);
@@ -329,10 +336,19 @@ static int
amrr_rate(struct ieee80211_node *ni, void *arg __unused, uint32_t iarg __unused)
{
struct ieee80211_amrr_node *amn = ni->ni_rctls;
- struct ieee80211_amrr *amrr = amn->amn_amrr;
+ struct ieee80211_amrr *amrr;
const struct ieee80211_rateset *rs = NULL;
int rix;
+ /* XXX should return -1 here, but drivers may not expect this... */
+ if (!amn)
+ {
+ ni->ni_txrate = ni->ni_rates.rs_rates[0];
+ return 0;
+ }
+
+ amrr = amn->amn_amrr;
+
/* 11n or not? Pick the right rateset */
if (amrr_node_is_11n(ni)) {
/* XXX ew */
@@ -371,6 +387,9 @@ amrr_tx_complete(const struct ieee80211_node *ni,
struct ieee80211_amrr_node *amn = ni->ni_rctls;
int retries;
+ if (!amn)
+ return;
+
retries = 0;
if (status->flags & IEEE80211_RATECTL_STATUS_LONG_RETRY)
retries = status->long_retries;
@@ -388,6 +407,9 @@ amrr_tx_update_cb(void *arg, struct ieee80211_node *ni)
struct ieee80211_amrr_node *amn = ni->ni_rctls;
int txcnt, success, retrycnt;
+ if (!amn)
+ return;
+
txcnt = stats->nframes;
success = stats->nsuccess;
retrycnt = 0;
@@ -422,9 +444,12 @@ amrr_sysctl_interval(SYSCTL_HANDLER_ARGS)
{
struct ieee80211vap *vap = arg1;
struct ieee80211_amrr *amrr = vap->iv_rs;
- int msecs = ticks_to_msecs(amrr->amrr_interval);
- int error;
+ int msecs, error;
+
+ if (!amrr)
+ return ENOMEM;
+ msecs = ticks_to_msecs(amrr->amrr_interval);
error = sysctl_handle_int(oidp, &msecs, 0, req);
if (error || !req->newptr)
return error;
@@ -438,6 +463,9 @@ amrr_sysctlattach(struct ieee80211vap *vap,
{
struct ieee80211_amrr *amrr = vap->iv_rs;
+ if (!amrr)
+ return;
+
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"amrr_rate_interval", CTLTYPE_INT | CTLFLAG_RW, vap,
0, amrr_sysctl_interval, "I", "amrr operation interval (ms)");
@@ -459,6 +487,9 @@ amrr_node_stats(struct ieee80211_node *ni, struct sbuf *s)
/* XXX TODO: check locking? */
+ if (!amn)
+ return;
+
/* XXX TODO: this should be a method */
if (amrr_node_is_11n(ni)) {
rs = (struct ieee80211_rateset *) &ni->ni_htrates;
diff --git a/freebsd/sys/net80211/ieee80211_crypto.c b/freebsd/sys/net80211/ieee80211_crypto.c
index 2e28538c..264e3f17 100644
--- a/freebsd/sys/net80211/ieee80211_crypto.c
+++ b/freebsd/sys/net80211/ieee80211_crypto.c
@@ -664,14 +664,15 @@ ieee80211_crypto_decap(struct ieee80211_node *ni, struct mbuf *m, int hdrlen,
k = &ni->ni_ucastkey;
/*
- * Insure crypto header is contiguous for all decap work.
+ * Insure crypto header is contiguous and long enough for all
+ * decap work.
*/
cip = k->wk_cipher;
- if (m->m_len < hdrlen + cip->ic_header &&
- (m = m_pullup(m, hdrlen + cip->ic_header)) == NULL) {
+ if (m->m_len < hdrlen + cip->ic_header) {
IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_CRYPTO, wh->i_addr2,
- "unable to pullup %s header", cip->ic_name);
- vap->iv_stats.is_rx_wepfail++; /* XXX */
+ "frame is too short (%d < %u) for crypto decap",
+ cip->ic_name, m->m_len, hdrlen + cip->ic_header);
+ vap->iv_stats.is_rx_tooshort++;
*key = NULL;
return (0);
}
diff --git a/freebsd/sys/net80211/ieee80211_dfs.c b/freebsd/sys/net80211/ieee80211_dfs.c
index 2c454516..119c11a7 100644
--- a/freebsd/sys/net80211/ieee80211_dfs.c
+++ b/freebsd/sys/net80211/ieee80211_dfs.c
@@ -158,8 +158,7 @@ cac_timeout(void *arg)
/* XXX clobbers any existing desired channel */
/* NB: dfs->newchan may be NULL, that's ok */
vap->iv_des_chan = dfs->newchan;
- /* XXX recursive lock need ieee80211_new_state_locked */
- ieee80211_new_state(vap, IEEE80211_S_SCAN, 0);
+ ieee80211_new_state_locked(vap, IEEE80211_S_SCAN, 0);
} else {
if_printf(vap->iv_ifp,
"CAC timer on channel %u (%u MHz) expired; "
diff --git a/freebsd/sys/net80211/ieee80211_freebsd.c b/freebsd/sys/net80211/ieee80211_freebsd.c
index 00430f77..f8d5d0f3 100644
--- a/freebsd/sys/net80211/ieee80211_freebsd.c
+++ b/freebsd/sys/net80211/ieee80211_freebsd.c
@@ -70,8 +70,6 @@ SYSCTL_INT(_net_wlan, OID_AUTO, debug, CTLFLAG_RW, &ieee80211_debug,
0, "debugging printfs");
#endif
-static MALLOC_DEFINE(M_80211_COM, "80211com", "802.11 com state");
-
static const char wlanname[] = "wlan";
static struct if_clone *wlan_cloner;
@@ -138,13 +136,12 @@ int
ieee80211_sysctl_msecs_ticks(SYSCTL_HANDLER_ARGS)
{
int msecs = ticks_to_msecs(*(int *)arg1);
- int error, t;
+ int error;
error = sysctl_handle_int(oidp, &msecs, 0, req);
if (error || !req->newptr)
return error;
- t = msecs_to_ticks(msecs);
- *(int *)arg1 = (t < 1) ? 1 : t;
+ *(int *)arg1 = msecs_to_ticks(msecs);
return 0;
}
@@ -309,6 +306,52 @@ ieee80211_sysctl_vdetach(struct ieee80211vap *vap)
}
}
+#define MS(_v, _f) (((_v) & _f##_M) >> _f##_S)
+int
+ieee80211_com_vincref(struct ieee80211vap *vap)
+{
+ uint32_t ostate;
+
+ ostate = atomic_fetchadd_32(&vap->iv_com_state, IEEE80211_COM_REF_ADD);
+
+ if (ostate & IEEE80211_COM_DETACHED) {
+ atomic_subtract_32(&vap->iv_com_state, IEEE80211_COM_REF_ADD);
+ return (ENETDOWN);
+ }
+
+ if (MS(ostate, IEEE80211_COM_REF) == IEEE80211_COM_REF_MAX) {
+ atomic_subtract_32(&vap->iv_com_state, IEEE80211_COM_REF_ADD);
+ return (EOVERFLOW);
+ }
+
+ return (0);
+}
+
+void
+ieee80211_com_vdecref(struct ieee80211vap *vap)
+{
+ uint32_t ostate;
+
+ ostate = atomic_fetchadd_32(&vap->iv_com_state, -IEEE80211_COM_REF_ADD);
+
+ KASSERT(MS(ostate, IEEE80211_COM_REF) != 0,
+ ("com reference counter underflow"));
+
+ (void) ostate;
+}
+
+void
+ieee80211_com_vdetach(struct ieee80211vap *vap)
+{
+ int sleep_time;
+
+ sleep_time = msecs_to_ticks(250);
+ atomic_set_32(&vap->iv_com_state, IEEE80211_COM_DETACHED);
+ while (MS(atomic_load_32(&vap->iv_com_state), IEEE80211_COM_REF) != 0)
+ pause("comref", sleep_time);
+}
+#undef MS
+
int
ieee80211_node_dectestref(struct ieee80211_node *ni)
{
diff --git a/freebsd/sys/net80211/ieee80211_freebsd.h b/freebsd/sys/net80211/ieee80211_freebsd.h
index 8395eb00..4e06b76a 100644
--- a/freebsd/sys/net80211/ieee80211_freebsd.h
+++ b/freebsd/sys/net80211/ieee80211_freebsd.h
@@ -38,6 +38,7 @@
#include <sys/rwlock.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
+#include <sys/time.h>
/*
* Common state locking definitions.
@@ -224,6 +225,11 @@ typedef struct mtx ieee80211_rt_lock_t;
*/
#include <machine/atomic.h>
+struct ieee80211vap;
+int ieee80211_com_vincref(struct ieee80211vap *);
+void ieee80211_com_vdecref(struct ieee80211vap *);
+void ieee80211_com_vdetach(struct ieee80211vap *);
+
#define ieee80211_node_initref(_ni) \
do { ((_ni)->ni_refcnt = 1); } while (0)
#define ieee80211_node_incref(_ni) \
@@ -235,7 +241,6 @@ int ieee80211_node_dectestref(struct ieee80211_node *ni);
#define ieee80211_node_refcnt(_ni) (_ni)->ni_refcnt
struct ifqueue;
-struct ieee80211vap;
void ieee80211_drain_ifq(struct ifqueue *);
void ieee80211_flush_ifq(struct ifqueue *, struct ieee80211vap *);
@@ -245,9 +250,8 @@ void ieee80211_vap_destroy(struct ieee80211vap *);
(((_ifp)->if_flags & IFF_UP) && \
((_ifp)->if_drv_flags & IFF_DRV_RUNNING))
-/* XXX TODO: cap these at 1, as hz may not be 1000 */
-#define msecs_to_ticks(ms) (((ms)*hz)/1000)
-#define ticks_to_msecs(t) (1000*(t) / hz)
+#define msecs_to_ticks(ms) MSEC_2_TICKS(ms)
+#define ticks_to_msecs(t) TICKS_2_MSEC(t)
#define ticks_to_secs(t) ((t) / hz)
#define ieee80211_time_after(a,b) ((long)(b) - (long)(a) < 0)
diff --git a/freebsd/sys/net80211/ieee80211_hostap.c b/freebsd/sys/net80211/ieee80211_hostap.c
index 0cc43748..2671547f 100644
--- a/freebsd/sys/net80211/ieee80211_hostap.c
+++ b/freebsd/sys/net80211/ieee80211_hostap.c
@@ -710,11 +710,9 @@ hostap_input(struct ieee80211_node *ni, struct mbuf *m,
/*
* Save QoS bits for use below--before we strip the header.
*/
- if (subtype == IEEE80211_FC0_SUBTYPE_QOS) {
- qos = (dir == IEEE80211_FC1_DIR_DSTODS) ?
- ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] :
- ((struct ieee80211_qosframe *)wh)->i_qos[0];
- } else
+ if (subtype == IEEE80211_FC0_SUBTYPE_QOS)
+ qos = ieee80211_getqos(wh)[0];
+ else
qos = 0;
/*
diff --git a/freebsd/sys/net80211/ieee80211_ht.c b/freebsd/sys/net80211/ieee80211_ht.c
index c6a3a200..da294488 100644
--- a/freebsd/sys/net80211/ieee80211_ht.c
+++ b/freebsd/sys/net80211/ieee80211_ht.c
@@ -888,10 +888,7 @@ ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m,
if (IEEE80211_IS_MULTICAST(wh->i_addr1))
return PROCESS;
- if (IEEE80211_IS_DSTODS(wh))
- tid = ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0];
- else
- tid = wh->i_qos[0];
+ tid = ieee80211_getqos(wh)[0];
tid &= IEEE80211_QOS_TID;
rap = &ni->ni_rx_ampdu[tid];
if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) {
@@ -1732,7 +1729,7 @@ ieee80211_ht_updateparams(struct ieee80211_node *ni,
const struct ieee80211_ie_htinfo *htinfo;
ieee80211_parse_htcap(ni, htcapie);
- if (vap->iv_htcaps & IEEE80211_HTCAP_SMPS)
+ if (vap->iv_htcaps & IEEE80211_HTC_SMPS)
htcap_update_mimo_ps(ni);
htcap_update_shortgi(ni);
htcap_update_ldpc(ni);
@@ -1885,7 +1882,7 @@ ieee80211_ht_updatehtcap(struct ieee80211_node *ni, const uint8_t *htcapie)
struct ieee80211vap *vap = ni->ni_vap;
ieee80211_parse_htcap(ni, htcapie);
- if (vap->iv_htcaps & IEEE80211_HTCAP_SMPS)
+ if (vap->iv_htcaps & IEEE80211_HTC_SMPS)
htcap_update_mimo_ps(ni);
htcap_update_shortgi(ni);
htcap_update_ldpc(ni);
diff --git a/freebsd/sys/net80211/ieee80211_hwmp.c b/freebsd/sys/net80211/ieee80211_hwmp.c
index b8950c5e..681ff7b5 100644
--- a/freebsd/sys/net80211/ieee80211_hwmp.c
+++ b/freebsd/sys/net80211/ieee80211_hwmp.c
@@ -2017,6 +2017,7 @@ done:
*/
IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_HWMP, dest,
"%s", "queue frame until path found");
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)(uintptr_t)
ieee80211_mac_hash(ic, dest);
/* XXX age chosen randomly */
diff --git a/freebsd/sys/net80211/ieee80211_ioctl.c b/freebsd/sys/net80211/ieee80211_ioctl.c
index 52712514..0396e8be 100644
--- a/freebsd/sys/net80211/ieee80211_ioctl.c
+++ b/freebsd/sys/net80211/ieee80211_ioctl.c
@@ -2206,18 +2206,6 @@ ieee80211_ioctl_setregdomain(struct ieee80211vap *vap,
}
static int
-ieee80211_ioctl_setroam(struct ieee80211vap *vap,
- const struct ieee80211req *ireq)
-{
- if (ireq->i_len != sizeof(vap->iv_roamparms))
- return EINVAL;
- /* XXX validate params */
- /* XXX? ENETRESET to push to device? */
- return copyin(ireq->i_data, vap->iv_roamparms,
- sizeof(vap->iv_roamparms));
-}
-
-static int
checkrate(const struct ieee80211_rateset *rs, int rate)
{
int i;
@@ -2247,6 +2235,73 @@ checkmcs(const struct ieee80211_htrateset *rs, int mcs)
}
static int
+ieee80211_ioctl_setroam(struct ieee80211vap *vap,
+ const struct ieee80211req *ireq)
+{
+ struct ieee80211com *ic = vap->iv_ic;
+ struct ieee80211_roamparams_req *parms;
+ struct ieee80211_roamparam *src, *dst;
+ const struct ieee80211_htrateset *rs_ht;
+ const struct ieee80211_rateset *rs;
+ int changed, error, mode, is11n, nmodes;
+
+ if (ireq->i_len != sizeof(vap->iv_roamparms))
+ return EINVAL;
+
+ parms = IEEE80211_MALLOC(sizeof(*parms), M_TEMP,
+ IEEE80211_M_NOWAIT | IEEE80211_M_ZERO);
+ if (parms == NULL)
+ return ENOMEM;
+
+ error = copyin(ireq->i_data, parms, ireq->i_len);
+ if (error != 0)
+ goto fail;
+
+ changed = 0;
+ nmodes = IEEE80211_MODE_MAX;
+
+ /* validate parameters and check if anything changed */
+ for (mode = IEEE80211_MODE_11A; mode < nmodes; mode++) {
+ if (isclr(ic->ic_modecaps, mode))
+ continue;
+ src = &parms->params[mode];
+ dst = &vap->iv_roamparms[mode];
+ rs = &ic->ic_sup_rates[mode]; /* NB: 11n maps to legacy */
+ rs_ht = &ic->ic_sup_htrates;
+ is11n = (mode == IEEE80211_MODE_11NA ||
+ mode == IEEE80211_MODE_11NG);
+ /* XXX TODO: 11ac */
+ if (src->rate != dst->rate) {
+ if (!checkrate(rs, src->rate) &&
+ (!is11n || !checkmcs(rs_ht, src->rate))) {
+ error = EINVAL;
+ goto fail;
+ }
+ changed++;
+ }
+ if (src->rssi != dst->rssi)
+ changed++;
+ }
+ if (changed) {
+ /*
+ * Copy new parameters in place and notify the
+ * driver so it can push state to the device.
+ */
+ /* XXX locking? */
+ for (mode = IEEE80211_MODE_11A; mode < nmodes; mode++) {
+ if (isset(ic->ic_modecaps, mode))
+ vap->iv_roamparms[mode] = parms->params[mode];
+ }
+
+ if (vap->iv_roaming == IEEE80211_ROAMING_DEVICE)
+ error = ERESTART;
+ }
+
+fail: IEEE80211_FREE(parms, M_TEMP);
+ return error;
+}
+
+static int
ieee80211_ioctl_settxparams(struct ieee80211vap *vap,
const struct ieee80211req *ireq)
{
@@ -2517,20 +2572,12 @@ ieee80211_scanreq(struct ieee80211vap *vap, struct ieee80211_scan_req *sr)
sr->sr_duration > IEEE80211_IOC_SCAN_DURATION_MAX)
return EINVAL;
sr->sr_duration = msecs_to_ticks(sr->sr_duration);
- if (sr->sr_duration < 1)
- sr->sr_duration = 1;
}
/* convert min/max channel dwell */
- if (sr->sr_mindwell != 0) {
+ if (sr->sr_mindwell != 0)
sr->sr_mindwell = msecs_to_ticks(sr->sr_mindwell);
- if (sr->sr_mindwell < 1)
- sr->sr_mindwell = 1;
- }
- if (sr->sr_maxdwell != 0) {
+ if (sr->sr_maxdwell != 0)
sr->sr_maxdwell = msecs_to_ticks(sr->sr_maxdwell);
- if (sr->sr_maxdwell < 1)
- sr->sr_maxdwell = 1;
- }
/* NB: silently reduce ssid count to what is supported */
if (sr->sr_nssid > IEEE80211_SCAN_MAX_SSID)
sr->sr_nssid = IEEE80211_SCAN_MAX_SSID;
@@ -3482,10 +3529,14 @@ ieee80211_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct ieee80211vap *vap = ifp->if_softc;
struct ieee80211com *ic = vap->iv_ic;
- int error = 0, wait = 0;
+ int error = 0, wait = 0, ic_used;
struct ifreq *ifr;
struct ifaddr *ifa; /* XXX */
+ ic_used = (cmd != SIOCSIFMTU && cmd != SIOCG80211STATS);
+ if (ic_used && (error = ieee80211_com_vincref(vap)) != 0)
+ return (error);
+
switch (cmd) {
case SIOCSIFFLAGS:
IEEE80211_LOCK(ic);
@@ -3539,9 +3590,13 @@ ieee80211_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
/*
* Check if the MAC address was changed
* via SIOCSIFLLADDR ioctl.
+ *
+ * NB: device may be detached during initialization;
+ * use if_ioctl for existence check.
*/
if_addr_rlock(ifp);
- if ((ifp->if_flags & IFF_UP) == 0 &&
+ if (ifp->if_ioctl == ieee80211_ioctl &&
+ (ifp->if_flags & IFF_UP) == 0 &&
!IEEE80211_ADDR_EQ(vap->iv_myaddr, IF_LLADDR(ifp)))
IEEE80211_ADDR_COPY(vap->iv_myaddr,
IF_LLADDR(ifp));
@@ -3618,5 +3673,9 @@ ieee80211_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = ether_ioctl(ifp, cmd, data);
break;
}
+
+ if (ic_used)
+ ieee80211_com_vdecref(vap);
+
return (error);
}
diff --git a/freebsd/sys/net80211/ieee80211_mesh.c b/freebsd/sys/net80211/ieee80211_mesh.c
index f747c214..5a83b630 100644
--- a/freebsd/sys/net80211/ieee80211_mesh.c
+++ b/freebsd/sys/net80211/ieee80211_mesh.c
@@ -1227,6 +1227,7 @@ mesh_forward(struct ieee80211vap *vap, struct mbuf *m,
M_WME_SETAC(mcopy, WME_AC_BE);
/* XXX do we know m_nextpkt is NULL? */
+ MPASS((mcopy->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mcopy->m_pkthdr.rcvif = (void *) ni;
/*
@@ -1657,12 +1658,7 @@ mesh_input(struct ieee80211_node *ni, struct mbuf *m,
* in the Mesh Control field and a 3 address qos frame
* is used.
*/
- if (IEEE80211_IS_DSTODS(wh))
- *(uint16_t *)qos = *(uint16_t *)
- ((struct ieee80211_qosframe_addr4 *)wh)->i_qos;
- else
- *(uint16_t *)qos = *(uint16_t *)
- ((struct ieee80211_qosframe *)wh)->i_qos;
+ *(uint16_t *)qos = *(uint16_t *)ieee80211_getqos(wh);
/*
* NB: The mesh STA sets the Mesh Control Present
diff --git a/freebsd/sys/net80211/ieee80211_output.c b/freebsd/sys/net80211/ieee80211_output.c
index 06fca965..57ea67d3 100644
--- a/freebsd/sys/net80211/ieee80211_output.c
+++ b/freebsd/sys/net80211/ieee80211_output.c
@@ -165,6 +165,7 @@ ieee80211_vap_pkt_send_dest(struct ieee80211vap *vap, struct mbuf *m,
* uses any existing value for rcvif to identify the
* interface it (might have been) received on.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)ni;
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1: 0;
@@ -530,6 +531,7 @@ ieee80211_raw_output(struct ieee80211vap *vap, struct ieee80211_node *ni,
* that the mbuf has the same node value that
* it would if it were going via the normal path.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)ni;
/*
@@ -606,6 +608,97 @@ ieee80211_validate_frame(struct mbuf *m,
return (0);
}
+static int
+ieee80211_validate_rate(struct ieee80211_node *ni, uint8_t rate)
+{
+ struct ieee80211com *ic = ni->ni_ic;
+
+ if (IEEE80211_IS_HT_RATE(rate)) {
+ if ((ic->ic_htcaps & IEEE80211_HTC_HT) == 0)
+ return (EINVAL);
+
+ rate = IEEE80211_RV(rate);
+ if (rate <= 31) {
+ if (rate > ic->ic_txstream * 8 - 1)
+ return (EINVAL);
+
+ return (0);
+ }
+
+ if (rate == 32) {
+ if ((ic->ic_htcaps & IEEE80211_HTC_TXMCS32) == 0)
+ return (EINVAL);
+
+ return (0);
+ }
+
+ if ((ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) == 0)
+ return (EINVAL);
+
+ switch (ic->ic_txstream) {
+ case 0:
+ case 1:
+ return (EINVAL);
+ case 2:
+ if (rate > 38)
+ return (EINVAL);
+
+ return (0);
+ case 3:
+ if (rate > 52)
+ return (EINVAL);
+
+ return (0);
+ case 4:
+ default:
+ if (rate > 76)
+ return (EINVAL);
+
+ return (0);
+ }
+ }
+
+ if (!ieee80211_isratevalid(ic->ic_rt, rate))
+ return (EINVAL);
+
+ return (0);
+}
+
+static int
+ieee80211_sanitize_rates(struct ieee80211_node *ni, struct mbuf *m,
+ const struct ieee80211_bpf_params *params)
+{
+ int error;
+
+ if (!params)
+ return (0); /* nothing to do */
+
+ /* NB: most drivers assume that ibp_rate0 is set (!= 0). */
+ if (params->ibp_rate0 != 0) {
+ error = ieee80211_validate_rate(ni, params->ibp_rate0);
+ if (error != 0)
+ return (error);
+ } else {
+ /* XXX pre-setup some default (e.g., mgmt / mcast) rate */
+ /* XXX __DECONST? */
+ (void) m;
+ }
+
+ if (params->ibp_rate1 != 0 &&
+ (error = ieee80211_validate_rate(ni, params->ibp_rate1)) != 0)
+ return (error);
+
+ if (params->ibp_rate2 != 0 &&
+ (error = ieee80211_validate_rate(ni, params->ibp_rate2)) != 0)
+ return (error);
+
+ if (params->ibp_rate3 != 0 &&
+ (error = ieee80211_validate_rate(ni, params->ibp_rate3)) != 0)
+ return (error);
+
+ return (0);
+}
+
/*
* 802.11 output routine. This is (currently) used only to
* connect bpf write calls to the 802.11 layer for injecting
@@ -720,6 +813,10 @@ ieee80211_output(struct ifnet *ifp, struct mbuf *m,
} else
M_WME_SETAC(m, WME_AC_BE);
+ error = ieee80211_sanitize_rates(ni, m, params);
+ if (error != 0)
+ senderr(error);
+
IEEE80211_NODE_STAT(ni, tx_data);
if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
IEEE80211_NODE_STAT(ni, tx_mcast);
@@ -1700,7 +1797,6 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni,
* capability; this may also change when we pull
* aggregation up into net80211
*/
- seqno = ni->ni_txseqs[tid]++;
*(uint16_t *)wh->i_seq =
htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT);
M_SEQNO_SET(m, seqno);
@@ -1856,14 +1952,8 @@ ieee80211_fragment(struct ieee80211vap *vap, struct mbuf *m0,
whf = mtod(m, struct ieee80211_frame *);
memcpy(whf, wh, hdrsize);
#ifdef IEEE80211_SUPPORT_MESH
- if (vap->iv_opmode == IEEE80211_M_MBSS) {
- if (IEEE80211_IS_DSTODS(wh))
- ((struct ieee80211_qosframe_addr4 *)
- whf)->i_qos[1] &= ~IEEE80211_QOS_MC;
- else
- ((struct ieee80211_qosframe *)
- whf)->i_qos[1] &= ~IEEE80211_QOS_MC;
- }
+ if (vap->iv_opmode == IEEE80211_M_MBSS)
+ ieee80211_getqos(wh)[1] &= ~IEEE80211_QOS_MC;
#endif
*(uint16_t *)&whf->i_seq[0] |= htole16(
(fragno & IEEE80211_SEQ_FRAG_MASK) <<
diff --git a/freebsd/sys/net80211/ieee80211_proto.c b/freebsd/sys/net80211/ieee80211_proto.c
index 129e11e2..c62d0875 100644
--- a/freebsd/sys/net80211/ieee80211_proto.c
+++ b/freebsd/sys/net80211/ieee80211_proto.c
@@ -349,6 +349,9 @@ ieee80211_proto_vattach(struct ieee80211vap *vap)
* driver and/or user applications.
*/
for (i = IEEE80211_MODE_11A; i < IEEE80211_MODE_MAX; i++) {
+ if (isclr(ic->ic_modecaps, i))
+ continue;
+
const struct ieee80211_rateset *rs = &ic->ic_sup_rates[i];
vap->iv_txparms[i].ucastrate = IEEE80211_FIXED_RATE_NONE;
diff --git a/freebsd/sys/net80211/ieee80211_proto.h b/freebsd/sys/net80211/ieee80211_proto.h
index c1637c57..717de30d 100644
--- a/freebsd/sys/net80211/ieee80211_proto.h
+++ b/freebsd/sys/net80211/ieee80211_proto.h
@@ -303,6 +303,22 @@ void ieee80211_wme_ic_getparams(struct ieee80211com *ic,
int ieee80211_wme_vap_ac_is_noack(struct ieee80211vap *vap, int ac);
/*
+ * Return pointer to the QoS field from a Qos frame.
+ */
+static __inline uint8_t *
+ieee80211_getqos(void *data)
+{
+ struct ieee80211_frame *wh = data;
+
+ KASSERT(IEEE80211_QOS_HAS_SEQ(wh), ("QoS field is absent!"));
+
+ if (IEEE80211_IS_DSTODS(wh))
+ return (((struct ieee80211_qosframe_addr4 *)wh)->i_qos);
+ else
+ return (((struct ieee80211_qosframe *)wh)->i_qos);
+}
+
+/*
* Return the WME TID from a QoS frame. If no TID
* is present return the index for the "non-QoS" entry.
*/
diff --git a/freebsd/sys/net80211/ieee80211_rssadapt.c b/freebsd/sys/net80211/ieee80211_rssadapt.c
index f05af298..52f81a99 100644
--- a/freebsd/sys/net80211/ieee80211_rssadapt.c
+++ b/freebsd/sys/net80211/ieee80211_rssadapt.c
@@ -119,12 +119,13 @@ static void
rssadapt_setinterval(const struct ieee80211vap *vap, int msecs)
{
struct ieee80211_rssadapt *rs = vap->iv_rs;
- int t;
+
+ if (!rs)
+ return;
if (msecs < 100)
msecs = 100;
- t = msecs_to_ticks(msecs);
- rs->interval = (t < 1) ? 1 : t;
+ rs->interval = msecs_to_ticks(msecs);
}
static void
@@ -179,6 +180,12 @@ rssadapt_node_init(struct ieee80211_node *ni)
struct ieee80211_rssadapt *rsa = vap->iv_rs;
const struct ieee80211_rateset *rs = &ni->ni_rates;
+ if (!rsa) {
+ if_printf(vap->iv_ifp, "ratectl structure was not allocated, "
+ "per-node structure allocation skipped\n");
+ return;
+ }
+
if (ni->ni_rctls == NULL) {
ni->ni_rctls = ra =
IEEE80211_MALLOC(sizeof(struct ieee80211_rssadapt_node),
@@ -233,10 +240,18 @@ rssadapt_rate(struct ieee80211_node *ni, void *arg __unused, uint32_t iarg)
{
struct ieee80211_rssadapt_node *ra = ni->ni_rctls;
u_int pktlen = iarg;
- const struct ieee80211_rateset *rs = &ra->ra_rates;
+ const struct ieee80211_rateset *rs;
uint16_t (*thrs)[IEEE80211_RATE_SIZE];
int rix, rssi;
+ /* XXX should return -1 here, but drivers may not expect this... */
+ if (!ra)
+ {
+ ni->ni_txrate = ni->ni_rates.rs_rates[0];
+ return 0;
+ }
+
+ rs = &ra->ra_rates;
if ((ticks - ra->ra_ticks) > ra->ra_rs->interval) {
rssadapt_updatestats(ra);
ra->ra_ticks = ticks;
@@ -322,6 +337,9 @@ rssadapt_tx_complete(const struct ieee80211_node *ni,
struct ieee80211_rssadapt_node *ra = ni->ni_rctls;
int pktlen, rssi;
+ if (!ra)
+ return;
+
if ((status->flags &
(IEEE80211_RATECTL_STATUS_PKTLEN|IEEE80211_RATECTL_STATUS_RSSI)) !=
(IEEE80211_RATECTL_STATUS_PKTLEN|IEEE80211_RATECTL_STATUS_RSSI))
@@ -346,9 +364,12 @@ rssadapt_sysctl_interval(SYSCTL_HANDLER_ARGS)
{
struct ieee80211vap *vap = arg1;
struct ieee80211_rssadapt *rs = vap->iv_rs;
- int msecs = ticks_to_msecs(rs->interval);
- int error;
+ int msecs, error;
+
+ if (!rs)
+ return ENOMEM;
+ msecs = ticks_to_msecs(rs->interval);
error = sysctl_handle_int(oidp, &msecs, 0, req);
if (error || !req->newptr)
return error;
diff --git a/freebsd/sys/net80211/ieee80211_scan.c b/freebsd/sys/net80211/ieee80211_scan.c
index 9b58ff98..c6f03fec 100644
--- a/freebsd/sys/net80211/ieee80211_scan.c
+++ b/freebsd/sys/net80211/ieee80211_scan.c
@@ -132,13 +132,21 @@ void
ieee80211_scan_vattach(struct ieee80211vap *vap)
{
struct ieee80211com *ic = vap->iv_ic;
+ int m;
vap->iv_bgscanidle = (IEEE80211_BGSCAN_IDLE_DEFAULT*1000)/hz;
vap->iv_bgscanintvl = IEEE80211_BGSCAN_INTVAL_DEFAULT*hz;
vap->iv_scanvalid = IEEE80211_SCAN_VALID_DEFAULT*hz;
vap->iv_roaming = IEEE80211_ROAMING_AUTO;
- memcpy(vap->iv_roamparms, defroam, sizeof(defroam));
+
+ memset(vap->iv_roamparms, 0, sizeof(vap->iv_roamparms));
+ for (m = IEEE80211_MODE_AUTO + 1; m < IEEE80211_MODE_MAX; m++) {
+ if (isclr(ic->ic_modecaps, m))
+ continue;
+
+ memcpy(&vap->iv_roamparms[m], &defroam[m], sizeof(defroam[m]));
+ }
ic->ic_scan_methods->sc_vattach(vap);
}
@@ -296,7 +304,7 @@ ieee80211_scan_dump(struct ieee80211_scan_state *ss)
if_printf(vap->iv_ifp, "scan set ");
ieee80211_scan_dump_channels(ss);
- printf(" dwell min %lums max %lums\n",
+ printf(" dwell min %ums max %ums\n",
ticks_to_msecs(ss->ss_mindwell), ticks_to_msecs(ss->ss_maxdwell));
}
#endif /* IEEE80211_DEBUG */
diff --git a/freebsd/sys/net80211/ieee80211_scan_sta.c b/freebsd/sys/net80211/ieee80211_scan_sta.c
index a7a1fc29..8dda8499 100644
--- a/freebsd/sys/net80211/ieee80211_scan_sta.c
+++ b/freebsd/sys/net80211/ieee80211_scan_sta.c
@@ -474,6 +474,8 @@ static const u_int chanflags[IEEE80211_MODE_MAX] = {
/* check legacy */
[IEEE80211_MODE_11NA] = IEEE80211_CHAN_A,
[IEEE80211_MODE_11NG] = IEEE80211_CHAN_G,
+ [IEEE80211_MODE_VHT_5GHZ] = IEEE80211_CHAN_A,
+ [IEEE80211_MODE_VHT_2GHZ] = IEEE80211_CHAN_G,
};
static void
@@ -496,12 +498,15 @@ add_channels(struct ieee80211vap *vap,
if (c == NULL || isexcluded(vap, c))
continue;
if (mode == IEEE80211_MODE_AUTO) {
+ KASSERT(IEEE80211_IS_CHAN_B(c),
+ ("%s: wrong channel for 'auto' mode %u / %u\n",
+ __func__, c->ic_freq, c->ic_flags));
+
/*
* XXX special-case 11b/g channels so we select
* the g channel if both are present.
*/
- if (IEEE80211_IS_CHAN_B(c) &&
- (cg = find11gchannel(ic, i, c->ic_freq)) != NULL)
+ if ((cg = find11gchannel(ic, i, c->ic_freq)) != NULL)
c = cg;
}
ss->ss_chans[ss->ss_last++] = c;
@@ -620,32 +625,48 @@ makescanlist(struct ieee80211_scan_state *ss, struct ieee80211vap *vap,
*/
for (scan = table; scan->list != NULL; scan++) {
mode = scan->mode;
- if (vap->iv_des_mode != IEEE80211_MODE_AUTO) {
+
+ switch (mode) {
+ case IEEE80211_MODE_11B:
+ if (vap->iv_des_mode == IEEE80211_MODE_11B)
+ break;
+
/*
- * If a desired mode was specified, scan only
- * channels that satisfy that constraint.
+ * The scan table marks 2.4Ghz channels as b
+ * so if the desired mode is 11g / 11ng / 11acg,
+ * then use the 11b channel list but upgrade the mode.
+ *
+ * NB: 11b -> AUTO lets add_channels upgrade an
+ * 11b channel to 11g if available.
*/
- if (vap->iv_des_mode != mode) {
- /*
- * The scan table marks 2.4Ghz channels as b
- * so if the desired mode is 11g, then use
- * the 11b channel list but upgrade the mode.
- */
- if (vap->iv_des_mode == IEEE80211_MODE_11G) {
- if (mode == IEEE80211_MODE_11G) /* Skip the G check */
- continue;
- else if (mode == IEEE80211_MODE_11B)
- mode = IEEE80211_MODE_11G; /* upgrade */
- }
+ if (vap->iv_des_mode == IEEE80211_MODE_AUTO ||
+ vap->iv_des_mode == IEEE80211_MODE_11G ||
+ vap->iv_des_mode == IEEE80211_MODE_11NG ||
+ vap->iv_des_mode == IEEE80211_MODE_VHT_2GHZ) {
+ mode = vap->iv_des_mode;
+ break;
}
- } else {
+
+ continue;
+ case IEEE80211_MODE_11A:
+ /* Use 11a channel list for 11na / 11ac modes */
+ if (vap->iv_des_mode == IEEE80211_MODE_11NA ||
+ vap->iv_des_mode == IEEE80211_MODE_VHT_5GHZ) {
+ mode = vap->iv_des_mode;
+ break;
+ }
+
+ /* FALLTHROUGH */
+ default:
/*
- * This lets add_channels upgrade an 11b channel
- * to 11g if available.
+ * If a desired mode was specified, scan only
+ * channels that satisfy that constraint.
*/
- if (mode == IEEE80211_MODE_11B)
- mode = IEEE80211_MODE_AUTO;
+ if (vap->iv_des_mode != IEEE80211_MODE_AUTO &&
+ vap->iv_des_mode != mode)
+ continue;
}
+
#ifdef IEEE80211_F_XR
/* XR does not operate on turbo channels */
if ((vap->iv_flags & IEEE80211_F_XR) &&
@@ -1335,12 +1356,14 @@ sta_roam_check(struct ieee80211_scan_state *ss, struct ieee80211vap *vap)
mode = ieee80211_chan2mode(ic->ic_bsschan);
roamRate = vap->iv_roamparms[mode].rate;
roamRssi = vap->iv_roamparms[mode].rssi;
+ KASSERT(roamRate != 0 && roamRssi != 0, ("iv_roamparms are not"
+ "initialized for %s mode!", ieee80211_phymode_name[mode]));
+
ucastRate = vap->iv_txparms[mode].ucastrate;
/* NB: the most up to date rssi is in the node, not the scan cache */
curRssi = ic->ic_node_getrssi(ni);
if (ucastRate == IEEE80211_FIXED_RATE_NONE) {
curRate = ni->ni_txrate;
- roamRate &= IEEE80211_RATE_VAL;
IEEE80211_DPRINTF(vap, IEEE80211_MSG_ROAM,
"%s: currssi %d currate %u roamrssi %d roamrate %u\n",
__func__, curRssi, curRate, roamRssi, roamRate);
diff --git a/freebsd/sys/net80211/ieee80211_sta.c b/freebsd/sys/net80211/ieee80211_sta.c
index 1993f566..3dc9ee16 100644
--- a/freebsd/sys/net80211/ieee80211_sta.c
+++ b/freebsd/sys/net80211/ieee80211_sta.c
@@ -788,11 +788,9 @@ sta_input(struct ieee80211_node *ni, struct mbuf *m,
/*
* Save QoS bits for use below--before we strip the header.
*/
- if (subtype == IEEE80211_FC0_SUBTYPE_QOS) {
- qos = (dir == IEEE80211_FC1_DIR_DSTODS) ?
- ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] :
- ((struct ieee80211_qosframe *)wh)->i_qos[0];
- } else
+ if (subtype == IEEE80211_FC0_SUBTYPE_QOS)
+ qos = ieee80211_getqos(wh)[0];
+ else
qos = 0;
/*
diff --git a/freebsd/sys/net80211/ieee80211_tdma.c b/freebsd/sys/net80211/ieee80211_tdma.c
index b18803fc..c675af9b 100644
--- a/freebsd/sys/net80211/ieee80211_tdma.c
+++ b/freebsd/sys/net80211/ieee80211_tdma.c
@@ -129,6 +129,9 @@ static int tdma_process_params(struct ieee80211_node *ni,
static void
settxparms(struct ieee80211vap *vap, enum ieee80211_phymode mode, int rate)
{
+ if (isclr(vap->iv_ic->ic_modecaps, mode))
+ return;
+
vap->iv_txparms[mode].ucastrate = rate;
vap->iv_txparms[mode].mcastrate = rate;
}
diff --git a/freebsd/sys/net80211/ieee80211_var.h b/freebsd/sys/net80211/ieee80211_var.h
index ee17c806..24ffbe10 100644
--- a/freebsd/sys/net80211/ieee80211_var.h
+++ b/freebsd/sys/net80211/ieee80211_var.h
@@ -400,6 +400,7 @@ struct ieee80211vap {
uint32_t iv_caps; /* capabilities */
uint32_t iv_htcaps; /* HT capabilities */
uint32_t iv_htextcaps; /* HT extended capabilities */
+ uint32_t iv_com_state; /* com usage / detached flag */
enum ieee80211_opmode iv_opmode; /* operation mode */
enum ieee80211_state iv_state; /* state machine state */
enum ieee80211_state iv_nstate; /* pending state */
@@ -685,6 +686,12 @@ MALLOC_DECLARE(M_80211_VAP);
#define IEEE80211_VFHT_BITS \
"\20\1VHT\2VHT40\3VHT80\4VHT80P80\5VHT160"
+#define IEEE80211_COM_DETACHED 0x00000001 /* ieee80211_ifdetach called */
+#define IEEE80211_COM_REF_ADD 0x00000002 /* add / remove reference */
+#define IEEE80211_COM_REF_M 0xfffffffe /* reference counter bits */
+#define IEEE80211_COM_REF_S 1
+#define IEEE80211_COM_REF_MAX (IEEE80211_COM_REF_M >> IEEE80211_COM_REF_S)
+
int ic_printf(struct ieee80211com *, const char *, ...) __printflike(2, 3);
void ieee80211_ifattach(struct ieee80211com *);
void ieee80211_ifdetach(struct ieee80211com *);
@@ -727,6 +734,8 @@ uint32_t ieee80211_get_channel_center_freq1(const struct ieee80211_channel *);
uint32_t ieee80211_get_channel_center_freq2(const struct ieee80211_channel *);
int ieee80211_add_channel_list_2ghz(struct ieee80211_channel[], int, int *,
const uint8_t[], int, const uint8_t[], int);
+int ieee80211_add_channels_default_2ghz(struct ieee80211_channel[], int,
+ int *, const uint8_t[], int);
int ieee80211_add_channel_list_5ghz(struct ieee80211_channel[], int, int *,
const uint8_t[], int, const uint8_t[], int);
struct ieee80211_channel *ieee80211_find_channel(struct ieee80211com *,
diff --git a/freebsd/sys/net80211/ieee80211_wds.c b/freebsd/sys/net80211/ieee80211_wds.c
index 1bceacef..3b57c6a3 100644
--- a/freebsd/sys/net80211/ieee80211_wds.c
+++ b/freebsd/sys/net80211/ieee80211_wds.c
@@ -301,6 +301,7 @@ ieee80211_dwds_mcast(struct ieee80211vap *vap0, struct mbuf *m)
continue;
}
mcopy->m_flags |= M_MCAST;
+ MPASS((mcopy->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mcopy->m_pkthdr.rcvif = (void *) ni;
err = ieee80211_parent_xmitpkt(ic, mcopy);
@@ -334,6 +335,7 @@ ieee80211_dwds_discover(struct ieee80211_node *ni, struct mbuf *m)
* XXX handle overflow?
* XXX per/vap beacon interval?
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)(uintptr_t)
ieee80211_mac_hash(ic, ni->ni_macaddr);
(void) ieee80211_ageq_append(&ic->ic_stageq, m,
@@ -585,11 +587,9 @@ wds_input(struct ieee80211_node *ni, struct mbuf *m,
/*
* Save QoS bits for use below--before we strip the header.
*/
- if (subtype == IEEE80211_FC0_SUBTYPE_QOS) {
- qos = (dir == IEEE80211_FC1_DIR_DSTODS) ?
- ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] :
- ((struct ieee80211_qosframe *)wh)->i_qos[0];
- } else
+ if (subtype == IEEE80211_FC0_SUBTYPE_QOS)
+ qos = ieee80211_getqos(wh)[0];
+ else
qos = 0;
/*
diff --git a/freebsd/sys/net80211/ieee80211_wps.h b/freebsd/sys/net80211/ieee80211_wps.h
new file mode 100644
index 00000000..32cc667e
--- /dev/null
+++ b/freebsd/sys/net80211/ieee80211_wps.h
@@ -0,0 +1,149 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017 J.R. Oldroyd, Open Advisors Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _NET80211_IEEE80211_WPS_H_
+#define _NET80211_IEEE80211_WPS_H_
+
+/*
+ * 802.11 WPS implementation definitions.
+ */
+
+#define IEEE80211_WPS_ATTR_AP_CHANNEL 0x1001
+#define IEEE80211_WPS_ATTR_ASSOC_STATE 0x1002
+#define IEEE80211_WPS_ATTR_AUTH_TYPE 0x1003
+#define IEEE80211_WPS_ATTR_AUTH_TYPE_FLAGS 0x1004
+#define IEEE80211_WPS_ATTR_AUTHENTICATOR 0x1005
+#define IEEE80211_WPS_ATTR_CONFIG_METHODS 0x1008
+#define IEEE80211_WPS_ATTR_CONFIG_ERROR 0x1009
+#define IEEE80211_WPS_ATTR_CONFIRM_URL4 0x100a
+#define IEEE80211_WPS_ATTR_CONFIRM_URL6 0x100b
+#define IEEE80211_WPS_ATTR_CONN_TYPE 0x100c
+#define IEEE80211_WPS_ATTR_CONN_TYPE_FLAGS 0x100d
+#define IEEE80211_WPS_ATTR_CRED 0x100e
+#define IEEE80211_WPS_ATTR_ENCR_TYPE 0x100f
+#define IEEE80211_WPS_ATTR_ENCR_TYPE_FLAGS 0x1010
+#define IEEE80211_WPS_ATTR_DEV_NAME 0x1011
+#define IEEE80211_WPS_ATTR_DEV_PASSWORD_ID 0x1012
+#define IEEE80211_WPS_ATTR_E_HASH1 0x1014
+#define IEEE80211_WPS_ATTR_E_HASH2 0x1015
+#define IEEE80211_WPS_ATTR_E_SNONCE1 0x1016
+#define IEEE80211_WPS_ATTR_E_SNONCE2 0x1017
+#define IEEE80211_WPS_ATTR_ENCR_SETTINGS 0x1018
+#define IEEE80211_WPS_ATTR_ENROLLEE_NONCE 0x101a
+#define IEEE80211_WPS_ATTR_FEATURE_ID 0x101b
+#define IEEE80211_WPS_ATTR_IDENTITY 0x101c
+#define IEEE80211_WPS_ATTR_IDENTITY_PROOF 0x101d
+#define IEEE80211_WPS_ATTR_KEY_WRAP_AUTH 0x101e
+#define IEEE80211_WPS_ATTR_KEY_ID 0x101f
+#define IEEE80211_WPS_ATTR_MAC_ADDR 0x1020
+#define IEEE80211_WPS_ATTR_MANUFACTURER 0x1021
+#define IEEE80211_WPS_ATTR_MSG_TYPE 0x1022
+#define IEEE80211_WPS_ATTR_MODEL_NAME 0x1023
+#define IEEE80211_WPS_ATTR_MODEL_NUMBER 0x1024
+#define IEEE80211_WPS_ATTR_NETWORK_INDEX 0x1026
+#define IEEE80211_WPS_ATTR_NETWORK_KEY 0x1027
+#define IEEE80211_WPS_ATTR_NETWORK_KEY_INDEX 0x1028
+#define IEEE80211_WPS_ATTR_NEW_DEVICE_NAME 0x1029
+#define IEEE80211_WPS_ATTR_NEW_PASSWORD 0x102a
+#define IEEE80211_WPS_ATTR_OOB_DEVICE_PASSWORD 0x102c
+#define IEEE80211_WPS_ATTR_OS_VERSION 0x102d
+#define IEEE80211_WPS_ATTR_POWER_LEVEL 0x102f
+#define IEEE80211_WPS_ATTR_PSK_CURRENT 0x1030
+#define IEEE80211_WPS_ATTR_PSK_MAX 0x1031
+#define IEEE80211_WPS_ATTR_PUBLIC_KEY 0x1032
+#define IEEE80211_WPS_ATTR_RADIO_ENABLE 0x1033
+#define IEEE80211_WPS_ATTR_REBOOT 0x1034
+#define IEEE80211_WPS_ATTR_REGISTRAR_CURRENT 0x1035
+#define IEEE80211_WPS_ATTR_REGISTRAR_ESTBLSHD 0x1036
+#define IEEE80211_WPS_ATTR_REGISTRAR_LIST 0x1037
+#define IEEE80211_WPS_ATTR_REGISTRAR_MAX 0x1038
+#define IEEE80211_WPS_ATTR_REGISTRAR_NONCE 0x1039
+#define IEEE80211_WPS_ATTR_REQUEST_TYPE 0x103a
+#define IEEE80211_WPS_ATTR_RESPONSE_TYPE 0x103b
+#define IEEE80211_WPS_ATTR_RF_BANDS 0x103c
+#define IEEE80211_WPS_ATTR_R_HASH1 0x103d
+#define IEEE80211_WPS_ATTR_R_HASH2 0x103e
+#define IEEE80211_WPS_ATTR_R_SNONCE1 0x103f
+#define IEEE80211_WPS_ATTR_R_SNONCE2 0x1040
+#define IEEE80211_WPS_ATTR_SELECTED_REGISTRAR 0x1041
+#define IEEE80211_WPS_ATTR_SERIAL_NUMBER 0x1042
+#define IEEE80211_WPS_ATTR_WPS_STATE 0x1044
+#define IEEE80211_WPS_ATTR_SSID 0x1045
+#define IEEE80211_WPS_ATTR_TOTAL_NETWORKS 0x1046
+#define IEEE80211_WPS_ATTR_UUID_E 0x1047
+#define IEEE80211_WPS_ATTR_UUID_R 0x1048
+#define IEEE80211_WPS_ATTR_VENDOR_EXT 0x1049
+#define IEEE80211_WPS_ATTR_VERSION 0x104a
+#define IEEE80211_WPS_ATTR_X509_CERT_REQ 0x104b
+#define IEEE80211_WPS_ATTR_X509_CERT 0x104c
+#define IEEE80211_WPS_ATTR_EAP_IDENTITY 0x104d
+#define IEEE80211_WPS_ATTR_MSG_COUNTER 0x104e
+#define IEEE80211_WPS_ATTR_PUBKEY_HASH 0x104f
+#define IEEE80211_WPS_ATTR_REKEY_KEY 0x1050
+#define IEEE80211_WPS_ATTR_KEY_LIFETIME 0x1051
+#define IEEE80211_WPS_ATTR_PERMITTED_CONFIG_METHODS 0x1052
+#define IEEE80211_WPS_ATTR_SELECTED_REGISTRAR_CONFIG_METHODS 0x1053
+#define IEEE80211_WPS_ATTR_PRIMARY_DEV_TYPE 0x1054
+#define IEEE80211_WPS_ATTR_SECONDARY_DEV_TYPE_LIST 0x1055
+#define IEEE80211_WPS_ATTR_PORTABLE_DEV 0x1056
+#define IEEE80211_WPS_ATTR_AP_SETUP_LOCKED 0x1057
+#define IEEE80211_WPS_ATTR_APPLICATION_EXT 0x1058
+#define IEEE80211_WPS_ATTR_EAP_TYPE 0x1059
+#define IEEE80211_WPS_ATTR_IV 0x1060
+#define IEEE80211_WPS_ATTR_KEY_PROVIDED_AUTO 0x1061
+#define IEEE80211_WPS_ATTR_802_1X_ENABLED 0x1062
+#define IEEE80211_WPS_ATTR_AP_SESSION_KEY 0x1063
+#define IEEE80211_WPS_ATTR_WEP_TRANSMIT_KEY 0x1064
+#define IEEE80211_WPS_ATTR_REQUESTED_DEV_TYPE 0x106a
+#define IEEE80211_WPS_ATTR_EXTENSIBILITY_TEST 0x10fa /* _NOT_ defined in the spec */
+
+/* RF bands bitmask */
+#define IEEE80211_WPS_RF_BAND_24GHZ 0x01
+#define IEEE80211_WPS_RF_BAND_50GHZ 0x02
+#define IEEE80211_WPS_RF_BAND_600GHZ 0x04
+
+/* Config methods bitmask */
+#define IEEE80211_WPS_CONFIG_USBA 0x0001
+#define IEEE80211_WPS_CONFIG_ETHERNET 0x0002
+#define IEEE80211_WPS_CONFIG_LABEL 0x0004
+#define IEEE80211_WPS_CONFIG_DISPLAY 0x0008
+#define IEEE80211_WPS_CONFIG_EXT_NFC_TOKEN 0x0010
+#define IEEE80211_WPS_CONFIG_INT_NFC_TOKEN 0x0020
+#define IEEE80211_WPS_CONFIG_NFC_INTERFACE 0x0040
+#define IEEE80211_WPS_CONFIG_PUSHBUTTON 0x0080
+#define IEEE80211_WPS_CONFIG_KEYPAD 0x0100
+#define IEEE80211_WPS_CONFIG_VIRT_PUSHBUTTON 0x0200
+#define IEEE80211_WPS_CONFIG_PHY_PUSHBUTTON 0x0400
+#define IEEE80211_WPS_CONFIG_P2PS 0x1000
+#define IEEE80211_WPS_CONFIG_VIRT_DISPLAY 0x2000
+#define IEEE80211_WPS_CONFIG_PHY_DISPLAY 0x4000
+
+/* Wi-Fi Protected Setup state */
+#define IEEE80211_WPS_STATE_NOT_CONFIGURED 0x01
+#define IEEE80211_WPS_STATE_CONFIGURED 0x02
+#endif /* _NET80211_IEEE80211_WPS_H_ */
diff --git a/freebsd/sys/netinet/cc/cc_newreno.c b/freebsd/sys/netinet/cc/cc_newreno.c
index b1307c92..e1993664 100644
--- a/freebsd/sys/netinet/cc/cc_newreno.c
+++ b/freebsd/sys/netinet/cc/cc_newreno.c
@@ -201,7 +201,7 @@ newreno_ack_received(struct cc_var *ccv, uint16_t type)
static void
newreno_after_idle(struct cc_var *ccv)
{
- int rw;
+ uint32_t rw;
/*
* If we've been idle for more than one retransmit timeout the old
@@ -216,11 +216,7 @@ newreno_after_idle(struct cc_var *ccv)
*
* See RFC5681 Section 4.1. "Restarting Idle Connections".
*/
- if (V_tcp_do_rfc3390)
- rw = min(4 * CCV(ccv, t_maxseg),
- max(2 * CCV(ccv, t_maxseg), 4380));
- else
- rw = CCV(ccv, t_maxseg) * 2;
+ rw = tcp_compute_initwnd(tcp_maxseg(ccv->ccvc.tcp));
CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd));
}
@@ -301,7 +297,12 @@ newreno_post_recovery(struct cc_var *ccv)
pipe = CCV(ccv, snd_max) - ccv->curack;
if (pipe < CCV(ccv, snd_ssthresh))
- CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg);
+ /*
+ * Ensure that cwnd does not collapse to 1 MSS under
+ * adverse conditons. Implements RFC6582
+ */
+ CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
+ CCV(ccv, t_maxseg);
else
CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
}
diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c
index e9efd89e..1206691f 100644
--- a/freebsd/sys/netinet/if_ether.c
+++ b/freebsd/sys/netinet/if_ether.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/queue.h>
@@ -163,8 +164,15 @@ SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count,
"Number of times to retransmit GARP packets;"
" 0 to disable, maximum of 16");
+VNET_DEFINE_STATIC(int, arp_log_level) = LOG_INFO; /* Min. log(9) level. */
+#define V_arp_log_level VNET(arp_log_level)
+SYSCTL_INT(_net_link_ether_arp, OID_AUTO, log_level, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(arp_log_level), 0,
+ "Minimum log(9) level for recording rate limited arp log messages. "
+ "The higher will be log more (emerg=0, info=6 (default), debug=7).");
#define ARP_LOG(pri, ...) do { \
- if (ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps)) \
+ if ((pri) <= V_arp_log_level && \
+ ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps)) \
log((pri), "arp: " __VA_ARGS__); \
} while (0)
@@ -343,8 +351,8 @@ arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf,
* - arp header target ip address
* - arp header source ethernet address
*/
-void
-arprequest(struct ifnet *ifp, const struct in_addr *sip,
+static int
+arprequest_internal(struct ifnet *ifp, const struct in_addr *sip,
const struct in_addr *tip, u_char *enaddr)
{
struct mbuf *m;
@@ -361,9 +369,10 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip,
* The caller did not supply a source address, try to find
* a compatible one among those assigned to this interface.
*/
+ struct epoch_tracker et;
struct ifaddr *ifa;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
@@ -381,17 +390,17 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip,
IA_MASKSIN(ifa)->sin_addr.s_addr))
break; /* found it. */
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (sip == NULL) {
printf("%s: cannot find matching address\n", __func__);
- return;
+ return (EADDRNOTAVAIL);
}
}
if (enaddr == NULL)
enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp);
if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
- return;
+ return (ENOMEM);
m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
2 * ifp->if_addrlen;
m->m_pkthdr.len = m->m_len;
@@ -418,7 +427,7 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip,
if (error != 0 && error != EAFNOSUPPORT) {
ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
if_name(ifp), error);
- return;
+ return (error);
}
ro.ro_prepend = linkhdr;
@@ -427,10 +436,23 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip,
m->m_flags |= M_BCAST;
m_clrprotoflags(m); /* Avoid confusing lower layers. */
- (*ifp->if_output)(ifp, m, &sa, &ro);
+ error = (*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txrequests);
+ if (error) {
+ ARPSTAT_INC(txerrors);
+ ARP_LOG(LOG_DEBUG, "Failed to send ARP packet on %s: %d\n",
+ if_name(ifp), error);
+ }
+ return (error);
}
+void
+arprequest(struct ifnet *ifp, const struct in_addr *sip,
+ const struct in_addr *tip, u_char *enaddr)
+{
+
+ (void) arprequest_internal(ifp, sip, tip, enaddr);
+}
/*
* Resolve an IP address into an ethernet address - heavy version.
@@ -461,9 +483,11 @@ arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
*plle = NULL;
if ((flags & LLE_CREATE) == 0) {
- IF_AFDATA_RLOCK(ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
@@ -556,7 +580,7 @@ arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
error = is_gw != 0 ? EHOSTUNREACH : EHOSTDOWN;
if (renew) {
- int canceled;
+ int canceled, e;
LLE_ADDREF(la);
la->la_expire = time_uptime;
@@ -566,7 +590,13 @@ arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
LLE_REMREF(la);
la->la_asked++;
LLE_WUNLOCK(la);
- arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
+ e = arprequest_internal(ifp, NULL, &SIN(dst)->sin_addr, NULL);
+ /*
+ * Only overwrite 'error' in case of error; in case of success
+ * the proper return value was already set above.
+ */
+ if (e != 0)
+ return (e);
return (error);
}
@@ -595,6 +625,7 @@ arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
struct llentry **plle)
{
+ struct epoch_tracker et;
struct llentry *la = NULL;
if (pflags != NULL)
@@ -616,7 +647,7 @@ arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
}
}
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
la = lla_lookup(LLTABLE(ifp), plle ? LLE_EXCLUSIVE : LLE_UNLOCKED, dst);
if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
/* Entry found, let's copy lle info */
@@ -630,12 +661,12 @@ arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
*plle = la;
LLE_WUNLOCK(la);
}
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
}
if (plle && la)
LLE_WUNLOCK(la);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst,
desten, pflags, plle));
@@ -780,6 +811,7 @@ in_arpinput(struct mbuf *m)
int lladdr_off;
int error;
char addrbuf[INET_ADDRSTRLEN];
+ struct epoch_tracker et;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
@@ -872,17 +904,17 @@ in_arpinput(struct mbuf *m)
* No match, use the first inet address on the receive interface
* as a dummy address for the rest of the function.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (ifa->ifa_addr->sa_family == AF_INET &&
(ifa->ifa_carp == NULL ||
(*carp_iamatch_p)(ifa, &enaddr))) {
ia = ifatoia(ifa);
ifa_ref(ifa);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
goto match;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
/*
* If bridging, fall back to using any inet address.
@@ -939,9 +971,9 @@ match:
sin.sin_family = AF_INET;
sin.sin_addr = isaddr;
dst = (struct sockaddr *)&sin;
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (la != NULL)
arp_check_update_lle(ah, isaddr, ifp, bridged, la);
else if (itaddr.s_addr == myaddr.s_addr) {
@@ -1019,9 +1051,9 @@ reply:
struct llentry *lle = NULL;
sin.sin_addr = itaddr;
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
@@ -1332,6 +1364,8 @@ garp_rexmit(void *arg)
return;
}
+ CURVNET_SET(ia->ia_ifa.ifa_ifp->if_vnet);
+
/*
* Drop lock while the ARP request is generated.
*/
@@ -1359,6 +1393,8 @@ garp_rexmit(void *arg)
ifa_free(&ia->ia_ifa);
}
}
+
+ CURVNET_RESTORE();
}
/*
diff --git a/freebsd/sys/netinet/igmp.c b/freebsd/sys/netinet/igmp.c
index 970a01a0..7fac6a70 100644
--- a/freebsd/sys/netinet/igmp.c
+++ b/freebsd/sys/netinet/igmp.c
@@ -694,6 +694,7 @@ static int
igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
const struct igmp *igmp)
{
+ struct epoch_tracker et;
struct ifmultiaddr *ifma;
struct igmp_ifsoftc *igi;
struct in_multi *inm;
@@ -735,7 +736,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
* for the interface on which the query arrived,
* except those which are already running.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_INET ||
ifma->ifma_protospec == NULL)
@@ -763,7 +764,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
out_locked:
IGMP_UNLOCK();
@@ -779,6 +780,7 @@ static int
igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
const struct igmp *igmp)
{
+ struct epoch_tracker et;
struct ifmultiaddr *ifma;
struct igmp_ifsoftc *igi;
struct in_multi *inm;
@@ -836,7 +838,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
*/
CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)",
ifp, ifp->if_xname);
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_INET ||
ifma->ifma_protospec == NULL)
@@ -844,7 +846,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
inm = (struct in_multi *)ifma->ifma_protospec;
igmp_v2_update_group(inm, timer);
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
} else {
/*
* Group-specific IGMPv2 query, we need only
@@ -1220,11 +1222,13 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
* Replace 0.0.0.0 with the subnet address if told to do so.
*/
if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
- NET_EPOCH_ENTER();
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
IFP_TO_IA(ifp, ia, &in_ifa_tracker);
if (ia != NULL)
ip->ip_src.s_addr = htonl(ia->ia_subnet);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
}
CTR3(KTR_IGMPV3, "process v1 report 0x%08x on ifp %p(%s)",
@@ -1309,6 +1313,7 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
/*const*/ struct igmp *igmp)
{
struct rm_priotracker in_ifa_tracker;
+ struct epoch_tracker et;
struct in_ifaddr *ia;
struct in_multi *inm;
@@ -1317,23 +1322,23 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
* leave requires knowing that we are the only member of a
* group.
*/
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
IFP_TO_IA(ifp, ia, &in_ifa_tracker);
if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (0);
}
IGMPSTAT_INC(igps_rcv_reports);
if (ifp->if_flags & IFF_LOOPBACK) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (0);
}
if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
!in_hosteq(igmp->igmp_group, ip->ip_dst)) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
IGMPSTAT_INC(igps_rcv_badreports);
return (EINVAL);
}
@@ -1349,7 +1354,7 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
if (ia != NULL)
ip->ip_src.s_addr = htonl(ia->ia_subnet);
}
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
CTR3(KTR_IGMPV3, "process v2 report 0x%08x on ifp %p(%s)",
ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
@@ -1991,6 +1996,7 @@ igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
struct ifnet *ifp;
struct in_multi *inm;
struct in_multi_head inm_free_tmp;
+ struct epoch_tracker et;
CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
igi->igi_ifp, igi->igi_ifp->if_xname);
@@ -2011,7 +2017,7 @@ igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
* for all memberships scoped to this link.
*/
ifp = igi->igi_ifp;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_INET ||
ifma->ifma_protospec == NULL)
@@ -2056,7 +2062,7 @@ igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
inm->inm_timer = 0;
mbufq_drain(&inm->inm_scq);
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
inm_release_list_deferred(&inm_free_tmp);
}
@@ -3299,6 +3305,7 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct mbufq *scq)
static void
igmp_v3_dispatch_general_query(struct igmp_ifsoftc *igi)
{
+ struct epoch_tracker et;
struct ifmultiaddr *ifma;
struct ifnet *ifp;
struct in_multi *inm;
@@ -3321,7 +3328,7 @@ igmp_v3_dispatch_general_query(struct igmp_ifsoftc *igi)
ifp = igi->igi_ifp;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_INET ||
ifma->ifma_protospec == NULL)
@@ -3352,7 +3359,7 @@ igmp_v3_dispatch_general_query(struct igmp_ifsoftc *igi)
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
send:
loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
@@ -3524,13 +3531,14 @@ igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
ip->ip_src.s_addr = INADDR_ANY;
if (m->m_flags & M_IGMP_LOOP) {
+ struct epoch_tracker et;
struct in_ifaddr *ia;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
IFP_TO_IA(ifp, ia, &in_ifa_tracker);
if (ia != NULL)
ip->ip_src = ia->ia_addr.sin_addr;
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
}
ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c
index db1ebda0..c3f172f3 100644
--- a/freebsd/sys/netinet/in.c
+++ b/freebsd/sys/netinet/in.c
@@ -141,20 +141,21 @@ in_localip(struct in_addr in)
int
in_ifhasaddr(struct ifnet *ifp, struct in_addr in)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct in_ifaddr *ia;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
ia = (struct in_ifaddr *)ifa;
if (ia->ia_addr.sin_addr.s_addr == in.s_addr) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (1);
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
}
@@ -192,15 +193,10 @@ int
in_canforward(struct in_addr in)
{
u_long i = ntohl(in.s_addr);
- u_long net;
- if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
+ if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i) ||
+ IN_ZERONET(i) || IN_LOOPBACK(i))
return (0);
- if (IN_CLASSA(i)) {
- net = i & IN_CLASSA_NET;
- if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
- return (0);
- }
return (1);
}
@@ -230,6 +226,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
{
struct ifreq *ifr = (struct ifreq *)data;
struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr;
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct in_ifaddr *ia;
int error;
@@ -281,7 +278,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
* address was specified, find that one instead of the
* first one on the interface, if possible.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
@@ -299,7 +296,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
}
if (ifa == NULL) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (EADDRNOTAVAIL);
}
@@ -330,7 +327,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
break;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -344,6 +341,7 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
const struct sockaddr_in *mask = &ifra->ifra_mask;
const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr;
const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0;
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct in_ifaddr *ia;
bool iaIsFirst;
@@ -380,7 +378,7 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
*/
iaIsFirst = true;
ia = NULL;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct in_ifaddr *it;
@@ -393,7 +391,7 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)
ia = it;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ia != NULL)
(void )in_difaddr_ioctl(cmd, data, ifp, td);
@@ -923,7 +921,7 @@ in_ifscrub_all(void)
IFNET_RLOCK();
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
/* Cannot lock here - lock recursion. */
- /* IF_ADDR_RLOCK(ifp); */
+ /* NET_EPOCH_ENTER(et); */
CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
@@ -939,7 +937,7 @@ in_ifscrub_all(void)
(void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr,
ifp, NULL);
}
- /* IF_ADDR_RUNLOCK(ifp); */
+ /* NET_EPOCH_EXIT(et); */
in_purgemaddrs(ifp);
igmp_domifdetach(ifp);
}
@@ -971,6 +969,7 @@ in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
int
in_broadcast(struct in_addr in, struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
int found;
@@ -984,14 +983,14 @@ in_broadcast(struct in_addr in, struct ifnet *ifp)
* Look through the list of addresses for a match
* with a broadcast address.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (ifa->ifa_addr->sa_family == AF_INET &&
in_ifaddr_broadcast(in, (struct in_ifaddr *)ifa)) {
found = 1;
break;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (found);
}
@@ -1382,15 +1381,13 @@ in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3add
IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
- lle = in_lltable_find_dst(llt, sin->sin_addr);
+ KASSERT((flags & (LLE_UNLOCKED | LLE_EXCLUSIVE)) !=
+ (LLE_UNLOCKED | LLE_EXCLUSIVE),
+ ("wrong lle request flags: %#x", flags));
+ lle = in_lltable_find_dst(llt, sin->sin_addr);
if (lle == NULL)
return (NULL);
-
- KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
- (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
- flags));
-
if (flags & LLE_UNLOCKED)
return (lle);
@@ -1399,6 +1396,17 @@ in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3add
else
LLE_RLOCK(lle);
+ /*
+ * If the afdata lock is not held, the LLE may have been unlinked while
+ * we were blocked on the LLE lock. Check for this case.
+ */
+ if (__predict_false((lle->la_flags & LLE_LINKED) == 0)) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(lle);
+ else
+ LLE_RUNLOCK(lle);
+ return (NULL);
+ }
return (lle);
}
diff --git a/freebsd/sys/netinet/in_fib.c b/freebsd/sys/netinet/in_fib.c
index f61909ea..63141326 100644
--- a/freebsd/sys/netinet/in_fib.c
+++ b/freebsd/sys/netinet/in_fib.c
@@ -98,7 +98,6 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
uint32_t flags, struct nhop4_extended *pnh4)
{
struct sockaddr_in *gw;
- struct in_ifaddr *ia;
if ((flags & NHR_IFAIF) != 0)
pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
@@ -115,10 +114,8 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
gw = (struct sockaddr_in *)rt_key(rte);
if (gw->sin_addr.s_addr == 0)
pnh4->nh_flags |= NHF_DEFAULT;
- /* XXX: Set RTF_BROADCAST if GW address is broadcast */
-
- ia = ifatoia(rte->rt_ifa);
- pnh4->nh_src = IA_SIN(ia)->sin_addr;
+ pnh4->nh_ia = ifatoia(rte->rt_ifa);
+ pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr;
}
/*
diff --git a/freebsd/sys/netinet/in_fib.h b/freebsd/sys/netinet/in_fib.h
index fa72fd76..f0b4d159 100644
--- a/freebsd/sys/netinet/in_fib.h
+++ b/freebsd/sys/netinet/in_fib.h
@@ -43,12 +43,13 @@ struct nhop4_basic {
/* Extended nexthop info used for control protocols */
struct nhop4_extended {
struct ifnet *nh_ifp; /* Logical egress interface */
+ struct in_ifaddr *nh_ia; /* Associated address */
uint16_t nh_mtu; /* nexthop mtu */
uint16_t nh_flags; /* nhop flags */
uint8_t spare[4];
struct in_addr nh_addr; /* GW/DST IPv4 address */
struct in_addr nh_src; /* default source IPv4 address */
- uint64_t spare2[2];
+ uint64_t spare2;
};
int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c
index 3b1d57f8..3fc4aa01 100644
--- a/freebsd/sys/netinet/in_mcast.c
+++ b/freebsd/sys/netinet/in_mcast.c
@@ -96,7 +96,9 @@ static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
/*
* Locking:
- * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
+ *
+ * - Lock order is: Giant, IN_MULTI_LOCK, INP_WLOCK,
+ * IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
* - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
* it can be taken by code in net/if.c also.
* - ip_moptions and in_mfilter are covered by the INP_WLOCK.
@@ -146,12 +148,11 @@ static int imf_prune(struct in_mfilter *, const struct sockaddr_in *);
static void imf_purge(struct in_mfilter *);
static void imf_rollback(struct in_mfilter *);
static void imf_reap(struct in_mfilter *);
-static int imo_grow(struct ip_moptions *);
-static size_t imo_match_group(const struct ip_moptions *,
+static struct in_mfilter *
+ imo_match_group(const struct ip_moptions *,
const struct ifnet *, const struct sockaddr *);
static struct in_msource *
- imo_match_source(const struct ip_moptions *, const size_t,
- const struct sockaddr *);
+ imo_match_source(struct in_mfilter *, const struct sockaddr *);
static void ims_merge(struct ip_msource *ims,
const struct in_msource *lims, const int rollback);
static int in_getmulti(struct ifnet *, const struct in_addr *,
@@ -335,6 +336,26 @@ imf_init(struct in_mfilter *imf, const int st0, const int st1)
imf->imf_st[1] = st1;
}
+struct in_mfilter *
+ip_mfilter_alloc(const int mflags, const int st0, const int st1)
+{
+ struct in_mfilter *imf;
+
+ imf = malloc(sizeof(*imf), M_INMFILTER, mflags);
+ if (imf != NULL)
+ imf_init(imf, st0, st1);
+
+ return (imf);
+}
+
+void
+ip_mfilter_free(struct in_mfilter *imf)
+{
+
+ imf_purge(imf);
+ free(imf, M_INMFILTER);
+}
+
/*
* Function for looking up an in_multi record for an IPv4 multicast address
* on a given interface. ifp must be valid. If no record found, return NULL.
@@ -369,100 +390,42 @@ inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
struct in_multi *
inm_lookup(struct ifnet *ifp, const struct in_addr ina)
{
+ struct epoch_tracker et;
struct in_multi *inm;
IN_MULTI_LIST_LOCK_ASSERT();
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
inm = inm_lookup_locked(ifp, ina);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (inm);
}
/*
- * Resize the ip_moptions vector to the next power-of-two minus 1.
- * May be called with locks held; do not sleep.
- */
-static int
-imo_grow(struct ip_moptions *imo)
-{
- struct in_multi **nmships;
- struct in_multi **omships;
- struct in_mfilter *nmfilters;
- struct in_mfilter *omfilters;
- size_t idx;
- size_t newmax;
- size_t oldmax;
-
- nmships = NULL;
- nmfilters = NULL;
- omships = imo->imo_membership;
- omfilters = imo->imo_mfilters;
- oldmax = imo->imo_max_memberships;
- newmax = ((oldmax + 1) * 2) - 1;
-
- if (newmax <= IP_MAX_MEMBERSHIPS) {
- nmships = (struct in_multi **)realloc(omships,
- sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
- nmfilters = (struct in_mfilter *)realloc(omfilters,
- sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
- if (nmships != NULL && nmfilters != NULL) {
- /* Initialize newly allocated source filter heads. */
- for (idx = oldmax; idx < newmax; idx++) {
- imf_init(&nmfilters[idx], MCAST_UNDEFINED,
- MCAST_EXCLUDE);
- }
- imo->imo_max_memberships = newmax;
- imo->imo_membership = nmships;
- imo->imo_mfilters = nmfilters;
- }
- }
-
- if (nmships == NULL || nmfilters == NULL) {
- if (nmships != NULL)
- free(nmships, M_IPMOPTS);
- if (nmfilters != NULL)
- free(nmfilters, M_INMFILTER);
- return (ETOOMANYREFS);
- }
-
- return (0);
-}
-
-/*
* Find an IPv4 multicast group entry for this ip_moptions instance
* which matches the specified group, and optionally an interface.
* Return its index into the array, or -1 if not found.
*/
-static size_t
+static struct in_mfilter *
imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
const struct sockaddr *group)
{
const struct sockaddr_in *gsin;
- struct in_multi **pinm;
- int idx;
- int nmships;
+ struct in_mfilter *imf;
+ struct in_multi *inm;
gsin = (const struct sockaddr_in *)group;
- /* The imo_membership array may be lazy allocated. */
- if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
- return (-1);
-
- nmships = imo->imo_num_memberships;
- pinm = &imo->imo_membership[0];
- for (idx = 0; idx < nmships; idx++, pinm++) {
- if (*pinm == NULL)
+ IP_MFILTER_FOREACH(imf, &imo->imo_head) {
+ inm = imf->imf_inm;
+ if (inm == NULL)
continue;
- if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
- in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
+ if ((ifp == NULL || (inm->inm_ifp == ifp)) &&
+ in_hosteq(inm->inm_addr, gsin->sin_addr)) {
break;
}
}
- if (idx >= nmships)
- idx = -1;
-
- return (idx);
+ return (imf);
}
/*
@@ -473,22 +436,13 @@ imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
* it exists, which may not be the desired behaviour.
*/
static struct in_msource *
-imo_match_source(const struct ip_moptions *imo, const size_t gidx,
- const struct sockaddr *src)
+imo_match_source(struct in_mfilter *imf, const struct sockaddr *src)
{
struct ip_msource find;
- struct in_mfilter *imf;
struct ip_msource *ims;
const sockunion_t *psa;
KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
- KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
- ("%s: invalid index %d\n", __func__, (int)gidx));
-
- /* The imo_mfilters array may be lazy allocated. */
- if (imo->imo_mfilters == NULL)
- return (NULL);
- imf = &imo->imo_mfilters[gidx];
/* Source trees are keyed in host byte order. */
psa = (const sockunion_t *)src;
@@ -508,14 +462,14 @@ int
imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
const struct sockaddr *group, const struct sockaddr *src)
{
- size_t gidx;
+ struct in_mfilter *imf;
struct in_msource *ims;
int mode;
KASSERT(ifp != NULL, ("%s: null ifp", __func__));
- gidx = imo_match_group(imo, ifp, group);
- if (gidx == -1)
+ imf = imo_match_group(imo, ifp, group);
+ if (imf == NULL)
return (MCAST_NOTGMEMBER);
/*
@@ -527,8 +481,8 @@ imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
* NOTE: We are comparing group state here at IGMP t1 (now)
* with socket-layer t0 (since last downcall).
*/
- mode = imo->imo_mfilters[gidx].imf_st[1];
- ims = imo_match_source(imo, gidx, src);
+ mode = imf->imf_st[1];
+ ims = imo_match_source(imf, src);
if ((ims == NULL && mode == MCAST_INCLUDE) ||
(ims != NULL && ims->imsl_st[0] != mode))
@@ -1453,7 +1407,6 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
struct ip_moptions *imo;
struct in_msource *ims;
struct in_multi *inm;
- size_t idx;
uint16_t fmode;
int error, doblock;
@@ -1532,20 +1485,18 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
return (EINVAL);
+ IN_MULTI_LOCK();
+
/*
* Check if we are actually a member of this group.
*/
imo = inp_findmoptions(inp);
- idx = imo_match_group(imo, ifp, &gsa->sa);
- if (idx == -1 || imo->imo_mfilters == NULL) {
+ imf = imo_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
error = EADDRNOTAVAIL;
goto out_inp_locked;
}
-
- KASSERT(imo->imo_mfilters != NULL,
- ("%s: imo_mfilters not allocated", __func__));
- imf = &imo->imo_mfilters[idx];
- inm = imo->imo_membership[idx];
+ inm = imf->imf_inm;
/*
* Attempting to use the delta-based API on an
@@ -1563,7 +1514,7 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
* Asked to unblock, but nothing to unblock.
* If adding a new block entry, allocate it.
*/
- ims = imo_match_source(imo, idx, &ssa->sa);
+ ims = imo_match_source(imf, &ssa->sa);
if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__,
ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not ");
@@ -1594,14 +1545,13 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
/*
* Begin state merge transaction at IGMP layer.
*/
- IN_MULTI_LOCK();
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
IN_MULTI_LIST_LOCK();
error = inm_merge(inm, imf);
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
IN_MULTI_LIST_UNLOCK();
- goto out_in_multi_locked;
+ goto out_imf_rollback;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -1610,9 +1560,6 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
if (error)
CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
-out_in_multi_locked:
-
- IN_MULTI_UNLOCK();
out_imf_rollback:
if (error)
imf_rollback(imf);
@@ -1623,6 +1570,7 @@ out_imf_rollback:
out_inp_locked:
INP_WUNLOCK(inp);
+ IN_MULTI_UNLOCK();
return (error);
}
@@ -1637,9 +1585,6 @@ static struct ip_moptions *
inp_findmoptions(struct inpcb *inp)
{
struct ip_moptions *imo;
- struct in_multi **immp;
- struct in_mfilter *imfp;
- size_t idx;
INP_WLOCK(inp);
if (inp->inp_moptions != NULL)
@@ -1648,29 +1593,16 @@ inp_findmoptions(struct inpcb *inp)
INP_WUNLOCK(inp);
imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
- immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
- M_WAITOK | M_ZERO);
- imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
- M_INMFILTER, M_WAITOK);
imo->imo_multicast_ifp = NULL;
imo->imo_multicast_addr.s_addr = INADDR_ANY;
imo->imo_multicast_vif = -1;
imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
imo->imo_multicast_loop = in_mcast_loop;
- imo->imo_num_memberships = 0;
- imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
- imo->imo_membership = immp;
-
- /* Initialize per-group source filters. */
- for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
- imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
- imo->imo_mfilters = imfp;
+ STAILQ_INIT(&imo->imo_head);
INP_WLOCK(inp);
if (inp->inp_moptions != NULL) {
- free(imfp, M_INMFILTER);
- free(immp, M_IPMOPTS);
free(imo, M_IPMOPTS);
return (inp->inp_moptions);
}
@@ -1681,32 +1613,25 @@ inp_findmoptions(struct inpcb *inp)
static void
inp_gcmoptions(struct ip_moptions *imo)
{
- struct in_mfilter *imf;
+ struct in_mfilter *imf;
struct in_multi *inm;
struct ifnet *ifp;
- size_t idx, nmships;
-
- nmships = imo->imo_num_memberships;
- for (idx = 0; idx < nmships; ++idx) {
- imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
- if (imf)
- imf_leave(imf);
- inm = imo->imo_membership[idx];
- ifp = inm->inm_ifp;
- if (ifp != NULL) {
- CURVNET_SET(ifp->if_vnet);
- (void)in_leavegroup(inm, imf);
- CURVNET_RESTORE();
- } else {
- (void)in_leavegroup(inm, imf);
+
+ while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
+ ip_mfilter_remove(&imo->imo_head, imf);
+
+ imf_leave(imf);
+ if ((inm = imf->imf_inm) != NULL) {
+ if ((ifp = inm->inm_ifp) != NULL) {
+ CURVNET_SET(ifp->if_vnet);
+ (void)in_leavegroup(inm, imf);
+ CURVNET_RESTORE();
+ } else {
+ (void)in_leavegroup(inm, imf);
+ }
}
- if (imf)
- imf_purge(imf);
+ ip_mfilter_free(imf);
}
-
- if (imo->imo_mfilters)
- free(imo->imo_mfilters, M_INMFILTER);
- free(imo->imo_membership, M_IPMOPTS);
free(imo, M_IPMOPTS);
}
@@ -1742,7 +1667,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
struct sockaddr_storage *ptss;
struct sockaddr_storage *tss;
int error;
- size_t idx, nsrcs, ncsrcs;
+ size_t nsrcs, ncsrcs;
INP_WLOCK_ASSERT(inp);
@@ -1769,12 +1694,11 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
* Lookup group on the socket.
*/
gsa = (sockunion_t *)&msfr.msfr_group;
- idx = imo_match_group(imo, ifp, &gsa->sa);
- if (idx == -1 || imo->imo_mfilters == NULL) {
+ imf = imo_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
INP_WUNLOCK(inp);
return (EADDRNOTAVAIL);
}
- imf = &imo->imo_mfilters[idx];
/*
* Ignore memberships which are in limbo.
@@ -1889,13 +1813,15 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
if (!in_nullhost(imo->imo_multicast_addr)) {
mreqn.imr_address = imo->imo_multicast_addr;
} else if (ifp != NULL) {
+ struct epoch_tracker et;
+
mreqn.imr_ifindex = ifp->if_index;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
IFP_TO_IA(ifp, ia, &in_ifa_tracker);
if (ia != NULL)
mreqn.imr_address =
IA_SIN(ia)->sin_addr;
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
}
}
INP_WUNLOCK(inp);
@@ -2032,14 +1958,11 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
struct ip_moptions *imo;
struct in_multi *inm;
struct in_msource *lims;
- size_t idx;
int error, is_new;
ifp = NULL;
- imf = NULL;
lims = NULL;
error = 0;
- is_new = 0;
memset(&gsr, 0, sizeof(struct group_source_req));
gsa = (sockunion_t *)&gsr.gsr_group;
@@ -2048,41 +1971,50 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
ssa->ss.ss_family = AF_UNSPEC;
switch (sopt->sopt_name) {
- case IP_ADD_MEMBERSHIP:
- case IP_ADD_SOURCE_MEMBERSHIP: {
- struct ip_mreq_source mreqs;
+ case IP_ADD_MEMBERSHIP: {
+ struct ip_mreqn mreqn;
- if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
- error = sooptcopyin(sopt, &mreqs,
- sizeof(struct ip_mreq),
- sizeof(struct ip_mreq));
- /*
- * Do argument switcharoo from ip_mreq into
- * ip_mreq_source to avoid using two instances.
- */
- mreqs.imr_interface = mreqs.imr_sourceaddr;
- mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
- } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
- error = sooptcopyin(sopt, &mreqs,
- sizeof(struct ip_mreq_source),
- sizeof(struct ip_mreq_source));
- }
+ if (sopt->sopt_valsize == sizeof(struct ip_mreqn))
+ error = sooptcopyin(sopt, &mreqn,
+ sizeof(struct ip_mreqn), sizeof(struct ip_mreqn));
+ else
+ error = sooptcopyin(sopt, &mreqn,
+ sizeof(struct ip_mreq), sizeof(struct ip_mreq));
if (error)
return (error);
gsa->sin.sin_family = AF_INET;
gsa->sin.sin_len = sizeof(struct sockaddr_in);
- gsa->sin.sin_addr = mreqs.imr_multiaddr;
+ gsa->sin.sin_addr = mreqn.imr_multiaddr;
+ if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+ return (EINVAL);
- if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
- ssa->sin.sin_family = AF_INET;
- ssa->sin.sin_len = sizeof(struct sockaddr_in);
- ssa->sin.sin_addr = mreqs.imr_sourceaddr;
- }
+ if (sopt->sopt_valsize == sizeof(struct ip_mreqn) &&
+ mreqn.imr_ifindex != 0)
+ ifp = ifnet_byindex(mreqn.imr_ifindex);
+ else
+ ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
+ mreqn.imr_address);
+ break;
+ }
+ case IP_ADD_SOURCE_MEMBERSHIP: {
+ struct ip_mreq_source mreqs;
+
+ error = sooptcopyin(sopt, &mreqs, sizeof(struct ip_mreq_source),
+ sizeof(struct ip_mreq_source));
+ if (error)
+ return (error);
+ gsa->sin.sin_family = ssa->sin.sin_family = AF_INET;
+ gsa->sin.sin_len = ssa->sin.sin_len =
+ sizeof(struct sockaddr_in);
+
+ gsa->sin.sin_addr = mreqs.imr_multiaddr;
if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
return (EINVAL);
+ ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+
ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
mreqs.imr_interface);
CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
@@ -2138,13 +2070,25 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
return (EADDRNOTAVAIL);
+ IN_MULTI_LOCK();
+
+ /*
+ * Find the membership in the membership list.
+ */
imo = inp_findmoptions(inp);
- idx = imo_match_group(imo, ifp, &gsa->sa);
- if (idx == -1) {
+ imf = imo_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
is_new = 1;
+ inm = NULL;
+
+ if (ip_mfilter_count(&imo->imo_head) >= IP_MAX_MEMBERSHIPS) {
+ error = ENOMEM;
+ goto out_inp_locked;
+ }
} else {
- inm = imo->imo_membership[idx];
- imf = &imo->imo_mfilters[idx];
+ is_new = 0;
+ inm = imf->imf_inm;
+
if (ssa->ss.ss_family != AF_UNSPEC) {
/*
* MCAST_JOIN_SOURCE_GROUP on an exclusive membership
@@ -2171,7 +2115,7 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
* full-state SSM API with the delta-based API,
* which is discouraged in the relevant RFCs.
*/
- lims = imo_match_source(imo, idx, &ssa->sa);
+ lims = imo_match_source(imf, &ssa->sa);
if (lims != NULL /*&&
lims->imsl_st[1] == MCAST_INCLUDE*/) {
error = EADDRNOTAVAIL;
@@ -2204,27 +2148,6 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
*/
INP_WLOCK_ASSERT(inp);
- if (is_new) {
- if (imo->imo_num_memberships == imo->imo_max_memberships) {
- error = imo_grow(imo);
- if (error)
- goto out_inp_locked;
- }
- /*
- * Allocate the new slot upfront so we can deal with
- * grafting the new source filter in same code path
- * as for join-source on existing membership.
- */
- idx = imo->imo_num_memberships;
- imo->imo_membership[idx] = NULL;
- imo->imo_num_memberships++;
- KASSERT(imo->imo_mfilters != NULL,
- ("%s: imf_mfilters vector was not allocated", __func__));
- imf = &imo->imo_mfilters[idx];
- KASSERT(RB_EMPTY(&imf->imf_sources),
- ("%s: imf_sources not empty", __func__));
- }
-
/*
* Graft new source into filter list for this inpcb's
* membership of the group. The in_multi may not have
@@ -2240,7 +2163,11 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
/* Membership starts in IN mode */
if (is_new) {
CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
- imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
+ imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE);
+ if (imf == NULL) {
+ error = ENOMEM;
+ goto out_inp_locked;
+ }
} else {
CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
}
@@ -2249,34 +2176,41 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
CTR1(KTR_IGMPV3, "%s: merge imf state failed",
__func__);
error = ENOMEM;
- goto out_imo_free;
+ goto out_inp_locked;
}
} else {
/* No address specified; Membership starts in EX mode */
if (is_new) {
CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
- imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+ imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE);
+ if (imf == NULL) {
+ error = ENOMEM;
+ goto out_inp_locked;
+ }
}
}
/*
* Begin state merge transaction at IGMP layer.
*/
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- IN_MULTI_LOCK();
-
if (is_new) {
+ in_pcbref(inp);
+ INP_WUNLOCK(inp);
+
error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
- &inm);
+ &imf->imf_inm);
+
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ error = ENXIO;
+ goto out_inp_unlocked;
+ }
if (error) {
CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
__func__);
- IN_MULTI_LIST_UNLOCK();
- goto out_imo_free;
+ goto out_inp_locked;
}
- inm_acquire(inm);
- imo->imo_membership[idx] = inm;
+ inm_acquire(imf->imf_inm);
} else {
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
IN_MULTI_LIST_LOCK();
@@ -2285,7 +2219,9 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
__func__);
IN_MULTI_LIST_UNLOCK();
- goto out_in_multi_locked;
+ imf_rollback(imf);
+ imf_reap(imf);
+ goto out_inp_locked;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
error = igmp_change_state(inm);
@@ -2293,40 +2229,30 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
if (error) {
CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
__func__);
- goto out_in_multi_locked;
+ imf_rollback(imf);
+ imf_reap(imf);
+ goto out_inp_locked;
}
}
+ if (is_new)
+ ip_mfilter_insert(&imo->imo_head, imf);
-out_in_multi_locked:
+ imf_commit(imf);
+ imf = NULL;
+out_inp_locked:
+ INP_WUNLOCK(inp);
+out_inp_unlocked:
IN_MULTI_UNLOCK();
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- return (ENXIO);
- if (error) {
- imf_rollback(imf);
- if (is_new)
- imf_purge(imf);
- else
- imf_reap(imf);
- } else {
- imf_commit(imf);
- }
-out_imo_free:
- if (error && is_new) {
- inm = imo->imo_membership[idx];
- if (inm != NULL) {
+ if (is_new && imf) {
+ if (imf->imf_inm != NULL) {
IN_MULTI_LIST_LOCK();
- inm_release_deferred(inm);
+ inm_release_deferred(imf->imf_inm);
IN_MULTI_LIST_UNLOCK();
}
- imo->imo_membership[idx] = NULL;
- --imo->imo_num_memberships;
+ ip_mfilter_free(imf);
}
-
-out_inp_locked:
- INP_WUNLOCK(inp);
return (error);
}
@@ -2345,12 +2271,12 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
struct ip_moptions *imo;
struct in_msource *ims;
struct in_multi *inm;
- size_t idx;
- int error, is_final;
+ int error;
+ bool is_final;
ifp = NULL;
error = 0;
- is_final = 1;
+ is_final = true;
memset(&gsr, 0, sizeof(struct group_source_req));
gsa = (sockunion_t *)&gsr.gsr_group;
@@ -2450,20 +2376,21 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
return (EINVAL);
+ IN_MULTI_LOCK();
+
/*
- * Find the membership in the membership array.
+ * Find the membership in the membership list.
*/
imo = inp_findmoptions(inp);
- idx = imo_match_group(imo, ifp, &gsa->sa);
- if (idx == -1) {
+ imf = imo_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
error = EADDRNOTAVAIL;
goto out_inp_locked;
}
- inm = imo->imo_membership[idx];
- imf = &imo->imo_mfilters[idx];
+ inm = imf->imf_inm;
if (ssa->ss.ss_family != AF_UNSPEC)
- is_final = 0;
+ is_final = false;
/*
* Begin state merge transaction at socket layer.
@@ -2475,13 +2402,14 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
* MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
*/
if (is_final) {
+ ip_mfilter_remove(&imo->imo_head, imf);
imf_leave(imf);
} else {
if (imf->imf_st[0] == MCAST_EXCLUDE) {
error = EADDRNOTAVAIL;
goto out_inp_locked;
}
- ims = imo_match_source(imo, idx, &ssa->sa);
+ ims = imo_match_source(imf, &ssa->sa);
if (ims == NULL) {
CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent",
__func__, ntohl(ssa->sin.sin_addr.s_addr), "not ");
@@ -2500,17 +2428,7 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
/*
* Begin state merge transaction at IGMP layer.
*/
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- IN_MULTI_LOCK();
-
- if (is_final) {
- /*
- * Give up the multicast address record to which
- * the membership points.
- */
- (void)in_leavegroup_locked(inm, imf);
- } else {
+ if (!is_final) {
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
IN_MULTI_LIST_LOCK();
error = inm_merge(inm, imf);
@@ -2518,7 +2436,9 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
__func__);
IN_MULTI_LIST_UNLOCK();
- goto out_in_multi_locked;
+ imf_rollback(imf);
+ imf_reap(imf);
+ goto out_inp_locked;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2527,34 +2447,27 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
if (error) {
CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
__func__);
+ imf_rollback(imf);
+ imf_reap(imf);
+ goto out_inp_locked;
}
}
-
-out_in_multi_locked:
-
- IN_MULTI_UNLOCK();
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- return (ENXIO);
-
- if (error)
- imf_rollback(imf);
- else
- imf_commit(imf);
-
+ imf_commit(imf);
imf_reap(imf);
- if (is_final) {
- /* Remove the gap in the membership and filter array. */
- for (++idx; idx < imo->imo_num_memberships; ++idx) {
- imo->imo_membership[idx-1] = imo->imo_membership[idx];
- imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx];
- }
- imo->imo_num_memberships--;
- }
-
out_inp_locked:
INP_WUNLOCK(inp);
+
+ if (is_final && imf) {
+ /*
+ * Give up the multicast address record to which
+ * the membership points.
+ */
+ (void) in_leavegroup_locked(imf->imf_inm, imf);
+ ip_mfilter_free(imf);
+ }
+
+ IN_MULTI_UNLOCK();
return (error);
}
@@ -2644,7 +2557,6 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
struct in_mfilter *imf;
struct ip_moptions *imo;
struct in_multi *inm;
- size_t idx;
int error;
error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
@@ -2676,18 +2588,19 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
if (ifp == NULL)
return (EADDRNOTAVAIL);
+ IN_MULTI_LOCK();
+
/*
* Take the INP write lock.
* Check if this socket is a member of this group.
*/
imo = inp_findmoptions(inp);
- idx = imo_match_group(imo, ifp, &gsa->sa);
- if (idx == -1 || imo->imo_mfilters == NULL) {
+ imf = imo_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
error = EADDRNOTAVAIL;
goto out_inp_locked;
}
- inm = imo->imo_membership[idx];
- imf = &imo->imo_mfilters[idx];
+ inm = imf->imf_inm;
/*
* Begin state merge transaction at socket layer.
@@ -2764,7 +2677,6 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
goto out_imf_rollback;
INP_WLOCK_ASSERT(inp);
- IN_MULTI_LOCK();
/*
* Begin state merge transaction at IGMP layer.
@@ -2775,7 +2687,7 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
IN_MULTI_LIST_UNLOCK();
- goto out_in_multi_locked;
+ goto out_imf_rollback;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2784,10 +2696,6 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
if (error)
CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
-out_in_multi_locked:
-
- IN_MULTI_UNLOCK();
-
out_imf_rollback:
if (error)
imf_rollback(imf);
@@ -2798,6 +2706,7 @@ out_imf_rollback:
out_inp_locked:
INP_WUNLOCK(inp);
+ IN_MULTI_UNLOCK();
return (error);
}
@@ -2968,6 +2877,7 @@ static int
sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
{
struct in_addr src, group;
+ struct epoch_tracker et;
struct ifnet *ifp;
struct ifmultiaddr *ifma;
struct in_multi *inm;
@@ -3014,7 +2924,7 @@ sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
IN_MULTI_LIST_LOCK();
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_INET ||
ifma->ifma_protospec == NULL)
@@ -3043,7 +2953,7 @@ sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
IN_MULTI_LIST_UNLOCK();
@@ -3052,7 +2962,14 @@ sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
#if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3)
-static const char *inm_modestrs[] = { "un", "in", "ex" };
+static const char *inm_modestrs[] = {
+ [MCAST_UNDEFINED] = "un",
+ [MCAST_INCLUDE] = "in",
+ [MCAST_EXCLUDE] = "ex",
+};
+_Static_assert(MCAST_UNDEFINED == 0 &&
+ MCAST_EXCLUDE + 1 == nitems(inm_modestrs),
+ "inm_modestrs: no longer matches #defines");
static const char *
inm_mode_str(const int mode)
@@ -3064,16 +2981,20 @@ inm_mode_str(const int mode)
}
static const char *inm_statestrs[] = {
- "not-member",
- "silent",
- "idle",
- "lazy",
- "sleeping",
- "awakening",
- "query-pending",
- "sg-query-pending",
- "leaving"
+ [IGMP_NOT_MEMBER] = "not-member",
+ [IGMP_SILENT_MEMBER] = "silent",
+ [IGMP_REPORTING_MEMBER] = "reporting",
+ [IGMP_IDLE_MEMBER] = "idle",
+ [IGMP_LAZY_MEMBER] = "lazy",
+ [IGMP_SLEEPING_MEMBER] = "sleeping",
+ [IGMP_AWAKENING_MEMBER] = "awakening",
+ [IGMP_G_QUERY_PENDING_MEMBER] = "query-pending",
+ [IGMP_SG_QUERY_PENDING_MEMBER] = "sg-query-pending",
+ [IGMP_LEAVING_MEMBER] = "leaving",
};
+_Static_assert(IGMP_NOT_MEMBER == 0 &&
+ IGMP_LEAVING_MEMBER + 1 == nitems(inm_statestrs),
+ "inm_statetrs: no longer matches #defines");
static const char *
inm_state_str(const int state)
diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c
index c00593e5..6147c566 100644
--- a/freebsd/sys/netinet/in_pcb.c
+++ b/freebsd/sys/netinet/in_pcb.c
@@ -92,6 +92,9 @@ __FBSDID("$FreeBSD$");
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#include <netinet/in_pcb.h>
+#ifdef INET
+#include <netinet/in_var.h>
+#endif
#include <netinet/ip_var.h>
#include <netinet/tcp_var.h>
#ifdef TCPHPTS
@@ -99,16 +102,13 @@ __FBSDID("$FreeBSD$");
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
-#endif
-#ifdef INET
-#include <netinet/in_var.h>
-#endif
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#endif /* INET6 */
+#endif
#include <netipsec/ipsec_support.h>
@@ -216,6 +216,22 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
&VNET_NAME(ipport_randomtime), 0,
"Minimum time to keep sequental port "
"allocation before switching to a random one");
+
+#ifdef RATELIMIT
+counter_u64_t rate_limit_active;
+counter_u64_t rate_limit_alloc_fail;
+counter_u64_t rate_limit_set_ok;
+
+static SYSCTL_NODE(_net_inet_ip, OID_AUTO, rl, CTLFLAG_RD, 0,
+ "IP Rate Limiting");
+SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, active, CTLFLAG_RD,
+ &rate_limit_active, "Active rate limited connections");
+SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, alloc_fail, CTLFLAG_RD,
+ &rate_limit_alloc_fail, "Rate limited connection failures");
+SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, set_ok, CTLFLAG_RD,
+ &rate_limit_set_ok, "Rate limited setting succeeded");
+#endif /* RATELIMIT */
+
#endif /* INET */
/*
@@ -516,6 +532,9 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
if (inp == NULL)
return (ENOBUFS);
bzero(&inp->inp_start_zero, inp_zero_size);
+#ifdef NUMA
+ inp->inp_numa_domain = M_NODOM;
+#endif
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
inp->inp_cred = crhold(so->so_cred);
@@ -1024,6 +1043,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
struct sockaddr *sa;
struct sockaddr_in *sin;
struct route sro;
+ struct epoch_tracker et;
int error;
KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
@@ -1059,7 +1079,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
* network and try to find a corresponding interface to take
* the source address from.
*/
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) {
struct in_ifaddr *ia;
struct ifnet *ifp;
@@ -1223,7 +1243,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
}
done:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
if (sro.ro_rt != NULL)
RTFREE(sro.ro_rt);
return (error);
@@ -1576,6 +1596,7 @@ in_pcbfree_deferred(epoch_context_t ctx)
inp = __containerof(ctx, struct inpcb, inp_epoch_ctx);
INP_WLOCK(inp);
+ CURVNET_SET(inp->inp_vnet);
#ifdef INET
struct ip_moptions *imo = inp->inp_moptions;
inp->inp_moptions = NULL;
@@ -1608,6 +1629,7 @@ in_pcbfree_deferred(epoch_context_t ctx)
#ifdef INET
inp_freemoptions(imo);
#endif
+ CURVNET_RESTORE();
}
/*
@@ -1785,8 +1807,9 @@ void
in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
{
struct inpcb *inp;
+ struct in_multi *inm;
+ struct in_mfilter *imf;
struct ip_moptions *imo;
- int i, gap;
INP_INFO_WLOCK(pcbinfo);
CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
@@ -1807,17 +1830,18 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
*
* XXX This can all be deferred to an epoch_call
*/
- for (i = 0, gap = 0; i < imo->imo_num_memberships;
- i++) {
- if (imo->imo_membership[i]->inm_ifp == ifp) {
- IN_MULTI_LOCK_ASSERT();
- in_leavegroup_locked(imo->imo_membership[i], NULL);
- gap++;
- } else if (gap != 0)
- imo->imo_membership[i - gap] =
- imo->imo_membership[i];
+restart:
+ IP_MFILTER_FOREACH(imf, &imo->imo_head) {
+ if ((inm = imf->imf_inm) == NULL)
+ continue;
+ if (inm->inm_ifp != ifp)
+ continue;
+ ip_mfilter_remove(&imo->imo_head, imf);
+ IN_MULTI_LOCK_ASSERT();
+ in_leavegroup_locked(inm, NULL);
+ ip_mfilter_free(imf);
+ goto restart;
}
- imo->imo_num_memberships -= gap;
}
INP_WUNLOCK(inp);
}
@@ -3174,6 +3198,7 @@ in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate)
{
union if_snd_tag_modify_params params = {
.rate_limit.max_rate = max_pacing_rate,
+ .rate_limit.flags = M_NOWAIT,
};
struct m_snd_tag *mst;
struct ifnet *ifp;
@@ -3260,7 +3285,8 @@ in_pcbquery_txrlevel(struct inpcb *inp, uint32_t *p_txqueue_level)
*/
int
in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
- uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate)
+ uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate, struct m_snd_tag **st)
+
{
union if_snd_tag_alloc_params params = {
.rate_limit.hdr.type = (max_pacing_rate == -1U) ?
@@ -3268,12 +3294,13 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
.rate_limit.hdr.flowid = flowid,
.rate_limit.hdr.flowtype = flowtype,
.rate_limit.max_rate = max_pacing_rate,
+ .rate_limit.flags = M_NOWAIT,
};
int error;
INP_WLOCK_ASSERT(inp);
- if (inp->inp_snd_tag != NULL)
+ if (*st != NULL)
return (EINVAL);
if (ifp->if_snd_tag_alloc == NULL) {
@@ -3281,16 +3308,37 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
} else {
error = ifp->if_snd_tag_alloc(ifp, &params, &inp->inp_snd_tag);
- /*
- * At success increment the refcount on
- * the send tag's network interface:
- */
- if (error == 0)
- if_ref(inp->inp_snd_tag->ifp);
+#ifdef INET
+ if (error == 0) {
+ counter_u64_add(rate_limit_set_ok, 1);
+ counter_u64_add(rate_limit_active, 1);
+ } else
+ counter_u64_add(rate_limit_alloc_fail, 1);
+#endif
}
return (error);
}
+void
+in_pcbdetach_tag(struct ifnet *ifp, struct m_snd_tag *mst)
+{
+ if (ifp == NULL)
+ return;
+
+ /*
+ * If the device was detached while we still had reference(s)
+ * on the ifp, we assume if_snd_tag_free() was replaced with
+ * stubs.
+ */
+ ifp->if_snd_tag_free(mst);
+
+ /* release reference count on network interface */
+ if_rele(ifp);
+#ifdef INET
+ counter_u64_add(rate_limit_active, -1);
+#endif
+}
+
/*
* Free an existing TX rate limit tag based on the "inp->inp_snd_tag",
* if any:
@@ -3299,7 +3347,6 @@ void
in_pcbdetach_txrtlmt(struct inpcb *inp)
{
struct m_snd_tag *mst;
- struct ifnet *ifp;
INP_WLOCK_ASSERT(inp);
@@ -3309,19 +3356,57 @@ in_pcbdetach_txrtlmt(struct inpcb *inp)
if (mst == NULL)
return;
- ifp = mst->ifp;
- if (ifp == NULL)
- return;
+ m_snd_tag_rele(mst);
+}
+
+int
+in_pcboutput_txrtlmt_locked(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb, uint32_t max_pacing_rate)
+{
+ int error;
/*
- * If the device was detached while we still had reference(s)
- * on the ifp, we assume if_snd_tag_free() was replaced with
- * stubs.
+ * If the existing send tag is for the wrong interface due to
+ * a route change, first drop the existing tag. Set the
+ * CHANGED flag so that we will keep trying to allocate a new
+ * tag if we fail to allocate one this time.
*/
- ifp->if_snd_tag_free(mst);
+ if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) {
+ in_pcbdetach_txrtlmt(inp);
+ inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+ }
- /* release reference count on network interface */
- if_rele(ifp);
+ /*
+ * NOTE: When attaching to a network interface a reference is
+ * made to ensure the network interface doesn't go away until
+ * all ratelimit connections are gone. The network interface
+ * pointers compared below represent valid network interfaces,
+ * except when comparing towards NULL.
+ */
+ if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) {
+ error = 0;
+ } else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) {
+ if (inp->inp_snd_tag != NULL)
+ in_pcbdetach_txrtlmt(inp);
+ error = 0;
+ } else if (inp->inp_snd_tag == NULL) {
+ /*
+ * In order to utilize packet pacing with RSS, we need
+ * to wait until there is a valid RSS hash before we
+ * can proceed:
+ */
+ if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) {
+ error = EAGAIN;
+ } else {
+ error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb),
+ mb->m_pkthdr.flowid, max_pacing_rate, &inp->inp_snd_tag);
+ }
+ } else {
+ error = in_pcbmodify_txrtlmt(inp, max_pacing_rate);
+ }
+ if (error == 0 || error == EOPNOTSUPP)
+ inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
+
+ return (error);
}
/*
@@ -3366,36 +3451,8 @@ in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
*/
max_pacing_rate = socket->so_max_pacing_rate;
- /*
- * NOTE: When attaching to a network interface a reference is
- * made to ensure the network interface doesn't go away until
- * all ratelimit connections are gone. The network interface
- * pointers compared below represent valid network interfaces,
- * except when comparing towards NULL.
- */
- if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) {
- error = 0;
- } else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) {
- if (inp->inp_snd_tag != NULL)
- in_pcbdetach_txrtlmt(inp);
- error = 0;
- } else if (inp->inp_snd_tag == NULL) {
- /*
- * In order to utilize packet pacing with RSS, we need
- * to wait until there is a valid RSS hash before we
- * can proceed:
- */
- if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) {
- error = EAGAIN;
- } else {
- error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb),
- mb->m_pkthdr.flowid, max_pacing_rate);
- }
- } else {
- error = in_pcbmodify_txrtlmt(inp, max_pacing_rate);
- }
- if (error == 0 || error == EOPNOTSUPP)
- inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
+ error = in_pcboutput_txrtlmt_locked(inp, ifp, mb, max_pacing_rate);
+
if (did_upgrade)
INP_DOWNGRADE(inp);
}
@@ -3406,16 +3463,11 @@ in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
void
in_pcboutput_eagain(struct inpcb *inp)
{
- struct socket *socket;
bool did_upgrade;
if (inp == NULL)
return;
- socket = inp->inp_socket;
- if (socket == NULL)
- return;
-
if (inp->inp_snd_tag == NULL)
return;
@@ -3442,4 +3494,16 @@ in_pcboutput_eagain(struct inpcb *inp)
if (did_upgrade)
INP_DOWNGRADE(inp);
}
+
+#ifdef INET
+static void
+rl_init(void *st)
+{
+ rate_limit_active = counter_u64_alloc(M_WAITOK);
+ rate_limit_alloc_fail = counter_u64_alloc(M_WAITOK);
+ rate_limit_set_ok = counter_u64_alloc(M_WAITOK);
+}
+
+SYSINIT(rl, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, rl_init, NULL);
+#endif
#endif /* RATELIMIT */
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h
index ecbd7a22..7d15b24b 100644
--- a/freebsd/sys/netinet/in_pcb.h
+++ b/freebsd/sys/netinet/in_pcb.h
@@ -57,9 +57,6 @@
#endif
#include <sys/ck.h>
-#define in6pcb inpcb /* for KAME src sync over BSD*'s */
-#define in6p_sp inp_sp /* for KAME src sync over BSD*'s */
-
/*
* struct inpcb is the common protocol control block structure used in most
* IP transport protocols.
@@ -272,7 +269,7 @@ struct inpcb {
inp_hpts_calls :1, /* (i) from output hpts */
inp_input_calls :1, /* (i) from input hpts */
inp_spare_bits2 : 4;
- uint8_t inp_spare_byte; /* Compiler hole */
+ uint8_t inp_numa_domain; /* numa domain */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct socket *inp_socket; /* (i) back pointer to socket */
uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
@@ -342,7 +339,6 @@ struct inpcb {
#define in6p_faddr inp_inc.inc6_faddr
#define in6p_laddr inp_inc.inc6_laddr
#define in6p_zoneid inp_inc.inc6_zoneid
-#define in6p_flowinfo inp_flow
#define inp_vnet inp_pcbinfo->ipi_vnet
@@ -632,12 +628,12 @@ int inp_so_options(const struct inpcb *inp);
#define INP_INFO_LOCK_INIT(ipi, d) \
mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
-#define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER_ET((et))
+#define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER((et))
#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
-#define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et))
-#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT_ET(*(tp)->t_inpcb->inp_et)
+#define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT((et))
+#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT(*(tp)->t_inpcb->inp_et)
#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock))
#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt))
@@ -670,8 +666,8 @@ int inp_so_options(const struct inpcb *inp);
#define INP_HASH_RLOCK(ipi) struct epoch_tracker inp_hash_et; epoch_enter_preempt(net_epoch_preempt, &inp_hash_et)
#define INP_HASH_RLOCK_ET(ipi, et) epoch_enter_preempt(net_epoch_preempt, &(et))
#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT_ET(inp_hash_et)
-#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT_ET((et))
+#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT(inp_hash_et)
+#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT((et))
#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock))
#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
@@ -759,7 +755,9 @@ int inp_so_options(const struct inpcb *inp);
#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */
#define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */
#define INP_REUSEPORT_LB 0x00002000 /* SO_REUSEPORT_LB option is set */
-
+#define INP_SUPPORTS_MBUFQ 0x00004000 /* Supports the mbuf queue method of LRO */
+#define INP_MBUF_QUEUE_READY 0x00008000 /* The transport is pacing, inputs can be queued */
+#define INP_DONT_SACK_QUEUE 0x00010000 /* If a sack arrives do not wake me */
/*
* Flags passed to in_pcblookup*() functions.
*/
@@ -771,7 +769,6 @@ int inp_so_options(const struct inpcb *inp);
INPLOOKUP_WLOCKPCB)
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
-#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */
#define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
@@ -881,8 +878,13 @@ struct sockaddr *
in_sockaddr(in_port_t port, struct in_addr *addr);
void in_pcbsosetlabel(struct socket *so);
#ifdef RATELIMIT
-int in_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t, uint32_t);
+int
+in_pcboutput_txrtlmt_locked(struct inpcb *, struct ifnet *,
+ struct mbuf *, uint32_t);
+int in_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t,
+ uint32_t, struct m_snd_tag **);
void in_pcbdetach_txrtlmt(struct inpcb *);
+void in_pcbdetach_tag(struct ifnet *ifp, struct m_snd_tag *mst);
int in_pcbmodify_txrtlmt(struct inpcb *, uint32_t);
int in_pcbquery_txrtlmt(struct inpcb *, uint32_t *);
int in_pcbquery_txrlevel(struct inpcb *, uint32_t *);
diff --git a/freebsd/sys/netinet/in_var.h b/freebsd/sys/netinet/in_var.h
index 5b7a464b..50112481 100644
--- a/freebsd/sys/netinet/in_var.h
+++ b/freebsd/sys/netinet/in_var.h
@@ -232,9 +232,61 @@ struct in_mfilter {
struct ip_msource_tree imf_sources; /* source list for (S,G) */
u_long imf_nsrc; /* # of source entries */
uint8_t imf_st[2]; /* state before/at commit */
+ struct in_multi *imf_inm; /* associated multicast address */
+ STAILQ_ENTRY(in_mfilter) imf_entry; /* list entry */
};
/*
+ * Helper types and functions for IPv4 multicast filters.
+ */
+STAILQ_HEAD(ip_mfilter_head, in_mfilter);
+
+struct in_mfilter *ip_mfilter_alloc(int mflags, int st0, int st1);
+void ip_mfilter_free(struct in_mfilter *);
+
+static inline void
+ip_mfilter_init(struct ip_mfilter_head *head)
+{
+
+ STAILQ_INIT(head);
+}
+
+static inline struct in_mfilter *
+ip_mfilter_first(const struct ip_mfilter_head *head)
+{
+
+ return (STAILQ_FIRST(head));
+}
+
+static inline void
+ip_mfilter_insert(struct ip_mfilter_head *head, struct in_mfilter *imf)
+{
+
+ STAILQ_INSERT_TAIL(head, imf, imf_entry);
+}
+
+static inline void
+ip_mfilter_remove(struct ip_mfilter_head *head, struct in_mfilter *imf)
+{
+
+ STAILQ_REMOVE(head, imf, in_mfilter, imf_entry);
+}
+
+#define IP_MFILTER_FOREACH(imf, head) \
+ STAILQ_FOREACH(imf, head, imf_entry)
+
+static inline size_t
+ip_mfilter_count(struct ip_mfilter_head *head)
+{
+ struct in_mfilter *imf;
+ size_t num = 0;
+
+ STAILQ_FOREACH(imf, head, imf_entry)
+ num++;
+ return (num);
+}
+
+/*
* IPv4 group descriptor.
*
* For every entry on an ifnet's if_multiaddrs list which represents
diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c
index 1fb208a2..03ce7bbe 100644
--- a/freebsd/sys/netinet/ip_carp.c
+++ b/freebsd/sys/netinet/ip_carp.c
@@ -646,6 +646,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
struct carp_softc *sc;
uint64_t tmp_counter;
struct timeval sc_tv, ch_tv;
+ struct epoch_tracker et;
int error;
/*
@@ -659,7 +660,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
* (these should never happen, and as noted above, we may
* miss real loops; this is just a double-check).
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
error = 0;
match = NULL;
IFNET_FOREACH_IFA(ifp, ifa) {
@@ -673,7 +674,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
ifa = error ? NULL : match;
if (ifa != NULL)
ifa_ref(ifa);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ifa == NULL) {
if (error == ELOOP) {
@@ -881,18 +882,19 @@ carp_send_ad_error(struct carp_softc *sc, int error)
static struct ifaddr *
carp_best_ifa(int af, struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifaddr *ifa, *best;
if (af >= AF_MAX)
return (NULL);
best = NULL;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family == af &&
(best == NULL || ifa_preferred(best, ifa)))
best = ifa;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (best != NULL)
ifa_ref(best);
return (best);
@@ -1169,10 +1171,11 @@ carp_send_na(struct carp_softc *sc)
struct ifaddr *
carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
ifa = NULL;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1184,7 +1187,7 @@ carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
ifa_ref(ifa);
break;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (ifa);
}
@@ -1192,16 +1195,17 @@ carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
caddr_t
carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
IFNET_FOREACH_IFA(ifp, ifa)
if (ifa->ifa_addr->sa_family == AF_INET6 &&
IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
struct carp_softc *sc = ifa->ifa_carp;
struct m_tag *mtag;
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
mtag = m_tag_get(PACKET_TAG_CARP,
sizeof(struct carp_softc *), M_NOWAIT);
@@ -1214,7 +1218,7 @@ carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
return (LLADDR(&sc->sc_addr));
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (NULL);
}
@@ -1369,25 +1373,24 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
case AF_INET:
{
struct ip_moptions *imo = &cif->cif_imo;
+ struct in_mfilter *imf;
struct in_addr addr;
- if (imo->imo_membership)
+ if (ip_mfilter_first(&imo->imo_head) != NULL)
return (0);
- imo->imo_membership = (struct in_multi **)malloc(
- (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
- M_WAITOK);
- imo->imo_mfilters = NULL;
- imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
+ ip_mfilter_init(&imo->imo_head);
imo->imo_multicast_vif = -1;
addr.s_addr = htonl(INADDR_CARP_GROUP);
if ((error = in_joingroup(ifp, &addr, NULL,
- &imo->imo_membership[0])) != 0) {
- free(imo->imo_membership, M_CARP);
+ &imf->imf_inm)) != 0) {
+ ip_mfilter_free(imf);
break;
}
- imo->imo_num_memberships++;
+
+ ip_mfilter_insert(&imo->imo_head, imf);
imo->imo_multicast_ifp = ifp;
imo->imo_multicast_ttl = CARP_DFLTTL;
imo->imo_multicast_loop = 0;
@@ -1398,17 +1401,16 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
case AF_INET6:
{
struct ip6_moptions *im6o = &cif->cif_im6o;
+ struct in6_mfilter *im6f[2];
struct in6_addr in6;
- struct in6_multi *in6m;
- if (im6o->im6o_membership)
+ if (ip6_mfilter_first(&im6o->im6o_head))
return (0);
- im6o->im6o_membership = (struct in6_multi **)malloc(
- (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
- M_ZERO | M_WAITOK);
- im6o->im6o_mfilters = NULL;
- im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
+ im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
+ im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
+
+ ip6_mfilter_init(&im6o->im6o_head);
im6o->im6o_multicast_hlim = CARP_DFLTTL;
im6o->im6o_multicast_ifp = ifp;
@@ -1417,17 +1419,15 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
in6.s6_addr16[0] = htons(0xff02);
in6.s6_addr8[15] = 0x12;
if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
- free(im6o->im6o_membership, M_CARP);
+ ip6_mfilter_free(im6f[0]);
+ ip6_mfilter_free(im6f[1]);
break;
}
- in6m = NULL;
- if ((error = in6_joingroup(ifp, &in6, NULL, &in6m, 0)) != 0) {
- free(im6o->im6o_membership, M_CARP);
+ if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) {
+ ip6_mfilter_free(im6f[0]);
+ ip6_mfilter_free(im6f[1]);
break;
}
- in6m_acquire(in6m);
- im6o->im6o_membership[0] = in6m;
- im6o->im6o_num_memberships++;
/* Join solicited multicast address. */
bzero(&in6, sizeof(in6));
@@ -1436,20 +1436,21 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
in6.s6_addr32[2] = htonl(1);
in6.s6_addr32[3] = 0;
in6.s6_addr8[12] = 0xff;
+
if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
- in6_leavegroup(im6o->im6o_membership[0], NULL);
- free(im6o->im6o_membership, M_CARP);
+ ip6_mfilter_free(im6f[0]);
+ ip6_mfilter_free(im6f[1]);
break;
}
- in6m = NULL;
- if ((error = in6_joingroup(ifp, &in6, NULL, &in6m, 0)) != 0) {
- in6_leavegroup(im6o->im6o_membership[0], NULL);
- free(im6o->im6o_membership, M_CARP);
+
+ if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) {
+ in6_leavegroup(im6f[0]->im6f_in6m, NULL);
+ ip6_mfilter_free(im6f[0]);
+ ip6_mfilter_free(im6f[1]);
break;
}
- in6m_acquire(in6m);
- im6o->im6o_membership[1] = in6m;
- im6o->im6o_num_memberships++;
+ ip6_mfilter_insert(&im6o->im6o_head, im6f[0]);
+ ip6_mfilter_insert(&im6o->im6o_head, im6f[1]);
break;
}
#endif
@@ -1464,35 +1465,38 @@ carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
static void
carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
{
-
+#ifdef INET
+ struct ip_moptions *imo = &cif->cif_imo;
+ struct in_mfilter *imf;
+#endif
+#ifdef INET6
+ struct ip6_moptions *im6o = &cif->cif_im6o;
+ struct in6_mfilter *im6f;
+#endif
sx_assert(&carp_sx, SA_XLOCKED);
switch (sa) {
#ifdef INET
case AF_INET:
- if (cif->cif_naddrs == 0) {
- struct ip_moptions *imo = &cif->cif_imo;
-
- in_leavegroup(imo->imo_membership[0], NULL);
- KASSERT(imo->imo_mfilters == NULL,
- ("%s: imo_mfilters != NULL", __func__));
- free(imo->imo_membership, M_CARP);
- imo->imo_membership = NULL;
+ if (cif->cif_naddrs != 0)
+ break;
+ while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
+ ip_mfilter_remove(&imo->imo_head, imf);
+ in_leavegroup(imf->imf_inm, NULL);
+ ip_mfilter_free(imf);
}
break;
#endif
#ifdef INET6
case AF_INET6:
- if (cif->cif_naddrs6 == 0) {
- struct ip6_moptions *im6o = &cif->cif_im6o;
-
- in6_leavegroup(im6o->im6o_membership[0], NULL);
- in6_leavegroup(im6o->im6o_membership[1], NULL);
- KASSERT(im6o->im6o_mfilters == NULL,
- ("%s: im6o_mfilters != NULL", __func__));
- free(im6o->im6o_membership, M_CARP);
- im6o->im6o_membership = NULL;
+ if (cif->cif_naddrs6 != 0)
+ break;
+
+ while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
+ ip6_mfilter_remove(&im6o->im6o_head, im6f);
+ in6_leavegroup(im6f->im6f_in6m, NULL);
+ ip6_mfilter_free(im6f);
}
break;
#endif
@@ -2178,21 +2182,6 @@ static struct protosw in6_carp_protosw = {
};
#endif
-#ifdef VIMAGE
-#if defined(__i386__)
-/*
- * XXX This is a hack to work around an absolute relocation outside
- * set_vnet by one (on the stop symbol) for carpstats. Add a dummy variable
- * to the end of the file in the hope that the linker will just keep the
- * order (as it seems to do at the moment). It is understood to be fragile.
- * See PR 230857 for a longer discussion of the problem and the referenced
- * review for possible alternate solutions. Each is a hack; we just need
- * the least intrusive one for the next release.
- */
-VNET_DEFINE(char, carp_zzz) = 0xde;
-#endif
-#endif
-
static void
carp_mod_cleanup(void)
{
diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c
index 6a99645e..a7f602b2 100644
--- a/freebsd/sys/netinet/ip_divert.c
+++ b/freebsd/sys/netinet/ip_divert.c
@@ -186,7 +186,7 @@ div_input(struct mbuf **mp, int *offp, int proto)
* then pass them along with mbuf chain.
*/
static void
-divert_packet(struct mbuf *m, int incoming)
+divert_packet(struct mbuf *m, bool incoming)
{
struct ip *ip;
struct inpcb *inp;
@@ -467,19 +467,20 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
* Clear the port and the ifname to make sure
* there are no distractions for ifa_ifwithaddr.
*/
+ struct epoch_tracker et;
struct ifaddr *ifa;
bzero(sin->sin_zero, sizeof(sin->sin_zero));
sin->sin_port = 0;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
ifa = ifa_ifwithaddr((struct sockaddr *) sin);
if (ifa == NULL) {
error = EADDRNOTAVAIL;
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
goto cantsend;
}
m->m_pkthdr.rcvif = ifa->ifa_ifp;
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
}
#ifdef MAC
mac_socket_create_mbuf(so, m);
diff --git a/freebsd/sys/netinet/ip_fastfwd.c b/freebsd/sys/netinet/ip_fastfwd.c
index 05deb4d8..1d55c3d2 100644
--- a/freebsd/sys/netinet/ip_fastfwd.c
+++ b/freebsd/sys/netinet/ip_fastfwd.c
@@ -92,11 +92,11 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/sysctl.h>
-#include <net/pfil.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/if_dl.h>
+#include <net/pfil.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -230,12 +230,11 @@ ip_tryforward(struct mbuf *m)
/*
* Run through list of ipfilter hooks for input packets
*/
- if (!PFIL_HOOKED(&V_inet_pfil_hook))
+ if (!PFIL_HOOKED_IN(V_inet_pfil_head))
goto passin;
- if (pfil_run_hooks(
- &V_inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, 0, NULL) ||
- m == NULL)
+ if (pfil_run_hooks(V_inet_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN,
+ NULL) != PFIL_PASS)
goto drop;
M_ASSERTVALID(m);
@@ -323,13 +322,12 @@ passin:
/*
* Step 5: outgoing firewall packet processing
*/
- if (!PFIL_HOOKED(&V_inet_pfil_hook))
+ if (!PFIL_HOOKED_OUT(V_inet_pfil_head))
goto passout;
- if (pfil_run_hooks(&V_inet_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, PFIL_FWD,
- NULL) || m == NULL) {
+ if (pfil_run_hooks(V_inet_pfil_head, &m, nh.nh_ifp,
+ PFIL_OUT | PFIL_FWD, NULL) != PFIL_PASS)
goto drop;
- }
M_ASSERTVALID(m);
M_ASSERTPKTHDR(m);
diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h
index 41351215..7a01c82b 100644
--- a/freebsd/sys/netinet/ip_fw.h
+++ b/freebsd/sys/netinet/ip_fw.h
@@ -134,6 +134,13 @@ typedef struct _ip_fw3_opheader {
#define IP_FW_NPTV6_STATS 154 /* Get NPTv6 instance statistics */
#define IP_FW_NPTV6_RESET_STATS 155 /* Reset NPTv6 instance statistics */
+#define IP_FW_NAT64CLAT_CREATE 160 /* Create clat NAT64 instance */
+#define IP_FW_NAT64CLAT_DESTROY 161 /* Destroy clat NAT64 instance */
+#define IP_FW_NAT64CLAT_CONFIG 162 /* Modify clat NAT64 instance */
+#define IP_FW_NAT64CLAT_LIST 163 /* List clat NAT64 instances */
+#define IP_FW_NAT64CLAT_STATS 164 /* Get NAT64CLAT instance statistics */
+#define IP_FW_NAT64CLAT_RESET_STATS 165 /* Reset NAT64CLAT instance statistics */
+
/*
* The kernel representation of ipfw rules is made of a list of
* 'instructions' (for all practical purposes equivalent to BPF
@@ -286,6 +293,7 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_EXTERNAL_DATA, /* variable length data */
O_SKIP_ACTION, /* none */
+ O_TCPMSS, /* arg1=MSS value */
O_LAST_OPCODE /* not an opcode! */
};
diff --git a/freebsd/sys/netinet/ip_gre.c b/freebsd/sys/netinet/ip_gre.c
index 1758bfff..560478d6 100644
--- a/freebsd/sys/netinet/ip_gre.c
+++ b/freebsd/sys/netinet/ip_gre.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <sys/systm.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
@@ -60,15 +61,19 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
#include <netinet/ip.h>
#include <netinet/ip_encap.h>
#include <netinet/ip_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
#ifdef INET6
#include <netinet/ip6.h>
#endif
#include <net/if_gre.h>
+#include <machine/in_cksum.h>
#define GRE_TTL 30
VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
@@ -76,14 +81,22 @@ VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets");
+struct in_gre_socket {
+ struct gre_socket base;
+ in_addr_t addr;
+};
+VNET_DEFINE_STATIC(struct gre_sockets *, ipv4_sockets) = NULL;
VNET_DEFINE_STATIC(struct gre_list *, ipv4_hashtbl) = NULL;
VNET_DEFINE_STATIC(struct gre_list *, ipv4_srchashtbl) = NULL;
+#define V_ipv4_sockets VNET(ipv4_sockets)
#define V_ipv4_hashtbl VNET(ipv4_hashtbl)
#define V_ipv4_srchashtbl VNET(ipv4_srchashtbl)
#define GRE_HASH(src, dst) (V_ipv4_hashtbl[\
in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)])
#define GRE_SRCHASH(src) (V_ipv4_srchashtbl[\
fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
+#define GRE_SOCKHASH(src) (V_ipv4_sockets[\
+ fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)])
#define GRE_HASH_SC(sc) GRE_HASH((sc)->gre_oip.ip_src.s_addr,\
(sc)->gre_oip.ip_dst.s_addr)
@@ -96,17 +109,43 @@ in_gre_hashval(in_addr_t src, in_addr_t dst)
return (fnv_32_buf(&dst, sizeof(dst), ret));
}
+static struct gre_socket*
+in_gre_lookup_socket(in_addr_t addr)
+{
+ struct gre_socket *gs;
+ struct in_gre_socket *s;
+
+ CK_LIST_FOREACH(gs, &GRE_SOCKHASH(addr), chain) {
+ s = __containerof(gs, struct in_gre_socket, base);
+ if (s->addr == addr)
+ break;
+ }
+ return (gs);
+}
+
static int
-in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst)
+in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst,
+ uint32_t opts)
{
+ struct gre_list *head;
struct gre_softc *tmp;
+ struct gre_socket *gs;
if (sc->gre_family == AF_INET &&
sc->gre_oip.ip_src.s_addr == src &&
- sc->gre_oip.ip_dst.s_addr == dst)
+ sc->gre_oip.ip_dst.s_addr == dst &&
+ (sc->gre_options & GRE_UDPENCAP) == (opts & GRE_UDPENCAP))
return (EEXIST);
- CK_LIST_FOREACH(tmp, &GRE_HASH(src, dst), chain) {
+ if (opts & GRE_UDPENCAP) {
+ gs = in_gre_lookup_socket(src);
+ if (gs == NULL)
+ return (0);
+ head = &gs->list;
+ } else
+ head = &GRE_HASH(src, dst);
+
+ CK_LIST_FOREACH(tmp, head, chain) {
if (tmp == sc)
continue;
if (tmp->gre_oip.ip_src.s_addr == src &&
@@ -183,35 +222,228 @@ in_gre_srcaddr(void *arg __unused, const struct sockaddr *sa,
}
static void
+in_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
+ const struct sockaddr *sa, void *ctx)
+{
+ struct epoch_tracker et;
+ struct gre_socket *gs;
+ struct gre_softc *sc;
+ in_addr_t dst;
+
+ NET_EPOCH_ENTER(et);
+ /*
+ * udp_append() holds reference to inp, it is safe to check
+ * inp_flags2 without INP_RLOCK().
+ * If socket was closed before we have entered NET_EPOCH section,
+ * INP_FREED flag should be set. Otherwise it should be safe to
+ * make access to ctx data, because gre_so will be freed by
+ * gre_sofree() via epoch_call().
+ */
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ NET_EPOCH_EXIT(et);
+ m_freem(m);
+ return;
+ }
+
+ gs = (struct gre_socket *)ctx;
+ dst = ((const struct sockaddr_in *)sa)->sin_addr.s_addr;
+ CK_LIST_FOREACH(sc, &gs->list, chain) {
+ if (sc->gre_oip.ip_dst.s_addr == dst)
+ break;
+ }
+ if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){
+ gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc);
+ NET_EPOCH_EXIT(et);
+ return;
+ }
+ m_freem(m);
+ NET_EPOCH_EXIT(et);
+}
+
+static int
+in_gre_setup_socket(struct gre_softc *sc)
+{
+ struct sockopt sopt;
+ struct sockaddr_in sin;
+ struct in_gre_socket *s;
+ struct gre_socket *gs;
+ in_addr_t addr;
+ int error, value;
+
+ /*
+ * NOTE: we are protected with gre_ioctl_sx lock.
+ *
+ * First check that socket is already configured.
+ * If so, check that source addres was not changed.
+ * If address is different, check that there are no other tunnels
+ * and close socket.
+ */
+ addr = sc->gre_oip.ip_src.s_addr;
+ gs = sc->gre_so;
+ if (gs != NULL) {
+ s = __containerof(gs, struct in_gre_socket, base);
+ if (s->addr != addr) {
+ if (CK_LIST_EMPTY(&gs->list)) {
+ CK_LIST_REMOVE(gs, chain);
+ soclose(gs->so);
+ epoch_call(net_epoch_preempt, &gs->epoch_ctx,
+ gre_sofree);
+ }
+ gs = sc->gre_so = NULL;
+ }
+ }
+
+ if (gs == NULL) {
+ /*
+ * Check that socket for given address is already
+ * configured.
+ */
+ gs = in_gre_lookup_socket(addr);
+ if (gs == NULL) {
+ s = malloc(sizeof(*s), M_GRE, M_WAITOK | M_ZERO);
+ s->addr = addr;
+ gs = &s->base;
+
+ error = socreate(sc->gre_family, &gs->so,
+ SOCK_DGRAM, IPPROTO_UDP, curthread->td_ucred,
+ curthread);
+ if (error != 0) {
+ if_printf(GRE2IFP(sc),
+ "cannot create socket: %d\n", error);
+ free(s, M_GRE);
+ return (error);
+ }
+
+ error = udp_set_kernel_tunneling(gs->so,
+ in_gre_udp_input, NULL, gs);
+ if (error != 0) {
+ if_printf(GRE2IFP(sc),
+ "cannot set UDP tunneling: %d\n", error);
+ goto fail;
+ }
+
+ memset(&sopt, 0, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_BINDANY;
+ sopt.sopt_val = &value;
+ sopt.sopt_valsize = sizeof(value);
+ value = 1;
+ error = sosetopt(gs->so, &sopt);
+ if (error != 0) {
+ if_printf(GRE2IFP(sc),
+ "cannot set IP_BINDANY opt: %d\n", error);
+ goto fail;
+ }
+
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(sin);
+ sin.sin_addr.s_addr = addr;
+ sin.sin_port = htons(GRE_UDPPORT);
+ error = sobind(gs->so, (struct sockaddr *)&sin,
+ curthread);
+ if (error != 0) {
+ if_printf(GRE2IFP(sc),
+ "cannot bind socket: %d\n", error);
+ goto fail;
+ }
+ /* Add socket to the chain */
+ CK_LIST_INSERT_HEAD(&GRE_SOCKHASH(addr), gs, chain);
+ }
+ }
+
+ /* Add softc to the socket's list */
+ CK_LIST_INSERT_HEAD(&gs->list, sc, chain);
+ sc->gre_so = gs;
+ return (0);
+fail:
+ soclose(gs->so);
+ free(s, M_GRE);
+ return (error);
+}
+
+static int
in_gre_attach(struct gre_softc *sc)
{
+ struct grehdr *gh;
+ int error;
- sc->gre_hlen = sizeof(struct greip);
+ if (sc->gre_options & GRE_UDPENCAP) {
+ sc->gre_csumflags = CSUM_UDP;
+ sc->gre_hlen = sizeof(struct greudp);
+ sc->gre_oip.ip_p = IPPROTO_UDP;
+ gh = &sc->gre_udphdr->gi_gre;
+ gre_update_udphdr(sc, &sc->gre_udp,
+ in_pseudo(sc->gre_oip.ip_src.s_addr,
+ sc->gre_oip.ip_dst.s_addr, 0));
+ } else {
+ sc->gre_hlen = sizeof(struct greip);
+ sc->gre_oip.ip_p = IPPROTO_GRE;
+ gh = &sc->gre_iphdr->gi_gre;
+ }
sc->gre_oip.ip_v = IPVERSION;
sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
- sc->gre_oip.ip_p = IPPROTO_GRE;
- gre_updatehdr(sc, &sc->gre_gihdr->gi_gre);
- CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
+ gre_update_hdr(sc, gh);
+
+ /*
+ * If we return error, this means that sc is not linked,
+ * and caller should reset gre_family and free(sc->gre_hdr).
+ */
+ if (sc->gre_options & GRE_UDPENCAP) {
+ error = in_gre_setup_socket(sc);
+ if (error != 0)
+ return (error);
+ } else
+ CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain);
CK_LIST_INSERT_HEAD(&GRE_SRCHASH(sc->gre_oip.ip_src.s_addr),
sc, srchash);
+
+ /* Set IFF_DRV_RUNNING if interface is ready */
+ in_gre_set_running(sc);
+ return (0);
}
-void
+int
in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value)
{
-
- MPASS(cmd == GRESKEY || cmd == GRESOPTS);
+ int error;
/* NOTE: we are protected with gre_ioctl_sx lock */
+ MPASS(cmd == GRESKEY || cmd == GRESOPTS || cmd == GRESPORT);
MPASS(sc->gre_family == AF_INET);
+
+ /*
+ * If we are going to change encapsulation protocol, do check
+ * for duplicate tunnels. Return EEXIST here to do not confuse
+ * user.
+ */
+ if (cmd == GRESOPTS &&
+ (sc->gre_options & GRE_UDPENCAP) != (value & GRE_UDPENCAP) &&
+ in_gre_checkdup(sc, sc->gre_oip.ip_src.s_addr,
+ sc->gre_oip.ip_dst.s_addr, value) == EADDRNOTAVAIL)
+ return (EEXIST);
+
CK_LIST_REMOVE(sc, chain);
CK_LIST_REMOVE(sc, srchash);
GRE_WAIT();
- if (cmd == GRESKEY)
+ switch (cmd) {
+ case GRESKEY:
sc->gre_key = value;
- else
+ break;
+ case GRESOPTS:
sc->gre_options = value;
- in_gre_attach(sc);
+ break;
+ case GRESPORT:
+ sc->gre_port = value;
+ break;
+ }
+ error = in_gre_attach(sc);
+ if (error != 0) {
+ sc->gre_family = 0;
+ free(sc->gre_hdr, M_GRE);
+ }
+ return (error);
}
int
@@ -243,9 +475,10 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data)
if (V_ipv4_hashtbl == NULL) {
V_ipv4_hashtbl = gre_hashinit();
V_ipv4_srchashtbl = gre_hashinit();
+ V_ipv4_sockets = (struct gre_sockets *)gre_hashinit();
}
error = in_gre_checkdup(sc, src->sin_addr.s_addr,
- dst->sin_addr.s_addr);
+ dst->sin_addr.s_addr, sc->gre_options);
if (error == EADDRNOTAVAIL)
break;
if (error == EEXIST) {
@@ -253,7 +486,7 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data)
error = 0;
break;
}
- ip = malloc(sizeof(struct greip) + 3 * sizeof(uint32_t),
+ ip = malloc(sizeof(struct greudp) + 3 * sizeof(uint32_t),
M_GRE, M_WAITOK | M_ZERO);
ip->ip_src.s_addr = src->sin_addr.s_addr;
ip->ip_dst.s_addr = dst->sin_addr.s_addr;
@@ -269,8 +502,11 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data)
sc->gre_hdr = ip;
sc->gre_oseq = 0;
sc->gre_iseq = UINT32_MAX;
- in_gre_attach(sc);
- in_gre_set_running(sc);
+ error = in_gre_attach(sc);
+ if (error != 0) {
+ sc->gre_family = 0;
+ free(sc->gre_hdr, M_GRE);
+ }
break;
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
@@ -356,5 +592,6 @@ in_gre_uninit(void)
V_ipv4_hashtbl = NULL;
GRE_WAIT();
gre_hashdestroy(V_ipv4_srchashtbl);
+ gre_hashdestroy((struct gre_list *)V_ipv4_sockets);
}
}
diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c
index 7e75f3d9..e86d0afc 100644
--- a/freebsd/sys/netinet/ip_icmp.c
+++ b/freebsd/sys/netinet/ip_icmp.c
@@ -156,7 +156,7 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_VNET | CTLFLAG_RW,
VNET_DEFINE_STATIC(int, icmptstamprepl) = 1;
#define V_icmptstamprepl VNET(icmptstamprepl)
-SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmptstamprepl), 0,
"Respond to ICMP Timestamp packets");
@@ -395,6 +395,7 @@ freeit:
int
icmp_input(struct mbuf **mp, int *offp, int proto)
{
+ struct epoch_tracker et;
struct icmp *icp;
struct in_ifaddr *ia;
struct mbuf *m = *mp;
@@ -422,7 +423,7 @@ icmp_input(struct mbuf **mp, int *offp, int proto)
inet_ntoa_r(ip->ip_dst, dstbuf), icmplen);
}
#endif
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
if (icmplen < ICMP_MINLEN) {
ICMPSTAT_INC(icps_tooshort);
goto freeit;
@@ -430,7 +431,7 @@ icmp_input(struct mbuf **mp, int *offp, int proto)
i = hlen + min(icmplen, ICMP_ADVLENMIN);
if (m->m_len < i && (m = m_pullup(m, i)) == NULL) {
ICMPSTAT_INC(icps_tooshort);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
@@ -548,7 +549,7 @@ icmp_input(struct mbuf **mp, int *offp, int proto)
if (m->m_len < i && (m = m_pullup(m, i)) == NULL) {
/* This should actually not happen */
ICMPSTAT_INC(icps_tooshort);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
@@ -641,7 +642,7 @@ reflect:
ICMPSTAT_INC(icps_reflect);
ICMPSTAT_INC(icps_outhist[icp->icmp_type]);
icmp_reflect(m);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (IPPROTO_DONE);
case ICMP_REDIRECT:
@@ -718,13 +719,13 @@ reflect:
}
raw:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
*mp = m;
rip_input(mp, offp, proto);
return (IPPROTO_DONE);
freeit:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
m_freem(m);
return (IPPROTO_DONE);
}
@@ -736,6 +737,7 @@ static void
icmp_reflect(struct mbuf *m)
{
struct rm_priotracker in_ifa_tracker;
+ struct epoch_tracker et;
struct ip *ip = mtod(m, struct ip *);
struct ifaddr *ifa;
struct ifnet *ifp;
@@ -779,7 +781,7 @@ icmp_reflect(struct mbuf *m)
*/
ifp = m->m_pkthdr.rcvif;
if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
@@ -787,11 +789,11 @@ icmp_reflect(struct mbuf *m)
if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
t.s_addr) {
t = IA_SIN(ia)->sin_addr;
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
goto match;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
/*
* If the packet was transiting through us, use the address of
@@ -800,16 +802,16 @@ icmp_reflect(struct mbuf *m)
* criteria apply.
*/
if (V_icmp_rfi && ifp != NULL) {
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
ia = ifatoia(ifa);
t = IA_SIN(ia)->sin_addr;
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
goto match;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
/*
* If the incoming packet was not addressed directly to us, use
@@ -818,16 +820,16 @@ icmp_reflect(struct mbuf *m)
* with normal source selection.
*/
if (V_reply_src[0] != '\0' && (ifp = ifunit(V_reply_src))) {
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
ia = ifatoia(ifa);
t = IA_SIN(ia)->sin_addr;
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
goto match;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
/*
* If the packet was transiting through us, use the address of
diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c
index 136a774f..4dc4acd0 100644
--- a/freebsd/sys/netinet/ip_input.c
+++ b/freebsd/sys/netinet/ip_input.c
@@ -59,11 +59,11 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <sys/sysctl.h>
-#include <net/pfil.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/if_dl.h>
+#include <net/pfil.h>
#include <net/route.h>
#include <net/netisr.h>
#include <net/rss_config.h>
@@ -136,7 +136,7 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_checkinterface), 0,
"Verify packet arrives on correct interface");
-VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */
+VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */
static struct netisr_handler ip_nh = {
.nh_name = "ip",
@@ -303,6 +303,7 @@ SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
void
ip_init(void)
{
+ struct pfil_head_args args;
struct protosw *pr;
int i;
@@ -313,11 +314,11 @@ ip_init(void)
ipreass_init();
/* Initialize packet filter hooks. */
- V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
- V_inet_pfil_hook.ph_af = AF_INET;
- if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
- printf("%s: WARNING: unable to register pfil hook, "
- "error %d\n", __func__, i);
+ args.pa_version = PFIL_VERSION;
+ args.pa_flags = PFIL_IN | PFIL_OUT;
+ args.pa_type = PFIL_TYPE_IP4;
+ args.pa_headname = PFIL_INET_NAME;
+ V_inet_pfil_head = pfil_head_register(&args);
if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
&V_ipsec_hhh_in[HHOOK_IPSEC_INET],
@@ -379,10 +380,7 @@ ip_destroy(void *unused __unused)
#endif
netisr_unregister_vnet(&ip_nh);
- if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
- printf("%s: WARNING: unable to unregister pfil hook, "
- "error %d\n", __func__, error);
-
+ pfil_head_unregister(V_inet_pfil_head);
error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
if (error != 0) {
printf("%s: WARNING: unable to deregister input helper hook "
@@ -503,10 +501,10 @@ ip_input(struct mbuf *m)
IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
- /* 127/8 must not appear on wire - RFC1122 */
+ /* IN_LOOPBACK must not appear on the wire - RFC1122 */
ifp = m->m_pkthdr.rcvif;
- if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
- (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
+ if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
+ IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
IPSTAT_INC(ips_badaddr);
goto bad;
@@ -601,11 +599,12 @@ tooshort:
*/
/* Jump over all PFIL processing if hooks are not active. */
- if (!PFIL_HOOKED(&V_inet_pfil_hook))
+ if (!PFIL_HOOKED_IN(V_inet_pfil_head))
goto passin;
odst = ip->ip_dst;
- if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, 0, NULL) != 0)
+ if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) !=
+ PFIL_PASS)
return;
if (m == NULL) /* consumed by filter */
return;
@@ -711,7 +710,9 @@ passin:
* into the stack for SIMPLEX interfaces handled by ether_output().
*/
if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
- IF_ADDR_RLOCK(ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
@@ -721,7 +722,7 @@ passin:
counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
counter_u64_add(ia->ia_ifa.ifa_ibytes,
m->m_pkthdr.len);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
goto ours;
}
#ifdef BOOTP_COMPAT
@@ -729,12 +730,12 @@ passin:
counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
counter_u64_add(ia->ia_ifa.ifa_ibytes,
m->m_pkthdr.len);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
goto ours;
}
#endif
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
ia = NULL;
}
/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
@@ -954,6 +955,7 @@ ip_forward(struct mbuf *m, int srcrt)
struct sockaddr_in *sin;
struct in_addr dest;
struct route ro;
+ struct epoch_tracker et;
int error, type = 0, code = 0, mtu = 0;
if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
@@ -982,7 +984,7 @@ ip_forward(struct mbuf *m, int srcrt)
#else
in_rtalloc_ign(&ro, 0, M_GETFIB(m));
#endif
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
if (ro.ro_rt != NULL) {
ia = ifatoia(ro.ro_rt->rt_ifa);
} else
@@ -1134,7 +1136,7 @@ ip_forward(struct mbuf *m, int srcrt)
}
icmp_error(mcopy, type, code, dest.s_addr, mtu);
out:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
}
#define CHECK_SO_CT(sp, ct) \
diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c
index 987549c6..c939196a 100644
--- a/freebsd/sys/netinet/ip_mroute.c
+++ b/freebsd/sys/netinet/ip_mroute.c
@@ -876,16 +876,18 @@ add_vif(struct vifctl *vifcp)
*/
ifp = NULL;
} else {
+ struct epoch_tracker et;
+
sin.sin_addr = vifcp->vifc_lcl_addr;
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
if (ifa == NULL) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
VIF_UNLOCK();
return EADDRNOTAVAIL;
}
ifp = ifa->ifa_ifp;
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
}
if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
@@ -1680,7 +1682,6 @@ static void
send_packet(struct vif *vifp, struct mbuf *m)
{
struct ip_moptions imo;
- struct in_multi *imm[2];
int error __unused;
VIF_LOCK_ASSERT();
@@ -1689,9 +1690,7 @@ send_packet(struct vif *vifp, struct mbuf *m)
imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;
imo.imo_multicast_loop = 1;
imo.imo_multicast_vif = -1;
- imo.imo_num_memberships = 0;
- imo.imo_max_memberships = 2;
- imo.imo_membership = &imm[0];
+ STAILQ_INIT(&imo.imo_head);
/*
* Re-entrancy should not be a problem here, because
diff --git a/freebsd/sys/netinet/ip_options.c b/freebsd/sys/netinet/ip_options.c
index c90077ea..fbbe96d3 100644
--- a/freebsd/sys/netinet/ip_options.c
+++ b/freebsd/sys/netinet/ip_options.c
@@ -111,6 +111,7 @@ ip_dooptions(struct mbuf *m, int pass)
uint32_t ntime;
struct nhop4_extended nh_ext;
struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
+ struct epoch_tracker et;
/* Ignore or reject packets with IP options. */
if (V_ip_doopts == 0)
@@ -121,7 +122,7 @@ ip_dooptions(struct mbuf *m, int pass)
goto bad_unlocked;
}
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
dst = ip->ip_dst;
cp = (u_char *)(ip + 1);
cnt = (ip->ip_hl << 2) - sizeof (struct ip);
@@ -227,7 +228,7 @@ dropit:
#endif
IPSTAT_INC(ips_cantforward);
m_freem(m);
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (1);
}
}
@@ -382,14 +383,14 @@ dropit:
cp[IPOPT_OFFSET] += sizeof(uint32_t);
}
}
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
if (forward && V_ipforwarding) {
ip_forward(m, 1);
return (1);
}
return (0);
bad:
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
bad_unlocked:
icmp_error(m, type, code, 0, 0);
IPSTAT_INC(ips_badoptions);
diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c
index 477388d7..1a068b87 100644
--- a/freebsd/sys/netinet/ip_output.c
+++ b/freebsd/sys/netinet/ip_output.c
@@ -37,17 +37,19 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_ratelimit.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_mbuf_stress_test.h>
#include <rtems/bsd/local/opt_mpath.h>
+#include <rtems/bsd/local/opt_ratelimit.h>
#include <rtems/bsd/local/opt_route.h>
-#include <rtems/bsd/local/opt_sctp.h>
#include <rtems/bsd/local/opt_rss.h>
+#include <rtems/bsd/local/opt_sctp.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/ktls.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -74,6 +76,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_fib.h>
#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
@@ -110,24 +113,33 @@ extern int in_mcast_loop;
extern struct protosw inetsw[];
static inline int
-ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp,
- struct sockaddr_in *dst, int *fibnum, int *error)
+ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,
+ struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error)
{
struct m_tag *fwd_tag = NULL;
struct mbuf *m;
struct in_addr odst;
struct ip *ip;
+ int pflags = PFIL_OUT;
+
+ if (flags & IP_FORWARDING)
+ pflags |= PFIL_FWD;
m = *mp;
ip = mtod(m, struct ip *);
/* Run through list of hooks for output packets. */
odst.s_addr = ip->ip_dst.s_addr;
- *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, 0, inp);
- m = *mp;
- if ((*error) != 0 || m == NULL)
+ switch (pfil_run_hooks(V_inet_pfil_head, mp, ifp, pflags, inp)) {
+ case PFIL_DROPPED:
+ *error = EPERM;
+ /* FALLTHROUGH */
+ case PFIL_CONSUMED:
return 1; /* Finished */
-
+ case PFIL_PASS:
+ *error = 0;
+ }
+ m = *mp;
ip = mtod(m, struct ip *);
/* See if destination IP address was changed by packet filter. */
@@ -200,6 +212,83 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp,
return 0;
}
+static int
+ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr_in *gw, struct route *ro)
+{
+#ifdef KERN_TLS
+ struct ktls_session *tls = NULL;
+#endif
+ struct m_snd_tag *mst;
+ int error;
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
+ mst = NULL;
+
+#ifdef KERN_TLS
+ /*
+ * If this is an unencrypted TLS record, save a reference to
+ * the record. This local reference is used to call
+ * ktls_output_eagain after the mbuf has been freed (thus
+ * dropping the mbuf's reference) in if_output.
+ */
+ if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
+ tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls);
+ mst = tls->snd_tag;
+
+ /*
+ * If a TLS session doesn't have a valid tag, it must
+ * have had an earlier ifp mismatch, so drop this
+ * packet.
+ */
+ if (mst == NULL) {
+ error = EAGAIN;
+ goto done;
+ }
+ }
+#endif
+#ifdef RATELIMIT
+ if (inp != NULL && mst == NULL) {
+ if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
+ (inp->inp_snd_tag != NULL &&
+ inp->inp_snd_tag->ifp != ifp))
+ in_pcboutput_txrtlmt(inp, ifp, m);
+
+ if (inp->inp_snd_tag != NULL)
+ mst = inp->inp_snd_tag;
+ }
+#endif
+ if (mst != NULL) {
+ KASSERT(m->m_pkthdr.rcvif == NULL,
+ ("trying to add a send tag to a forwarded packet"));
+ if (mst->ifp != ifp) {
+ error = EAGAIN;
+ goto done;
+ }
+
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ }
+
+ error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro);
+
+done:
+ /* Check for route change invalidating send tags. */
+#ifdef KERN_TLS
+ if (tls != NULL) {
+ if (error == EAGAIN)
+ error = ktls_output_eagain(inp, tls);
+ ktls_free(tls);
+ }
+#endif
+#ifdef RATELIMIT
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
+ return (error);
+}
+
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
@@ -217,23 +306,24 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
struct ip_moptions *imo, struct inpcb *inp)
{
struct rm_priotracker in_ifa_tracker;
+ struct epoch_tracker et;
struct ip *ip;
struct ifnet *ifp = NULL; /* keep compiler happy */
struct mbuf *m0;
int hlen = sizeof (struct ip);
int mtu;
int error = 0;
- struct sockaddr_in *dst;
+ struct sockaddr_in *dst, sin;
const struct sockaddr_in *gw;
struct in_ifaddr *ia;
+ struct in_addr src;
int isbroadcast;
uint16_t ip_len, ip_off;
- struct route iproute;
- struct rtentry *rte; /* cache for ro->ro_rt */
uint32_t fibnum;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
int no_route_but_check_spd = 0;
#endif
+
M_ASSERTPKTHDR(m);
if (inp != NULL) {
@@ -243,11 +333,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
m->m_pkthdr.flowid = inp->inp_flowid;
M_HASHTYPE_SET(m, inp->inp_flowtype);
}
- }
-
- if (ro == NULL) {
- ro = &iproute;
- bzero(ro, sizeof (*ro));
+#ifdef NUMA
+ m->m_pkthdr.numa_domain = inp->inp_numa_domain;
+#endif
}
if (opt) {
@@ -274,26 +362,28 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
/*
* dst/gw handling:
*
- * dst can be rewritten but always points to &ro->ro_dst.
* gw is readonly but can point either to dst OR rt_gateway,
* therefore we need restore gw if we're redoing lookup.
*/
- gw = dst = (struct sockaddr_in *)&ro->ro_dst;
fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
- rte = ro->ro_rt;
- if (rte == NULL) {
+ if (ro != NULL)
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ else
+ dst = &sin;
+ if (ro == NULL || ro->ro_rt == NULL) {
bzero(dst, sizeof(*dst));
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = ip->ip_dst;
}
- NET_EPOCH_ENTER();
+ gw = dst;
+ NET_EPOCH_ENTER(et);
again:
/*
* Validate route against routing table additions;
* a better/more specific route might have been added.
*/
- if (inp)
+ if (inp != NULL && ro != NULL && ro->ro_rt != NULL)
RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
/*
* If there is a cached route,
@@ -303,15 +393,12 @@ again:
* cache with IPv6.
* Also check whether routing cache needs invalidation.
*/
- rte = ro->ro_rt;
- if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
- rte->rt_ifp == NULL ||
- !RT_LINK_IS_UP(rte->rt_ifp) ||
- dst->sin_family != AF_INET ||
- dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+ if (ro != NULL && ro->ro_rt != NULL &&
+ ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) ||
+ dst->sin_family != AF_INET ||
+ dst->sin_addr.s_addr != ip->ip_dst.s_addr))
RO_INVALIDATE_CACHE(ro);
- rte = NULL;
- }
ia = NULL;
/*
* If routing to interface only, short circuit routing lookup.
@@ -331,8 +418,10 @@ again:
ip->ip_dst.s_addr = INADDR_BROADCAST;
dst->sin_addr = ip->ip_dst;
ifp = ia->ia_ifp;
+ mtu = ifp->if_mtu;
ip->ip_ttl = 1;
isbroadcast = 1;
+ src = IA_SIN(ia)->sin_addr;
} else if (flags & IP_ROUTETOIF) {
if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
M_GETFIB(m)))) == NULL &&
@@ -343,9 +432,11 @@ again:
goto bad;
}
ifp = ia->ia_ifp;
+ mtu = ifp->if_mtu;
ip->ip_ttl = 1;
isbroadcast = ifp->if_flags & IFF_BROADCAST ?
in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
+ src = IA_SIN(ia)->sin_addr;
} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
imo != NULL && imo->imo_multicast_ifp != NULL) {
/*
@@ -353,15 +444,21 @@ again:
* packets if the interface is specified.
*/
ifp = imo->imo_multicast_ifp;
+ mtu = ifp->if_mtu;
IFP_TO_IA(ifp, ia, &in_ifa_tracker);
isbroadcast = 0; /* fool gcc */
- } else {
- /*
- * We want to do any cloning requested by the link layer,
- * as this is probably required in all cases for correct
- * operation (as it is for ARP).
- */
- if (rte == NULL) {
+ /* Interface may have no addresses. */
+ if (ia != NULL)
+ src = IA_SIN(ia)->sin_addr;
+ else
+ src.s_addr = INADDR_ANY;
+ } else if (ro != NULL) {
+ if (ro->ro_rt == NULL) {
+ /*
+ * We want to do any cloning requested by the link
+ * layer, as this is probably required in all cases
+ * for correct operation (as it is for ARP).
+ */
#ifdef RADIX_MPATH
rtalloc_mpath_fib(ro,
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
@@ -369,12 +466,47 @@ again:
#else
in_rtalloc_ign(ro, 0, fibnum);
#endif
- rte = ro->ro_rt;
+ if (ro->ro_rt == NULL ||
+ (ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ ro->ro_rt->rt_ifp == NULL ||
+ !RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) {
+#if defined(IPSEC) || defined(IPSEC_SUPPORT)
+ /*
+ * There is no route for this packet, but it is
+ * possible that a matching SPD entry exists.
+ */
+ no_route_but_check_spd = 1;
+ mtu = 0; /* Silence GCC warning. */
+ goto sendit;
+#endif
+ IPSTAT_INC(ips_noroute);
+ error = EHOSTUNREACH;
+ goto bad;
+ }
}
- if (rte == NULL ||
- (rte->rt_flags & RTF_UP) == 0 ||
- rte->rt_ifp == NULL ||
- !RT_LINK_IS_UP(rte->rt_ifp)) {
+ ia = ifatoia(ro->ro_rt->rt_ifa);
+ ifp = ro->ro_rt->rt_ifp;
+ counter_u64_add(ro->ro_rt->rt_pksent, 1);
+ rt_update_ro_flags(ro);
+ if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+ gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+ if (ro->ro_rt->rt_flags & RTF_HOST)
+ isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
+ else if (ifp->if_flags & IFF_BROADCAST)
+ isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
+ else
+ isbroadcast = 0;
+ if (ro->ro_rt->rt_flags & RTF_HOST)
+ mtu = ro->ro_rt->rt_mtu;
+ else
+ mtu = ifp->if_mtu;
+ src = IA_SIN(ia)->sin_addr;
+ } else {
+ struct nhop4_extended nh;
+
+ bzero(&nh, sizeof(nh));
+ if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh) !=
+ 0) {
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* There is no route for this packet, but it is
@@ -388,31 +520,29 @@ again:
error = EHOSTUNREACH;
goto bad;
}
- ia = ifatoia(rte->rt_ifa);
- ifp = rte->rt_ifp;
- counter_u64_add(rte->rt_pksent, 1);
- rt_update_ro_flags(ro);
- if (rte->rt_flags & RTF_GATEWAY)
- gw = (struct sockaddr_in *)rte->rt_gateway;
- if (rte->rt_flags & RTF_HOST)
- isbroadcast = (rte->rt_flags & RTF_BROADCAST);
- else if (ifp->if_flags & IFF_BROADCAST)
- isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
- else
- isbroadcast = 0;
+ ifp = nh.nh_ifp;
+ mtu = nh.nh_mtu;
+ /*
+ * We are rewriting here dst to be gw actually, contradicting
+ * comment at the beginning of the function. However, in this
+ * case we are always dealing with on stack dst.
+ * In case if pfil(9) sends us back to beginning of the
+ * function, the dst would be rewritten by ip_output_pfil().
+ */
+ MPASS(dst == &sin);
+ dst->sin_addr = nh.nh_addr;
+ ia = nh.nh_ia;
+ src = nh.nh_src;
+ isbroadcast = (((nh.nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
+ (NHF_HOST | NHF_BROADCAST)) ||
+ ((ifp->if_flags & IFF_BROADCAST) &&
+ in_ifaddr_broadcast(dst->sin_addr, ia)));
}
- /*
- * Calculate MTU. If we have a route that is up, use that,
- * otherwise use the interface's MTU.
- */
- if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
- mtu = rte->rt_mtu;
- else
- mtu = ifp->if_mtu;
/* Catch a possible divide by zero later. */
- KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
- __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
+ KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p",
+ __func__, mtu, ro,
+ (ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp));
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
m->m_flags |= M_MCAST;
@@ -448,11 +578,8 @@ again:
* If source address not specified yet, use address
* of outgoing interface.
*/
- if (ip->ip_src.s_addr == INADDR_ANY) {
- /* Interface may have no addresses. */
- if (ia != NULL)
- ip->ip_src = IA_SIN(ia)->sin_addr;
- }
+ if (ip->ip_src.s_addr == INADDR_ANY)
+ ip->ip_src = src;
if ((imo == NULL && in_mcast_loop) ||
(imo && imo->imo_multicast_loop)) {
@@ -515,12 +642,8 @@ again:
* If the source address is not specified yet, use the address
* of the outoing interface.
*/
- if (ip->ip_src.s_addr == INADDR_ANY) {
- /* Interface may have no addresses. */
- if (ia != NULL) {
- ip->ip_src = IA_SIN(ia)->sin_addr;
- }
- }
+ if (ip->ip_src.s_addr == INADDR_ANY)
+ ip->ip_src = src;
/*
* Look for broadcast address and
@@ -569,8 +692,9 @@ sendit:
#endif /* IPSEC */
/* Jump over all PFIL processing if hooks are not active. */
- if (PFIL_HOOKED(&V_inet_pfil_hook)) {
- switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) {
+ if (PFIL_HOOKED_OUT(V_inet_pfil_head)) {
+ switch (ip_output_pfil(&m, ifp, flags, inp, dst, &fibnum,
+ &error)) {
case 1: /* Finished */
goto done;
@@ -580,9 +704,10 @@ sendit:
case -1: /* Need to try again */
/* Reset everything for a new round */
- RO_RTFREE(ro);
- ro->ro_prepend = NULL;
- rte = NULL;
+ if (ro != NULL) {
+ RO_RTFREE(ro);
+ ro->ro_prepend = NULL;
+ }
gw = dst;
ip = mtod(m, struct ip *);
goto again;
@@ -590,9 +715,9 @@ sendit:
}
}
- /* 127/8 must not appear on wire - RFC1122. */
- if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
- (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
+ /* IN_LOOPBACK must not appear on the wire - RFC1122. */
+ if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
+ IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
IPSTAT_INC(ips_badaddr);
error = EADDRNOTAVAIL;
@@ -602,11 +727,30 @@ sendit:
m->m_pkthdr.csum_flags |= CSUM_IP;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
+ m = mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ IPSTAT_INC(ips_odropped);
+ error = ENOBUFS;
+ goto bad;
+ }
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ } else if ((ifp->if_capenable & IFCAP_NOMAP) == 0) {
+ m = mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ IPSTAT_INC(ips_odropped);
+ error = ENOBUFS;
+ goto bad;
+ }
}
#ifdef SCTP
if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
+ m = mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ IPSTAT_INC(ips_odropped);
+ error = ENOBUFS;
+ goto bad;
+ }
sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
@@ -649,23 +793,7 @@ sendit:
*/
m_clrprotoflags(m);
IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = (*ifp->if_output)(ifp, m,
- (const struct sockaddr *)gw, ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip_output_send(inp, ifp, m, gw, ro);
goto done;
}
@@ -701,23 +829,7 @@ sendit:
IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
mtod(m, struct ip *), NULL);
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = (*ifp->if_output)(ifp, m,
- (const struct sockaddr *)gw, ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip_output_send(inp, ifp, m, gw, ro);
} else
m_freem(m);
}
@@ -726,16 +838,7 @@ sendit:
IPSTAT_INC(ips_fragmented);
done:
- if (ro == &iproute)
- RO_RTFREE(ro);
- else if (rte == NULL)
- /*
- * If the caller supplied a route but somehow the reference
- * to it has been released need to prevent the caller
- * calling RTFREE on it again.
- */
- ro->ro_rt = NULL;
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (error);
bad:
m_freem(m);
@@ -783,11 +886,23 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
* fragmented packets, then do it here.
*/
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ m0 = mb_unmapped_to_ext(m0);
+ if (m0 == NULL) {
+ error = ENOBUFS;
+ IPSTAT_INC(ips_odropped);
+ goto done;
+ }
in_delayed_cksum(m0);
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
#ifdef SCTP
if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
+ m0 = mb_unmapped_to_ext(m0);
+ if (m0 == NULL) {
+ error = ENOBUFS;
+ IPSTAT_INC(ips_odropped);
+ goto done;
+ }
sctp_delayed_cksum(m0, hlen);
m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
@@ -1264,7 +1379,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
if (inp->inp_options) {
struct mbuf *options;
- options = m_dup(inp->inp_options, M_NOWAIT);
+ options = m_copym(inp->inp_options, 0,
+ M_COPYALL, M_NOWAIT);
INP_RUNLOCK(inp);
if (options != NULL) {
error = sooptcopyout(sopt,
diff --git a/freebsd/sys/netinet/ip_reass.c b/freebsd/sys/netinet/ip_reass.c
index 70a6edae..63353485 100644
--- a/freebsd/sys/netinet/ip_reass.c
+++ b/freebsd/sys/netinet/ip_reass.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/eventhandler.h>
+#include <sys/kernel.h>
#include <sys/hash.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
diff --git a/freebsd/sys/netinet/ip_var.h b/freebsd/sys/netinet/ip_var.h
index 86615a15..7580a7b4 100644
--- a/freebsd/sys/netinet/ip_var.h
+++ b/freebsd/sys/netinet/ip_var.h
@@ -82,6 +82,7 @@ struct ipoption {
char ipopt_list[MAX_IPOPTLEN]; /* options proper */
};
+#if defined(_NETINET_IN_VAR_H_) && defined(_KERNEL)
/*
* Structure attached to inpcb.ip_moptions and
* passed to ip_output when IP multicast options are in use.
@@ -93,12 +94,11 @@ struct ip_moptions {
u_long imo_multicast_vif; /* vif num outgoing multicasts */
u_char imo_multicast_ttl; /* TTL for outgoing multicasts */
u_char imo_multicast_loop; /* 1 => hear sends if a member */
- u_short imo_num_memberships; /* no. memberships this socket */
- u_short imo_max_memberships; /* max memberships this socket */
- struct in_multi **imo_membership; /* group memberships */
- struct in_mfilter *imo_mfilters; /* source filters */
- struct epoch_context imo_epoch_ctx;
+ struct ip_mfilter_head imo_head; /* group membership list */
};
+#else
+struct ip_moptions;
+#endif
struct ipstat {
uint64_t ips_total; /* total packets received */
@@ -241,8 +241,9 @@ extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
extern void (*ip_rsvp_force_done)(struct socket *);
extern int (*rsvp_input_p)(struct mbuf **, int *, int);
-VNET_DECLARE(struct pfil_head, inet_pfil_hook); /* packet filter hooks */
-#define V_inet_pfil_hook VNET(inet_pfil_hook)
+VNET_DECLARE(struct pfil_head *, inet_pfil_head);
+#define V_inet_pfil_head VNET(inet_pfil_head)
+#define PFIL_INET_NAME "inet"
void in_delayed_cksum(struct mbuf *m);
@@ -291,13 +292,11 @@ VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr);
#define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr)
/* Divert hooks. */
-extern void (*ip_divert_ptr)(struct mbuf *m, int incoming);
+extern void (*ip_divert_ptr)(struct mbuf *m, bool incoming);
/* ng_ipfw hooks -- XXX make it the same as divert and dummynet */
-extern int (*ng_ipfw_input_p)(struct mbuf **, int,
- struct ip_fw_args *, int);
-
+extern int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool);
extern int (*ip_dn_ctl_ptr)(struct sockopt *);
-extern int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
+extern int (*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *);
#endif /* _KERNEL */
#endif /* !_NETINET_IP_VAR_H_ */
diff --git a/freebsd/sys/netinet/libalias/alias_sctp.c b/freebsd/sys/netinet/libalias/alias_sctp.c
index 4f7d4940..ea05cc4d 100644
--- a/freebsd/sys/netinet/libalias/alias_sctp.c
+++ b/freebsd/sys/netinet/libalias/alias_sctp.c
@@ -77,6 +77,7 @@
#ifdef _KERNEL
#include <machine/stdarg.h>
#include <sys/param.h>
+#include <sys/gsb_crc32.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
diff --git a/freebsd/sys/netinet/netdump/netdump.h b/freebsd/sys/netinet/netdump/netdump.h
index 12a527ee..4e952332 100644
--- a/freebsd/sys/netinet/netdump/netdump.h
+++ b/freebsd/sys/netinet/netdump/netdump.h
@@ -60,20 +60,20 @@ struct netdump_ack {
uint32_t na_seqno; /* Match acks with msgs. */
} __packed;
-struct netdump_conf {
+struct netdump_conf_freebsd12 {
#ifndef __rtems__
- struct diocskerneldump_arg ndc_kda;
+ struct diocskerneldump_arg_freebsd12 ndc12_kda;
#endif /* __rtems__ */
- char ndc_iface[IFNAMSIZ];
- struct in_addr ndc_server;
- struct in_addr ndc_client;
- struct in_addr ndc_gateway;
+ char ndc12_iface[IFNAMSIZ];
+ struct in_addr ndc12_server;
+ struct in_addr ndc12_client;
+ struct in_addr ndc12_gateway;
};
-#define _PATH_NETDUMP "/dev/netdump"
+#define NETDUMPGCONF_FREEBSD12 _IOR('n', 1, struct netdump_conf_freebsd12)
+#define NETDUMPSCONF_FREEBSD12 _IOW('n', 2, struct netdump_conf_freebsd12)
-#define NETDUMPGCONF _IOR('n', 1, struct netdump_conf)
-#define NETDUMPSCONF _IOW('n', 2, struct netdump_conf)
+#define _PATH_NETDUMP "/dev/netdump"
#ifdef _KERNEL
#ifdef NETDUMP
diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c
index 1cac8d12..17217a5c 100644
--- a/freebsd/sys/netinet/raw_ip.c
+++ b/freebsd/sys/netinet/raw_ip.c
@@ -102,10 +102,9 @@ VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
int (*ip_dn_ctl_ptr)(struct sockopt *);
-int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
-void (*ip_divert_ptr)(struct mbuf *, int);
-int (*ng_ipfw_input_p)(struct mbuf **, int,
- struct ip_fw_args *, int);
+int (*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *);
+void (*ip_divert_ptr)(struct mbuf *, bool);
+int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool);
#ifdef INET
/*
@@ -456,6 +455,8 @@ rip_output(struct mbuf *m, struct socket *so, ...)
u_long dst;
int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
IP_ALLOWBROADCAST;
+ int cnt, hlen;
+ u_char opttype, optlen, *cp;
va_start(ap, so);
dst = va_arg(ap, u_long);
@@ -510,26 +511,61 @@ rip_output(struct mbuf *m, struct socket *so, ...)
m_freem(m);
return(EMSGSIZE);
}
- INP_RLOCK(inp);
ip = mtod(m, struct ip *);
- error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
- if (error != 0) {
- INP_RUNLOCK(inp);
- m_freem(m);
- return (error);
+ hlen = ip->ip_hl << 2;
+ if (m->m_len < hlen) {
+ m = m_pullup(m, hlen);
+ if (m == NULL)
+ return (EINVAL);
+ ip = mtod(m, struct ip *);
}
+ INP_RLOCK(inp);
/*
* Don't allow both user specified and setsockopt options,
* and don't allow packet length sizes that will crash.
*/
- if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
- || (ntohs(ip->ip_len) != m->m_pkthdr.len)
- || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) {
+ if ((hlen < sizeof (*ip))
+ || ((hlen > sizeof (*ip)) && inp->inp_options)
+ || (ntohs(ip->ip_len) != m->m_pkthdr.len)) {
INP_RUNLOCK(inp);
m_freem(m);
return (EINVAL);
}
+ error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
+ if (error != 0) {
+ INP_RUNLOCK(inp);
+ m_freem(m);
+ return (error);
+ }
+ /*
+ * Don't allow IP options which do not have the required
+ * structure as specified in section 3.1 of RFC 791 on
+ * pages 15-23.
+ */
+ cp = (u_char *)(ip + 1);
+ cnt = hlen - sizeof (struct ip);
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opttype = cp[IPOPT_OPTVAL];
+ if (opttype == IPOPT_EOL)
+ break;
+ if (opttype == IPOPT_NOP) {
+ optlen = 1;
+ continue;
+ }
+ if (cnt < IPOPT_OLEN + sizeof(u_char)) {
+ INP_RUNLOCK(inp);
+ m_freem(m);
+ return (EINVAL);
+ }
+ optlen = cp[IPOPT_OLEN];
+ if (optlen < IPOPT_OLEN + sizeof(u_char) ||
+ optlen > cnt) {
+ INP_RUNLOCK(inp);
+ m_freem(m);
+ return (EINVAL);
+ }
+ }
/*
* This doesn't allow application to specify ID of zero,
* but we got this limitation from the beginning of history.
diff --git a/freebsd/sys/netinet/sctp.h b/freebsd/sys/netinet/sctp.h
index 64fd5442..27e5fd49 100644
--- a/freebsd/sys/netinet/sctp.h
+++ b/freebsd/sys/netinet/sctp.h
@@ -491,6 +491,7 @@ struct sctp_error_auth_invalid_hmac {
* time */
#define SCTP_SAT_NETWORK_BURST_INCR 2 /* how many times to multiply maxburst
* in sat */
+#define SCTP_MAX_SENDALL_LIMIT 1024
/* Data Chuck Specific Flags */
#define SCTP_DATA_FRAG_MASK 0x03
@@ -516,6 +517,7 @@ struct sctp_error_auth_invalid_hmac {
#define SCTP_PCB_FLAGS_BOUNDALL 0x00000004
#define SCTP_PCB_FLAGS_ACCEPTING 0x00000008
#define SCTP_PCB_FLAGS_UNBOUND 0x00000010
+#define SCTP_PCB_FLAGS_SND_ITERATOR_UP 0x00000020
#define SCTP_PCB_FLAGS_CLOSE_IP 0x00040000
#define SCTP_PCB_FLAGS_WAS_CONNECTED 0x00080000
#define SCTP_PCB_FLAGS_WAS_ABORTED 0x00100000
diff --git a/freebsd/sys/netinet/sctp_asconf.c b/freebsd/sys/netinet/sctp_asconf.c
index 2c66f65c..584a9d3b 100644
--- a/freebsd/sys/netinet/sctp_asconf.c
+++ b/freebsd/sys/netinet/sctp_asconf.c
@@ -705,6 +705,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
if (param_length <= sizeof(struct sctp_paramhdr)) {
SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: param length (%u) too short\n", param_length);
sctp_m_freem(m_ack);
+ return;
}
/* get the entire parameter */
aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, param_length, aparam_buf);
@@ -1368,7 +1369,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
if (sctp_asconf_queue_mgmt(stcb,
stcb->asoc.asconf_addr_del_pending,
SCTP_DEL_IP_ADDRESS) == 0) {
- SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_add: queing pending delete\n");
+ SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_add: queuing pending delete\n");
pending_delete_queued = 1;
/* clear out the pending delete info */
stcb->asoc.asconf_del_pending = 0;
@@ -1956,12 +1957,10 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
case AF_INET:
{
struct sockaddr_in *sin;
- struct in6pcb *inp6;
- inp6 = (struct in6pcb *)&inp->ip_inp.inp;
/* invalid if we are a v6 only endpoint */
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp6))
+ SCTP_IPV6_V6ONLY(inp))
return;
sin = &ifa->address.sin;
@@ -2034,11 +2033,8 @@ sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val SCTP_UNU
case AF_INET:
{
/* invalid if we are a v6 only endpoint */
- struct in6pcb *inp6;
-
- inp6 = (struct in6pcb *)&inp->ip_inp.inp;
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp6)) {
+ SCTP_IPV6_V6ONLY(inp)) {
cnt_invalid++;
if (asc->cnt == cnt_invalid)
return (1);
@@ -2149,13 +2145,11 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
case AF_INET:
{
/* invalid if we are a v6 only endpoint */
- struct in6pcb *inp6;
struct sockaddr_in *sin;
- inp6 = (struct in6pcb *)&inp->ip_inp.inp;
/* invalid if we are a v6 only endpoint */
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp6))
+ SCTP_IPV6_V6ONLY(inp))
continue;
sin = &ifa->address.sin;
@@ -2172,7 +2166,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
continue;
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp6)) {
+ SCTP_IPV6_V6ONLY(inp)) {
cnt_invalid++;
if (asc->cnt == cnt_invalid)
return;
diff --git a/freebsd/sys/netinet/sctp_auth.c b/freebsd/sys/netinet/sctp_auth.c
index 8301a98f..3555bb87 100644
--- a/freebsd/sys/netinet/sctp_auth.c
+++ b/freebsd/sys/netinet/sctp_auth.c
@@ -525,7 +525,7 @@ sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
} else if (new_skey->keyid == skey->keyid) {
/* replace the existing key */
/* verify this key *can* be replaced */
- if ((skey->deactivated) && (skey->refcount > 1)) {
+ if ((skey->deactivated) || (skey->refcount > 1)) {
SCTPDBG(SCTP_DEBUG_AUTH1,
"can't replace shared key id %u\n",
new_skey->keyid);
diff --git a/freebsd/sys/netinet/sctp_bsd_addr.c b/freebsd/sys/netinet/sctp_bsd_addr.c
index 0f0ddd89..962fae37 100644
--- a/freebsd/sys/netinet/sctp_bsd_addr.c
+++ b/freebsd/sys/netinet/sctp_bsd_addr.c
@@ -210,11 +210,13 @@ sctp_init_ifns_for_vrf(int vrfid)
IFNET_RLOCK();
CK_STAILQ_FOREACH(ifn, &MODULE_GLOBAL(ifnet), if_link) {
+ struct epoch_tracker et;
+
if (sctp_is_desired_interface_type(ifn) == 0) {
/* non desired type */
continue;
}
- IF_ADDR_RLOCK(ifn);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifn->if_addrhead, ifa_link) {
if (ifa->ifa_addr == NULL) {
continue;
@@ -267,7 +269,7 @@ sctp_init_ifns_for_vrf(int vrfid)
sctp_ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
}
}
- IF_ADDR_RUNLOCK(ifn);
+ NET_EPOCH_EXIT(et);
}
IFNET_RUNLOCK();
}
diff --git a/freebsd/sys/netinet/sctp_constants.h b/freebsd/sys/netinet/sctp_constants.h
index d07381d5..7ee7e311 100644
--- a/freebsd/sys/netinet/sctp_constants.h
+++ b/freebsd/sys/netinet/sctp_constants.h
@@ -983,6 +983,9 @@ __FBSDID("$FreeBSD$");
((((uint8_t *)&(a)->s_addr)[0] == 169) && \
(((uint8_t *)&(a)->s_addr)[1] == 254))
+/* Maximum size of optval for IPPROTO_SCTP level socket options. */
+#define SCTP_SOCKET_OPTION_LIMIT (64 * 1024)
+
#if defined(_KERNEL)
#define SCTP_GETTIME_TIMEVAL(x) (getmicrouptime(x))
diff --git a/freebsd/sys/netinet/sctp_crc32.c b/freebsd/sys/netinet/sctp_crc32.c
index e22abeb6..a387d528 100644
--- a/freebsd/sys/netinet/sctp_crc32.c
+++ b/freebsd/sys/netinet/sctp_crc32.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_sctp.h>
+#include <sys/gsb_crc32.h>
#ifdef SCTP
#include <netinet/sctp_os.h>
#include <netinet/sctp.h>
@@ -133,16 +134,16 @@ sctp_delayed_cksum(struct mbuf *m, uint32_t offset)
SCTP_STAT_INCR(sctps_sendswcrc);
offset += offsetof(struct sctphdr, checksum);
- if (offset + sizeof(uint32_t) > (uint32_t)(m->m_len)) {
+ if (offset + sizeof(uint32_t) > (uint32_t)(m->m_pkthdr.len)) {
#ifdef INVARIANTS
- panic("sctp_delayed_cksum(): m->m_len: %d, offset: %u.",
- m->m_len, offset);
+ panic("sctp_delayed_cksum(): m->m_pkthdr.len: %d, offset: %u.",
+ m->m_pkthdr.len, offset);
#else
- SCTP_PRINTF("sctp_delayed_cksum(): m->m_len: %d, offset: %u.\n",
- m->m_len, offset);
+ SCTP_PRINTF("sctp_delayed_cksum(): m->m_pkthdr.len: %d, offset: %u.\n",
+ m->m_pkthdr.len, offset);
#endif
return;
}
- *(uint32_t *)(m->m_data + offset) = checksum;
+ m_copyback(m, (int)offset, (int)sizeof(uint32_t), (caddr_t)&checksum);
}
#endif
diff --git a/freebsd/sys/netinet/sctp_indata.c b/freebsd/sys/netinet/sctp_indata.c
index 28e3f5b2..59654ac6 100644
--- a/freebsd/sys/netinet/sctp_indata.c
+++ b/freebsd/sys/netinet/sctp_indata.c
@@ -917,6 +917,9 @@ restart:
break;
}
}
+ if (cnt_added && strm->pd_api_started) {
+ sctp_wakeup_the_read_socket(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+ }
if ((control->length > pd_point) && (strm->pd_api_started == 0)) {
strm->pd_api_started = 1;
control->pdapi_started = 1;
@@ -949,6 +952,15 @@ sctp_inject_old_unordered_data(struct sctp_tcb *stcb,
SCTPDBG(SCTP_DEBUG_XXX,
"chunk is a first fsn: %u becomes fsn_included\n",
chk->rec.data.fsn);
+ at = TAILQ_FIRST(&control->reasm);
+ if (at && SCTP_TSN_GT(chk->rec.data.fsn, at->rec.data.fsn)) {
+ /*
+ * The first chunk in the reassembly is a smaller
+ * TSN than this one, even though this has a first,
+ * it must be from a subsequent msg.
+ */
+ goto place_chunk;
+ }
if (control->first_frag_seen) {
/*
* In old un-ordered we can reassembly on one
@@ -1469,6 +1481,16 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
"The last fsn is now in place fsn: %u\n",
chk->rec.data.fsn);
control->last_frag_seen = 1;
+ if (SCTP_TSN_GT(control->top_fsn, chk->rec.data.fsn)) {
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "New fsn: %u is not at top_fsn: %u -- abort\n",
+ chk->rec.data.fsn,
+ control->top_fsn);
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_9);
+ return;
+ }
}
if (asoc->idata_supported || control->first_frag_seen) {
/*
@@ -1484,7 +1506,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
*/
sctp_abort_in_reasm(stcb, control, chk,
abort_flag,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_9);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_10);
return;
}
}
@@ -1496,7 +1518,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
chk->rec.data.fsn, control->top_fsn);
sctp_abort_in_reasm(stcb, control,
chk, abort_flag,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_10);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_11);
return;
}
if (asoc->idata_supported || control->first_frag_seen) {
@@ -1517,7 +1539,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
chk->rec.data.fsn, control->fsn_included);
sctp_abort_in_reasm(stcb, control, chk,
abort_flag,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_11);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_12);
return;
}
}
@@ -1532,7 +1554,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
control->top_fsn);
sctp_abort_in_reasm(stcb, control, chk,
abort_flag,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_12);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_13);
return;
}
}
@@ -1575,7 +1597,7 @@ sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
at->rec.data.fsn);
sctp_abort_in_reasm(stcb, control,
chk, abort_flag,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_13);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_14);
return;
}
}
@@ -3088,13 +3110,12 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
* update RTO too ?
*/
if (tp1->do_rtt) {
- if (*rto_ok) {
- tp1->whoTo->RTO =
- sctp_calculate_rto(stcb,
- &stcb->asoc,
- tp1->whoTo,
- &tp1->sent_rcv_time,
- SCTP_RTT_FROM_DATA);
+ if (*rto_ok &&
+ sctp_calculate_rto(stcb,
+ &stcb->asoc,
+ tp1->whoTo,
+ &tp1->sent_rcv_time,
+ SCTP_RTT_FROM_DATA)) {
*rto_ok = 0;
}
if (tp1->whoTo->rto_needed == 0) {
@@ -4066,16 +4087,12 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
/* update RTO too? */
if (tp1->do_rtt) {
- if (rto_ok) {
- tp1->whoTo->RTO =
- /*
- * sa_ignore
- * NO_NULL_CHK
- */
- sctp_calculate_rto(stcb,
- asoc, tp1->whoTo,
- &tp1->sent_rcv_time,
- SCTP_RTT_FROM_DATA);
+ if (rto_ok &&
+ sctp_calculate_rto(stcb,
+ &stcb->asoc,
+ tp1->whoTo,
+ &tp1->sent_rcv_time,
+ SCTP_RTT_FROM_DATA)) {
rto_ok = 0;
}
if (tp1->whoTo->rto_needed == 0) {
@@ -4684,12 +4701,12 @@ hopeless_peer:
/* update RTO too? */
if (tp1->do_rtt) {
- if (rto_ok) {
- tp1->whoTo->RTO =
- sctp_calculate_rto(stcb,
- asoc, tp1->whoTo,
- &tp1->sent_rcv_time,
- SCTP_RTT_FROM_DATA);
+ if (rto_ok &&
+ sctp_calculate_rto(stcb,
+ &stcb->asoc,
+ tp1->whoTo,
+ &tp1->sent_rcv_time,
+ SCTP_RTT_FROM_DATA)) {
rto_ok = 0;
}
if (tp1->whoTo->rto_needed == 0) {
diff --git a/freebsd/sys/netinet/sctp_indata.h b/freebsd/sys/netinet/sctp_indata.h
index 59ceac3a..3f3099e8 100644
--- a/freebsd/sys/netinet/sctp_indata.h
+++ b/freebsd/sys/netinet/sctp_indata.h
@@ -61,7 +61,6 @@ sctp_build_readq_entry(struct sctp_tcb *stcb,
(_ctl)->sinfo_ppid = ppid; \
(_ctl)->sinfo_context = context; \
(_ctl)->fsn_included = 0xffffffff; \
- (_ctl)->top_fsn = 0xffffffff; \
(_ctl)->sinfo_tsn = tsn; \
(_ctl)->sinfo_cumtsn = tsn; \
(_ctl)->sinfo_assoc_id = sctp_get_associd((in_it)); \
diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c
index 5386aae4..f380c208 100644
--- a/freebsd/sys/netinet/sctp_input.c
+++ b/freebsd/sys/netinet/sctp_input.c
@@ -446,22 +446,48 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
{
struct sctp_association *asoc;
struct mbuf *op_err;
- int retval, abort_flag;
- uint32_t initack_limit;
+ int retval, abort_flag, cookie_found;
+ int initack_limit;
int nat_friendly = 0;
/* First verify that we have no illegal param's */
abort_flag = 0;
+ cookie_found = 0;
op_err = sctp_arethere_unrecognized_parameters(m,
(offset + sizeof(struct sctp_init_chunk)),
- &abort_flag, (struct sctp_chunkhdr *)cp, &nat_friendly);
+ &abort_flag, (struct sctp_chunkhdr *)cp,
+ &nat_friendly, &cookie_found);
if (abort_flag) {
/* Send an abort and notify peer */
sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_no_unlock = 1;
return (-1);
}
+ if (!cookie_found) {
+ uint16_t len;
+
+ len = (uint16_t)(sizeof(struct sctp_error_missing_param) + sizeof(uint16_t));
+ /* We abort with an error of missing mandatory param */
+ op_err = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ struct sctp_error_missing_param *cause;
+
+ SCTP_BUF_LEN(op_err) = len;
+ cause = mtod(op_err, struct sctp_error_missing_param *);
+ /* Subtract the reserved param */
+ cause->cause.code = htons(SCTP_CAUSE_MISSING_PARAM);
+ cause->cause.length = htons(len);
+ cause->num_missing_params = htonl(1);
+ cause->type[0] = htons(SCTP_STATE_COOKIE);
+ }
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
+ vrf_id, net->port);
+ *abort_no_unlock = 1;
+ return (-3);
+ }
asoc = &stcb->asoc;
asoc->peer_supports_nat = (uint8_t)nat_friendly;
/* process the peer's parameters in the INIT-ACK */
@@ -524,42 +550,10 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
/* calculate the RTO */
- net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered,
+ sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered,
SCTP_RTT_FROM_NON_DATA);
- retval = sctp_send_cookie_echo(m, offset, stcb, net);
- if (retval < 0) {
- /*
- * No cookie, we probably should send a op error. But in any
- * case if there is no cookie in the INIT-ACK, we can
- * abandon the peer, its broke.
- */
- if (retval == -3) {
- uint16_t len;
-
- len = (uint16_t)(sizeof(struct sctp_error_missing_param) + sizeof(uint16_t));
- /* We abort with an error of missing mandatory param */
- op_err = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
- if (op_err != NULL) {
- struct sctp_error_missing_param *cause;
-
- SCTP_BUF_LEN(op_err) = len;
- cause = mtod(op_err, struct sctp_error_missing_param *);
- /* Subtract the reserved param */
- cause->cause.code = htons(SCTP_CAUSE_MISSING_PARAM);
- cause->cause.length = htons(len);
- cause->num_missing_params = htonl(1);
- cause->type[0] = htons(SCTP_STATE_COOKIE);
- }
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
- src, dst, sh, op_err,
- mflowtype, mflowid,
- vrf_id, net->port);
- *abort_no_unlock = 1;
- }
- return (retval);
- }
-
- return (0);
+ retval = sctp_send_cookie_echo(m, offset, initack_limit, stcb, net);
+ return (retval);
}
static void
@@ -656,7 +650,7 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
tv.tv_sec = cp->heartbeat.hb_info.time_value_1;
tv.tv_usec = cp->heartbeat.hb_info.time_value_2;
/* Now lets do a RTO with this */
- r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv,
+ sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv,
SCTP_RTT_FROM_NON_DATA);
if (!(r_net->dest_state & SCTP_ADDR_REACHABLE)) {
r_net->dest_state |= SCTP_ADDR_REACHABLE;
@@ -711,34 +705,37 @@ static int
sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
{
/*
- * return 0 means we want you to proceed with the abort non-zero
- * means no abort processing
+ * Return 0 means we want you to proceed with the abort non-zero
+ * means no abort processing.
*/
+ uint32_t new_vtag;
struct sctpasochead *head;
if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
(SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
+ new_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_INP_INFO_WLOCK();
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ } else {
+ return (0);
}
if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) {
/* generate a new vtag and send init */
LIST_REMOVE(stcb, sctp_asocs);
- stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+ stcb->asoc.my_vtag = new_vtag;
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
/*
* put it in the bucket in the vtag hash of assoc's for the
* system
*/
LIST_INSERT_HEAD(head, stcb, sctp_asocs);
- sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
SCTP_INP_INFO_WUNLOCK();
+ sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
return (1);
- }
- if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
+ } else {
/*
* treat like a case where the cookie expired i.e.: - dump
* current cookie. - generate a new vtag. - resend init.
@@ -748,15 +745,15 @@ sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
sctp_stop_all_cookie_timers(stcb);
sctp_toss_old_cookies(stcb, &stcb->asoc);
- stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+ stcb->asoc.my_vtag = new_vtag;
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
/*
* put it in the bucket in the vtag hash of assoc's for the
* system
*/
LIST_INSERT_HEAD(head, stcb, sctp_asocs);
- sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
SCTP_INP_INFO_WUNLOCK();
+ sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
return (1);
}
return (0);
@@ -1682,8 +1679,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
old.tv_sec = cookie->time_entered.tv_sec;
old.tv_usec = cookie->time_entered.tv_usec;
net->hb_responded = 1;
- net->RTO = sctp_calculate_rto(stcb, asoc, net,
- &old,
+ sctp_calculate_rto(stcb, asoc, net, &old,
SCTP_RTT_FROM_NON_DATA);
if (stcb->asoc.sctp_autoclose_ticks &&
@@ -2157,8 +2153,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
ntohl(initack_cp->init.initiate_tag), vrf_id,
ntohs(initack_cp->init.num_outbound_streams),
port,
- (struct thread *)NULL
- );
+ (struct thread *)NULL,
+ SCTP_DONT_INITIALIZE_AUTH_PARAMS);
if (stcb == NULL) {
struct mbuf *op_err;
@@ -2407,8 +2403,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
/* calculate the RTT and set the encaps port */
old.tv_sec = cookie->time_entered.tv_sec;
old.tv_usec = cookie->time_entered.tv_usec;
- (*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
- &old, SCTP_RTT_FROM_NON_DATA);
+ sctp_calculate_rto(stcb, asoc, *netp, &old, SCTP_RTT_FROM_NON_DATA);
}
/* respond with a COOKIE-ACK */
sctp_send_cookie_ack(stcb);
@@ -2984,8 +2979,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
SCTP_STAT_INCR_GAUGE32(sctps_currestab);
if (asoc->overall_error_count == 0) {
- net->RTO = sctp_calculate_rto(stcb, asoc, net,
- &asoc->time_entered,
+ sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered,
SCTP_RTT_FROM_NON_DATA);
}
(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h
index abe8e2c9..68528de0 100644
--- a/freebsd/sys/netinet/sctp_os_bsd.h
+++ b/freebsd/sys/netinet/sctp_os_bsd.h
@@ -97,9 +97,6 @@ __FBSDID("$FreeBSD$");
#include <crypto/sha1.h>
#include <crypto/sha2/sha256.h>
-#ifndef in6pcb
-#define in6pcb inpcb
-#endif
/* Declare all the malloc names for all the various mallocs */
MALLOC_DECLARE(SCTP_M_MAP);
MALLOC_DECLARE(SCTP_M_STRMI);
@@ -368,10 +365,10 @@ typedef struct callout sctp_os_timer_t;
*/
/* get the v6 hop limit */
-#define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
+#define SCTP_GET_HLIM(inp, ro) in6_selecthlim(&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
/* is the endpoint v6only? */
-#define SCTP_IPV6_V6ONLY(inp) (((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY)
+#define SCTP_IPV6_V6ONLY(sctp_inpcb) ((sctp_inpcb)->ip_inp.inp.inp_flags & IN6P_IPV6_V6ONLY)
/* is the socket non-blocking? */
#define SCTP_SO_IS_NBIO(so) ((so)->so_state & SS_NBIO)
#define SCTP_SET_SO_NBIO(so) ((so)->so_state |= SS_NBIO)
@@ -431,7 +428,7 @@ typedef struct rtentry sctp_rtentry_t;
m_clrprotoflags(o_pak); \
if (local_stcb && local_stcb->sctp_ep) \
result = ip6_output(o_pak, \
- ((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \
+ ((struct inpcb *)(local_stcb->sctp_ep))->in6p_outputopts, \
(ro), 0, 0, ifp, NULL); \
else \
result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \
diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c
index b01ec41f..3890ea11 100644
--- a/freebsd/sys/netinet/sctp_output.c
+++ b/freebsd/sys/netinet/sctp_output.c
@@ -4291,10 +4291,12 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
if (net->port) {
mtu -= sizeof(struct udphdr);
}
- if ((stcb != NULL) && (stcb->asoc.smallest_mtu > mtu)) {
- sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+ if (mtu < net->mtu) {
+ if ((stcb != NULL) && (stcb->asoc.smallest_mtu > mtu)) {
+ sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+ }
+ net->mtu = mtu;
}
- net->mtu = mtu;
}
} else if (ro->ro_rt == NULL) {
/* route was freed */
@@ -4336,7 +4338,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
* at the SCTP layer. So use the value from
* the IP layer.
*/
- flowlabel = ntohl(((struct in6pcb *)inp)->in6p_flowinfo);
+ flowlabel = ntohl(((struct inpcb *)inp)->inp_flow);
}
flowlabel &= 0x000fffff;
len = SCTP_MIN_OVERHEAD;
@@ -4391,7 +4393,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
* at the SCTP layer. So use the value from
* the IP layer.
*/
- tos_value = (ntohl(((struct in6pcb *)inp)->in6p_flowinfo) >> 20) & 0xff;
+ tos_value = (ntohl(((struct inpcb *)inp)->inp_flow) >> 20) & 0xff;
}
tos_value &= 0xfc;
if (ecn_ok) {
@@ -4649,10 +4651,12 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
if (net->port) {
mtu -= sizeof(struct udphdr);
}
- if ((stcb != NULL) && (stcb->asoc.smallest_mtu > mtu)) {
- sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+ if (mtu < net->mtu) {
+ if ((stcb != NULL) && (stcb->asoc.smallest_mtu > mtu)) {
+ sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+ }
+ net->mtu = mtu;
}
- net->mtu = mtu;
}
} else if (ifp) {
if (ND_IFINFO(ifp)->linkmtu &&
@@ -4968,7 +4972,10 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
struct mbuf *
sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
- int param_offset, int *abort_processing, struct sctp_chunkhdr *cp, int *nat_friendly)
+ int param_offset, int *abort_processing,
+ struct sctp_chunkhdr *cp,
+ int *nat_friendly,
+ int *cookie_found)
{
/*
* Given a mbuf containing an INIT or INIT-ACK with the param_offset
@@ -4986,17 +4993,20 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
*/
struct sctp_paramhdr *phdr, params;
- struct mbuf *mat, *op_err;
+ struct mbuf *mat, *m_tmp, *op_err, *op_err_last;
int at, limit, pad_needed;
uint16_t ptype, plen, padded_size;
- int err_at;
*abort_processing = 0;
+ if (cookie_found != NULL) {
+ *cookie_found = 0;
+ }
mat = in_initpkt;
- err_at = 0;
limit = ntohs(cp->chunk_length) - sizeof(struct sctp_init_chunk);
at = param_offset;
op_err = NULL;
+ op_err_last = NULL;
+ pad_needed = 0;
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Check for unrecognized param's\n");
phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
while ((phdr != NULL) && ((size_t)limit >= sizeof(struct sctp_paramhdr))) {
@@ -5019,12 +5029,17 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
switch (ptype) {
/* Param's with variable size */
case SCTP_HEARTBEAT_INFO:
- case SCTP_STATE_COOKIE:
case SCTP_UNRECOG_PARAM:
case SCTP_ERROR_CAUSE_IND:
/* ok skip fwd */
at += padded_size;
break;
+ case SCTP_STATE_COOKIE:
+ if (cookie_found != NULL) {
+ *cookie_found = 1;
+ }
+ at += padded_size;
+ break;
/* Param's with variable size within a range */
case SCTP_CHUNK_LIST:
case SCTP_SUPPORTED_CHUNK_EXT:
@@ -5113,55 +5128,44 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
break;
case SCTP_HOSTNAME_ADDRESS:
{
- /* We can NOT handle HOST NAME addresses!! */
+ /* Hostname parameters are deprecated. */
+ struct sctp_gen_error_cause *cause;
int l_len;
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Can't handle hostname addresses.. abort processing\n");
*abort_processing = 1;
- if (op_err == NULL) {
- /* Ok need to try to get a mbuf */
+ sctp_m_freem(op_err);
+ op_err = NULL;
+ op_err_last = NULL;
#ifdef INET6
- l_len = SCTP_MIN_OVERHEAD;
+ l_len = SCTP_MIN_OVERHEAD;
#else
- l_len = SCTP_MIN_V4_OVERHEAD;
+ l_len = SCTP_MIN_V4_OVERHEAD;
#endif
- l_len += sizeof(struct sctp_chunkhdr);
- l_len += sizeof(struct sctp_gen_error_cause);
- op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
- if (op_err) {
- SCTP_BUF_LEN(op_err) = 0;
- /*
- * Pre-reserve space for IP,
- * SCTP, and chunk header.
- */
+ l_len += sizeof(struct sctp_chunkhdr);
+ l_len += sizeof(struct sctp_gen_error_cause);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ /*
+ * Pre-reserve space for IP, SCTP,
+ * and chunk header.
+ */
#ifdef INET6
- SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
#else
- SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
-#endif
- SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
- SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
- }
- }
- if (op_err) {
- /* If we have space */
- struct sctp_gen_error_cause cause;
-
- if (err_at % 4) {
- uint32_t cpthis = 0;
-
- pad_needed = 4 - (err_at % 4);
- m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
- err_at += pad_needed;
- }
- cause.code = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR);
- cause.length = htons((uint16_t)(sizeof(struct sctp_gen_error_cause) + plen));
- m_copyback(op_err, err_at, sizeof(struct sctp_gen_error_cause), (caddr_t)&cause);
- err_at += sizeof(struct sctp_gen_error_cause);
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
+#endif
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
+ cause = mtod(op_err, struct sctp_gen_error_cause *);
+ cause->code = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR);
+ cause->length = htons((uint16_t)(sizeof(struct sctp_gen_error_cause) + plen));
SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(mat, at, plen, M_NOWAIT);
if (SCTP_BUF_NEXT(op_err) == NULL) {
sctp_m_freem(op_err);
- return (NULL);
+ op_err = NULL;
+ op_err_last = NULL;
}
}
return (op_err);
@@ -5197,37 +5201,55 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
#endif
SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ op_err_last = op_err;
}
}
- if (op_err) {
+ if (op_err != NULL) {
/* If we have space */
- struct sctp_paramhdr s;
-
- if (err_at % 4) {
- uint32_t cpthis = 0;
+ struct sctp_paramhdr *param;
- pad_needed = 4 - (err_at % 4);
- m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
- err_at += pad_needed;
+ if (pad_needed > 0) {
+ op_err_last = sctp_add_pad_tombuf(op_err_last, pad_needed);
}
- s.param_type = htons(SCTP_UNRECOG_PARAM);
- s.param_length = htons((uint16_t)sizeof(struct sctp_paramhdr) + plen);
- m_copyback(op_err, err_at, sizeof(struct sctp_paramhdr), (caddr_t)&s);
- err_at += sizeof(struct sctp_paramhdr);
- SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(mat, at, plen, M_NOWAIT);
- if (SCTP_BUF_NEXT(op_err) == NULL) {
+ if (op_err_last == NULL) {
+ sctp_m_freem(op_err);
+ op_err = NULL;
+ op_err_last = NULL;
+ goto more_processing;
+ }
+ if (M_TRAILINGSPACE(op_err_last) < (int)sizeof(struct sctp_paramhdr)) {
+ m_tmp = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_NOWAIT, 1, MT_DATA);
+ if (m_tmp == NULL) {
+ sctp_m_freem(op_err);
+ op_err = NULL;
+ op_err_last = NULL;
+ goto more_processing;
+ }
+ SCTP_BUF_LEN(m_tmp) = 0;
+ SCTP_BUF_NEXT(m_tmp) = NULL;
+ SCTP_BUF_NEXT(op_err_last) = m_tmp;
+ op_err_last = m_tmp;
+ }
+ param = (struct sctp_paramhdr *)(mtod(op_err_last, caddr_t)+SCTP_BUF_LEN(op_err_last));
+ param->param_type = htons(SCTP_UNRECOG_PARAM);
+ param->param_length = htons((uint16_t)sizeof(struct sctp_paramhdr) + plen);
+ SCTP_BUF_LEN(op_err_last) += sizeof(struct sctp_paramhdr);
+ SCTP_BUF_NEXT(op_err_last) = SCTP_M_COPYM(mat, at, plen, M_NOWAIT);
+ if (SCTP_BUF_NEXT(op_err_last) == NULL) {
sctp_m_freem(op_err);
- /*
- * we are out of memory but
- * we still need to have a
- * look at what to do (the
- * system is in trouble
- * though).
- */
op_err = NULL;
+ op_err_last = NULL;
goto more_processing;
+ } else {
+ while (SCTP_BUF_NEXT(op_err_last) != NULL) {
+ op_err_last = SCTP_BUF_NEXT(op_err_last);
+ }
+ }
+ if (plen % 4 != 0) {
+ pad_needed = 4 - (plen % 4);
+ } else {
+ pad_needed = 0;
}
- err_at += plen;
}
}
more_processing:
@@ -5248,7 +5270,11 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
invalid_size:
SCTPDBG(SCTP_DEBUG_OUTPUT1, "abort flag set\n");
*abort_processing = 1;
- if ((op_err == NULL) && phdr) {
+ sctp_m_freem(op_err);
+ op_err = NULL;
+ op_err_last = NULL;
+ if (phdr != NULL) {
+ struct sctp_paramhdr *param;
int l_len;
#ifdef INET6
l_len = SCTP_MIN_OVERHEAD;
@@ -5267,25 +5293,15 @@ invalid_size:
#endif
SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ SCTP_BUF_LEN(op_err) = 2 * sizeof(struct sctp_paramhdr);
+ param = mtod(op_err, struct sctp_paramhdr *);
+ param->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ param->param_length = htons(2 * sizeof(struct sctp_paramhdr));
+ param++;
+ param->param_type = htons(ptype);
+ param->param_length = htons(plen);
}
}
- if ((op_err) && phdr) {
- struct sctp_paramhdr s;
-
- if (err_at % 4) {
- uint32_t cpthis = 0;
-
- pad_needed = 4 - (err_at % 4);
- m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
- err_at += pad_needed;
- }
- s.param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- s.param_length = htons(sizeof(s) + sizeof(struct sctp_paramhdr));
- m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
- err_at += sizeof(s);
- /* Only copy back the p-hdr that caused the issue */
- m_copyback(op_err, err_at, sizeof(struct sctp_paramhdr), (caddr_t)phdr);
- }
return (op_err);
}
@@ -5565,7 +5581,9 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
abort_flag = 0;
op_err = sctp_arethere_unrecognized_parameters(init_pkt,
(offset + sizeof(struct sctp_init_chunk)),
- &abort_flag, (struct sctp_chunkhdr *)init_chk, &nat_friendly);
+ &abort_flag,
+ (struct sctp_chunkhdr *)init_chk,
+ &nat_friendly, NULL);
if (abort_flag) {
do_a_abort:
if (op_err == NULL) {
@@ -5584,8 +5602,7 @@ do_a_abort:
m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m == NULL) {
/* No memory, INIT timer will re-attempt. */
- if (op_err)
- sctp_m_freem(op_err);
+ sctp_m_freem(op_err);
return;
}
chunk_len = (uint16_t)sizeof(struct sctp_init_ack_chunk);
@@ -5774,8 +5791,11 @@ do_a_abort:
net->ro._s_addr = sctp_source_address_selection(inp,
stcb, (sctp_route_t *)&net->ro,
net, 0, vrf_id);
- if (net->ro._s_addr == NULL)
+ if (net->ro._s_addr == NULL) {
+ sctp_m_freem(op_err);
+ sctp_m_freem(m);
return;
+ }
net->src_addr_selected = 1;
@@ -5804,8 +5824,11 @@ do_a_abort:
net->ro._s_addr = sctp_source_address_selection(inp,
stcb, (sctp_route_t *)&net->ro,
net, 0, vrf_id);
- if (net->ro._s_addr == NULL)
+ if (net->ro._s_addr == NULL) {
+ sctp_m_freem(op_err);
+ sctp_m_freem(m);
return;
+ }
net->src_addr_selected = 1;
}
@@ -5876,6 +5899,7 @@ do_a_abort:
so = inp->sctp_socket;
if (so == NULL) {
/* memory problem */
+ sctp_m_freem(op_err);
sctp_m_freem(m);
return;
} else {
@@ -6802,15 +6826,19 @@ sctp_sendall_completes(void *ptr, uint32_t val SCTP_UNUSED)
*/
/* now free everything */
+ if (ca->inp) {
+ /* Lets clear the flag to allow others to run. */
+ ca->inp->sctp_flags &= ~SCTP_PCB_FLAGS_SND_ITERATOR_UP;
+ }
sctp_m_freem(ca->m);
SCTP_FREE(ca, SCTP_M_COPYAL);
}
static struct mbuf *
-sctp_copy_out_all(struct uio *uio, int len)
+sctp_copy_out_all(struct uio *uio, ssize_t len)
{
struct mbuf *ret, *at;
- int left, willcpy, cancpy, error;
+ ssize_t left, willcpy, cancpy, error;
ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAITOK, 1, MT_DATA);
if (ret == NULL) {
@@ -6825,17 +6853,17 @@ sctp_copy_out_all(struct uio *uio, int len)
at = ret;
while (left > 0) {
/* Align data to the end */
- error = uiomove(mtod(at, caddr_t), willcpy, uio);
+ error = uiomove(mtod(at, caddr_t), (int)willcpy, uio);
if (error) {
err_out_now:
sctp_m_freem(at);
return (NULL);
}
- SCTP_BUF_LEN(at) = willcpy;
+ SCTP_BUF_LEN(at) = (int)willcpy;
SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
left -= willcpy;
if (left > 0) {
- SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAITOK, 1, MT_DATA);
+ SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg((unsigned int)left, 0, M_WAITOK, 1, MT_DATA);
if (SCTP_BUF_NEXT(at) == NULL) {
goto err_out_now;
}
@@ -6855,6 +6883,14 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
int ret;
struct sctp_copy_all *ca;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SND_ITERATOR_UP) {
+ /* There is another. */
+ return (EBUSY);
+ }
+ if (uio->uio_resid > SCTP_MAX_SENDALL_LIMIT) {
+ /* You must be less than the max! */
+ return (EMSGSIZE);
+ }
SCTP_MALLOC(ca, struct sctp_copy_all *, sizeof(struct sctp_copy_all),
SCTP_M_COPYAL);
if (ca == NULL) {
@@ -6875,7 +6911,7 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL;
/* get length and mbuf chain */
if (uio) {
- ca->sndlen = (int)uio->uio_resid;
+ ca->sndlen = uio->uio_resid;
ca->m = sctp_copy_out_all(uio, ca->sndlen);
if (ca->m == NULL) {
SCTP_FREE(ca, SCTP_M_COPYAL);
@@ -6891,6 +6927,7 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
ca->sndlen += SCTP_BUF_LEN(mat);
}
}
+ inp->sctp_flags |= SCTP_PCB_FLAGS_SND_ITERATOR_UP;
ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
SCTP_ASOC_ANY_STATE,
@@ -8979,7 +9016,7 @@ sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
int
sctp_send_cookie_echo(struct mbuf *m,
- int offset,
+ int offset, int limit,
struct sctp_tcb *stcb,
struct sctp_nets *net)
{
@@ -9005,18 +9042,30 @@ sctp_send_cookie_echo(struct mbuf *m,
}
ptype = ntohs(phdr->param_type);
plen = ntohs(phdr->param_length);
+ if (plen < sizeof(struct sctp_paramhdr)) {
+ return (-6);
+ }
if (ptype == SCTP_STATE_COOKIE) {
int pad;
/* found the cookie */
- if ((pad = (plen % 4))) {
- plen += 4 - pad;
+ if (at + plen > limit) {
+ return (-7);
}
cookie = SCTP_M_COPYM(m, at, plen, M_NOWAIT);
if (cookie == NULL) {
/* No memory */
return (-2);
}
+ if ((pad = (plen % 4)) > 0) {
+ pad = 4 - pad;
+ }
+ if (pad > 0) {
+ cookie = sctp_pad_lastmbuf(cookie, pad, NULL);
+ if (cookie == NULL) {
+ return (-8);
+ }
+ }
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
sctp_log_mbc(cookie, SCTP_MBUF_ICOPY);
@@ -9042,7 +9091,7 @@ sctp_send_cookie_echo(struct mbuf *m,
chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
chk->rec.chunk_id.can_take_data = 0;
chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
- chk->send_size = plen;
+ chk->send_size = SCTP_SIZE32(plen);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
chk->asoc = &stcb->asoc;
@@ -9068,7 +9117,6 @@ sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
struct sctp_chunkhdr *chdr;
struct sctp_tmit_chunk *chk;
-
if (net == NULL)
/* must have a net pointer */
return;
@@ -9086,13 +9134,8 @@ sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
chdr = mtod(outchain, struct sctp_chunkhdr *);
chdr->chunk_type = SCTP_HEARTBEAT_ACK;
chdr->chunk_flags = 0;
- if (chk_length % 4) {
- /* need pad */
- uint32_t cpthis = 0;
- int padlen;
-
- padlen = 4 - (chk_length % 4);
- m_copyback(outchain, chk_length, padlen, (caddr_t)&cpthis);
+ if (chk_length % 4 != 0) {
+ sctp_pad_lastmbuf(outchain, 4 - (chk_length % 4), NULL);
}
sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
@@ -12372,7 +12415,7 @@ sctp_copy_it_in(struct sctp_tcb *stcb,
struct sctp_sndrcvinfo *srcv,
struct uio *uio,
struct sctp_nets *net,
- int max_send_len,
+ ssize_t max_send_len,
int user_marks_eor,
int *error)
{
@@ -12518,7 +12561,7 @@ sctp_lower_sosend(struct socket *so,
struct thread *p
)
{
- unsigned int sndlen = 0, max_len;
+ ssize_t sndlen = 0, max_len, local_add_more;
int error, len;
struct mbuf *top = NULL;
int queue_only = 0, queue_only_for_init = 0;
@@ -12540,7 +12583,7 @@ sctp_lower_sosend(struct socket *so,
int got_all_of_the_send = 0;
int hold_tcblock = 0;
int non_blocking = 0;
- uint32_t local_add_more, local_soresv = 0;
+ ssize_t local_soresv = 0;
uint16_t port;
uint16_t sinfo_flags;
sctp_assoc_t sinfo_assoc_id;
@@ -12570,12 +12613,12 @@ sctp_lower_sosend(struct socket *so,
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
}
- sndlen = (unsigned int)uio->uio_resid;
+ sndlen = uio->uio_resid;
} else {
top = SCTP_HEADER_TO_CHAIN(i_pak);
sndlen = SCTP_HEADER_LEN(i_pak);
}
- SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n",
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %zu\n",
(void *)addr,
sndlen);
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
@@ -12636,6 +12679,12 @@ sctp_lower_sosend(struct socket *so,
sinfo_flags = inp->def_send.sinfo_flags;
sinfo_assoc_id = inp->def_send.sinfo_assoc_id;
}
+ if (flags & MSG_EOR) {
+ sinfo_flags |= SCTP_EOR;
+ }
+ if (flags & MSG_EOF) {
+ sinfo_flags |= SCTP_EOF;
+ }
if (sinfo_flags & SCTP_SENDALL) {
/* its a sendall */
error = sctp_sendall(inp, uio, top, srcv);
@@ -12753,7 +12802,8 @@ sctp_lower_sosend(struct socket *so,
stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
inp->sctp_ep.pre_open_stream_count,
inp->sctp_ep.port,
- p);
+ p,
+ SCTP_INITIALIZE_AUTH_PARAMS);
if (stcb == NULL) {
/* Error is setup for us in the call */
goto out_unlocked;
@@ -12782,9 +12832,6 @@ sctp_lower_sosend(struct socket *so,
SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
- /* initialize authentication params for the assoc */
- sctp_initialize_auth_params(inp, stcb);
-
if (control) {
if (sctp_process_cmsgs_for_init(stcb, control, &error)) {
sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
@@ -12805,9 +12852,17 @@ sctp_lower_sosend(struct socket *so,
}
} else
asoc = &stcb->asoc;
- if (srcv == NULL)
+ if (srcv == NULL) {
srcv = (struct sctp_sndrcvinfo *)&asoc->def_send;
- if (srcv->sinfo_flags & SCTP_ADDR_OVER) {
+ sinfo_flags = srcv->sinfo_flags;
+ if (flags & MSG_EOR) {
+ sinfo_flags |= SCTP_EOR;
+ }
+ if (flags & MSG_EOF) {
+ sinfo_flags |= SCTP_EOF;
+ }
+ }
+ if (sinfo_flags & SCTP_ADDR_OVER) {
if (addr)
net = sctp_findnet(stcb, addr);
else
@@ -12831,20 +12886,20 @@ sctp_lower_sosend(struct socket *so,
free_cnt_applied = 1;
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT)) {
- if (sndlen > asoc->smallest_mtu) {
+ if (sndlen > (ssize_t)asoc->smallest_mtu) {
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
error = EMSGSIZE;
goto out_unlocked;
}
}
if (SCTP_SO_IS_NBIO(so)
- || (flags & MSG_NBIO)
+ || (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0
) {
non_blocking = 1;
}
/* would we block? */
if (non_blocking) {
- uint32_t amount;
+ ssize_t amount;
if (hold_tcblock == 0) {
SCTP_TCB_LOCK(stcb);
@@ -12859,13 +12914,13 @@ sctp_lower_sosend(struct socket *so,
if ((SCTP_SB_LIMIT_SND(so) < (amount + inqueue_bytes + stcb->asoc.sb_send_resv)) ||
(stcb->asoc.chunks_on_out_queue >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EWOULDBLOCK);
- if (sndlen > SCTP_SB_LIMIT_SND(so))
+ if (sndlen > (ssize_t)SCTP_SB_LIMIT_SND(so))
error = EMSGSIZE;
else
error = EWOULDBLOCK;
goto out_unlocked;
}
- stcb->asoc.sb_send_resv += sndlen;
+ stcb->asoc.sb_send_resv += (uint32_t)sndlen;
SCTP_TCB_UNLOCK(stcb);
hold_tcblock = 0;
} else {
@@ -12914,7 +12969,7 @@ sctp_lower_sosend(struct socket *so,
(SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
(SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
(asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
- if (srcv->sinfo_flags & SCTP_ABORT) {
+ if (sinfo_flags & SCTP_ABORT) {
;
} else {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
@@ -12929,9 +12984,9 @@ sctp_lower_sosend(struct socket *so,
}
#endif /* __rtems__ */
/* Are we aborting? */
- if (srcv->sinfo_flags & SCTP_ABORT) {
+ if (sinfo_flags & SCTP_ABORT) {
struct mbuf *mm;
- int tot_demand, tot_out = 0, max_out;
+ ssize_t tot_demand, tot_out = 0, max_out;
SCTP_STAT_INCR(sctps_sends_with_abort);
if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
@@ -12965,7 +13020,7 @@ sctp_lower_sosend(struct socket *so,
error = EMSGSIZE;
goto out;
}
- mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAITOK, 1, MT_DATA);
+ mm = sctp_get_mbuf_for_msg((unsigned int)tot_demand, 0, M_WAITOK, 1, MT_DATA);
}
if (mm == NULL) {
SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -12985,7 +13040,7 @@ sctp_lower_sosend(struct socket *so,
ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
ph->param_length = htons((uint16_t)(sizeof(struct sctp_paramhdr) + tot_out));
ph++;
- SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr);
+ SCTP_BUF_LEN(mm) = (int)(tot_out + sizeof(struct sctp_paramhdr));
if (top == NULL) {
error = uiomove((caddr_t)ph, (int)tot_out, uio);
if (error) {
@@ -13026,12 +13081,7 @@ sctp_lower_sosend(struct socket *so,
/* Calculate the maximum we can send */
inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
- if (non_blocking) {
- /* we already checked for non-blocking above. */
- max_len = sndlen;
- } else {
- max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
- }
+ max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
} else {
max_len = 0;
}
@@ -13048,7 +13098,7 @@ sctp_lower_sosend(struct socket *so,
/* Unless E_EOR mode is on, we must make a send FIT in one call. */
if ((user_marks_eor == 0) &&
- (sndlen > SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
+ (sndlen > (ssize_t)SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
/* It will NEVER fit */
SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
error = EMSGSIZE;
@@ -13065,7 +13115,7 @@ sctp_lower_sosend(struct socket *so,
}
if (user_marks_eor) {
- local_add_more = min(SCTP_SB_LIMIT_SND(so), SCTP_BASE_SYSCTL(sctp_add_more_threshold));
+ local_add_more = (ssize_t)min(SCTP_SB_LIMIT_SND(so), SCTP_BASE_SYSCTL(sctp_add_more_threshold));
} else {
/*-
* For non-eeor the whole message must fit in
@@ -13078,7 +13128,7 @@ sctp_lower_sosend(struct socket *so,
goto skip_preblock;
}
if (((max_len <= local_add_more) &&
- (SCTP_SB_LIMIT_SND(so) >= local_add_more)) ||
+ ((ssize_t)SCTP_SB_LIMIT_SND(so) >= local_add_more)) ||
(max_len == 0) ||
((stcb->asoc.chunks_on_out_queue + stcb->asoc.stream_queue_cnt) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
/* No room right now ! */
@@ -13086,7 +13136,7 @@ sctp_lower_sosend(struct socket *so,
inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
while ((SCTP_SB_LIMIT_SND(so) < (inqueue_bytes + local_add_more)) ||
((stcb->asoc.stream_queue_cnt + stcb->asoc.chunks_on_out_queue) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) {
- SCTPDBG(SCTP_DEBUG_OUTPUT1, "pre_block limit:%u <(inq:%d + %d) || (%d+%d > %d)\n",
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "pre_block limit:%u <(inq:%d + %zd) || (%d+%d > %d)\n",
(unsigned int)SCTP_SB_LIMIT_SND(so),
inqueue_bytes,
local_add_more,
@@ -13138,7 +13188,7 @@ skip_preblock:
* case NOTE: uio will be null when top/mbuf is passed
*/
if (sndlen == 0) {
- if (srcv->sinfo_flags & SCTP_EOF) {
+ if (sinfo_flags & SCTP_EOF) {
got_all_of_the_send = 1;
goto dataless_eof;
} else {
@@ -13187,7 +13237,7 @@ skip_preblock:
}
sctp_snd_sb_alloc(stcb, sp->length);
atomic_add_int(&asoc->stream_queue_cnt, 1);
- if (srcv->sinfo_flags & SCTP_UNORDERED) {
+ if (sinfo_flags & SCTP_UNORDERED) {
SCTP_STAT_INCR(sctps_sends_with_unord);
}
TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
@@ -13219,16 +13269,16 @@ skip_preblock:
else
max_len = 0;
- if ((max_len > SCTP_BASE_SYSCTL(sctp_add_more_threshold)) ||
+ if ((max_len > (ssize_t)SCTP_BASE_SYSCTL(sctp_add_more_threshold)) ||
(max_len && (SCTP_SB_LIMIT_SND(so) < SCTP_BASE_SYSCTL(sctp_add_more_threshold))) ||
- (uio->uio_resid && (uio->uio_resid <= (int)max_len))) {
+ (uio->uio_resid && (uio->uio_resid <= max_len))) {
sndout = 0;
new_tail = NULL;
if (hold_tcblock) {
SCTP_TCB_UNLOCK(stcb);
hold_tcblock = 0;
}
- mm = sctp_copy_resume(uio, max_len, user_marks_eor, &error, &sndout, &new_tail);
+ mm = sctp_copy_resume(uio, (int)max_len, user_marks_eor, &error, &sndout, &new_tail);
if ((mm == NULL) || error) {
if (mm) {
sctp_m_freem(mm);
@@ -13262,15 +13312,15 @@ skip_preblock:
sctp_snd_sb_alloc(stcb, sndout);
atomic_add_int(&sp->length, sndout);
len += sndout;
- if (srcv->sinfo_flags & SCTP_SACK_IMMEDIATELY) {
+ if (sinfo_flags & SCTP_SACK_IMMEDIATELY) {
sp->sinfo_flags |= SCTP_SACK_IMMEDIATELY;
}
/* Did we reach EOR? */
if ((uio->uio_resid == 0) &&
((user_marks_eor == 0) ||
- (srcv->sinfo_flags & SCTP_EOF) ||
- (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) {
+ (sinfo_flags & SCTP_EOF) ||
+ (user_marks_eor && (sinfo_flags & SCTP_EOR)))) {
sp->msg_is_complete = 1;
} else {
sp->msg_is_complete = 0;
@@ -13291,7 +13341,7 @@ skip_preblock:
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
- sctp_prune_prsctp(stcb, asoc, srcv, sndlen);
+ sctp_prune_prsctp(stcb, asoc, srcv, (int)sndlen);
inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb));
if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes)
max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
@@ -13388,10 +13438,10 @@ skip_preblock:
stcb,
SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
}
- if (hold_tcblock == 1) {
- SCTP_TCB_UNLOCK(stcb);
- hold_tcblock = 0;
- }
+ }
+ if (hold_tcblock == 1) {
+ SCTP_TCB_UNLOCK(stcb);
+ hold_tcblock = 0;
}
SOCKBUF_LOCK(&so->so_snd);
/*-
@@ -13413,7 +13463,7 @@ skip_preblock:
min(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTP_SB_LIMIT_SND(so)))) {
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK,
- asoc, (size_t)uio->uio_resid);
+ asoc, uio->uio_resid);
}
be.error = 0;
stcb->block_entry = &be;
@@ -13472,7 +13522,7 @@ skip_preblock:
/* We send in a 0, since we do NOT have any locks */
error = sctp_msg_append(stcb, net, top, srcv, 0);
top = NULL;
- if (srcv->sinfo_flags & SCTP_EOF) {
+ if (sinfo_flags & SCTP_EOF) {
/*
* This should only happen for Panda for the mbuf
* send case, which does NOT yet support EEOR mode.
@@ -13487,7 +13537,7 @@ skip_preblock:
}
dataless_eof:
/* EOF thing ? */
- if ((srcv->sinfo_flags & SCTP_EOF) &&
+ if ((sinfo_flags & SCTP_EOF) &&
(got_all_of_the_send == 1)) {
SCTP_STAT_INCR(sctps_sends_with_eof);
error = 0;
diff --git a/freebsd/sys/netinet/sctp_output.h b/freebsd/sys/netinet/sctp_output.h
index 1b3d22d9..6d78cf90 100644
--- a/freebsd/sys/netinet/sctp_output.h
+++ b/freebsd/sys/netinet/sctp_output.h
@@ -92,11 +92,11 @@ sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
struct mbuf *
sctp_arethere_unrecognized_parameters(struct mbuf *, int, int *,
- struct sctp_chunkhdr *, int *);
+ struct sctp_chunkhdr *, int *, int *);
void sctp_queue_op_err(struct sctp_tcb *, struct mbuf *);
int
-sctp_send_cookie_echo(struct mbuf *, int, struct sctp_tcb *,
+sctp_send_cookie_echo(struct mbuf *, int, int, struct sctp_tcb *,
struct sctp_nets *);
void sctp_send_cookie_ack(struct sctp_tcb *);
diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c
index 782e5f1d..45342dc3 100644
--- a/freebsd/sys/netinet/sctp_pcb.c
+++ b/freebsd/sys/netinet/sctp_pcb.c
@@ -2847,7 +2847,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
struct sockaddr_in *sin;
/* IPV6_V6ONLY socket? */
- if (SCTP_IPV6_V6ONLY(ip_inp)) {
+ if (SCTP_IPV6_V6ONLY(inp)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
return (EINVAL);
}
@@ -3648,10 +3648,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
#ifdef INET6
if (ip_pcb->inp_vflag & INP_IPV6) {
- struct in6pcb *in6p;
-
- in6p = (struct in6pcb *)inp;
- ip6_freepcbopts(in6p->in6p_outputopts);
+ ip6_freepcbopts(ip_pcb->in6p_outputopts);
}
#endif /* INET6 */
ip_pcb->inp_vflag = 0;
@@ -4161,11 +4158,9 @@ sctp_aloc_a_assoc_id(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
struct sctpasochead *head;
struct sctp_tcb *lstcb;
- SCTP_INP_WLOCK(inp);
try_again:
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
/* TSNH */
- SCTP_INP_WUNLOCK(inp);
return (0);
}
/*
@@ -4184,8 +4179,7 @@ try_again:
head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)];
LIST_INSERT_HEAD(head, stcb, sctp_tcbasocidhash);
stcb->asoc.in_asocid_hash = 1;
- SCTP_INP_WUNLOCK(inp);
- return id;
+ return (id);
}
/*
@@ -4197,8 +4191,8 @@ struct sctp_tcb *
sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
int *error, uint32_t override_tag, uint32_t vrf_id,
uint16_t o_streams, uint16_t port,
- struct thread *p
-)
+ struct thread *p,
+ int initialize_auth_params)
{
/* note the p argument is only valid in unbound sockets */
@@ -4348,7 +4342,6 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
memset(stcb, 0, sizeof(*stcb));
asoc = &stcb->asoc;
- asoc->assoc_id = sctp_aloc_a_assoc_id(inp, stcb);
SCTP_TCB_LOCK_INIT(stcb);
SCTP_TCB_SEND_LOCK_INIT(stcb);
stcb->rport = rport;
@@ -4359,7 +4352,6 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
/* failed */
SCTP_TCB_LOCK_DESTROY(stcb);
SCTP_TCB_SEND_LOCK_DESTROY(stcb);
- LIST_REMOVE(stcb, sctp_tcbasocidhash);
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
SCTP_DECR_ASOC_COUNT();
*error = err;
@@ -4372,7 +4364,6 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
/* inpcb freed while alloc going on */
SCTP_TCB_LOCK_DESTROY(stcb);
SCTP_TCB_SEND_LOCK_DESTROY(stcb);
- LIST_REMOVE(stcb, sctp_tcbasocidhash);
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
SCTP_INP_WUNLOCK(inp);
SCTP_INP_INFO_WUNLOCK();
@@ -4383,6 +4374,7 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
}
SCTP_TCB_LOCK(stcb);
+ asoc->assoc_id = sctp_aloc_a_assoc_id(inp, stcb);
/* now that my_vtag is set, add it to the hash */
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
/* put it in the bucket in the vtag hash of assoc's for the system */
@@ -4430,6 +4422,9 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
inp->sctp_hashmark)];
LIST_INSERT_HEAD(head, stcb, sctp_tcbhash);
}
+ if (initialize_auth_params == SCTP_INITIALIZE_AUTH_PARAMS) {
+ sctp_initialize_auth_params(inp, stcb);
+ }
SCTP_INP_WUNLOCK(inp);
SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", (void *)stcb);
return (stcb);
@@ -4918,12 +4913,11 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
inp->sctp_flags |= SCTP_PCB_FLAGS_WAS_CONNECTED;
if (so) {
SOCKBUF_LOCK(&so->so_rcv);
- if (so->so_rcv.sb_cc == 0) {
- so->so_state &= ~(SS_ISCONNECTING |
- SS_ISDISCONNECTING |
- SS_ISCONFIRMING |
- SS_ISCONNECTED);
- }
+ so->so_state &= ~(SS_ISCONNECTING |
+ SS_ISDISCONNECTING |
+ SS_ISCONFIRMING |
+ SS_ISCONNECTED);
+ so->so_state |= SS_ISDISCONNECTED;
socantrcvmore_locked(so);
socantsendmore(so);
sctp_sowwakeup(inp, so);
@@ -4991,6 +4985,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
* in case.
*/
/* anything on the wheel needs to be removed */
+ SCTP_TCB_SEND_LOCK(stcb);
for (i = 0; i < asoc->streamoutcnt; i++) {
struct sctp_stream_out *outs;
@@ -4999,7 +4994,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) {
atomic_subtract_int(&asoc->stream_queue_cnt, 1);
TAILQ_REMOVE(&outs->outqueue, sp, next);
- stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 0);
+ stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 1);
sctp_free_spbufspace(stcb, asoc, sp);
if (sp->data) {
if (so) {
@@ -5021,6 +5016,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
sctp_free_a_strmoq(stcb, sp, SCTP_SO_LOCKED);
}
}
+ SCTP_TCB_SEND_UNLOCK(stcb);
/* sa_ignore FREED_MEMORY */
TAILQ_FOREACH_SAFE(strrst, &asoc->resetHead, next_resp, nstrrst) {
TAILQ_REMOVE(&asoc->resetHead, strrst, next_resp);
@@ -5779,7 +5775,7 @@ sctp_startup_mcore_threads(void)
#endif
void
-sctp_pcb_init()
+sctp_pcb_init(void)
{
/*
* SCTP initialization for the PCB structures should be called by
diff --git a/freebsd/sys/netinet/sctp_pcb.h b/freebsd/sys/netinet/sctp_pcb.h
index 5b41ae8a..cbe51c7d 100644
--- a/freebsd/sys/netinet/sctp_pcb.h
+++ b/freebsd/sys/netinet/sctp_pcb.h
@@ -362,7 +362,7 @@ struct sctp_inpcb {
*/
union {
struct inpcb inp;
- char align[(sizeof(struct in6pcb) + SCTP_ALIGNM1) &
+ char align[(sizeof(struct inpcb) + SCTP_ALIGNM1) &
~SCTP_ALIGNM1];
} ip_inp;
@@ -578,9 +578,13 @@ int sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id);
void sctp_inpcb_free(struct sctp_inpcb *, int, int);
+#define SCTP_DONT_INITIALIZE_AUTH_PARAMS 0
+#define SCTP_INITIALIZE_AUTH_PARAMS 1
+
struct sctp_tcb *
sctp_aloc_assoc(struct sctp_inpcb *, struct sockaddr *,
- int *, uint32_t, uint32_t, uint16_t, uint16_t, struct thread *);
+ int *, uint32_t, uint32_t, uint16_t, uint16_t, struct thread *,
+ int);
int sctp_free_assoc(struct sctp_inpcb *, struct sctp_tcb *, int, int);
diff --git a/freebsd/sys/netinet/sctp_structs.h b/freebsd/sys/netinet/sctp_structs.h
index c4eafc26..451ae72b 100644
--- a/freebsd/sys/netinet/sctp_structs.h
+++ b/freebsd/sys/netinet/sctp_structs.h
@@ -166,7 +166,7 @@ struct sctp_copy_all {
struct sctp_inpcb *inp; /* ep */
struct mbuf *m;
struct sctp_sndrcvinfo sndrcv;
- int sndlen;
+ ssize_t sndlen;
int cnt_sent;
int cnt_failed;
};
diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c
index b519971c..d8fbabc4 100644
--- a/freebsd/sys/netinet/sctp_usrreq.c
+++ b/freebsd/sys/netinet/sctp_usrreq.c
@@ -632,7 +632,6 @@ connected_type:
/* now what about control */
if (control) {
if (inp->control) {
- SCTP_PRINTF("huh? control set?\n");
sctp_m_freem(inp->control);
inp->control = NULL;
}
@@ -968,9 +967,9 @@ sctp_shutdown(struct socket *so)
abort_anyway:
op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
+ SCTP_INP_RUNLOCK(inp);
sctp_abort_an_association(stcb->sctp_ep, stcb,
op_err, SCTP_SO_LOCKED);
- SCTP_INP_RUNLOCK(inp);
return (0);
}
}
@@ -1124,22 +1123,25 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
}
#ifdef INET6
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ if (actual + sizeof(struct sockaddr_in6) > limit) {
+ return (actual);
+ }
in6_sin_2_v4mapsin6(sin, (struct sockaddr_in6 *)sas);
((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(struct sockaddr_in6));
actual += sizeof(struct sockaddr_in6);
} else {
#endif
- memcpy(sas, sin, sizeof(*sin));
+ if (actual + sizeof(struct sockaddr_in) > limit) {
+ return (actual);
+ }
+ memcpy(sas, sin, sizeof(struct sockaddr_in));
((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport;
- sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin));
- actual += sizeof(*sin);
+ sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(struct sockaddr_in));
+ actual += sizeof(struct sockaddr_in);
#ifdef INET6
}
#endif
- if (actual >= limit) {
- return (actual);
- }
} else {
continue;
}
@@ -1184,13 +1186,13 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
(IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
continue;
}
- memcpy(sas, sin6, sizeof(*sin6));
- ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
- sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin6));
- actual += sizeof(*sin6);
- if (actual >= limit) {
+ if (actual + sizeof(struct sockaddr_in6) > limit) {
return (actual);
}
+ memcpy(sas, sin6, sizeof(struct sockaddr_in6));
+ ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+ sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(struct sockaddr_in6));
+ actual += sizeof(struct sockaddr_in6);
} else {
continue;
}
@@ -1204,6 +1206,7 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
}
} else {
struct sctp_laddr *laddr;
+ size_t sa_len;
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (stcb) {
@@ -1211,6 +1214,10 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
continue;
}
}
+ sa_len = laddr->ifa->address.sa.sa_len;
+ if (actual + sa_len > limit) {
+ return (actual);
+ }
if (sctp_fill_user_address(sas, &laddr->ifa->address.sa))
continue;
switch (laddr->ifa->address.sa.sa_family) {
@@ -1228,12 +1235,8 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
/* TSNH */
break;
}
- sas = (struct sockaddr_storage *)((caddr_t)sas +
- laddr->ifa->address.sa.sa_len);
- actual += laddr->ifa->address.sa.sa_len;
- if (actual >= limit) {
- return (actual);
- }
+ sas = (struct sockaddr_storage *)((caddr_t)sas + sa_len);
+ actual += sa_len;
}
}
return (actual);
@@ -1351,13 +1354,12 @@ static int
sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
size_t optsize, void *p, int delay)
{
- int error = 0;
+ int error;
int creat_lock_on = 0;
struct sctp_tcb *stcb = NULL;
struct sockaddr *sa;
unsigned int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr;
uint32_t vrf_id;
- int bad_addresses = 0;
sctp_assoc_t *a_id;
SCTPDBG(SCTP_DEBUG_PCB1, "Connectx called\n");
@@ -1396,17 +1398,12 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
totaddrp = (unsigned int *)optval;
totaddr = *totaddrp;
sa = (struct sockaddr *)(totaddrp + 1);
- stcb = sctp_connectx_helper_find(inp, sa, &totaddr, &num_v4, &num_v6, &error, (unsigned int)(optsize - sizeof(int)), &bad_addresses);
- if ((stcb != NULL) || bad_addresses) {
+ error = sctp_connectx_helper_find(inp, sa, totaddr, &num_v4, &num_v6, (unsigned int)(optsize - sizeof(int)));
+ if (error != 0) {
/* Already have or am bring up an association */
SCTP_ASOC_CREATE_UNLOCK(inp);
creat_lock_on = 0;
- if (stcb)
- SCTP_TCB_UNLOCK(stcb);
- if (bad_addresses == 0) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
- error = EALREADY;
- }
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
goto out_now;
}
#ifdef INET6
@@ -1417,10 +1414,7 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
(num_v4 > 0)) {
- struct in6pcb *inp6;
-
- inp6 = (struct in6pcb *)inp;
- if (SCTP_IPV6_V6ONLY(inp6)) {
+ if (SCTP_IPV6_V6ONLY(inp)) {
/*
* if IPV6_V6ONLY flag, ignore connections destined
* to a v4 addr or v4-mapped addr
@@ -1448,8 +1442,8 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
stcb = sctp_aloc_assoc(inp, sa, &error, 0, vrf_id,
inp->sctp_ep.pre_open_stream_count,
inp->sctp_ep.port,
- (struct thread *)p
- );
+ (struct thread *)p,
+ SCTP_INITIALIZE_AUTH_PARAMS);
if (stcb == NULL) {
/* Gak! no memory */
goto out_now;
@@ -1480,16 +1474,11 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
/* Fill in the return id */
if (error) {
- (void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
goto out_now;
}
a_id = (sctp_assoc_t *)optval;
*a_id = sctp_get_associd(stcb);
- /* initialize authentication parameters for the assoc */
- sctp_initialize_auth_params(inp, stcb);
-
if (delay) {
/* doing delayed connection */
stcb->asoc.delayed_connection = 1;
@@ -2238,8 +2227,8 @@ flags_out:
SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
if (stcb) {
- left = (*optsize) - sizeof(struct sctp_getaddresses);
- *optsize = sizeof(struct sctp_getaddresses);
+ left = (*optsize) - sizeof(sctp_assoc_t);
+ *optsize = sizeof(sctp_assoc_t);
sas = (struct sockaddr_storage *)&saddr->addr[0];
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
@@ -2313,7 +2302,7 @@ flags_out:
if (stcb) {
SCTP_TCB_UNLOCK(stcb);
}
- *optsize = sizeof(struct sockaddr_storage) + actual;
+ *optsize = sizeof(sctp_assoc_t) + actual;
break;
}
case SCTP_PEER_ADDR_PARAMS:
@@ -2642,42 +2631,47 @@ flags_out:
sstat->sstat_instrms = stcb->asoc.streamincnt;
sstat->sstat_outstrms = stcb->asoc.streamoutcnt;
sstat->sstat_fragmentation_point = sctp_get_frag_point(stcb, &stcb->asoc);
- memcpy(&sstat->sstat_primary.spinfo_address,
- &stcb->asoc.primary_destination->ro._l_addr,
- ((struct sockaddr *)(&stcb->asoc.primary_destination->ro._l_addr))->sa_len);
net = stcb->asoc.primary_destination;
- ((struct sockaddr_in *)&sstat->sstat_primary.spinfo_address)->sin_port = stcb->rport;
- /*
- * Again the user can get info from sctp_constants.h
- * for what the state of the network is.
- */
- if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
- /* It's unconfirmed */
- sstat->sstat_primary.spinfo_state = SCTP_UNCONFIRMED;
- } else if (net->dest_state & SCTP_ADDR_REACHABLE) {
- /* It's active */
- sstat->sstat_primary.spinfo_state = SCTP_ACTIVE;
- } else {
- /* It's inactive */
- sstat->sstat_primary.spinfo_state = SCTP_INACTIVE;
- }
- sstat->sstat_primary.spinfo_cwnd = net->cwnd;
- sstat->sstat_primary.spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT;
- sstat->sstat_primary.spinfo_rto = net->RTO;
- sstat->sstat_primary.spinfo_mtu = net->mtu;
- switch (stcb->asoc.primary_destination->ro._l_addr.sa.sa_family) {
+ if (net != NULL) {
+ memcpy(&sstat->sstat_primary.spinfo_address,
+ &stcb->asoc.primary_destination->ro._l_addr,
+ ((struct sockaddr *)(&stcb->asoc.primary_destination->ro._l_addr))->sa_len);
+ ((struct sockaddr_in *)&sstat->sstat_primary.spinfo_address)->sin_port = stcb->rport;
+ /*
+ * Again the user can get info from
+ * sctp_constants.h for what the state of
+ * the network is.
+ */
+ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ /* It's unconfirmed */
+ sstat->sstat_primary.spinfo_state = SCTP_UNCONFIRMED;
+ } else if (net->dest_state & SCTP_ADDR_REACHABLE) {
+ /* It's active */
+ sstat->sstat_primary.spinfo_state = SCTP_ACTIVE;
+ } else {
+ /* It's inactive */
+ sstat->sstat_primary.spinfo_state = SCTP_INACTIVE;
+ }
+ sstat->sstat_primary.spinfo_cwnd = net->cwnd;
+ sstat->sstat_primary.spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT;
+ sstat->sstat_primary.spinfo_rto = net->RTO;
+ sstat->sstat_primary.spinfo_mtu = net->mtu;
+ switch (stcb->asoc.primary_destination->ro._l_addr.sa.sa_family) {
#if defined(INET)
- case AF_INET:
- sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
- break;
+ case AF_INET:
+ sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
#endif
#if defined(INET6)
- case AF_INET6:
- sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_OVERHEAD;
- break;
+ case AF_INET6:
+ sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_OVERHEAD;
+ break;
#endif
- default:
- break;
+ default:
+ break;
+ }
+ } else {
+ memset(&sstat->sstat_primary, 0, sizeof(struct sctp_paddrinfo));
}
sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
SCTP_TCB_UNLOCK(stcb);
@@ -3744,13 +3738,11 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
uint32_t vrf_id;
if (optval == NULL) {
- SCTP_PRINTF("optval is NULL\n");
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
- SCTP_PRINTF("inp is NULL?\n");
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
@@ -4065,10 +4057,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
SCTP_FIND_STCB(inp, stcb, av->assoc_id);
if (stcb) {
+ SCTP_TCB_SEND_LOCK(stcb);
stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1);
stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value];
stcb->asoc.stream_scheduling_module = av->assoc_value;
stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
+ SCTP_TCB_SEND_UNLOCK(stcb);
SCTP_TCB_UNLOCK(stcb);
} else {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
@@ -4084,10 +4078,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_INP_RLOCK(inp);
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
SCTP_TCB_LOCK(stcb);
+ SCTP_TCB_SEND_LOCK(stcb);
stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1);
stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value];
stcb->asoc.stream_scheduling_module = av->assoc_value;
stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
+ SCTP_TCB_SEND_UNLOCK(stcb);
SCTP_TCB_UNLOCK(stcb);
}
SCTP_INP_RUNLOCK(inp);
@@ -4624,6 +4620,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_TCB_UNLOCK(stcb);
break;
}
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
if (sizeof(struct sctp_reset_streams) +
strrst->srs_number_streams * sizeof(uint16_t) > optsize) {
error = EINVAL;
@@ -4656,13 +4658,13 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
for (i = 0; i < strrst->srs_number_streams; i++) {
if ((send_in) &&
- (strrst->srs_stream_list[i] > stcb->asoc.streamincnt)) {
+ (strrst->srs_stream_list[i] >= stcb->asoc.streamincnt)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
}
if ((send_out) &&
- (strrst->srs_stream_list[i] > stcb->asoc.streamoutcnt)) {
+ (strrst->srs_stream_list[i] >= stcb->asoc.streamoutcnt)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
@@ -4738,6 +4740,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_TCB_UNLOCK(stcb);
break;
}
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
if (stcb->asoc.stream_reset_outstanding) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
error = EALREADY;
@@ -4808,6 +4816,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_TCB_UNLOCK(stcb);
break;
}
+ if (SCTP_GET_STATE(stcb) != SCTP_STATE_OPEN) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
if (stcb->asoc.stream_reset_outstanding) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
error = EALREADY;
@@ -5314,10 +5328,11 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
net->dest_state &= ~SCTP_ADDR_NOHB;
}
if (paddrp->spp_flags & SPP_HB_DEMAND) {
- /* on demand HB */
- sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SOCKOPT, SCTP_SO_LOCKED);
- sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
+ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) {
+ sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SOCKOPT, SCTP_SO_LOCKED);
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
+ }
}
if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
@@ -6117,6 +6132,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_INP_RUNLOCK(inp);
}
}
+ } else {
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
}
break;
}
@@ -6211,6 +6230,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id);
if (info->pr_policy > SCTP_PR_SCTP_MAX) {
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
@@ -6330,6 +6352,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
}
if (thlds->spt_pathcpthld != 0xffff) {
+ if (stcb != NULL) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
error = EINVAL;
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
break;
@@ -6830,6 +6855,10 @@ sctp_ctloutput(struct socket *so, struct sockopt *sopt)
return (error);
}
optsize = sopt->sopt_valsize;
+ if (optsize > SCTP_SOCKET_OPTION_LIMIT) {
+ SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
+ return (ENOBUFS);
+ }
if (optsize) {
SCTP_MALLOC(optval, void *, optsize, SCTP_M_SOCKOPT);
if (optval == NULL) {
@@ -6886,14 +6915,14 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
#ifdef INET6
case AF_INET6:
{
- struct sockaddr_in6 *sin6p;
+ struct sockaddr_in6 *sin6;
if (addr->sa_len != sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
- sin6p = (struct sockaddr_in6 *)addr;
- if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6p->sin6_addr)) != 0) {
+ sin6 = (struct sockaddr_in6 *)addr;
+ if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6->sin6_addr)) != 0) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
return (error);
}
@@ -6903,14 +6932,14 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
#ifdef INET
case AF_INET:
{
- struct sockaddr_in *sinp;
+ struct sockaddr_in *sin;
if (addr->sa_len != sizeof(struct sockaddr_in)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
- sinp = (struct sockaddr_in *)addr;
- if (p != NULL && (error = prison_remote_ip4(p->td_ucred, &sinp->sin_addr)) != 0) {
+ sin = (struct sockaddr_in *)addr;
+ if (p != NULL && (error = prison_remote_ip4(p->td_ucred, &sin->sin_addr)) != 0) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
return (error);
}
@@ -6992,7 +7021,8 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
/* We are GOOD to go */
stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
inp->sctp_ep.pre_open_stream_count,
- inp->sctp_ep.port, p);
+ inp->sctp_ep.port, p,
+ SCTP_INITIALIZE_AUTH_PARAMS);
if (stcb == NULL) {
/* Gak! no memory */
goto out_now;
@@ -7005,9 +7035,6 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
- /* initialize authentication parameters for the assoc */
- sctp_initialize_auth_params(inp, stcb);
-
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
SCTP_TCB_UNLOCK(stcb);
out_now:
@@ -7201,28 +7228,56 @@ sctp_accept(struct socket *so, struct sockaddr **addr)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (ECONNRESET);
}
- SCTP_INP_RLOCK(inp);
+ SCTP_INP_WLOCK(inp);
if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
- SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_WUNLOCK(inp);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
return (EOPNOTSUPP);
}
if (so->so_state & SS_ISDISCONNECTED) {
- SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_WUNLOCK(inp);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ECONNABORTED);
return (ECONNABORTED);
}
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
- SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_WUNLOCK(inp);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (ECONNRESET);
}
SCTP_TCB_LOCK(stcb);
- SCTP_INP_RUNLOCK(inp);
store = stcb->asoc.primary_destination->ro._l_addr;
SCTP_CLEAR_SUBSTATE(stcb, SCTP_STATE_IN_ACCEPT_QUEUE);
- SCTP_TCB_UNLOCK(stcb);
+ /* Wake any delayed sleep action */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_DONT_WAKE;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT) {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
+ SOCKBUF_LOCK(&inp->sctp_socket->so_snd);
+ if (sowriteable(inp->sctp_socket)) {
+ sowwakeup_locked(inp->sctp_socket);
+ } else {
+ SOCKBUF_UNLOCK(&inp->sctp_socket->so_snd);
+ }
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT) {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
+ SOCKBUF_LOCK(&inp->sctp_socket->so_rcv);
+ if (soreadable(inp->sctp_socket)) {
+ sctp_defered_wakeup_cnt++;
+ sorwakeup_locked(inp->sctp_socket);
+ } else {
+ SOCKBUF_UNLOCK(&inp->sctp_socket->so_rcv);
+ }
+ }
+ }
+ SCTP_INP_WUNLOCK(inp);
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_19);
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ }
switch (store.sa.sa_family) {
#ifdef INET
case AF_INET:
@@ -7264,40 +7319,6 @@ sctp_accept(struct socket *so, struct sockaddr **addr)
/* TSNH */
break;
}
- /* Wake any delayed sleep action */
- if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) {
- SCTP_INP_WLOCK(inp);
- inp->sctp_flags &= ~SCTP_PCB_FLAGS_DONT_WAKE;
- if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT) {
- inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
- SCTP_INP_WUNLOCK(inp);
- SOCKBUF_LOCK(&inp->sctp_socket->so_snd);
- if (sowriteable(inp->sctp_socket)) {
- sowwakeup_locked(inp->sctp_socket);
- } else {
- SOCKBUF_UNLOCK(&inp->sctp_socket->so_snd);
- }
- SCTP_INP_WLOCK(inp);
- }
- if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT) {
- inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
- SCTP_INP_WUNLOCK(inp);
- SOCKBUF_LOCK(&inp->sctp_socket->so_rcv);
- if (soreadable(inp->sctp_socket)) {
- sctp_defered_wakeup_cnt++;
- sorwakeup_locked(inp->sctp_socket);
- } else {
- SOCKBUF_UNLOCK(&inp->sctp_socket->so_rcv);
- }
- SCTP_INP_WLOCK(inp);
- }
- SCTP_INP_WUNLOCK(inp);
- }
- if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
- SCTP_TCB_LOCK(stcb);
- sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_19);
- }
return (0);
}
diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c
index ddf136ef..e75f158e 100644
--- a/freebsd/sys/netinet/sctputil.c
+++ b/freebsd/sys/netinet/sctputil.c
@@ -548,7 +548,7 @@ sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t wake_cnt, int from)
}
void
-sctp_log_block(uint8_t from, struct sctp_association *asoc, size_t sendlen)
+sctp_log_block(uint8_t from, struct sctp_association *asoc, ssize_t sendlen)
{
#if defined(SCTP_LOCAL_TRACE_BUF)
struct sctp_cwnd_log sctp_clog;
@@ -2471,25 +2471,24 @@ sctp_mtu_size_reset(struct sctp_inpcb *inp,
/*
- * given an association and starting time of the current RTT period return
- * RTO in number of msecs net should point to the current network
+ * Given an association and starting time of the current RTT period, update
+ * RTO in number of msecs. net should point to the current network.
+ * Return 1, if an RTO update was performed, return 0 if no update was
+ * performed due to invalid starting point.
*/
-uint32_t
+int
sctp_calculate_rto(struct sctp_tcb *stcb,
struct sctp_association *asoc,
struct sctp_nets *net,
struct timeval *old,
int rtt_from_sack)
{
- /*-
- * given an association and the starting time of the current RTT
- * period (in value1/value2) return RTO in number of msecs.
- */
+ struct timeval now;
+ uint64_t rtt_us; /* RTT in us */
int32_t rtt; /* RTT in ms */
uint32_t new_rto;
int first_measure = 0;
- struct timeval now;
/************************/
/* 1. calculate new RTT */
@@ -2500,10 +2499,19 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
} else {
(void)SCTP_GETTIME_TIMEVAL(&now);
}
+ if ((old->tv_sec > now.tv_sec) ||
+ ((old->tv_sec == now.tv_sec) && (old->tv_sec > now.tv_sec))) {
+ /* The starting point is in the future. */
+ return (0);
+ }
timevalsub(&now, old);
+ rtt_us = (uint64_t)1000000 * (uint64_t)now.tv_sec + (uint64_t)now.tv_usec;
+ if (rtt_us > SCTP_RTO_UPPER_BOUND * 1000) {
+ /* The RTT is larger than a sane value. */
+ return (0);
+ }
/* store the current RTT in us */
- net->rtt = (uint64_t)1000000 * (uint64_t)now.tv_sec +
- (uint64_t)now.tv_usec;
+ net->rtt = rtt_us;
/* compute rtt in ms */
rtt = (int32_t)(net->rtt / 1000);
if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) {
@@ -2535,7 +2543,7 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
* Paper "Congestion Avoidance and Control", Annex A.
*
* (net->lastsa >> SCTP_RTT_SHIFT) is the srtt
- * (net->lastsa >> SCTP_RTT_VAR_SHIFT) is the rttvar
+ * (net->lastsv >> SCTP_RTT_VAR_SHIFT) is the rttvar
*/
if (net->RTO_measured) {
rtt -= (net->lastsa >> SCTP_RTT_SHIFT);
@@ -2576,8 +2584,8 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
if (new_rto > stcb->asoc.maxrto) {
new_rto = stcb->asoc.maxrto;
}
- /* we are now returning the RTO */
- return (new_rto);
+ net->RTO = new_rto;
+ return (1);
}
/*
@@ -3948,7 +3956,7 @@ sctp_report_all_outbound(struct sctp_tcb *stcb, uint16_t error, int holds_lock,
TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) {
atomic_subtract_int(&asoc->stream_queue_cnt, 1);
TAILQ_REMOVE(&outs->outqueue, sp, next);
- stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, holds_lock);
+ stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 1);
sctp_free_spbufspace(stcb, asoc, sp);
if (sp->data) {
sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
@@ -3997,7 +4005,7 @@ sctp_abort_notification(struct sctp_tcb *stcb, uint8_t from_peer, uint16_t error
return;
}
/* Tell them we lost the asoc */
- sctp_report_all_outbound(stcb, error, 1, so_locked);
+ sctp_report_all_outbound(stcb, error, 0, so_locked);
if (from_peer) {
sctp_ulp_notify(SCTP_NOTIFY_ASSOC_REM_ABORTED, stcb, error, abort, so_locked);
} else {
@@ -4569,12 +4577,14 @@ sctp_add_to_readq(struct sctp_inpcb *inp,
if (inp_read_lock_held == 0)
SCTP_INP_READ_LOCK(inp);
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
- sctp_free_remote_addr(control->whoFrom);
- if (control->data) {
- sctp_m_freem(control->data);
- control->data = NULL;
+ if (!control->on_strm_q) {
+ sctp_free_remote_addr(control->whoFrom);
+ if (control->data) {
+ sctp_m_freem(control->data);
+ control->data = NULL;
+ }
+ sctp_free_a_readq(stcb, control);
}
- sctp_free_a_readq(stcb, control);
if (inp_read_lock_held == 0)
SCTP_INP_READ_UNLOCK(inp);
return;
@@ -4619,8 +4629,10 @@ sctp_add_to_readq(struct sctp_inpcb *inp,
control->tail_mbuf = prev;
} else {
/* Everything got collapsed out?? */
- sctp_free_remote_addr(control->whoFrom);
- sctp_free_a_readq(stcb, control);
+ if (!control->on_strm_q) {
+ sctp_free_remote_addr(control->whoFrom);
+ sctp_free_a_readq(stcb, control);
+ }
if (inp_read_lock_held == 0)
SCTP_INP_READ_UNLOCK(inp);
return;
@@ -5221,8 +5233,9 @@ sctp_sorecvmsg(struct socket *so,
*
*/
struct sctp_inpcb *inp = NULL;
- int my_len = 0;
- int cp_len = 0, error = 0;
+ ssize_t my_len = 0;
+ ssize_t cp_len = 0;
+ int error = 0;
struct sctp_queued_to_read *control = NULL, *ctl = NULL, *nxt = NULL;
struct mbuf *m = NULL;
struct sctp_tcb *stcb = NULL;
@@ -5231,7 +5244,7 @@ sctp_sorecvmsg(struct socket *so,
int out_flags = 0, in_flags = 0;
int block_allowed = 1;
uint32_t freed_so_far = 0;
- uint32_t copied_so_far = 0;
+ ssize_t copied_so_far = 0;
int in_eeor_mode = 0;
int no_rcv_needed = 0;
uint32_t rwnd_req = 0;
@@ -5575,7 +5588,7 @@ found_one:
* will go to the sctp_user_rcvd() that will not
* lock until it KNOWs it MUST send a WUP-SACK.
*/
- freed_so_far = stcb->freed_by_sorcv_sincelast;
+ freed_so_far = (uint32_t)stcb->freed_by_sorcv_sincelast;
stcb->freed_by_sorcv_sincelast = 0;
}
}
@@ -5730,8 +5743,8 @@ get_more_data:
m = control->data;
while (m) {
/* Move out all we can */
- cp_len = (int)uio->uio_resid;
- my_len = (int)SCTP_BUF_LEN(m);
+ cp_len = uio->uio_resid;
+ my_len = SCTP_BUF_LEN(m);
if (cp_len > my_len) {
/* not enough in this buf */
cp_len = my_len;
@@ -5741,7 +5754,7 @@ get_more_data:
hold_rlock = 0;
}
if (cp_len > 0)
- error = uiomove(mtod(m, char *), cp_len, uio);
+ error = uiomove(mtod(m, char *), (int)cp_len, uio);
/* re-read */
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
goto release;
@@ -5786,7 +5799,7 @@ get_more_data:
control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
}
copied_so_far += cp_len;
- freed_so_far += cp_len;
+ freed_so_far += (uint32_t)cp_len;
freed_so_far += MSIZE;
atomic_subtract_int(&control->length, cp_len);
control->data = sctp_m_free(m);
@@ -5826,9 +5839,9 @@ get_more_data:
}
if ((in_flags & MSG_PEEK) == 0) {
SCTP_BUF_RESV_UF(m, cp_len);
- SCTP_BUF_LEN(m) -= cp_len;
+ SCTP_BUF_LEN(m) -= (int)cp_len;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
- sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, cp_len);
+ sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, (int)cp_len);
}
atomic_subtract_int(&so->so_rcv.sb_cc, cp_len);
if ((control->do_not_ref_stcb == 0) &&
@@ -5836,7 +5849,7 @@ get_more_data:
atomic_subtract_int(&stcb->asoc.sb_cc, cp_len);
}
copied_so_far += cp_len;
- freed_so_far += cp_len;
+ freed_so_far += (uint32_t)cp_len;
freed_so_far += MSIZE;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb,
@@ -5927,7 +5940,7 @@ get_more_data:
}
if ((uio->uio_resid == 0) ||
((in_eeor_mode) &&
- (copied_so_far >= (uint32_t)max(so->so_rcv.sb_lowat, 1)))) {
+ (copied_so_far >= max(so->so_rcv.sb_lowat, 1)))) {
goto release;
}
/*
@@ -6064,7 +6077,7 @@ wait_some_more:
control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
}
sctp_sbfree(control, stcb, &so->so_rcv, m);
- freed_so_far += SCTP_BUF_LEN(m);
+ freed_so_far += (uint32_t)SCTP_BUF_LEN(m);
freed_so_far += MSIZE;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
sctp_sblog(&so->so_rcv,
@@ -6392,30 +6405,33 @@ out_now:
return (added);
}
-struct sctp_tcb *
+int
sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
- unsigned int *totaddr,
- unsigned int *num_v4, unsigned int *num_v6, int *error,
- unsigned int limit, int *bad_addr)
+ unsigned int totaddr,
+ unsigned int *num_v4, unsigned int *num_v6,
+ unsigned int limit)
{
struct sockaddr *sa;
- struct sctp_tcb *stcb = NULL;
+ struct sctp_tcb *stcb;
unsigned int incr, at, i;
at = 0;
sa = addr;
- *error = *num_v6 = *num_v4 = 0;
+ *num_v6 = *num_v4 = 0;
/* account and validate addresses */
- for (i = 0; i < *totaddr; i++) {
+ if (totaddr == 0) {
+ return (EINVAL);
+ }
+ for (i = 0; i < totaddr; i++) {
+ if (at + sizeof(struct sockaddr) > limit) {
+ return (EINVAL);
+ }
switch (sa->sa_family) {
#ifdef INET
case AF_INET:
incr = (unsigned int)sizeof(struct sockaddr_in);
if (sa->sa_len != incr) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
- *error = EINVAL;
- *bad_addr = 1;
- return (NULL);
+ return (EINVAL);
}
(*num_v4) += 1;
break;
@@ -6428,46 +6444,34 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
sin6 = (struct sockaddr_in6 *)sa;
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
/* Must be non-mapped for connectx */
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
- *error = EINVAL;
- *bad_addr = 1;
- return (NULL);
+ return (EINVAL);
}
incr = (unsigned int)sizeof(struct sockaddr_in6);
if (sa->sa_len != incr) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
- *error = EINVAL;
- *bad_addr = 1;
- return (NULL);
+ return (EINVAL);
}
(*num_v6) += 1;
break;
}
#endif
default:
- *totaddr = i;
- incr = 0;
- /* we are done */
- break;
+ return (EINVAL);
}
- if (i == *totaddr) {
- break;
+ if ((at + incr) > limit) {
+ return (EINVAL);
}
SCTP_INP_INCR_REF(inp);
stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
if (stcb != NULL) {
- /* Already have or am bring up an association */
- return (stcb);
+ SCTP_TCB_UNLOCK(stcb);
+ return (EALREADY);
} else {
SCTP_INP_DECR_REF(inp);
}
- if ((at + incr) > limit) {
- *totaddr = i;
- break;
- }
+ at += incr;
sa = (struct sockaddr *)((caddr_t)sa + incr);
}
- return ((struct sctp_tcb *)NULL);
+ return (0);
}
/*
diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h
index c12fb210..c67c021f 100644
--- a/freebsd/sys/netinet/sctputil.h
+++ b/freebsd/sys/netinet/sctputil.h
@@ -133,7 +133,7 @@ uint32_t sctp_get_next_mtu(uint32_t);
void
sctp_timeout_handler(void *);
-uint32_t
+int
sctp_calculate_rto(struct sctp_tcb *, struct sctp_association *,
struct sctp_nets *, struct timeval *, int);
@@ -211,10 +211,9 @@ int
sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
int totaddr, int *error);
-struct sctp_tcb *
-sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
- unsigned int *totaddr, unsigned int *num_v4, unsigned int *num_v6,
- int *error, unsigned int limit, int *bad_addr);
+int
+sctp_connectx_helper_find(struct sctp_inpcb *, struct sockaddr *,
+ unsigned int, unsigned int *, unsigned int *, unsigned int);
int sctp_is_there_an_abort_here(struct mbuf *, int, uint32_t *);
#ifdef INET6
@@ -367,7 +366,7 @@ void sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc
void sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from);
void sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *, int, int, uint8_t);
-void sctp_log_block(uint8_t, struct sctp_association *, size_t);
+void sctp_log_block(uint8_t, struct sctp_association *, ssize_t);
void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t);
void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
int sctp_fill_stat_log(void *, size_t *);
diff --git a/freebsd/sys/netinet/tcp_hpts.h b/freebsd/sys/netinet/tcp_hpts.h
index 04c86769..293daa2c 100644
--- a/freebsd/sys/netinet/tcp_hpts.h
+++ b/freebsd/sys/netinet/tcp_hpts.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016-2018 Netflix Inc.
+ * Copyright (c) 2016-2018 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -45,112 +45,80 @@ TAILQ_HEAD(hptsh, inpcb);
/* Number of useconds in a hpts tick */
#define HPTS_TICKS_PER_USEC 10
-#define HPTS_MS_TO_SLOTS(x) (x * 100)
+#define HPTS_MS_TO_SLOTS(x) ((x * 100) + 1)
#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
#define HPTS_USEC_IN_SEC 1000000
#define HPTS_MSEC_IN_SEC 1000
#define HPTS_USEC_IN_MSEC 1000
-#define DEFAULT_HPTS_LOG 3072
-
-/*
- * Log flags consist of
- * 7f 7f 1 1 bits
- * p_cpu | p_num | INPUT_ACTIVE | HPTS_ACTIVE
- *
- * So for example cpu 10, number 10 would with
- * input active would show up as:
- * p_flags = 0001010 0001010 1 0
- * <or>
- * p_flags = 0x142a
- */
-#define HPTS_HPTS_ACTIVE 0x01
-#define HPTS_INPUT_ACTIVE 0x02
-
-#define HPTSLOG_IMMEDIATE 1
-#define HPTSLOG_INSERT_NORMAL 2
-#define HPTSLOG_INSERT_SLEEPER 3
-#define HPTSLOG_SLEEP_AFTER 4
-#define HPTSLOG_SLEEP_BEFORE 5
-#define HPTSLOG_INSERTED 6
-#define HPTSLOG_WAKEUP_HPTS 7
-#define HPTSLOG_SETTORUN 8
-#define HPTSLOG_HPTSI 9
-#define HPTSLOG_TOLONG 10
-#define HPTSLOG_AWAKENS 11
-#define HPTSLOG_TIMESOUT 12
-#define HPTSLOG_SLEEPSET 13
-#define HPTSLOG_WAKEUP_INPUT 14
-#define HPTSLOG_RESCHEDULE 15
-#define HPTSLOG_AWAKE 16
-#define HPTSLOG_INP_DONE 17
-
-struct hpts_log {
- struct inpcb *inp;
- int32_t event;
- uint32_t cts;
- int32_t line;
- uint32_t ticknow;
- uint32_t t_paceslot;
- uint32_t t_hptsreq;
- uint32_t p_curtick;
- uint32_t p_prevtick;
- uint32_t slot_req;
- uint32_t p_on_queue_cnt;
- uint32_t p_nxt_slot;
- uint32_t p_cur_slot;
- uint32_t p_hpts_sleep_time;
- uint16_t p_flags;
- uint8_t p_onhpts;
- uint8_t p_oninput;
- uint8_t is_notempty;
-};
struct hpts_diag {
- uint32_t p_hpts_active;
- uint32_t p_nxt_slot;
- uint32_t p_cur_slot;
- uint32_t slot_req;
- uint32_t inp_hptsslot;
- uint32_t slot_now;
- uint32_t have_slept;
- uint32_t hpts_sleep_time;
- uint32_t yet_to_sleep;
- uint32_t need_new_to;
- int32_t co_ret;
- uint8_t p_on_min_sleep;
+ uint32_t p_hpts_active; /* bbr->flex7 x */
+ uint32_t p_nxt_slot; /* bbr->flex1 x */
+ uint32_t p_cur_slot; /* bbr->flex2 x */
+ uint32_t p_prev_slot; /* bbr->delivered */
+ uint32_t p_runningtick; /* bbr->inflight */
+ uint32_t slot_req; /* bbr->flex3 x */
+ uint32_t inp_hptsslot; /* bbr->flex4 x */
+ uint32_t slot_remaining; /* bbr->flex5 x */
+ uint32_t have_slept; /* bbr->epoch x */
+ uint32_t hpts_sleep_time; /* bbr->applimited x */
+ uint32_t yet_to_sleep; /* bbr->lt_epoch x */
+ uint32_t need_new_to; /* bbr->flex6 x */
+ uint32_t wheel_tick; /* bbr->bw_inuse x */
+ uint32_t maxticks; /* bbr->delRate x */
+ uint32_t wheel_cts; /* bbr->rttProp x */
+ int32_t co_ret; /* bbr->pkts_out x */
+ uint32_t p_curtick; /* upper bbr->cur_del_rate */
+ uint32_t p_lasttick; /* lower bbr->cur_del_rate */
+ uint8_t p_on_min_sleep; /* bbr->flex8 x */
};
+/* Magic flags to tell whats cooking on the pacing wheel */
+#define PACE_TMR_DELACK 0x01 /* Delayed ack timer running */
+#define PACE_TMR_RACK 0x02 /* RACK timer running */
+#define PACE_TMR_TLP 0x04 /* TLP timer running */
+#define PACE_TMR_RXT 0x08 /* Retransmit timer running */
+#define PACE_TMR_PERSIT 0x10 /* Persists timer running */
+#define PACE_TMR_KEEP 0x20 /* Keep alive timer running */
+#define PACE_PKT_OUTPUT 0x40 /* Output Packets being paced */
+#define PACE_TMR_MASK (PACE_TMR_KEEP|PACE_TMR_PERSIT|PACE_TMR_RXT|PACE_TMR_TLP|PACE_TMR_RACK|PACE_TMR_DELACK)
+
#ifdef _KERNEL
/* Each hpts has its own p_mtx which is used for locking */
struct tcp_hpts_entry {
/* Cache line 0x00 */
struct mtx p_mtx; /* Mutex for hpts */
- uint32_t p_hpts_active; /* Flag that says hpts is awake */
- uint32_t p_curtick; /* Current tick in 10 us the hpts is at */
- uint32_t p_prevtick; /* Previous tick in 10 us the hpts ran */
+ uint16_t p_hpts_active; /* Flag that says hpts is awake */
+ uint8_t p_hpts_wake_scheduled; /* Have we scheduled a wakeup? */
+ uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */
+ uint32_t p_curtick; /* Tick in 10 us the hpts is going to */
+ uint32_t p_runningtick; /* Current tick we are at if we are running */
+ uint32_t p_prev_slot; /* Previous slot we were on */
uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */
uint32_t p_nxt_slot; /* The next slot outside the current range of
* slots that the hpts is running on. */
int32_t p_on_queue_cnt; /* Count on queue in this hpts */
- uint32_t enobuf_cnt;
- uint16_t p_log_at;
+ uint32_t p_lasttick; /* Last tick before the current one */
uint8_t p_direct_wake :1, /* boolean */
- p_log_wrapped :1, /* boolean */
- p_on_min_sleep:1; /* boolean */
- uint8_t p_fill;
+ p_on_min_sleep:1, /* boolean */
+ p_avail:6;
+ uint8_t p_fill[3]; /* Fill to 32 bits */
/* Cache line 0x40 */
void *p_inp;
struct hptsh p_input; /* For the tcp-input runner */
/* Hptsi wheel */
struct hptsh *p_hptss;
- struct hpts_log *p_log;
- uint32_t p_logsize;
int32_t p_on_inqueue_cnt; /* Count on input queue in this hpts */
uint32_t hit_no_enobuf;
uint32_t p_dyn_adjust;
uint32_t p_hpts_sleep_time; /* Current sleep interval having a max
* of 255ms */
+ uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */
+ uint32_t saved_lasttick; /* for logging */
+ uint32_t saved_curtick; /* for logging */
+ uint32_t saved_curslot; /* for logging */
+ uint32_t saved_prev_slot; /* for logging */
uint32_t p_delayed_by; /* How much were we delayed by */
/* Cache line 0x80 */
struct sysctl_ctx_list hpts_ctx;
@@ -236,13 +204,9 @@ tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts
int
__tcp_queue_to_input_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line);
#define tcp_queue_to_input_locked(a, b) __tcp_queue_to_input_locked(a, b, __LINE__);
-void
-tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
- int32_t tlen, int32_t drop_hdrlen, uint8_t iptos);
int
-__tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
- int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line);
-#define tcp_queue_to_input(a, b, c, d, e, f, g) __tcp_queue_to_input(a, b, c, d, e, f, g, __LINE__)
+__tcp_queue_to_input(struct inpcb *inp, int32_t line);
+#define tcp_queue_to_input(a) __tcp_queue_to_input(a, __LINE__)
uint16_t tcp_hpts_delayedby(struct inpcb *inp);
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
index d00504dc..4bf12298 100644
--- a/freebsd/sys/netinet/tcp_input.c
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -214,11 +214,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_autorcvbuf), 0,
"Enable automatic receive buffer sizing");
-VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_autorcvbuf_inc), 0,
- "Incrementor step size of automatic receive buffer");
-
VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_max), 0,
@@ -373,31 +368,14 @@ cc_conn_init(struct tcpcb *tp)
/*
* Set the initial slow-start flight size.
*
- * RFC5681 Section 3.1 specifies the default conservative values.
- * RFC3390 specifies slightly more aggressive values.
- * RFC6928 increases it to ten segments.
- * Support for user specified value for initial flight size.
- *
* If a SYN or SYN/ACK was lost and retransmitted, we have to
* reduce the initial CWND to one segment as congestion is likely
* requiring us to be cautious.
*/
if (tp->snd_cwnd == 1)
tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */
- else if (V_tcp_initcwnd_segments)
- tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg,
- max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
- else if (V_tcp_do_rfc3390)
- tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380));
- else {
- /* Per RFC5681 Section 3.1 */
- if (maxseg > 2190)
- tp->snd_cwnd = 2 * maxseg;
- else if (maxseg > 1095)
- tp->snd_cwnd = 3 * maxseg;
- else
- tp->snd_cwnd = 4 * maxseg;
- }
+ else
+ tp->snd_cwnd = tcp_compute_initwnd(maxseg);
if (CC_ALGO(tp)->conn_init != NULL)
CC_ALGO(tp)->conn_init(tp->ccv);
@@ -578,6 +556,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
int optlen = 0;
#ifdef INET
int len;
+ uint8_t ipttl;
#endif
int tlen = 0, off;
int drop_hdrlen;
@@ -700,6 +679,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
* Checksum extended TCP header and data.
*/
len = off0 + tlen;
+ ipttl = ip->ip_ttl;
bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
ipov->ih_len = htons(tlen);
th->th_sum = in_cksum(m, len);
@@ -708,6 +688,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
/* Reset TOS bits */
ip->ip_tos = iptos;
/* Re-initialization for later version check */
+ ip->ip_ttl = ipttl;
ip->ip_v = IPVERSION;
ip->ip_hl = off0 >> 2;
}
@@ -1468,13 +1449,16 @@ drop:
* The criteria to step up the receive buffer one notch are:
* 1. Application has not set receive buffer size with
* SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
- * 2. the number of bytes received during the time it takes
- * one timestamp to be reflected back to us (the RTT);
- * 3. received bytes per RTT is within seven eighth of the
- * current socket buffer size;
- * 4. receive buffer size has not hit maximal automatic size;
+ * 2. the number of bytes received during 1/2 of an sRTT
+ * is at least 3/8 of the current socket buffer size.
+ * 3. receive buffer size has not hit maximal automatic size;
+ *
+ * If all of the criteria are met we increaset the socket buffer
+ * by a 1/2 (bounded by the max). This allows us to keep ahead
+ * of slow-start but also makes it so our peer never gets limited
+ * by our rwnd which we then open up causing a burst.
*
- * This algorithm does one step per RTT at most and only if
+ * This algorithm does two steps per RTT at most and only if
* we receive a bulk stream w/o packet losses or reorderings.
* Shrinking the buffer during idle times is not necessary as
* it doesn't consume any memory when idle.
@@ -1491,11 +1475,10 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
- (tp->t_srtt >> TCP_RTT_SHIFT)) {
- if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) &&
+ ((tp->t_srtt >> TCP_RTT_SHIFT)/2)) {
+ if (tp->rfbuf_cnt > ((so->so_rcv.sb_hiwat / 2)/ 4 * 3) &&
so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
- newsize = min(so->so_rcv.sb_hiwat +
- V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max);
+ newsize = min((so->so_rcv.sb_hiwat + (so->so_rcv.sb_hiwat/2)), V_tcp_autorcvbuf_max);
}
TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize);
@@ -1505,7 +1488,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else {
tp->rfbuf_cnt += tlen; /* add up */
}
-
return (newsize);
}
@@ -2029,7 +2011,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
else
tp->t_flags |= TF_ACKNOW;
- if ((thflags & TH_ECE) && V_tcp_do_ecn) {
+ if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+ V_tcp_do_ecn) {
tp->t_flags |= TF_ECN_PERMIT;
TCPSTAT_INC(tcps_ecn_shs);
}
@@ -2279,6 +2262,18 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
TCPSTAT_INC(tcps_rcvpartduppack);
TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
}
+ /*
+ * DSACK - add SACK block for dropped range
+ */
+ if (tp->t_flags & TF_SACK_PERMIT) {
+ tcp_update_sack_list(tp, th->th_seq,
+ th->th_seq + todrop);
+ /*
+ * ACK now, as the next in-sequence segment
+ * will clear the DSACK block again
+ */
+ tp->t_flags |= TF_ACKNOW;
+ }
drop_hdrlen += todrop; /* drop from the top afterwards */
th->th_seq += todrop;
tlen -= todrop;
@@ -2403,8 +2398,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->rcv_scale = tp->request_r_scale;
- tp->snd_wnd = tiwin;
}
+ tp->snd_wnd = tiwin;
/*
* Make transitions:
* SYN-RECEIVED -> ESTABLISHED
@@ -3007,6 +3002,8 @@ dodata: /* XXX */
if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
+ tcp_seq save_rnxt = tp->rcv_nxt;
+ int save_tlen = tlen;
m_adj(m, drop_hdrlen); /* delayed header drop */
/*
* Insert segment which includes th into TCP reassembly queue
@@ -3046,11 +3043,41 @@ dodata: /* XXX */
* m_adj() doesn't actually frees any mbufs
* when trimming from the head.
*/
- thflags = tcp_reass(tp, th, &save_start, &tlen, m);
+ tcp_seq temp = save_start;
+ thflags = tcp_reass(tp, th, &temp, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
- if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
- tcp_update_sack_list(tp, save_start, save_start + tlen);
+ if ((tp->t_flags & TF_SACK_PERMIT) && (save_tlen > 0)) {
+ if ((tlen == 0) && (SEQ_LT(save_start, save_rnxt))) {
+ /*
+ * DSACK actually handled in the fastpath
+ * above.
+ */
+ tcp_update_sack_list(tp, save_start,
+ save_start + save_tlen);
+ } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
+ if ((tp->rcv_numsacks >= 1) &&
+ (tp->sackblks[0].end == save_start)) {
+ /*
+ * Partial overlap, recorded at todrop
+ * above.
+ */
+ tcp_update_sack_list(tp,
+ tp->sackblks[0].start,
+ tp->sackblks[0].end);
+ } else {
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
+ }
+ } else if (tlen >= save_tlen) {
+ /* Update of sackblks. */
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
+ } else if (tlen > 0) {
+ tcp_update_dsack_list(tp, save_start,
+ save_start + tlen);
+ }
+ }
#if 0
/*
* Note the amount of data that peer has sent into
@@ -3820,3 +3847,30 @@ tcp_compute_pipe(struct tcpcb *tp)
tp->sackhint.sack_bytes_rexmit -
tp->sackhint.sacked_bytes);
}
+
+uint32_t
+tcp_compute_initwnd(uint32_t maxseg)
+{
+ /*
+ * Calculate the Initial Window, also used as Restart Window
+ *
+ * RFC5681 Section 3.1 specifies the default conservative values.
+ * RFC3390 specifies slightly more aggressive values.
+ * RFC6928 increases it to ten segments.
+ * Support for user specified value for initial flight size.
+ */
+ if (V_tcp_initcwnd_segments)
+ return min(V_tcp_initcwnd_segments * maxseg,
+ max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
+ else if (V_tcp_do_rfc3390)
+ return min(4 * maxseg, max(2 * maxseg, 4380));
+ else {
+ /* Per RFC5681 Section 3.1 */
+ if (maxseg > 2190)
+ return (2 * maxseg);
+ else if (maxseg > 1095)
+ return (3 * maxseg);
+ else
+ return (4 * maxseg);
+ }
+}
diff --git a/freebsd/sys/netinet/tcp_log_buf.h b/freebsd/sys/netinet/tcp_log_buf.h
index e569395a..e0575a43 100644
--- a/freebsd/sys/netinet/tcp_log_buf.h
+++ b/freebsd/sys/netinet/tcp_log_buf.h
@@ -1,8 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2016-2018
- * Netflix Inc. All rights reserved.
+ * Copyright (c) 2016-2018 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -176,7 +175,7 @@ enum tcp_log_events {
TCP_LOG_BAD_RETRAN, /* Detected bad retransmission 5 */
TCP_LOG_PRR, /* Doing PRR 6 */
TCP_LOG_REORDER,/* Detected reorder 7 */
- TCP_LOG_PACER, /* Pacer sending a packet 8 */
+ TCP_LOG_HPTS, /* Hpts sending a packet 8 */
BBR_LOG_BBRUPD, /* We updated BBR info 9 */
BBR_LOG_BBRSND, /* We did a slot calculation and sending is done 10 */
BBR_LOG_ACKCLEAR, /* A ack clears all outstanding 11 */
@@ -195,31 +194,38 @@ enum tcp_log_events {
BBR_LOG_PERSIST, /* BBR changed to/from a persists 24 */
TCP_LOG_FLOWEND, /* End of a flow 25 */
BBR_LOG_RTO, /* BBR's timeout includes BBR info 26 */
- BBR_LOG_DOSEG_DONE, /* pacer do_segment completes 27 */
- BBR_LOG_EXIT_GAIN, /* pacer do_segment completes 28 */
+ BBR_LOG_DOSEG_DONE, /* hpts do_segment completes 27 */
+ BBR_LOG_EXIT_GAIN, /* hpts do_segment completes 28 */
BBR_LOG_THRESH_CALC, /* Doing threshold calculation 29 */
BBR_LOG_EXTRACWNDGAIN, /* Removed 30 */
TCP_LOG_USERSEND, /* User level sends data 31 */
- UNUSED_32, /* Unused 32 */
- UNUSED_33, /* Unused 33 */
+ BBR_RSM_CLEARED, /* RSM cleared of ACK flags 32 */
+ BBR_LOG_STATE_TARGET, /* Log of target at state 33 */
BBR_LOG_TIME_EPOCH, /* A timed based Epoch occured 34 */
BBR_LOG_TO_PROCESS, /* A to was processed 35 */
BBR_LOG_BBRTSO, /* TSO update 36 */
- BBR_LOG_PACERDIAG, /* Pacer diag insert 37 */
+ BBR_LOG_HPTSDIAG, /* Hpts diag insert 37 */
BBR_LOG_LOWGAIN, /* Low gain accounting 38 */
BBR_LOG_PROGRESS, /* Progress timer event 39 */
TCP_LOG_SOCKET_OPT, /* A socket option is set 40 */
BBR_LOG_TIMERPREP, /* A BBR var to debug out TLP issues 41 */
BBR_LOG_ENOBUF_JMP, /* We had a enobuf jump 42 */
- BBR_LOG_PACING_CALC, /* calc the pacing time 43 */
+ BBR_LOG_HPTSI_CALC, /* calc the hptsi time 43 */
BBR_LOG_RTT_SHRINKS, /* We had a log reduction of rttProp 44 */
BBR_LOG_BW_RED_EV, /* B/W reduction events 45 */
BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/
TCP_LOG_RTT, /* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
BBR_LOG_SETTINGS_CHG, /* Settings changed for loss response 48 */
- BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */
+ BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining -- now not used 49 */
TCP_LOG_REASS, /* Reassembly buffer logging 50 */
- TCP_LOG_END /* End (keep at end) 51 */
+ TCP_HDWR_TLS, /* TCP Hardware TLS logs 51 */
+ BBR_LOG_HDWR_PACE, /* TCP Hardware pacing log 52 */
+ BBR_LOG_TSTMP_VAL, /* Temp debug timestamp validation 53 */
+ TCP_LOG_CONNEND, /* End of connection 54 */
+ TCP_LOG_LRO, /* LRO entry 55 */
+ TCP_SACK_FILTER_RES, /* Results of SACK Filter 56 */
+ TCP_SAD_DETECTION, /* Sack Attack Detection 57 */
+ TCP_LOG_END /* End (keep at end) 58 */
};
enum tcp_log_states {
@@ -276,8 +282,8 @@ struct tcp_log_dev_log_queue {
#ifdef _KERNEL
-#define TCP_LOG_BUF_DEFAULT_SESSION_LIMIT 10000
-#define TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT 1000000
+#define TCP_LOG_BUF_DEFAULT_SESSION_LIMIT 5000
+#define TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT 5000000
/*
* TCP_LOG_EVENT_VERBOSE: The same as TCP_LOG_EVENT, except it always
diff --git a/freebsd/sys/netinet/tcp_lro.c b/freebsd/sys/netinet/tcp_lro.c
index 7242eb5c..1bc90c9f 100644
--- a/freebsd/sys/netinet/tcp_lro.c
+++ b/freebsd/sys/netinet/tcp_lro.c
@@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockbuf.h>
#include <sys/sysctl.h>
#include <net/if.h>
@@ -58,11 +60,14 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet6/in6_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_lro.h>
#include <netinet/tcp_var.h>
-
+#include <netinet/tcp_hpts.h>
+#include <netinet/tcp_log_buf.h>
#include <netinet6/ip6_var.h>
#include <machine/in_cksum.h>
@@ -81,10 +86,46 @@ static int tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m,
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"TCP LRO");
+static long tcplro_stacks_wanting_mbufq = 0;
+counter_u64_t tcp_inp_lro_direct_queue;
+counter_u64_t tcp_inp_lro_wokeup_queue;
+counter_u64_t tcp_inp_lro_compressed;
+counter_u64_t tcp_inp_lro_single_push;
+counter_u64_t tcp_inp_lro_locks_taken;
+counter_u64_t tcp_inp_lro_sack_wake;
+
static unsigned tcp_lro_entries = TCP_LRO_ENTRIES;
+static int32_t hold_lock_over_compress = 0;
+SYSCTL_INT(_net_inet_tcp_lro, OID_AUTO, hold_lock, CTLFLAG_RW,
+ &hold_lock_over_compress, 0,
+ "Do we hold the lock over the compress of mbufs?");
SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_entries, 0,
"default number of LRO entries");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, fullqueue, CTLFLAG_RD,
+ &tcp_inp_lro_direct_queue, "Number of lro's fully queued to transport");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, wokeup, CTLFLAG_RD,
+ &tcp_inp_lro_wokeup_queue, "Number of lro's where we woke up transport via hpts");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, compressed, CTLFLAG_RD,
+ &tcp_inp_lro_compressed, "Number of lro's compressed and sent to transport");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, single, CTLFLAG_RD,
+ &tcp_inp_lro_single_push, "Number of lro's sent with single segment");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, lockcnt, CTLFLAG_RD,
+ &tcp_inp_lro_locks_taken, "Number of lro's inp_wlocks taken");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, sackwakeups, CTLFLAG_RD,
+ &tcp_inp_lro_sack_wake, "Number of wakeups caused by sack/fin");
+
+void
+tcp_lro_reg_mbufq(void)
+{
+ atomic_fetchadd_long(&tcplro_stacks_wanting_mbufq, 1);
+}
+
+void
+tcp_lro_dereg_mbufq(void)
+{
+ atomic_fetchadd_long(&tcplro_stacks_wanting_mbufq, -1);
+}
static __inline void
tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
@@ -164,6 +205,36 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
return (0);
}
+static struct tcphdr *
+tcp_lro_get_th(struct lro_entry *le, struct mbuf *m)
+{
+ struct ether_header *eh;
+ struct tcphdr *th = NULL;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
+#endif
+#ifdef INET
+ struct ip *ip4 = NULL; /* Keep compiler happy. */
+#endif
+
+ eh = mtod(m, struct ether_header *);
+ switch (le->eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ ip6 = (struct ip6_hdr *)(eh + 1);
+ th = (struct tcphdr *)(ip6 + 1);
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ ip4 = (struct ip *)(eh + 1);
+ th = (struct tcphdr *)(ip4 + 1);
+ break;
+#endif
+ }
+ return (th);
+}
+
void
tcp_lro_free(struct lro_ctrl *lc)
{
@@ -194,7 +265,6 @@ tcp_lro_free(struct lro_ctrl *lc)
lc->lro_mbuf_data = NULL;
}
-#ifdef TCP_LRO_UPDATE_CSUM
static uint16_t
tcp_lro_csum_th(struct tcphdr *th)
{
@@ -277,7 +347,6 @@ tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
return (c & 0xffff);
}
-#endif
static void
tcp_lro_rx_done(struct lro_ctrl *lc)
@@ -299,7 +368,7 @@ tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
if (LIST_EMPTY(&lc->lro_active))
return;
- getmicrotime(&tv);
+ getmicrouptime(&tv);
timevalsub(&tv, timeout);
LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
if (timevalcmp(&tv, &le->mtime, >=)) {
@@ -309,11 +378,113 @@ tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
}
}
-void
-tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
+#ifdef INET6
+static int
+tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
+ struct tcphdr **th)
+{
+
+ /* XXX-BZ we should check the flow-label. */
+
+ /* XXX-BZ We do not yet support ext. hdrs. */
+ if (ip6->ip6_nxt != IPPROTO_TCP)
+ return (TCP_LRO_NOT_SUPPORTED);
+
+ /* Find the TCP header. */
+ *th = (struct tcphdr *)(ip6 + 1);
+
+ return (0);
+}
+#endif
+
+#ifdef INET
+static int
+tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
+ struct tcphdr **th)
{
+ int csum_flags;
+ uint16_t csum;
+
+ if (ip4->ip_p != IPPROTO_TCP)
+ return (TCP_LRO_NOT_SUPPORTED);
+
+ /* Ensure there are no options. */
+ if ((ip4->ip_hl << 2) != sizeof (*ip4))
+ return (TCP_LRO_CANNOT);
+
+ /* .. and the packet is not fragmented. */
+ if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
+ return (TCP_LRO_CANNOT);
- if (le->append_cnt > 0) {
+ /* Legacy IP has a header checksum that needs to be correct. */
+ csum_flags = m->m_pkthdr.csum_flags;
+ if (csum_flags & CSUM_IP_CHECKED) {
+ if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
+ lc->lro_bad_csum++;
+ return (TCP_LRO_CANNOT);
+ }
+ } else {
+ csum = in_cksum_hdr(ip4);
+ if (__predict_false((csum) != 0)) {
+ lc->lro_bad_csum++;
+ return (TCP_LRO_CANNOT);
+ }
+ }
+ /* Find the TCP header (we assured there are no IP options). */
+ *th = (struct tcphdr *)(ip4 + 1);
+ return (0);
+}
+#endif
+
+static void
+tcp_lro_log(struct tcpcb *tp, struct lro_ctrl *lc,
+ struct lro_entry *le, struct mbuf *m, int frm, int32_t tcp_data_len,
+ uint32_t th_seq , uint32_t th_ack, uint16_t th_win)
+{
+ if (tp->t_logstate != TCP_LOG_STATE_OFF) {
+ union tcp_log_stackspecific log;
+ struct timeval tv;
+ uint32_t cts;
+
+ cts = tcp_get_usecs(&tv);
+ memset(&log, 0, sizeof(union tcp_log_stackspecific));
+ log.u_bbr.flex8 = frm;
+ log.u_bbr.flex1 = tcp_data_len;
+ if (m)
+ log.u_bbr.flex2 = m->m_pkthdr.len;
+ else
+ log.u_bbr.flex2 = 0;
+ log.u_bbr.flex3 = le->append_cnt;
+ log.u_bbr.flex4 = le->p_len;
+ log.u_bbr.flex5 = le->m_head->m_pkthdr.len;
+ log.u_bbr.delRate = le->m_head->m_flags;
+ log.u_bbr.rttProp = le->m_head->m_pkthdr.rcv_tstmp;
+ log.u_bbr.flex6 = lc->lro_length_lim;
+ log.u_bbr.flex7 = lc->lro_ackcnt_lim;
+ log.u_bbr.inflight = th_seq;
+ log.u_bbr.timeStamp = cts;
+ log.u_bbr.epoch = le->next_seq;
+ log.u_bbr.delivered = th_ack;
+ log.u_bbr.lt_epoch = le->ack_seq;
+ log.u_bbr.pacing_gain = th_win;
+ log.u_bbr.cwnd_gain = le->window;
+ log.u_bbr.cur_del_rate = (uint64_t)m;
+ log.u_bbr.bw_inuse = (uint64_t)le->m_head;
+ log.u_bbr.pkts_out = le->mbuf_cnt; /* Total mbufs added */
+ log.u_bbr.applimited = le->ulp_csum;
+ log.u_bbr.lost = le->mbuf_appended;
+ TCP_LOG_EVENTP(tp, NULL,
+ &tp->t_inpcb->inp_socket->so_rcv,
+ &tp->t_inpcb->inp_socket->so_snd,
+ TCP_LOG_LRO, 0,
+ 0, &log, false, &tv);
+ }
+}
+
+static void
+tcp_flush_out_le(struct tcpcb *tp, struct lro_ctrl *lc, struct lro_entry *le, int locked)
+{
+ if (le->append_cnt > 1) {
struct tcphdr *th;
uint16_t p_len;
@@ -337,13 +508,10 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
case ETHERTYPE_IP:
{
struct ip *ip4;
-#ifdef TCP_LRO_UPDATE_CSUM
uint32_t cl;
uint16_t c;
-#endif
ip4 = le->le_ip4;
-#ifdef TCP_LRO_UPDATE_CSUM
/* Fix IP header checksum for new length. */
c = ~ip4->ip_sum;
cl = c;
@@ -353,9 +521,6 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
cl = (cl >> 16) + (cl & 0xffff);
c = cl;
ip4->ip_sum = ~c;
-#else
- ip4->ip_sum = TCP_LRO_INVALID_CSUM;
-#endif
ip4->ip_len = p_len;
th = (struct tcphdr *)(ip4 + 1);
le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
@@ -381,7 +546,6 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
ts_ptr[1] = htonl(le->tsval);
ts_ptr[2] = le->tsecr;
}
-#ifdef TCP_LRO_UPDATE_CSUM
/* Update the TCP header checksum. */
le->ulp_csum += p_len;
le->ulp_csum += tcp_lro_csum_th(th);
@@ -390,14 +554,431 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
(le->ulp_csum & 0xffff);
th->th_sum = (le->ulp_csum & 0xffff);
th->th_sum = ~th->th_sum;
-#else
- th->th_sum = TCP_LRO_INVALID_CSUM;
+ if (tp && locked) {
+ tcp_lro_log(tp, lc, le, NULL, 7, 0, 0, 0, 0);
+ }
+ }
+ /*
+ * Break any chain, this is not set to NULL on the singleton
+ * case m_nextpkt points to m_head. Other case set them
+ * m_nextpkt to NULL in push_and_replace.
+ */
+ le->m_head->m_nextpkt = NULL;
+ le->m_head->m_pkthdr.lro_nsegs = le->append_cnt;
+ if (tp && locked) {
+ tcp_lro_log(tp, lc, le, le->m_head, 8, 0, 0, 0, 0);
+ }
+ (*lc->ifp->if_input)(lc->ifp, le->m_head);
+ lc->lro_queued += le->append_cnt;
+}
+
+static void
+tcp_set_le_to_m(struct lro_ctrl *lc, struct lro_entry *le, struct mbuf *m)
+{
+ struct ether_header *eh;
+ void *l3hdr = NULL; /* Keep compiler happy. */
+ struct tcphdr *th;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
+#endif
+#ifdef INET
+ struct ip *ip4 = NULL; /* Keep compiler happy. */
+#endif
+ uint32_t *ts_ptr;
+ int error, l, ts_failed = 0;
+ uint16_t tcp_data_len;
+ uint16_t csum;
+
+ error = -1;
+ eh = mtod(m, struct ether_header *);
+ /*
+ * We must reset the other pointers since the mbuf
+ * we were pointing too is about to go away.
+ */
+ switch (le->eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
+ error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
+ le->le_ip6 = ip6;
+ le->source_ip6 = ip6->ip6_src;
+ le->dest_ip6 = ip6->ip6_dst;
+ le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ l3hdr = ip4 = (struct ip *)(eh + 1);
+ error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
+ le->le_ip4 = ip4;
+ le->source_ip4 = ip4->ip_src.s_addr;
+ le->dest_ip4 = ip4->ip_dst.s_addr;
+ le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
+ break;
#endif
}
+ KASSERT(error == 0, ("%s: le=%p tcp_lro_rx_xxx failed\n",
+ __func__, le));
+ ts_ptr = (uint32_t *)(th + 1);
+ l = (th->th_off << 2);
+ l -= sizeof(*th);
+ if (l != 0 &&
+ (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
+ (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
+ /* We have failed to find a timestamp some other option? */
+ ts_failed = 1;
+ }
+ if ((l != 0) && (ts_failed == 0)) {
+ le->timestamp = 1;
+ le->tsval = ntohl(*(ts_ptr + 1));
+ le->tsecr = *(ts_ptr + 2);
+ } else
+ le->timestamp = 0;
+ le->source_port = th->th_sport;
+ le->dest_port = th->th_dport;
+ /* Pull out the csum */
+ tcp_data_len = m->m_pkthdr.lro_len;
+ le->next_seq = ntohl(th->th_seq) + tcp_data_len;
+ le->ack_seq = th->th_ack;
+ le->window = th->th_win;
+ csum = th->th_sum;
+ /* Setup the data pointers */
+ le->m_head = m;
+ le->m_tail = m_last(m);
+ le->append_cnt = 0;
+ le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
+ ~csum);
+ le->append_cnt++;
+ th->th_sum = csum; /* Restore checksum on first packet. */
+}
- le->m_head->m_pkthdr.lro_nsegs = le->append_cnt + 1;
- (*lc->ifp->if_input)(lc->ifp, le->m_head);
- lc->lro_queued += le->append_cnt + 1;
+static void
+tcp_push_and_replace(struct tcpcb *tp, struct lro_ctrl *lc, struct lro_entry *le, struct mbuf *m, int locked)
+{
+ /*
+ * Push up the stack the current le and replace
+ * it with m.
+ */
+ struct mbuf *msave;
+
+ /* Grab off the next and save it */
+ msave = le->m_head->m_nextpkt;
+ le->m_head->m_nextpkt = NULL;
+ /* Now push out the old le entry */
+ tcp_flush_out_le(tp, lc, le, locked);
+ /*
+ * Now to replace the data properly in the le
+ * we have to reset the tcp header and
+ * other fields.
+ */
+ tcp_set_le_to_m(lc, le, m);
+ /* Restore the next list */
+ m->m_nextpkt = msave;
+}
+
+static void
+tcp_lro_condense(struct tcpcb *tp, struct lro_ctrl *lc, struct lro_entry *le, int locked)
+{
+ /*
+ * Walk through the mbuf chain we
+ * have on tap and compress/condense
+ * as required.
+ */
+ uint32_t *ts_ptr;
+ struct mbuf *m;
+ struct tcphdr *th;
+ uint16_t tcp_data_len, csum_upd;
+ int l;
+
+ /*
+ * First we must check the lead (m_head)
+ * we must make sure that it is *not*
+ * something that should be sent up
+ * right away (sack etc).
+ */
+again:
+
+ m = le->m_head->m_nextpkt;
+ if (m == NULL) {
+ /* Just the one left */
+ return;
+ }
+ th = tcp_lro_get_th(le, le->m_head);
+ KASSERT(th != NULL,
+ ("le:%p m:%p th comes back NULL?", le, le->m_head));
+ l = (th->th_off << 2);
+ l -= sizeof(*th);
+ ts_ptr = (uint32_t *)(th + 1);
+ if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
+ (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
+ /*
+ * Its not the timestamp. We can't
+ * use this guy as the head.
+ */
+ le->m_head->m_nextpkt = m->m_nextpkt;
+ tcp_push_and_replace(tp, lc, le, m, locked);
+ goto again;
+ }
+ if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
+ /*
+ * Make sure that previously seen segements/ACKs are delivered
+ * before this segment, e.g. FIN.
+ */
+ le->m_head->m_nextpkt = m->m_nextpkt;
+ tcp_push_and_replace(tp, lc, le, m, locked);
+ goto again;
+ }
+ while((m = le->m_head->m_nextpkt) != NULL) {
+ /*
+ * condense m into le, first
+ * pull m out of the list.
+ */
+ le->m_head->m_nextpkt = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ /* Setup my data */
+ tcp_data_len = m->m_pkthdr.lro_len;
+ th = tcp_lro_get_th(le, m);
+ KASSERT(th != NULL,
+ ("le:%p m:%p th comes back NULL?", le, m));
+ ts_ptr = (uint32_t *)(th + 1);
+ l = (th->th_off << 2);
+ l -= sizeof(*th);
+ if (tp && locked) {
+ tcp_lro_log(tp, lc, le, m, 1, 0, 0, 0, 0);
+ }
+ if (le->append_cnt >= lc->lro_ackcnt_lim) {
+ if (tp && locked) {
+ tcp_lro_log(tp, lc, le, m, 2, 0, 0, 0, 0);
+ }
+ tcp_push_and_replace(tp, lc, le, m, locked);
+ goto again;
+ }
+ if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
+ /* Flush now if appending will result in overflow. */
+ if (tp && locked) {
+ tcp_lro_log(tp, lc, le, m, 3, tcp_data_len, 0, 0, 0);
+ }
+ tcp_push_and_replace(tp, lc, le, m, locked);
+ goto again;
+ }
+ if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
+ (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
+ /*
+ * Maybe a sack in the new one? We need to
+ * start all over after flushing the
+ * current le. We will go up to the beginning
+ * and flush it (calling the replace again possibly
+ * or just returning).
+ */
+ tcp_push_and_replace(tp, lc, le, m, locked);
+ goto again;
+ }
+ if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
+ tcp_push_and_replace(tp, lc, le, m, locked);
+ goto again;
+ }
+ if (l != 0) {
+ uint32_t tsval = ntohl(*(ts_ptr + 1));
+ /* Make sure timestamp values are increasing. */
+ if (TSTMP_GT(le->tsval, tsval)) {
+ tcp_push_and_replace(tp, lc, le, m, locked);
+ goto again;
+ }
+ le->tsval = tsval;
+ le->tsecr = *(ts_ptr + 2);
+ }
+ /* Try to append the new segment. */
+ if (__predict_false(ntohl(th->th_seq) != le->next_seq ||
+ (tcp_data_len == 0 &&
+ le->ack_seq == th->th_ack &&
+ le->window == th->th_win))) {
+ /* Out of order packet or duplicate ACK. */
+ if (tp && locked) {
+ tcp_lro_log(tp, lc, le, m, 4, tcp_data_len,
+ ntohl(th->th_seq),
+ th->th_ack,
+ th->th_win);
+ }
+ tcp_push_and_replace(tp, lc, le, m, locked);
+ goto again;
+ }
+ if (tcp_data_len || SEQ_GT(ntohl(th->th_ack), ntohl(le->ack_seq))) {
+ le->next_seq += tcp_data_len;
+ le->ack_seq = th->th_ack;
+ le->window = th->th_win;
+ } else if (th->th_ack == le->ack_seq) {
+ le->window = WIN_MAX(le->window, th->th_win);
+ }
+ csum_upd = m->m_pkthdr.lro_csum;
+ le->ulp_csum += csum_upd;
+ if (tcp_data_len == 0) {
+ le->append_cnt++;
+ le->mbuf_cnt--;
+ if (tp && locked) {
+ tcp_lro_log(tp, lc, le, m, 5, tcp_data_len,
+ ntohl(th->th_seq),
+ th->th_ack,
+ th->th_win);
+ }
+ m_freem(m);
+ continue;
+ }
+ le->append_cnt++;
+ le->mbuf_appended++;
+ le->p_len += tcp_data_len;
+ /*
+ * Adjust the mbuf so that m_data points to the first byte of
+ * the ULP payload. Adjust the mbuf to avoid complications and
+ * append new segment to existing mbuf chain.
+ */
+ m_adj(m, m->m_pkthdr.len - tcp_data_len);
+ if (tp && locked) {
+ tcp_lro_log(tp, lc, le, m, 6, tcp_data_len,
+ ntohl(th->th_seq),
+ th->th_ack,
+ th->th_win);
+ }
+ m_demote_pkthdr(m);
+ le->m_tail->m_next = m;
+ le->m_tail = m_last(m);
+ }
+}
+
+#ifdef TCPHPTS
+static void
+tcp_queue_pkts(struct tcpcb *tp, struct lro_entry *le)
+{
+ if (tp->t_in_pkt == NULL) {
+ /* Nothing yet there */
+ tp->t_in_pkt = le->m_head;
+ tp->t_tail_pkt = le->m_last_mbuf;
+ } else {
+ /* Already some there */
+ tp->t_tail_pkt->m_nextpkt = le->m_head;
+ tp->t_tail_pkt = le->m_last_mbuf;
+ }
+ le->m_head = NULL;
+ le->m_last_mbuf = NULL;
+}
+#endif
+
+void
+tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
+{
+ struct tcpcb *tp = NULL;
+ int locked = 0;
+#ifdef TCPHPTS
+ struct inpcb *inp = NULL;
+ int need_wakeup = 0, can_queue = 0;
+ struct epoch_tracker et;
+
+ /* Now lets lookup the inp first */
+ CURVNET_SET(lc->ifp->if_vnet);
+ if (tcplro_stacks_wanting_mbufq == 0)
+ goto skip_lookup;
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ switch (le->eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ inp = in6_pcblookup(&V_tcbinfo, &le->source_ip6,
+ le->source_port, &le->dest_ip6,le->dest_port,
+ INPLOOKUP_WLOCKPCB,
+ lc->ifp);
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ inp = in_pcblookup(&V_tcbinfo, le->le_ip4->ip_src,
+ le->source_port, le->le_ip4->ip_dst, le->dest_port,
+ INPLOOKUP_WLOCKPCB,
+ lc->ifp);
+ break;
+#endif
+ }
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ if (inp && ((inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT)) ||
+ (inp->inp_flags2 & INP_FREED))) {
+ /* We don't want this guy */
+ INP_WUNLOCK(inp);
+ inp = NULL;
+ }
+ if (inp && (inp->inp_flags2 & INP_SUPPORTS_MBUFQ)) {
+ /* The transport supports mbuf queuing */
+ can_queue = 1;
+ if (le->need_wakeup ||
+ ((inp->inp_in_input == 0) &&
+ ((inp->inp_flags2 & INP_MBUF_QUEUE_READY) == 0))) {
+ /*
+ * Either the transport is off on a keep-alive
+ * (it has the queue_ready flag clear and its
+ * not already been woken) or the entry has
+ * some urgent thing (FIN or possibly SACK blocks).
+ * This means we need to wake the transport up by
+ * putting it on the input pacer.
+ */
+ need_wakeup = 1;
+ if ((inp->inp_flags2 & INP_DONT_SACK_QUEUE) &&
+ (le->need_wakeup != 1)) {
+ /*
+ * Prohibited from a sack wakeup.
+ */
+ need_wakeup = 0;
+ }
+ }
+ /* Do we need to be awoken due to lots of data or acks? */
+ if ((le->tcp_tot_p_len >= lc->lro_length_lim) ||
+ (le->mbuf_cnt >= lc->lro_ackcnt_lim))
+ need_wakeup = 1;
+ }
+ if (inp) {
+ tp = intotcpcb(inp);
+ locked = 1;
+ } else
+ tp = NULL;
+ if (can_queue) {
+ counter_u64_add(tcp_inp_lro_direct_queue, 1);
+ tcp_lro_log(tp, lc, le, NULL, 22, need_wakeup,
+ inp->inp_flags2, inp->inp_in_input, le->need_wakeup);
+ tcp_queue_pkts(tp, le);
+ if (need_wakeup) {
+ /*
+ * We must get the guy to wakeup via
+ * hpts.
+ */
+ counter_u64_add(tcp_inp_lro_wokeup_queue, 1);
+ if (le->need_wakeup)
+ counter_u64_add(tcp_inp_lro_sack_wake, 1);
+ tcp_queue_to_input(inp);
+ }
+ }
+ if (inp && (hold_lock_over_compress == 0)) {
+ /* Unlock it */
+ locked = 0;
+ tp = NULL;
+ counter_u64_add(tcp_inp_lro_locks_taken, 1);
+ INP_WUNLOCK(inp);
+ }
+ if (can_queue == 0) {
+skip_lookup:
+#endif /* TCPHPTS */
+ /* Old fashioned lro method */
+ if (le->m_head != le->m_last_mbuf) {
+ counter_u64_add(tcp_inp_lro_compressed, 1);
+ tcp_lro_condense(tp, lc, le, locked);
+ } else
+ counter_u64_add(tcp_inp_lro_single_push, 1);
+ tcp_flush_out_le(tp, lc, le, locked);
+#ifdef TCPHPTS
+ }
+ if (inp && locked) {
+ counter_u64_add(tcp_inp_lro_locks_taken, 1);
+ INP_WUNLOCK(inp);
+ }
+ CURVNET_RESTORE();
+#endif
lc->lro_flushed++;
bzero(le, sizeof(*le));
LIST_INSERT_HEAD(&lc->lro_free, le, next);
@@ -539,65 +1120,12 @@ done:
lc->lro_mbuf_count = 0;
}
-#ifdef INET6
-static int
-tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
- struct tcphdr **th)
-{
-
- /* XXX-BZ we should check the flow-label. */
-
- /* XXX-BZ We do not yet support ext. hdrs. */
- if (ip6->ip6_nxt != IPPROTO_TCP)
- return (TCP_LRO_NOT_SUPPORTED);
-
- /* Find the TCP header. */
- *th = (struct tcphdr *)(ip6 + 1);
-
- return (0);
-}
-#endif
-
-#ifdef INET
-static int
-tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
- struct tcphdr **th)
+static void
+lro_set_mtime(struct timeval *tv, struct timespec *ts)
{
- int csum_flags;
- uint16_t csum;
-
- if (ip4->ip_p != IPPROTO_TCP)
- return (TCP_LRO_NOT_SUPPORTED);
-
- /* Ensure there are no options. */
- if ((ip4->ip_hl << 2) != sizeof (*ip4))
- return (TCP_LRO_CANNOT);
-
- /* .. and the packet is not fragmented. */
- if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
- return (TCP_LRO_CANNOT);
-
- /* Legacy IP has a header checksum that needs to be correct. */
- csum_flags = m->m_pkthdr.csum_flags;
- if (csum_flags & CSUM_IP_CHECKED) {
- if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
- lc->lro_bad_csum++;
- return (TCP_LRO_CANNOT);
- }
- } else {
- csum = in_cksum_hdr(ip4);
- if (__predict_false((csum) != 0)) {
- lc->lro_bad_csum++;
- return (TCP_LRO_CANNOT);
- }
- }
-
- /* Find the TCP header (we assured there are no IP options). */
- *th = (struct tcphdr *)(ip4 + 1);
-
- return (0);
+ tv->tv_sec = ts->tv_sec;
+ tv->tv_usec = ts->tv_nsec / 1000;
}
-#endif
static int
tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
@@ -615,12 +1143,17 @@ tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
uint32_t *ts_ptr;
tcp_seq seq;
int error, ip_len, l;
- uint16_t eh_type, tcp_data_len;
+ uint16_t eh_type, tcp_data_len, need_flush;
struct lro_head *bucket;
- int force_flush = 0;
+ struct timespec arrv;
/* We expect a contiguous header [eh, ip, tcp]. */
-
+ if ((m->m_flags & (M_TSTMP_LRO|M_TSTMP)) == 0) {
+ /* If no hardware or arrival stamp on the packet add arrival */
+ nanouptime(&arrv);
+ m->m_pkthdr.rcv_tstmp = (arrv.tv_sec * 1000000000) + arrv.tv_nsec;
+ m->m_flags |= M_TSTMP_LRO;
+ }
eh = mtod(m, struct ether_header *);
eh_type = ntohs(eh->ether_type);
switch (eh_type) {
@@ -679,49 +1212,35 @@ tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
m_adj(m, -l);
}
-
/*
* Check TCP header constraints.
*/
- /* Ensure no bits set besides ACK or PSH. */
- if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
- if (th->th_flags & TH_SYN)
- return (TCP_LRO_CANNOT);
- /*
- * Make sure that previously seen segements/ACKs are delivered
- * before this segement, e.g. FIN.
- */
- force_flush = 1;
- }
-
- /* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
- /* XXX-BZ Ideally we'd flush on PUSH? */
-
- /*
- * Check for timestamps.
- * Since the only option we handle are timestamps, we only have to
- * handle the simple case of aligned timestamps.
- */
+ if (th->th_flags & TH_SYN)
+ return (TCP_LRO_CANNOT);
+ if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
+ need_flush = 1;
+ else
+ need_flush = 0;
l = (th->th_off << 2);
+ ts_ptr = (uint32_t *)(th + 1);
tcp_data_len -= l;
l -= sizeof(*th);
- ts_ptr = (uint32_t *)(th + 1);
if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
- (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
- TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
- /*
- * Make sure that previously seen segements/ACKs are delivered
- * before this segement.
+ (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
+ /*
+ * We have an option besides Timestamps, maybe
+ * it is a sack (most likely) which means we
+ * will probably need to wake up a sleeper (if
+ * the guy does queueing).
*/
- force_flush = 1;
+ need_flush = 2;
}
/* If the driver did not pass in the checksum, set it now. */
if (csum == 0x0000)
csum = th->th_sum;
-
seq = ntohl(th->th_seq);
-
if (!use_hash) {
bucket = &lc->lro_hash[0];
} else if (M_HASHTYPE_ISHASH(m)) {
@@ -738,13 +1257,13 @@ tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
#ifdef INET6
case ETHERTYPE_IPV6:
hash = ip6->ip6_src.s6_addr32[0] +
- ip6->ip6_dst.s6_addr32[0];
+ ip6->ip6_dst.s6_addr32[0];
hash += ip6->ip6_src.s6_addr32[1] +
- ip6->ip6_dst.s6_addr32[1];
+ ip6->ip6_dst.s6_addr32[1];
hash += ip6->ip6_src.s6_addr32[2] +
- ip6->ip6_dst.s6_addr32[2];
+ ip6->ip6_dst.s6_addr32[2];
hash += ip6->ip6_src.s6_addr32[3] +
- ip6->ip6_dst.s6_addr32[3];
+ ip6->ip6_dst.s6_addr32[3];
break;
#endif
default:
@@ -766,9 +1285,9 @@ tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
#ifdef INET6
case ETHERTYPE_IPV6:
if (bcmp(&le->source_ip6, &ip6->ip6_src,
- sizeof(struct in6_addr)) != 0 ||
+ sizeof(struct in6_addr)) != 0 ||
bcmp(&le->dest_ip6, &ip6->ip6_dst,
- sizeof(struct in6_addr)) != 0)
+ sizeof(struct in6_addr)) != 0)
continue;
break;
#endif
@@ -780,108 +1299,34 @@ tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
break;
#endif
}
-
- if (force_flush) {
- /* Timestamps mismatch; this is a FIN, etc */
- tcp_lro_active_remove(le);
- tcp_lro_flush(lc, le);
- return (TCP_LRO_CANNOT);
- }
-
- /* Flush now if appending will result in overflow. */
- if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
- tcp_lro_active_remove(le);
- tcp_lro_flush(lc, le);
- break;
- }
-
- /* Try to append the new segment. */
- if (__predict_false(seq != le->next_seq ||
- (tcp_data_len == 0 &&
- le->ack_seq == th->th_ack &&
- le->window == th->th_win))) {
- /* Out of order packet or duplicate ACK. */
- tcp_lro_active_remove(le);
- tcp_lro_flush(lc, le);
- return (TCP_LRO_CANNOT);
- }
-
- if (l != 0) {
- uint32_t tsval = ntohl(*(ts_ptr + 1));
- /* Make sure timestamp values are increasing. */
- /* XXX-BZ flip and use TSTMP_GEQ macro for this? */
- if (__predict_false(le->tsval > tsval ||
- *(ts_ptr + 2) == 0))
- return (TCP_LRO_CANNOT);
- le->tsval = tsval;
- le->tsecr = *(ts_ptr + 2);
- }
- if (tcp_data_len || SEQ_GT(ntohl(th->th_ack), ntohl(le->ack_seq))) {
- le->next_seq += tcp_data_len;
- le->ack_seq = th->th_ack;
- le->window = th->th_win;
- le->append_cnt++;
- } else if (th->th_ack == le->ack_seq) {
- le->window = WIN_MAX(le->window, th->th_win);
- le->append_cnt++;
+ if (tcp_data_len || SEQ_GT(ntohl(th->th_ack), ntohl(le->ack_seq)) ||
+ (th->th_ack == le->ack_seq)) {
+ m->m_pkthdr.lro_len = tcp_data_len;
} else {
/* no data and old ack */
- le->append_cnt++;
- m_freem(m);
- return (0);
- }
-#ifdef TCP_LRO_UPDATE_CSUM
- le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
- tcp_data_len, ~csum);
-#endif
-
- if (tcp_data_len == 0) {
m_freem(m);
- /*
- * Flush this LRO entry, if this ACK should not
- * be further delayed.
- */
- if (le->append_cnt >= lc->lro_ackcnt_lim) {
- tcp_lro_active_remove(le);
- tcp_lro_flush(lc, le);
- }
return (0);
}
-
- le->p_len += tcp_data_len;
-
- /*
- * Adjust the mbuf so that m_data points to the first byte of
- * the ULP payload. Adjust the mbuf to avoid complications and
- * append new segment to existing mbuf chain.
- */
- m_adj(m, m->m_pkthdr.len - tcp_data_len);
- m_demote_pkthdr(m);
-
- le->m_tail->m_next = m;
- le->m_tail = m_last(m);
-
- /*
- * If a possible next full length packet would cause an
- * overflow, pro-actively flush now.
- */
- if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
- tcp_lro_active_remove(le);
- tcp_lro_flush(lc, le);
- } else
- getmicrotime(&le->mtime);
-
+ if (need_flush)
+ le->need_wakeup = need_flush;
+ /* Save of the data only csum */
+ m->m_pkthdr.rcvif = lc->ifp;
+ m->m_pkthdr.lro_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th,
+ tcp_data_len, ~csum);
+ th->th_sum = csum; /* Restore checksum */
+ /* Save off the tail I am appending too (prev) */
+ le->m_prev_last = le->m_last_mbuf;
+ /* Mark me in the last spot */
+ le->m_last_mbuf->m_nextpkt = m;
+ /* Now set the tail to me */
+ le->m_last_mbuf = m;
+ le->mbuf_cnt++;
+ m->m_nextpkt = NULL;
+ /* Add to the total size of data */
+ le->tcp_tot_p_len += tcp_data_len;
+ lro_set_mtime(&le->mtime, &arrv);
return (0);
}
-
- if (force_flush) {
- /*
- * Nothing to flush, but this segment can not be further
- * aggregated/delayed.
- */
- return (TCP_LRO_CANNOT);
- }
-
/* Try to find an empty slot. */
if (LIST_EMPTY(&lc->lro_free))
return (TCP_LRO_NO_ENTRIES);
@@ -890,7 +1335,7 @@ tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
le = LIST_FIRST(&lc->lro_free);
LIST_REMOVE(le, next);
tcp_lro_active_insert(lc, bucket, le);
- getmicrotime(&le->mtime);
+ lro_set_mtime(&le->mtime, &arrv);
/* Start filling in details. */
switch (eh_type) {
@@ -912,10 +1357,9 @@ tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
break;
#endif
- }
+ }
le->source_port = th->th_sport;
le->dest_port = th->th_dport;
-
le->next_seq = seq + tcp_data_len;
le->ack_seq = th->th_ack;
le->window = th->th_win;
@@ -924,26 +1368,31 @@ tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
le->tsval = ntohl(*(ts_ptr + 1));
le->tsecr = *(ts_ptr + 2);
}
-
-#ifdef TCP_LRO_UPDATE_CSUM
- /*
- * Do not touch the csum of the first packet. However save the
- * "adjusted" checksum of just the source and destination addresses,
- * the next header and the TCP payload. The length and TCP header
- * parts may change, so we remove those from the saved checksum and
- * re-add with final values on tcp_lro_flush() if needed.
- */
KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
- __func__, le, le->ulp_csum));
+ __func__, le, le->ulp_csum));
+ le->append_cnt = 0;
le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
- ~csum);
- th->th_sum = csum; /* Restore checksum on first packet. */
-#endif
-
+ ~csum);
+ le->append_cnt++;
+ th->th_sum = csum; /* Restore checksum */
le->m_head = m;
+ m->m_pkthdr.rcvif = lc->ifp;
+ le->mbuf_cnt = 1;
+ if (need_flush)
+ le->need_wakeup = need_flush;
+ else
+ le->need_wakeup = 0;
le->m_tail = m_last(m);
-
+ le->m_last_mbuf = m;
+ m->m_nextpkt = NULL;
+ le->m_prev_last = NULL;
+ /*
+ * We keep the total size here for cross checking when we may need
+ * to flush/wakeup in the MBUF_QUEUE case.
+ */
+ le->tcp_tot_p_len = tcp_data_len;
+ m->m_pkthdr.lro_len = tcp_data_len;
return (0);
}
@@ -957,6 +1406,8 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
void
tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
{
+ struct timespec arrv;
+
/* sanity checks */
if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
lc->lro_mbuf_max == 0)) {
@@ -973,7 +1424,15 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
(*lc->ifp->if_input) (lc->ifp, mb);
return;
}
-
+ /* Arrival Stamp the packet */
+
+ if ((mb->m_flags & M_TSTMP) == 0) {
+ /* If no hardware or arrival stamp on the packet add arrival */
+ nanouptime(&arrv);
+ mb->m_pkthdr.rcv_tstmp = ((arrv.tv_sec * 1000000000) +
+ arrv.tv_nsec);
+ mb->m_flags |= M_TSTMP_LRO;
+ }
/* create sequence number */
lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
(((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
diff --git a/freebsd/sys/netinet/tcp_lro.h b/freebsd/sys/netinet/tcp_lro.h
index 855f4ee4..1c6d2dd5 100644
--- a/freebsd/sys/netinet/tcp_lro.h
+++ b/freebsd/sys/netinet/tcp_lro.h
@@ -45,6 +45,8 @@ struct lro_entry {
LIST_ENTRY(lro_entry) hash_next;
struct mbuf *m_head;
struct mbuf *m_tail;
+ struct mbuf *m_last_mbuf;
+ struct mbuf *m_prev_last;
union {
struct ip *ip4;
struct ip6_hdr *ip6;
@@ -67,10 +69,22 @@ struct lro_entry {
uint32_t ack_seq; /* tcp_seq */
uint32_t tsval;
uint32_t tsecr;
+ uint32_t tcp_tot_p_len; /* TCP payload length of chain */
uint16_t window;
uint16_t timestamp; /* flag, not a TCP hdr field. */
+ uint16_t need_wakeup;
+ uint16_t mbuf_cnt; /* Count of mbufs collected see note */
+ uint16_t mbuf_appended;
struct timeval mtime;
};
+/*
+ * Note: The mbuf_cnt field tracks our number of mbufs added to the m_next
+ * list. Each mbuf counted can have data and of course it will
+ * have an ack as well (by defintion any inbound tcp segment will
+ * have an ack value. We use this count to tell us how many ACK's
+ * are present for our ack-count threshold. If we exceed that or
+ * the data threshold we will wake up the endpoint.
+ */
LIST_HEAD(lro_head, lro_entry);
#define le_ip4 leip.ip4
@@ -115,6 +129,8 @@ void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
void tcp_lro_flush_all(struct lro_ctrl *);
int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
+void tcp_lro_reg_mbufq(void);
+void tcp_lro_dereg_mbufq(void);
#define TCP_LRO_NO_ENTRIES -2
#define TCP_LRO_CANNOT -1
diff --git a/freebsd/sys/netinet/tcp_offload.c b/freebsd/sys/netinet/tcp_offload.c
index f3ab3b50..89b21859 100644
--- a/freebsd/sys/netinet/tcp_offload.c
+++ b/freebsd/sys/netinet/tcp_offload.c
@@ -35,7 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/types.h>
+#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c
index b641f04c..ff09fa31 100644
--- a/freebsd/sys/netinet/tcp_output.c
+++ b/freebsd/sys/netinet/tcp_output.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_tcpdebug.h>
#include <sys/param.h>
@@ -48,6 +49,9 @@ __FBSDID("$FreeBSD$");
#include <sys/hhook.h>
#endif
#include <sys/kernel.h>
+#ifdef KERN_TLS
+#include <sys/ktls.h>
+#endif
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
@@ -193,7 +197,7 @@ tcp_output(struct tcpcb *tp)
uint32_t recwin, sendwin;
int off, flags, error = 0; /* Keep compiler happy */
u_int if_hw_tsomaxsegcount = 0;
- u_int if_hw_tsomaxsegsize;
+ u_int if_hw_tsomaxsegsize = 0;
struct mbuf *m;
struct ip *ip = NULL;
#ifdef TCPDEBUG
@@ -221,6 +225,11 @@ tcp_output(struct tcpcb *tp)
isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
#endif
+#ifdef KERN_TLS
+ const bool hw_tls = (so->so_snd.sb_flags & SB_TLS_IFNET) != 0;
+#else
+ const bool hw_tls = false;
+#endif
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -658,7 +667,8 @@ after_sack_rexmit:
if (adv >= (int32_t)(2 * tp->t_maxseg) &&
(adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) ||
recwin <= (so->so_rcv.sb_hiwat / 8) ||
- so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg))
+ so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg ||
+ adv >= TCP_MAXWIN << tp->rcv_scale))
goto send;
if (2 * adv >= (int32_t)so->so_rcv.sb_hiwat)
goto send;
@@ -1001,7 +1011,7 @@ send:
* to the offset in the socket buffer chain.
*/
mb = sbsndptr_noadv(&so->so_snd, off, &moff);
- if (len <= MHLEN - hdrlen - max_linkhdr) {
+ if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) {
m_copydata(mb, moff, len,
mtod(m, caddr_t) + hdrlen);
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
@@ -1014,7 +1024,7 @@ send:
msb = &so->so_snd;
m->m_next = tcp_m_copym(mb, moff,
&len, if_hw_tsomaxsegcount,
- if_hw_tsomaxsegsize, msb);
+ if_hw_tsomaxsegsize, msb, hw_tls);
if (len <= (tp->t_maxseg - optlen)) {
/*
* Must have ran out of mbufs for the copy
@@ -1285,15 +1295,9 @@ send:
m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
}
-#if defined(IPSEC) || defined(IPSEC_SUPPORT)
- KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL),
- ("%s: mbuf chain shorter than expected: %d + %u + %u - %u != %u",
- __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL)));
-#else
- KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL),
- ("%s: mbuf chain shorter than expected: %d + %u + %u != %u",
- __func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
-#endif
+ KASSERT(len + hdrlen == m_length(m, NULL),
+ ("%s: mbuf chain shorter than expected: %d + %u != %u",
+ __func__, len, hdrlen, m_length(m, NULL)));
#ifdef TCP_HHOOK
/* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
@@ -1515,7 +1519,13 @@ timer:
if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
tp->snd_max = tp->snd_nxt + xlen;
}
-
+ if ((error == 0) &&
+ (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags & TF_SACK_PERMIT) &&
+ tp->rcv_numsacks > 0)) {
+ /* Clean up any DSACK's sent */
+ tcp_clean_dsack_blocks(tp);
+ }
if (error) {
/* Record the error. */
TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd, TCP_LOG_OUT,
@@ -1817,8 +1827,12 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
*/
struct mbuf *
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
- int32_t seglimit, int32_t segsize, struct sockbuf *sb)
+ int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls)
{
+#ifdef KERN_TLS
+ struct ktls_session *tls, *ntls;
+ struct mbuf *start;
+#endif
struct mbuf *n, **np;
struct mbuf *top;
int32_t off = off0;
@@ -1850,6 +1864,13 @@ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
np = &top;
top = NULL;
pkthdrlen = NULL;
+#ifdef KERN_TLS
+ if (m->m_flags & M_NOMAP)
+ tls = m->m_ext.ext_pgs->tls;
+ else
+ tls = NULL;
+ start = m;
+#endif
while (len > 0) {
if (m == NULL) {
KASSERT(len == M_COPYALL,
@@ -1859,6 +1880,38 @@ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
*pkthdrlen = len_cp;
break;
}
+#ifdef KERN_TLS
+ if (hw_tls) {
+ if (m->m_flags & M_NOMAP)
+ ntls = m->m_ext.ext_pgs->tls;
+ else
+ ntls = NULL;
+
+ /*
+ * Avoid mixing TLS records with handshake
+ * data or TLS records from different
+ * sessions.
+ */
+ if (tls != ntls) {
+ MPASS(m != start);
+ *plen = len_cp;
+ if (pkthdrlen != NULL)
+ *pkthdrlen = len_cp;
+ break;
+ }
+
+ /*
+ * Don't end a send in the middle of a TLS
+ * record if it spans multiple TLS records.
+ */
+ if (tls != NULL && (m != start) && len < m->m_len) {
+ *plen = len_cp;
+ if (pkthdrlen != NULL)
+ *pkthdrlen = len_cp;
+ break;
+ }
+ }
+#endif
mlen = min(len, m->m_len - off);
if (seglimit) {
/*
diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c
index fea3b716..9d4bf3be 100644
--- a/freebsd/sys/netinet/tcp_reass.c
+++ b/freebsd/sys/netinet/tcp_reass.c
@@ -40,6 +40,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_tcpdebug.h>
+/* For debugging we want counters and BB logging */
+/* #define TCP_REASS_COUNTERS 1 */
+/* #define TCP_REASS_LOGGING 1 */
+
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/eventhandler.h>
@@ -74,8 +78,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#ifdef TCP_REASS_LOGGING
#include <netinet/tcp_log_buf.h>
#include <netinet/tcp_hpts.h>
+#endif
#include <netinet6/tcp6_var.h>
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
@@ -94,10 +100,6 @@ __FBSDID("$FreeBSD$");
#define TCP_R_LOG_DUMP 10
#define TCP_R_LOG_TRIM 11
-/* For debugging we want counters and BB logging */
-/* #define TCP_REASS_COUNTERS 1 */
-/* #define TCP_REASS_LOGGING 1 */
-
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
@@ -542,6 +544,10 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start,
* and should be rewritten (see NetBSD for optimizations).
*/
+ KASSERT(th == NULL || (seq_start != NULL && tlenp != NULL),
+ ("tcp_reass called with illegal parameter combination "
+ "(tp=%p, th=%p, seq_start=%p, tlenp=%p, m=%p)",
+ tp, th, seq_start, tlenp, m));
/*
* Call with th==NULL after become established to
* force pre-ESTABLISHED data up to user socket.
@@ -581,7 +587,8 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start,
*/
lenofoh = tcp_reass_overhead_of_chain(m, &mlast);
sb = &tp->t_inpcb->inp_socket->so_rcv;
- if ((sb->sb_mbcnt + tp->t_segqmbuflen + lenofoh) > sb->sb_mbmax) {
+ if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
+ (sb->sb_mbcnt + tp->t_segqmbuflen + lenofoh) > sb->sb_mbmax) {
/* No room */
TCPSTAT_INC(tcps_rcvreassfull);
#ifdef TCP_REASS_COUNTERS
@@ -590,6 +597,11 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start,
#ifdef TCP_REASS_LOGGING
tcp_log_reassm(tp, NULL, NULL, th->th_seq, lenofoh, TCP_R_LOG_LIMIT_REACHED, 0);
#endif
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: mbuf count limit reached, "
+ "segment dropped\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
m_freem(m);
*tlenp = 0;
#ifdef TCP_REASS_LOGGING
@@ -938,6 +950,20 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start,
* is understood.
*/
new_entry:
+ if (th->th_seq == tp->rcv_nxt && TCPS_HAVEESTABLISHED(tp->t_state)) {
+ tp->rcv_nxt += *tlenp;
+ flags = th->th_flags & TH_FIN;
+ TCPSTAT_INC(tcps_rcvoopack);
+ TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ m_freem(m);
+ } else {
+ sbappendstream_locked(&so->so_rcv, m, 0);
+ }
+ sorwakeup_locked(so);
+ return (flags);
+ }
if (tcp_new_limits) {
if ((tp->t_segqlen > tcp_reass_queue_guard) &&
(*tlenp < MSIZE)) {
@@ -962,9 +988,7 @@ new_entry:
return (0);
}
} else {
-
- if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
- tp->t_segqlen >= min((so->so_rcv.sb_hiwat / tp->t_maxseg) + 1,
+ if (tp->t_segqlen >= min((so->so_rcv.sb_hiwat / tp->t_maxseg) + 1,
tcp_reass_maxqueuelen)) {
TCPSTAT_INC(tcps_rcvreassfull);
*tlenp = 0;
@@ -1044,12 +1068,20 @@ present:
} else {
#ifdef TCP_REASS_LOGGING
tcp_reass_log_new_in(tp, q->tqe_start, q->tqe_len, q->tqe_m, TCP_R_LOG_READ, q);
- tcp_log_reassm(tp, q, NULL, th->th_seq, *tlenp, TCP_R_LOG_READ, 1);
+ if (th != NULL) {
+ tcp_log_reassm(tp, q, NULL, th->th_seq, *tlenp, TCP_R_LOG_READ, 1);
+ } else {
+ tcp_log_reassm(tp, q, NULL, 0, 0, TCP_R_LOG_READ, 1);
+ }
#endif
sbappendstream_locked(&so->so_rcv, q->tqe_m, 0);
}
#ifdef TCP_REASS_LOGGING
- tcp_log_reassm(tp, q, NULL, th->th_seq, *tlenp, TCP_R_LOG_READ, 2);
+ if (th != NULL) {
+ tcp_log_reassm(tp, q, NULL, th->th_seq, *tlenp, TCP_R_LOG_READ, 2);
+ } else {
+ tcp_log_reassm(tp, q, NULL, 0, 0, TCP_R_LOG_READ, 2);
+ }
#endif
KASSERT(tp->t_segqmbuflen >= q->tqe_mbuf_cnt,
("tp:%p seg queue goes negative", tp));
@@ -1065,7 +1097,11 @@ present:
tp, &tp->t_segq, tp->t_segqmbuflen);
#else
#ifdef TCP_REASS_LOGGING
- tcp_log_reassm(tp, NULL, NULL, th->th_seq, *tlenp, TCP_R_LOG_ZERO, 0);
+ if (th != NULL) {
+ tcp_log_reassm(tp, NULL, NULL, th->th_seq, *tlenp, TCP_R_LOG_ZERO, 0);
+ } else {
+ tcp_log_reassm(tp, NULL, NULL, 0, 0, TCP_R_LOG_ZERO, 0);
+ }
#endif
tp->t_segqmbuflen = 0;
#endif
diff --git a/freebsd/sys/netinet/tcp_sack.c b/freebsd/sys/netinet/tcp_sack.c
index 91c032c8..6d6198dd 100644
--- a/freebsd/sys/netinet/tcp_sack.c
+++ b/freebsd/sys/netinet/tcp_sack.c
@@ -151,6 +151,108 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcp_sack_globalholes), 0,
"Global number of TCP SACK holes currently allocated");
+
+/*
+ * This function will find overlaps with the currently stored sackblocks
+ * and add any overlap as a dsack block upfront
+ */
+void
+tcp_update_dsack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
+{
+ struct sackblk head_blk,mid_blk,saved_blks[MAX_SACK_BLKS];
+ int i, j, n, identical;
+ tcp_seq start, end;
+
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
+
+ if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+ log(LOG_DEBUG, "\nDSACK update: %d..%d, rcv_nxt: %u\n",
+ rcv_start, rcv_end, tp->rcv_nxt);
+ }
+
+ if (SEQ_LT(rcv_end, tp->rcv_nxt) ||
+ ((rcv_end == tp->rcv_nxt) &&
+ (tp->rcv_numsacks > 0 ) &&
+ (tp->sackblks[0].end == tp->rcv_nxt))) {
+ saved_blks[0].start = rcv_start;
+ saved_blks[0].end = rcv_end;
+ } else {
+ saved_blks[0].start = saved_blks[0].end = 0;
+ }
+
+ head_blk.start = head_blk.end = 0;
+ mid_blk.start = rcv_start;
+ mid_blk.end = rcv_end;
+ identical = 0;
+
+ for (i = 0; i < tp->rcv_numsacks; i++) {
+ start = tp->sackblks[i].start;
+ end = tp->sackblks[i].end;
+ if (SEQ_LT(rcv_end, start)) {
+ /* pkt left to sack blk */
+ continue;
+ }
+ if (SEQ_GT(rcv_start, end)) {
+ /* pkt right to sack blk */
+ continue;
+ }
+ if (SEQ_GT(tp->rcv_nxt, end)) {
+ if ((SEQ_MAX(rcv_start, start) != SEQ_MIN(rcv_end, end)) &&
+ (SEQ_GT(head_blk.start, SEQ_MAX(rcv_start, start)) ||
+ (head_blk.start == head_blk.end))) {
+ head_blk.start = SEQ_MAX(rcv_start, start);
+ head_blk.end = SEQ_MIN(rcv_end, end);
+ }
+ continue;
+ }
+ if (((head_blk.start == head_blk.end) ||
+ SEQ_LT(start, head_blk.start)) &&
+ (SEQ_GT(end, rcv_start) &&
+ SEQ_LEQ(start, rcv_end))) {
+ head_blk.start = start;
+ head_blk.end = end;
+ }
+ mid_blk.start = SEQ_MIN(mid_blk.start, start);
+ mid_blk.end = SEQ_MAX(mid_blk.end, end);
+ if ((mid_blk.start == start) &&
+ (mid_blk.end == end))
+ identical = 1;
+ }
+ if (SEQ_LT(head_blk.start, head_blk.end)) {
+ /* store overlapping range */
+ saved_blks[0].start = SEQ_MAX(rcv_start, head_blk.start);
+ saved_blks[0].end = SEQ_MIN(rcv_end, head_blk.end);
+ }
+ n = 1;
+ /*
+ * Second, if not ACKed, store the SACK block that
+ * overlaps with the DSACK block unless it is identical
+ */
+ if ((SEQ_LT(tp->rcv_nxt, mid_blk.end) &&
+ !((mid_blk.start == saved_blks[0].start) &&
+ (mid_blk.end == saved_blks[0].end))) ||
+ identical == 1) {
+ saved_blks[n].start = mid_blk.start;
+ saved_blks[n++].end = mid_blk.end;
+ }
+ for (j = 0; (j < tp->rcv_numsacks) && (n < MAX_SACK_BLKS); j++) {
+ if (((SEQ_LT(tp->sackblks[j].end, mid_blk.start) ||
+ SEQ_GT(tp->sackblks[j].start, mid_blk.end)) &&
+ (SEQ_GT(tp->sackblks[j].start, tp->rcv_nxt))))
+ saved_blks[n++] = tp->sackblks[j];
+ }
+ j = 0;
+ for (i = 0; i < n; i++) {
+ /* we can end up with a stale inital entry */
+ if (SEQ_LT(saved_blks[i].start, saved_blks[i].end)) {
+ tp->sackblks[j++] = saved_blks[i];
+ }
+ }
+ tp->rcv_numsacks = j;
+}
+
/*
* This function is called upon receipt of new valid data (while not in
* header prediction mode), and it updates the ordered list of sacks.
@@ -170,11 +272,18 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
INP_WLOCK_ASSERT(tp->t_inpcb);
/* Check arguments. */
- KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
-
- /* SACK block for the received segment. */
- head_blk.start = rcv_start;
- head_blk.end = rcv_end;
+ KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end"));
+
+ if ((rcv_start == rcv_end) &&
+ (tp->rcv_numsacks >= 1) &&
+ (rcv_end == tp->sackblks[0].end)) {
+ /* retaining DSACK block below rcv_nxt (todrop) */
+ head_blk = tp->sackblks[0];
+ } else {
+ /* SACK block for the received segment. */
+ head_blk.start = rcv_start;
+ head_blk.end = rcv_end;
+ }
/*
* Merge updated SACK blocks into head_blk, and save unchanged SACK
@@ -195,12 +304,54 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
* Merge this SACK block into head_blk. This SACK
* block itself will be discarded.
*/
- if (SEQ_GT(head_blk.start, start))
+ /*
+ * |-|
+ * |---| merge
+ *
+ * |-|
+ * |---| merge
+ *
+ * |-----|
+ * |-| DSACK smaller
+ *
+ * |-|
+ * |-----| DSACK smaller
+ */
+ if (head_blk.start == end)
head_blk.start = start;
- if (SEQ_LT(head_blk.end, end))
+ else if (head_blk.end == start)
head_blk.end = end;
+ else {
+ if (SEQ_LT(head_blk.start, start)) {
+ tcp_seq temp = start;
+ start = head_blk.start;
+ head_blk.start = temp;
+ }
+ if (SEQ_GT(head_blk.end, end)) {
+ tcp_seq temp = end;
+ end = head_blk.end;
+ head_blk.end = temp;
+ }
+ if ((head_blk.start != start) ||
+ (head_blk.end != end)) {
+ if ((num_saved >= 1) &&
+ SEQ_GEQ(saved_blks[num_saved-1].start, start) &&
+ SEQ_LEQ(saved_blks[num_saved-1].end, end))
+ num_saved--;
+ saved_blks[num_saved].start = start;
+ saved_blks[num_saved].end = end;
+ num_saved++;
+ }
+ }
} else {
/*
+ * This block supercedes the prior block
+ */
+ if ((num_saved >= 1) &&
+ SEQ_GEQ(saved_blks[num_saved-1].start, start) &&
+ SEQ_LEQ(saved_blks[num_saved-1].end, end))
+ num_saved--;
+ /*
* Save this SACK block.
*/
saved_blks[num_saved].start = start;
@@ -213,7 +364,7 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
* Update SACK list in tp->sackblks[].
*/
num_head = 0;
- if (SEQ_GT(head_blk.start, tp->rcv_nxt)) {
+ if (SEQ_LT(rcv_start, rcv_end)) {
/*
* The received data segment is an out-of-order segment. Put
* head_blk at the top of SACK list.
@@ -227,6 +378,10 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
if (num_saved >= MAX_SACK_BLKS)
num_saved--;
}
+ if ((rcv_start == rcv_end) &&
+ (rcv_start == tp->sackblks[0].end)) {
+ num_head = 1;
+ }
if (num_saved > 0) {
/*
* Copy the saved SACK blocks back.
@@ -239,6 +394,45 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
tp->rcv_numsacks = num_head + num_saved;
}
+void
+tcp_clean_dsack_blocks(struct tcpcb *tp)
+{
+ struct sackblk saved_blks[MAX_SACK_BLKS];
+ int num_saved, i;
+
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+ /*
+ * Clean up any DSACK blocks that
+ * are in our queue of sack blocks.
+ *
+ */
+ num_saved = 0;
+ for (i = 0; i < tp->rcv_numsacks; i++) {
+ tcp_seq start = tp->sackblks[i].start;
+ tcp_seq end = tp->sackblks[i].end;
+ if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) {
+ /*
+ * Discard this D-SACK block.
+ */
+ continue;
+ }
+ /*
+ * Save this SACK block.
+ */
+ saved_blks[num_saved].start = start;
+ saved_blks[num_saved].end = end;
+ num_saved++;
+ }
+ if (num_saved > 0) {
+ /*
+ * Copy the saved SACK blocks back.
+ */
+ bcopy(saved_blks, &tp->sackblks[0],
+ sizeof(struct sackblk) * num_saved);
+ }
+ tp->rcv_numsacks = num_saved;
+}
+
/*
* Delete all receiver-side SACK information.
*/
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c
index 52d2ca2a..223f33f7 100644
--- a/freebsd/sys/netinet/tcp_subr.c
+++ b/freebsd/sys/netinet/tcp_subr.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_tcpdebug.h>
#include <sys/param.h>
@@ -56,6 +57,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_HHOOK
#include <sys/khelp.h>
#endif
+#ifdef KERN_TLS
+#include <sys/ktls.h>
+#endif
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
@@ -201,6 +205,11 @@ SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_rfc1323), 0,
"Enable rfc1323 (high performance TCP) extensions");
+VNET_DEFINE(int, tcp_ts_offset_per_conn) = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, ts_offset_per_conn, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_ts_offset_per_conn), 0,
+ "Initialize TCP timestamps per connection instead of per host pair");
+
static int tcp_log_debug = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
&tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
@@ -263,21 +272,10 @@ static struct tcp_function_block tcp_def_funcblk = {
.tfb_tcp_fb_fini = tcp_default_fb_fini,
};
-int t_functions_inited = 0;
static int tcp_fb_cnt = 0;
struct tcp_funchead t_functions;
static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
-static void
-init_tcp_functions(void)
-{
- if (t_functions_inited == 0) {
- TAILQ_INIT(&t_functions);
- rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
- t_functions_inited = 1;
- }
-}
-
static struct tcp_function_block *
find_tcp_functions_locked(struct tcp_function_set *fs)
{
@@ -565,13 +563,10 @@ sysctl_net_inet_list_func_info(SYSCTL_HANDLER_ARGS)
bzero(&tfi, sizeof(tfi));
tfi.tfi_refcnt = f->tf_fb->tfb_refcnt;
tfi.tfi_id = f->tf_fb->tfb_id;
- (void)strncpy(tfi.tfi_alias, f->tf_name,
- TCP_FUNCTION_NAME_LEN_MAX);
- tfi.tfi_alias[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
- (void)strncpy(tfi.tfi_name,
- f->tf_fb->tfb_tcp_block_name,
- TCP_FUNCTION_NAME_LEN_MAX);
- tfi.tfi_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
+ (void)strlcpy(tfi.tfi_alias, f->tf_name,
+ sizeof(tfi.tfi_alias));
+ (void)strlcpy(tfi.tfi_name,
+ f->tf_fb->tfb_tcp_block_name, sizeof(tfi.tfi_name));
error = SYSCTL_OUT(req, &tfi, sizeof(tfi));
/*
* Don't stop on error, as that is the
@@ -787,10 +782,9 @@ register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
KASSERT(names != NULL && *num_names > 0,
("%s: Called with 0-length name list", __func__));
KASSERT(names != NULL, ("%s: Called with NULL name list", __func__));
+ KASSERT(rw_initialized(&tcp_function_lock),
+ ("%s: called too early", __func__));
- if (t_functions_inited == 0) {
- init_tcp_functions();
- }
if ((blk->tfb_tcp_output == NULL) ||
(blk->tfb_tcp_do_segment == NULL) ||
(blk->tfb_tcp_ctloutput == NULL) ||
@@ -819,8 +813,12 @@ register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
}
}
+ if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
+ *num_names = 0;
+ return (EINVAL);
+ }
+
refcount_init(&blk->tfb_refcnt, 0);
- blk->tfb_flags = 0;
blk->tfb_id = atomic_fetchadd_int(&next_tcp_stack_id, 1);
for (i = 0; i < *num_names; i++) {
n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
@@ -830,9 +828,8 @@ register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
}
n->tf_fb = blk;
- (void)strncpy(fs.function_set_name, names[i],
- TCP_FUNCTION_NAME_LEN_MAX);
- fs.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
+ (void)strlcpy(fs.function_set_name, names[i],
+ sizeof(fs.function_set_name));
rw_wlock(&tcp_function_lock);
if (find_tcp_functions_locked(&fs) != NULL) {
/* Duplicate name space not allowed */
@@ -841,8 +838,7 @@ register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
error = EALREADY;
goto cleanup;
}
- (void)strncpy(n->tf_name, names[i], TCP_FUNCTION_NAME_LEN_MAX);
- n->tf_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
+ (void)strlcpy(n->tf_name, names[i], sizeof(n->tf_name));
TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
tcp_fb_cnt++;
rw_wunlock(&tcp_function_lock);
@@ -929,8 +925,8 @@ deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
bool force)
{
struct tcp_function *f;
-
- if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
+
+ if (blk == &tcp_def_funcblk) {
/* You can't un-register the default */
return (EPERM);
}
@@ -1088,6 +1084,9 @@ tcp_init(void)
tcp_keepintvl = TCPTV_KEEPINTVL;
tcp_maxpersistidle = TCPTV_KEEP_IDLE;
tcp_msl = TCPTV_MSL;
+ tcp_rexmit_initial = TCPTV_RTOBASE;
+ if (tcp_rexmit_initial < 1)
+ tcp_rexmit_initial = 1;
tcp_rexmit_min = TCPTV_MIN;
if (tcp_rexmit_min < 1)
tcp_rexmit_min = 1;
@@ -1096,8 +1095,10 @@ tcp_init(void)
tcp_rexmit_slop = TCPTV_CPU_VAR;
tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
tcp_tcbhashsize = hashsize;
+
/* Setup the tcp function block list */
- init_tcp_functions();
+ TAILQ_INIT(&t_functions);
+ rw_init(&tcp_function_lock, "tcp_func_lock");
register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
#ifdef TCP_BLACKBOX
/* Initialize the TCP logging data. */
@@ -1130,6 +1131,13 @@ tcp_init(void)
SHUTDOWN_PRI_DEFAULT);
EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
+
+ tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK);
+ tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK);
+ tcp_inp_lro_compressed = counter_u64_alloc(M_WAITOK);
+ tcp_inp_lro_single_push = counter_u64_alloc(M_WAITOK);
+ tcp_inp_lro_locks_taken = counter_u64_alloc(M_WAITOK);
+ tcp_inp_lro_sack_wake = counter_u64_alloc(M_WAITOK);
#ifdef TCPPCAP
tcp_pcap_init();
#endif
@@ -1666,9 +1674,9 @@ tcp_newtcpcb(struct inpcb *inp)
* reasonable initial retransmit time.
*/
tp->t_srtt = TCPTV_SRTTBASE;
- tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
+ tp->t_rttvar = ((tcp_rexmit_initial - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
tp->t_rttmin = tcp_rexmit_min;
- tp->t_rxtcur = TCPTV_RTOBASE;
+ tp->t_rxtcur = tcp_rexmit_initial;
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->t_rcvtime = ticks;
@@ -2648,7 +2656,17 @@ tcp_keyed_hash(struct in_conninfo *inc, u_char *key, u_int len)
uint32_t
tcp_new_ts_offset(struct in_conninfo *inc)
{
- return (tcp_keyed_hash(inc, V_ts_offset_secret,
+ struct in_conninfo inc_store, *local_inc;
+
+ if (!V_tcp_ts_offset_per_conn) {
+ memcpy(&inc_store, inc, sizeof(struct in_conninfo));
+ inc_store.inc_lport = 0;
+ inc_store.inc_fport = 0;
+ local_inc = &inc_store;
+ } else {
+ local_inc = inc;
+ }
+ return (tcp_keyed_hash(local_inc, V_ts_offset_secret,
sizeof(V_ts_offset_secret)));
}
@@ -3075,6 +3093,120 @@ SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL,
0, sysctl_drop, "", "Drop TCP connection");
+#ifdef KERN_TLS
+static int
+sysctl_switch_tls(SYSCTL_HANDLER_ARGS)
+{
+ /* addrs[0] is a foreign socket, addrs[1] is a local one. */
+ struct sockaddr_storage addrs[2];
+ struct inpcb *inp;
+ struct sockaddr_in *fin, *lin;
+ struct epoch_tracker et;
+#ifdef INET6
+ struct sockaddr_in6 *fin6, *lin6;
+#endif
+ int error;
+
+ inp = NULL;
+ fin = lin = NULL;
+#ifdef INET6
+ fin6 = lin6 = NULL;
+#endif
+ error = 0;
+
+ if (req->oldptr != NULL || req->oldlen != 0)
+ return (EINVAL);
+ if (req->newptr == NULL)
+ return (EPERM);
+ if (req->newlen < sizeof(addrs))
+ return (ENOMEM);
+ error = SYSCTL_IN(req, &addrs, sizeof(addrs));
+ if (error)
+ return (error);
+
+ switch (addrs[0].ss_family) {
+#ifdef INET6
+ case AF_INET6:
+ fin6 = (struct sockaddr_in6 *)&addrs[0];
+ lin6 = (struct sockaddr_in6 *)&addrs[1];
+ if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
+ lin6->sin6_len != sizeof(struct sockaddr_in6))
+ return (EINVAL);
+ if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
+ if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
+ return (EINVAL);
+ in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
+ in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
+ fin = (struct sockaddr_in *)&addrs[0];
+ lin = (struct sockaddr_in *)&addrs[1];
+ break;
+ }
+ error = sa6_embedscope(fin6, V_ip6_use_defzone);
+ if (error)
+ return (error);
+ error = sa6_embedscope(lin6, V_ip6_use_defzone);
+ if (error)
+ return (error);
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ fin = (struct sockaddr_in *)&addrs[0];
+ lin = (struct sockaddr_in *)&addrs[1];
+ if (fin->sin_len != sizeof(struct sockaddr_in) ||
+ lin->sin_len != sizeof(struct sockaddr_in))
+ return (EINVAL);
+ break;
+#endif
+ default:
+ return (EINVAL);
+ }
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ switch (addrs[0].ss_family) {
+#ifdef INET6
+ case AF_INET6:
+ inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
+ fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
+ INPLOOKUP_WLOCKPCB, NULL);
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
+ lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
+ break;
+#endif
+ }
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ if (inp != NULL) {
+ if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) != 0 ||
+ inp->inp_socket == NULL) {
+ error = ECONNRESET;
+ INP_WUNLOCK(inp);
+ } else {
+ struct socket *so;
+
+ so = inp->inp_socket;
+ soref(so);
+ error = ktls_set_tx_mode(so,
+ arg2 == 0 ? TCP_TLS_MODE_SW : TCP_TLS_MODE_IFNET);
+ INP_WUNLOCK(inp);
+ SOCK_LOCK(so);
+ sorele(so);
+ }
+ } else
+ error = ESRCH;
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, switch_to_sw_tls,
+ CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL,
+ 0, sysctl_switch_tls, "", "Switch TCP connection to SW TLS");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, switch_to_ifnet_tls,
+ CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL,
+ 1, sysctl_switch_tls, "", "Switch TCP connection to ifnet TLS");
+#endif
+
/*
* Generate a standardized TCP log line for use throughout the
* tcp subsystem. Memory allocation is done with M_NOWAIT to
diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c
index bfbf9f42..468aaab7 100644
--- a/freebsd/sys/netinet/tcp_syncache.c
+++ b/freebsd/sys/netinet/tcp_syncache.c
@@ -156,7 +156,12 @@ static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
/*
* Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
- * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds,
+ * 3 retransmits corresponds to a timeout with default values of
+ * tcp_rexmit_initial * ( 1 +
+ * tcp_backoff[1] +
+ * tcp_backoff[2] +
+ * tcp_backoff[3]) + 3 * tcp_rexmit_slop,
+ * 1000 ms * (1 + 2 + 4 + 8) + 3 * 200 ms = 15600 ms,
* the odds are that the user has given up attempting to connect by then.
*/
#define SYNCACHE_MAXREXMTS 3
@@ -421,9 +426,10 @@ syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
int rexmt;
if (sc->sc_rxmits == 0)
- rexmt = TCPTV_RTOBASE;
+ rexmt = tcp_rexmit_initial;
else
- TCPT_RANGESET(rexmt, TCPTV_RTOBASE * tcp_syn_backoff[sc->sc_rxmits],
+ TCPT_RANGESET(rexmt,
+ tcp_rexmit_initial * tcp_backoff[sc->sc_rxmits],
tcp_rexmit_min, TCPTV_REXMTMAX);
sc->sc_rxttime = ticks + rexmt;
sc->sc_rxmits++;
@@ -773,6 +779,9 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
inp->inp_flowid = m->m_pkthdr.flowid;
inp->inp_flowtype = M_HASHTYPE_GET(m);
+#ifdef NUMA
+ inp->inp_numa_domain = m->m_pkthdr.numa_domain;
+#endif
}
/*
@@ -1143,6 +1152,28 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
}
#endif /* TCP_SIGNATURE */
+
+ /*
+ * RFC 7323 PAWS: If we have a timestamp on this segment and
+ * it's less than ts_recent, drop it.
+ * XXXMT: RFC 7323 also requires to send an ACK.
+ * In tcp_input.c this is only done for TCP segments
+ * with user data, so be consistent here and just drop
+ * the segment.
+ */
+ if (sc->sc_flags & SCF_TIMESTAMP && to->to_flags & TOF_TS &&
+ TSTMP_LT(to->to_tsval, sc->sc_tsreflect)) {
+ SCH_UNLOCK(sch);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG,
+ "%s; %s: SEG.TSval %u < TS.Recent %u, "
+ "segment dropped\n", s, __func__,
+ to->to_tsval, sc->sc_tsreflect);
+ free(s, M_TCPLOG);
+ }
+ return (-1); /* Do not send RST */
+ }
+
/*
* Pull out the entry to unlock the bucket row.
*
@@ -1522,7 +1553,6 @@ skip_alloc:
sc->sc_todctx = todctx;
#endif
sc->sc_irs = th->th_seq;
- sc->sc_iss = arc4random();
sc->sc_flags = 0;
sc->sc_flowlabel = 0;
@@ -1596,6 +1626,8 @@ skip_alloc:
if (V_tcp_syncookies)
sc->sc_iss = syncookie_generate(sch, sc);
+ else
+ sc->sc_iss = arc4random();
#ifdef INET6
if (autoflowlabel) {
if (V_tcp_syncookies)
diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c
index c50af2bb..cf6ceff5 100644
--- a/freebsd/sys/netinet/tcp_timer.c
+++ b/freebsd/sys/netinet/tcp_timer.c
@@ -112,6 +112,11 @@ int tcp_msl;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
&tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
+int tcp_rexmit_initial;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_initial, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_rexmit_initial, 0, sysctl_msec_to_ticks, "I",
+ "Initial Retransmission Timeout");
+
int tcp_rexmit_min;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
&tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
@@ -235,9 +240,6 @@ tcp_slowtimo(void)
VNET_LIST_RUNLOCK_NOSLEEP();
}
-int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
- { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
-
int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
{ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
@@ -673,7 +675,7 @@ tcp_timer_rexmt(void * xtp)
TCPSTAT_INC(tcps_rexmttimeo);
if ((tp->t_state == TCPS_SYN_SENT) ||
(tp->t_state == TCPS_SYN_RECEIVED))
- rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
+ rexmt = tcp_rexmit_initial * tcp_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
TCPT_RANGESET(tp->t_rxtcur, rexmt,
diff --git a/freebsd/sys/netinet/tcp_timer.h b/freebsd/sys/netinet/tcp_timer.h
index a2ab6ca5..3e985bdf 100644
--- a/freebsd/sys/netinet/tcp_timer.h
+++ b/freebsd/sys/netinet/tcp_timer.h
@@ -77,7 +77,7 @@
#define TCPTV_MSL ( 30*hz) /* max seg lifetime (hah!) */
#define TCPTV_SRTTBASE 0 /* base roundtrip time;
if 0, no idea yet */
-#define TCPTV_RTOBASE ( 3*hz) /* assumed RTO if no info */
+#define TCPTV_RTOBASE ( 1*hz) /* assumed RTO if no info */
#define TCPTV_PERSMIN ( 5*hz) /* minimum persist interval */
#define TCPTV_PERSMAX ( 60*hz) /* maximum persist interval */
@@ -194,12 +194,12 @@ extern int tcp_keepintvl; /* time between keepalive probes */
extern int tcp_keepcnt; /* number of keepalives */
extern int tcp_delacktime; /* time before sending a delayed ACK */
extern int tcp_maxpersistidle;
+extern int tcp_rexmit_initial;
extern int tcp_rexmit_min;
extern int tcp_rexmit_slop;
extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
-extern int tcp_syn_backoff[];
extern int tcp_totbackoff;
extern int tcp_rexmit_drop_options;
diff --git a/freebsd/sys/netinet/tcp_timewait.c b/freebsd/sys/netinet/tcp_timewait.c
index 8a28283f..6965d391 100644
--- a/freebsd/sys/netinet/tcp_timewait.c
+++ b/freebsd/sys/netinet/tcp_timewait.c
@@ -304,7 +304,7 @@ tcp_twstart(struct tcpcb *tp)
if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
recwin < (tp->rcv_adv - tp->rcv_nxt))
recwin = (tp->rcv_adv - tp->rcv_nxt);
- tw->last_win = htons((u_short)(recwin >> tp->rcv_scale));
+ tw->last_win = (u_short)(recwin >> tp->rcv_scale);
/*
* Set t_recent if timestamps are used on the connection.
diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c
index 27ab745b..9f670278 100644
--- a/freebsd/sys/netinet/tcp_usrreq.c
+++ b/freebsd/sys/netinet/tcp_usrreq.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_tcpdebug.h>
#include <sys/param.h>
@@ -54,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/refcount.h>
#include <sys/kernel.h>
+#include <sys/ktls.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#ifdef INET6
@@ -346,17 +348,17 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- struct sockaddr_in6 *sin6p;
+ struct sockaddr_in6 *sin6;
- sin6p = (struct sockaddr_in6 *)nam;
- if (nam->sa_len != sizeof (*sin6p))
+ sin6 = (struct sockaddr_in6 *)nam;
+ if (nam->sa_len != sizeof (*sin6))
return (EINVAL);
/*
* Must check for multicast addresses and disallow binding
* to them.
*/
- if (sin6p->sin6_family == AF_INET6 &&
- IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
+ if (sin6->sin6_family == AF_INET6 &&
+ IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return (EAFNOSUPPORT);
TCPDEBUG0;
@@ -374,12 +376,12 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp->inp_vflag |= INP_IPV6;
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
inp->inp_vflag |= INP_IPV4;
- else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+ else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
struct sockaddr_in sin;
- in6_sin6_2_sin(&sin, sin6p);
+ in6_sin6_2_sin(&sin, sin6);
if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
error = EAFNOSUPPORT;
INP_HASH_WUNLOCK(&V_tcbinfo);
@@ -568,18 +570,18 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- struct sockaddr_in6 *sin6p;
+ struct sockaddr_in6 *sin6;
TCPDEBUG0;
- sin6p = (struct sockaddr_in6 *)nam;
- if (nam->sa_len != sizeof (*sin6p))
+ sin6 = (struct sockaddr_in6 *)nam;
+ if (nam->sa_len != sizeof (*sin6))
return (EINVAL);
/*
* Must disallow TCP ``connections'' to multicast addresses.
*/
- if (sin6p->sin6_family == AF_INET6
- && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
+ if (sin6->sin6_family == AF_INET6
+ && IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return (EAFNOSUPPORT);
inp = sotoinpcb(so);
@@ -601,7 +603,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
* therefore probably require the hash lock, which isn't held here.
* Is this a significant problem?
*/
- if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
struct sockaddr_in sin;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
@@ -613,7 +615,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
}
- in6_sin6_2_sin(&sin, sin6p);
+ in6_sin6_2_sin(&sin, sin6);
if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
error = EAFNOSUPPORT;
goto out;
@@ -643,7 +645,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
inp->inp_inc.inc_flags |= INC_ISIPV6;
- if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
+ if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
goto out;
if ((error = tcp6_connect(tp, nam, td)) != 0)
goto out;
@@ -974,22 +976,22 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
#ifdef INET6
case AF_INET6:
{
- struct sockaddr_in6 *sin6p;
+ struct sockaddr_in6 *sin6;
- sin6p = (struct sockaddr_in6 *)nam;
- if (sin6p->sin6_len != sizeof(struct sockaddr_in6)) {
+ sin6 = (struct sockaddr_in6 *)nam;
+ if (sin6->sin6_len != sizeof(*sin6)) {
if (m)
m_freem(m);
error = EINVAL;
goto out;
}
- if (IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
+ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
if (m)
m_freem(m);
error = EAFNOSUPPORT;
goto out;
}
- if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
error = EINVAL;
@@ -1005,7 +1007,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
}
inp->inp_vflag &= ~INP_IPV6;
sinp = &sin;
- in6_sin6_2_sin(sinp, sin6p);
+ in6_sin6_2_sin(sinp, sin6);
if (IN_MULTICAST(
ntohl(sinp->sin_addr.s_addr))) {
error = EAFNOSUPPORT;
@@ -1036,7 +1038,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
inp->inp_vflag &= ~INP_IPV4;
inp->inp_inc.inc_flags |= INC_ISIPV6;
if ((error = prison_remote_ip6(td->td_ucred,
- &sin6p->sin6_addr))) {
+ &sin6->sin6_addr))) {
if (m)
m_freem(m);
goto out;
@@ -1192,8 +1194,7 @@ tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
INP_WUNLOCK(inp);
- for (int i = 0; i < count; i++)
- m = m_free(m);
+ mb_free_notready(m, count);
return (ECONNRESET);
}
tp = intotcpcb(inp);
@@ -1580,11 +1581,9 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
error = 0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
- INP_WLOCK(inp);
if (sopt->sopt_level != IPPROTO_TCP) {
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO) {
- INP_WUNLOCK(inp);
error = ip6_ctloutput(so, sopt);
/*
* In case of the IPV6_USE_MIN_MTU socket option,
@@ -1629,12 +1628,12 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
#endif
#ifdef INET
{
- INP_WUNLOCK(inp);
error = ip_ctloutput(so, sopt);
}
#endif
return (error);
}
+ INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
INP_WUNLOCK(inp);
return (ECONNRESET);
@@ -1760,6 +1759,9 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
int error, opt, optval;
u_int ui;
struct tcp_info ti;
+#ifdef KERN_TLS
+ struct tls_enable tls;
+#endif
struct cc_algo *algo;
char *pbuf, buf[TCP_LOG_ID_LEN];
size_t len;
@@ -1922,6 +1924,29 @@ unlock_and_done:
INP_WUNLOCK(inp);
break;
+#ifdef KERN_TLS
+ case TCP_TXTLS_ENABLE:
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &tls, sizeof(tls),
+ sizeof(tls));
+ if (error)
+ break;
+ error = ktls_enable_tx(so, &tls);
+ break;
+ case TCP_TXTLS_MODE:
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
+ if (error)
+ return (error);
+ if (ui != TCP_TLS_MODE_SW && ui != TCP_TLS_MODE_IFNET)
+ return (EINVAL);
+
+ INP_WLOCK_RECHECK(inp);
+ error = ktls_set_tx_mode(so, ui);
+ INP_WUNLOCK(inp);
+ break;
+#endif
+
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:
@@ -2203,6 +2228,13 @@ unlock_and_done:
error = EINVAL;
break;
#endif
+#ifdef KERN_TLS
+ case TCP_TXTLS_MODE:
+ optval = ktls_get_tx_mode(so);
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof(optval));
+ break;
+#endif
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h
index 2fbe07ad..48136abe 100644
--- a/freebsd/sys/netinet/tcp_var.h
+++ b/freebsd/sys/netinet/tcp_var.h
@@ -102,7 +102,8 @@ struct tcpcb {
t_state:4, /* state of this connection */
t_idle_reduce : 1,
t_delayed_ack: 7, /* Delayed ack variable */
- bits_spare : 4;
+ t_fin_is_rst: 1, /* Are fin's treated as resets */
+ bits_spare : 3;
u_int t_flags;
tcp_seq snd_una; /* sent but unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
@@ -271,6 +272,11 @@ struct tcp_function_block {
void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *,
int, int, uint8_t);
+ int (*tfb_do_queued_segments)(struct socket *, struct tcpcb *, int);
+ int (*tfb_do_segment_nounlock)(struct mbuf *, struct tcphdr *,
+ struct socket *, struct tcpcb *,
+ int, int, uint8_t,
+ int, struct timeval *);
void (*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *,
int, int, uint8_t,
@@ -754,7 +760,6 @@ extern int tcp_log_in_vain;
VNET_DECLARE(int, drop_synfin);
VNET_DECLARE(int, path_mtu_discovery);
VNET_DECLARE(int, tcp_abc_l_var);
-VNET_DECLARE(int, tcp_autorcvbuf_inc);
VNET_DECLARE(int, tcp_autorcvbuf_max);
VNET_DECLARE(int, tcp_autosndbuf_inc);
VNET_DECLARE(int, tcp_autosndbuf_max);
@@ -789,7 +794,6 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcb VNET(tcb)
#define V_tcbinfo VNET(tcbinfo)
#define V_tcp_abc_l_var VNET(tcp_abc_l_var)
-#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
@@ -798,6 +802,7 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
#define V_tcp_do_ecn VNET(tcp_do_ecn)
#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323)
+#define V_tcp_ts_offset_per_conn VNET(tcp_ts_offset_per_conn)
#define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042)
#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390)
#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465)
@@ -882,6 +887,13 @@ struct tcp_function_block *
find_and_ref_tcp_fb(struct tcp_function_block *fs);
int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp);
+extern counter_u64_t tcp_inp_lro_direct_queue;
+extern counter_u64_t tcp_inp_lro_wokeup_queue;
+extern counter_u64_t tcp_inp_lro_compressed;
+extern counter_u64_t tcp_inp_lro_single_push;
+extern counter_u64_t tcp_inp_lro_locks_taken;
+extern counter_u64_t tcp_inp_lro_sack_wake;
+
uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
u_int tcp_maxseg(const struct tcpcb *);
@@ -934,7 +946,9 @@ uint32_t tcp_new_ts_offset(struct in_conninfo *);
tcp_seq tcp_new_isn(struct in_conninfo *);
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
+void tcp_clean_dsack_blocks(struct tcpcb *tp);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
@@ -942,10 +956,11 @@ void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
int tcp_compute_pipe(struct tcpcb *);
+uint32_t tcp_compute_initwnd(uint32_t);
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
struct mbuf *
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
- int32_t seglimit, int32_t segsize, struct sockbuf *sb);
+ int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
static inline void
diff --git a/freebsd/sys/netinet/toecore.h b/freebsd/sys/netinet/toecore.h
index f2374d70..f8bda614 100644
--- a/freebsd/sys/netinet/toecore.h
+++ b/freebsd/sys/netinet/toecore.h
@@ -35,6 +35,8 @@
#error "no user-serviceable parts inside"
#endif
+#include <sys/_eventhandler.h>
+
struct tcpopt;
struct tcphdr;
struct in_conninfo;
@@ -108,7 +110,6 @@ struct toedev {
struct tcp_info *);
};
-#include <sys/eventhandler.h>
typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c
index 33b89c21..f89660d6 100644
--- a/freebsd/sys/netinet/udp_usrreq.c
+++ b/freebsd/sys/netinet/udp_usrreq.c
@@ -1162,9 +1162,23 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
src.sin_family = 0;
sin = (struct sockaddr_in *)addr;
+retry:
if (sin == NULL ||
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
INP_WLOCK(inp);
+ /*
+ * In case we lost a race and another thread bound addr/port
+ * on the inp we cannot keep the wlock (which still would be
+ * fine) as further down, based on these values we make
+ * decisions for the pcbinfo lock. If the locks are not in
+ * synch the assertions on unlock will fire, hence we go for
+ * one retry loop.
+ */
+ if (sin != NULL && (inp->inp_laddr.s_addr != INADDR_ANY ||
+ inp->inp_lport != 0)) {
+ INP_WUNLOCK(inp);
+ goto retry;
+ }
unlock_inp = UH_WLOCKED;
} else {
INP_RLOCK(inp);
@@ -1264,36 +1278,44 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
}
/*
- * Depending on whether or not the application has bound or connected
- * the socket, we may have to do varying levels of work. The optimal
- * case is for a connected UDP socket, as a global lock isn't
- * required at all.
- *
- * In order to decide which we need, we require stability of the
- * inpcb binding, which we ensure by acquiring a read lock on the
- * inpcb. This doesn't strictly follow the lock order, so we play
- * the trylock and retry game; note that we may end up with more
- * conservative locks than required the second time around, so later
- * assertions have to accept that. Further analysis of the number of
- * misses under contention is required.
- *
- * XXXRW: Check that hash locking update here is correct.
+ * In the old days, depending on whether or not the application had
+ * bound or connected the socket, we had to do varying levels of work.
+ * The optimal case was for a connected UDP socket, as a global lock
+ * wasn't required at all.
+ * In order to decide which we need, we required stability of the
+ * inpcb binding, which we ensured by acquiring a read lock on the
+ * inpcb. This didn't strictly follow the lock order, so we played
+ * the trylock and retry game.
+ * With the re-introduction of the route-cache in some cases, we started
+ * to acquire an early inp wlock and a possible race during re-lock
+ * went away. With the introduction of epoch(9) some read locking
+ * became epoch(9) and the lock-order issues also went away.
+ * Due to route-cache we may now hold more conservative locks than
+ * otherwise required and have split up the 2nd case in case 2 and 3
+ * in order to keep the udpinfo lock level in sync with the inp one
+ * for the IP_SENDSRCADDR case below.
*/
pr = inp->inp_socket->so_proto->pr_protocol;
pcbinfo = udp_get_inpcbinfo(pr);
- sin = (struct sockaddr_in *)addr;
if (sin != NULL &&
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
INP_HASH_WLOCK(pcbinfo);
unlock_udbinfo = UH_WLOCKED;
- } else if ((sin != NULL && (
- (sin->sin_addr.s_addr == INADDR_ANY) ||
- (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
- (inp->inp_laddr.s_addr == INADDR_ANY) ||
- (inp->inp_lport == 0))) ||
- (src.sin_family == AF_INET)) {
+ } else if (sin != NULL &&
+ (sin->sin_addr.s_addr == INADDR_ANY ||
+ sin->sin_addr.s_addr == INADDR_BROADCAST ||
+ inp->inp_laddr.s_addr == INADDR_ANY ||
+ inp->inp_lport == 0)) {
INP_HASH_RLOCK_ET(pcbinfo, et);
unlock_udbinfo = UH_RLOCKED;
+ } else if (src.sin_family == AF_INET) {
+ if (unlock_inp == UH_WLOCKED) {
+ INP_HASH_WLOCK(pcbinfo);
+ unlock_udbinfo = UH_WLOCKED;
+ } else {
+ INP_HASH_RLOCK_ET(pcbinfo, et);
+ unlock_udbinfo = UH_RLOCKED;
+ }
} else
unlock_udbinfo = UH_UNLOCKED;
@@ -1503,8 +1525,9 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
if (flowtype != M_HASHTYPE_NONE) {
m->m_pkthdr.flowid = flowid;
M_HASHTYPE_SET(m, flowtype);
+ }
#ifdef RSS
- } else {
+ else {
uint32_t hash_val, hash_type;
/*
* Calculate an appropriate RSS hash for UDP and
@@ -1527,10 +1550,8 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
m->m_pkthdr.flowid = hash_val;
M_HASHTYPE_SET(m, hash_type);
}
-#endif
}
-#ifdef RSS
/*
* Don't override with the inp cached flowid value.
*
@@ -1565,12 +1586,22 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
release:
if (unlock_udbinfo == UH_WLOCKED) {
KASSERT(unlock_inp == UH_WLOCKED,
- ("%s: excl udbinfo lock, shared inp lock", __func__));
+ ("%s: excl udbinfo lock %#03x, shared inp lock %#03x, "
+ "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
+ "src fam %#04x",
+ __func__, unlock_udbinfo, unlock_inp, sin,
+ (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
+ inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
} else if (unlock_udbinfo == UH_RLOCKED) {
KASSERT(unlock_inp == UH_RLOCKED,
- ("%s: shared udbinfo lock, excl inp lock", __func__));
+ ("%s: shared udbinfo lock %#03x, excl inp lock %#03x, "
+ "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
+ "src fam %#04x",
+ __func__, unlock_udbinfo, unlock_inp, sin,
+ (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
+ inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
INP_HASH_RUNLOCK_ET(pcbinfo, et);
INP_RUNLOCK(inp);
} else if (unlock_inp == UH_WLOCKED)
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c
index 0b0c7b91..6f16c712 100644
--- a/freebsd/sys/netinet6/frag6.c
+++ b/freebsd/sys/netinet6/frag6.c
@@ -40,20 +40,17 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/domain.h>
+#include <sys/eventhandler.h>
#include <sys/hash.h>
+#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
-#include <sys/domain.h>
-#include <sys/eventhandler.h>
#include <sys/protosw.h>
#include <sys/socket.h>
-#include <sys/errno.h>
-#include <sys/time.h>
-#include <sys/kernel.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <machine/atomic.h>
-
#include <net/if.h>
#include <net/if_var.h>
#include <net/netisr.h>
@@ -65,14 +62,14 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet/icmp6.h>
-#include <netinet/in_systm.h> /* for ECN definitions */
-#include <netinet/ip.h> /* for ECN definitions */
+#include <netinet/in_systm.h> /* For ECN definitions. */
+#include <netinet/ip.h> /* For ECN definitions. */
+#ifdef MAC
#include <security/mac/mac_framework.h>
+#endif
-/*
- * Reassembly headers are stored in hash buckets.
- */
+/* Reassembly headers are stored in hash buckets. */
#define IP6REASS_NHASH_LOG2 10
#define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2)
#define IP6REASS_HMASK (IP6REASS_NHASH - 1)
@@ -91,22 +88,47 @@ struct ip6qbucket {
int count;
};
-VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
-volatile u_int frag6_nfrags = 0;
-VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]);
-VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed);
+struct ip6asfrag {
+ struct ip6asfrag *ip6af_down;
+ struct ip6asfrag *ip6af_up;
+ struct mbuf *ip6af_m;
+ int ip6af_offset; /* offset in ip6af_m to next header */
+ int ip6af_frglen; /* fragmentable part length */
+ int ip6af_off; /* fragment offset */
+ u_int16_t ip6af_mff; /* more fragment bit in frag off */
+};
+#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m))
+
+static MALLOC_DEFINE(M_FRAG6, "frag6", "IPv6 fragment reassembly header");
+
+/* System wide (global) maximum and count of packets in reassembly queues. */
+static int ip6_maxfrags;
+static volatile u_int frag6_nfrags = 0;
+
+/* Maximum and current packets in per-VNET reassembly queue. */
+VNET_DEFINE_STATIC(int, ip6_maxfragpackets);
+VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
+#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
#define V_frag6_nfragpackets VNET(frag6_nfragpackets)
-#define V_ip6q VNET(ip6q)
-#define V_ip6q_hashseed VNET(ip6q_hashseed)
-#define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock)
-#define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock)
-#define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED)
-#define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock)
-#define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q)
+/* Maximum per-VNET reassembly queues per bucket and fragments per packet. */
+VNET_DEFINE_STATIC(int, ip6_maxfragbucketsize);
+VNET_DEFINE_STATIC(int, ip6_maxfragsperpacket);
+#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize)
+#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket)
-static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
+/* Per-VNET reassembly queue buckets. */
+VNET_DEFINE_STATIC(struct ip6qbucket, ip6qb[IP6REASS_NHASH]);
+VNET_DEFINE_STATIC(uint32_t, ip6qb_hashseed);
+#define V_ip6qb VNET(ip6qb)
+#define V_ip6qb_hashseed VNET(ip6qb_hashseed)
+
+#define IP6QB_LOCK(_b) mtx_lock(&V_ip6qb[(_b)].lock)
+#define IP6QB_TRYLOCK(_b) mtx_trylock(&V_ip6qb[(_b)].lock)
+#define IP6QB_LOCK_ASSERT(_b) mtx_assert(&V_ip6qb[(_b)].lock, MA_OWNED)
+#define IP6QB_UNLOCK(_b) mtx_unlock(&V_ip6qb[(_b)].lock)
+#define IP6QB_HEAD(_b) (&V_ip6qb[(_b)].ip6q)
/*
* By default, limit the number of IP6 fragments across all reassembly
@@ -124,11 +146,14 @@ static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
#define IP6_MAXFRAGS (nmbclusters / 32)
#define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50))
+
/*
- * Initialise reassembly queue and fragment identifier.
+ * Sysctls and helper function.
*/
-void
-frag6_set_bucketsize()
+SYSCTL_DECL(_net_inet6_ip6);
+
+static void
+frag6_set_bucketsize(void)
{
int i;
@@ -136,68 +161,140 @@ frag6_set_bucketsize()
V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
}
-static void
-frag6_change(void *tag)
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
+ CTLFLAG_RW, &ip6_maxfrags, 0,
+ "Maximum allowed number of outstanding IPv6 packet fragments. "
+ "A value of 0 means no fragmented packets will be accepted, while a "
+ "a value of -1 means no limit");
+
+static int
+sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
{
- VNET_ITERATOR_DECL(vnet_iter);
+ int error, val;
- ip6_maxfrags = IP6_MAXFRAGS;
- VNET_LIST_RLOCK_NOSLEEP();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
- V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
- frag6_set_bucketsize();
- CURVNET_RESTORE();
- }
- VNET_LIST_RUNLOCK_NOSLEEP();
+ val = V_ip6_maxfragpackets;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || !req->newptr)
+ return (error);
+ V_ip6_maxfragpackets = val;
+ frag6_set_bucketsize();
+ return (0);
}
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+ sysctl_ip6_maxfragpackets, "I",
+ "Default maximum number of outstanding fragmented IPv6 packets. "
+ "A value of 0 means no fragmented packets will be accepted, while a "
+ "a value of -1 means no limit");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
+ "Maximum allowed number of fragments per packet");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
+ "Maximum number of reassembly queues per hash bucket");
-void
-frag6_init(void)
+
+/*
+ * Remove the IPv6 fragmentation header from the mbuf.
+ */
+int
+ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
{
- struct ip6q *q6;
- int i;
+ struct ip6_hdr *ip6;
+ struct mbuf *t;
- V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
- frag6_set_bucketsize();
- for (i = 0; i < IP6REASS_NHASH; i++) {
- q6 = IP6Q_HEAD(i);
- q6->ip6q_next = q6->ip6q_prev = q6;
- mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF);
- V_ip6q[i].count = 0;
+ /* Delete frag6 header. */
+ if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+
+ /* This is the only possible case with !PULLDOWN_TEST. */
+ ip6 = mtod(m, struct ip6_hdr *);
+ bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
+ offset);
+ m->m_data += sizeof(struct ip6_frag);
+ m->m_len -= sizeof(struct ip6_frag);
+ } else {
+
+ /* This comes with no copy if the boundary is on cluster. */
+ if ((t = m_split(m, offset, wait)) == NULL)
+ return (ENOMEM);
+ m_adj(t, sizeof(struct ip6_frag));
+ m_cat(m, t);
}
- V_ip6q_hashseed = arc4random();
- V_ip6_maxfragsperpacket = 64;
- if (!IS_DEFAULT_VNET(curvnet))
- return;
- ip6_maxfrags = IP6_MAXFRAGS;
- EVENTHANDLER_REGISTER(nmbclusters_change,
- frag6_change, NULL, EVENTHANDLER_PRI_ANY);
+ m->m_flags |= M_FRAGMENTED;
+ return (0);
+}
+
+/*
+ * Free a fragment reassembly header and all associated datagrams.
+ */
+static void
+frag6_freef(struct ip6q *q6, uint32_t bucket)
+{
+ struct ip6_hdr *ip6;
+ struct ip6asfrag *af6, *down6;
+ struct mbuf *m;
+
+ IP6QB_LOCK_ASSERT(bucket);
+
+ for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
+ af6 = down6) {
+
+ m = IP6_REASS_MBUF(af6);
+ down6 = af6->ip6af_down;
+ frag6_deq(af6, bucket);
+
+ /*
+ * Return ICMP time exceeded error for the 1st fragment.
+ * Just free other fragments.
+ */
+ if (af6->ip6af_off == 0) {
+
+ /* Adjust pointer. */
+ ip6 = mtod(m, struct ip6_hdr *);
+
+ /* Restore source and destination addresses. */
+ ip6->ip6_src = q6->ip6q_src;
+ ip6->ip6_dst = q6->ip6q_dst;
+
+ icmp6_error(m, ICMP6_TIME_EXCEEDED,
+ ICMP6_TIME_EXCEED_REASSEMBLY, 0);
+ } else
+ m_freem(m);
+
+ free(af6, M_FRAG6);
+ }
+ frag6_remque(q6, bucket);
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
+#ifdef MAC
+ mac_ip6q_destroy(q6);
+#endif
+ free(q6, M_FRAG6);
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
}
/*
- * In RFC2460, fragment and reassembly rule do not agree with each other,
- * in terms of next header field handling in fragment header.
+ * Like in RFC2460, in RFC8200, fragment and reassembly rules do not agree with
+ * each other, in terms of next header field handling in fragment header.
* While the sender will use the same value for all of the fragmented packets,
- * receiver is suggested not to check the consistency.
+ * receiver is suggested not to check for consistency.
*
- * fragment rule (p20):
- * (2) A Fragment header containing:
- * The Next Header value that identifies the first header of
- * the Fragmentable Part of the original packet.
+ * Fragment rules (p18,p19):
+ * (2) A Fragment header containing:
+ * The Next Header value that identifies the first header
+ * after the Per-Fragment headers of the original packet.
* -> next header field is same for all fragments
*
- * reassembly rule (p21):
- * The Next Header field of the last header of the Unfragmentable
- * Part is obtained from the Next Header field of the first
+ * Reassembly rule (p20):
+ * The Next Header field of the last header of the Per-Fragment
+ * headers is obtained from the Next Header field of the first
* fragment's Fragment header.
* -> should grab it from the first fragment only
*
* The following note also contradicts with fragment rule - no one is going to
* send different fragment with different next header field.
*
- * additional note (p22):
+ * Additional note (p22) [not an error]:
* The Next Header values in the Fragment headers of different
* fragments of the same original packet may differ. Only the value
* from the Offset zero fragment packet is used for reassembly.
@@ -206,33 +303,32 @@ frag6_init(void)
* There is no explicit reason given in the RFC. Historical reason maybe?
*/
/*
- * Fragment input
+ * Fragment input.
*/
int
frag6_input(struct mbuf **mp, int *offp, int proto)
{
- struct mbuf *m = *mp, *t;
+ struct ifnet *dstifp;
+ struct in6_ifaddr *ia6;
struct ip6_hdr *ip6;
struct ip6_frag *ip6f;
struct ip6q *head, *q6;
- struct ip6asfrag *af6, *ip6af, *af6dwn;
- struct in6_ifaddr *ia;
- int offset = *offp, nxt, i, next;
- int first_frag = 0;
- int fragoff, frgpartlen; /* must be larger than u_int16_t */
+ struct ip6asfrag *af6, *af6dwn, *ip6af;
+ struct mbuf *m, *t;
uint32_t hashkey[(sizeof(struct in6_addr) * 2 +
sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)];
- uint32_t hash, *hashkeyp;
- struct ifnet *dstifp;
- u_int8_t ecn, ecn0;
+ uint32_t bucket, *hashkeyp;
+ int fragoff, frgpartlen; /* Must be larger than uint16_t. */
+ int nxt, offset, plen;
+ uint8_t ecn, ecn0;
+ bool only_frag;
#ifdef RSS
- struct m_tag *mtag;
struct ip6_direct_ctx *ip6dc;
+ struct m_tag *mtag;
#endif
-#if 0
- char ip6buf[INET6_ADDRSTRLEN];
-#endif
+ m = *mp;
+ offset = *offp;
ip6 = mtod(m, struct ip6_hdr *);
#ifndef PULLDOWN_TEST
@@ -245,22 +341,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
#endif
dstifp = NULL;
- /* find the destination interface of the packet. */
- ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
- if (ia != NULL) {
- dstifp = ia->ia_ifp;
- ifa_free(&ia->ia_ifa);
+ /* Find the destination interface of the packet. */
+ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia6 != NULL) {
+ dstifp = ia6->ia_ifp;
+ ifa_free(&ia6->ia_ifa);
}
- /* jumbo payload can't contain a fragment header */
+
+ /* Jumbo payload cannot contain a fragment header. */
if (ip6->ip6_plen == 0) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
in6_ifstat_inc(dstifp, ifs6_reass_fail);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
/*
- * check whether fragment packet's fragment length is
- * multiple of 8 octets.
+ * Check whether fragment packet's fragment length is a
+ * multiple of 8 octets (unless it is the last one).
* sizeof(struct ip6_frag) == 8
* sizeof(struct ip6_hdr) = 40
*/
@@ -269,22 +366,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offsetof(struct ip6_hdr, ip6_plen));
in6_ifstat_inc(dstifp, ifs6_reass_fail);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
IP6STAT_INC(ip6s_fragments);
in6_ifstat_inc(dstifp, ifs6_reass_reqd);
- /* offset now points to data portion */
+ /* Offset now points to data portion. */
offset += sizeof(struct ip6_frag);
/*
- * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
- * upfront, unrelated to any reassembly. Just skip the fragment header.
+ * Handle "atomic" fragments (offset and m bit set to 0) upfront,
+ * unrelated to any reassembly. Still need to remove the frag hdr.
+ * See RFC 6946 and section 4.5 of RFC 8200.
*/
if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
- /* XXX-BZ we want dedicated counters for this. */
- IP6STAT_INC(ip6s_reassembled);
+ IP6STAT_INC(ip6s_atomicfrags);
+ /* XXX-BZ handle correctly. */
in6_ifstat_inc(dstifp, ifs6_reass_ok);
*offp = offset;
m->m_flags |= M_FRAGMENTED;
@@ -298,22 +396,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
offsetof(struct ip6_hdr, ip6_plen));
in6_ifstat_inc(dstifp, ifs6_reass_fail);
IP6STAT_INC(ip6s_fragdropped);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
+ /* Generate a hash value for fragment bucket selection. */
hashkeyp = hashkey;
memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
*hashkeyp = ip6f->ip6f_ident;
- hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed);
- hash &= IP6REASS_HMASK;
- head = IP6Q_HEAD(hash);
- IP6Q_LOCK(hash);
+ bucket = jenkins_hash32(hashkey, nitems(hashkey), V_ip6qb_hashseed);
+ bucket &= IP6REASS_HMASK;
+ head = IP6QB_HEAD(bucket);
+ IP6QB_LOCK(bucket);
/*
- * Enforce upper bound on number of fragments.
+ * Enforce upper bound on number of fragments for the entire system.
* If maxfrag is 0, never accept fragments.
* If maxfrag is -1, accept all fragments without limitation.
*/
@@ -332,11 +431,11 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
)
break;
+ only_frag = false;
if (q6 == head) {
- /*
- * the first fragment to arrive, create a reassembly queue.
- */
- first_frag = 1;
+
+ /* A first fragment to arrive creates a reassembly queue. */
+ only_frag = true;
/*
* Enforce upper bound on number of fragmented packets
@@ -347,26 +446,27 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
*/
if (V_ip6_maxfragpackets < 0)
;
- else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize ||
+ else if (V_ip6qb[bucket].count >= V_ip6_maxfragbucketsize ||
atomic_load_int(&V_frag6_nfragpackets) >=
(u_int)V_ip6_maxfragpackets)
goto dropfrag;
atomic_add_int(&V_frag6_nfragpackets, 1);
- q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
- M_NOWAIT);
+
+ /* Allocate IPv6 fragement packet queue entry. */
+ q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FRAG6,
+ M_NOWAIT | M_ZERO);
if (q6 == NULL)
goto dropfrag;
- bzero(q6, sizeof(*q6));
#ifdef MAC
if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
- free(q6, M_FTABLE);
+ free(q6, M_FRAG6);
goto dropfrag;
}
mac_ip6q_create(m, q6);
#endif
- frag6_insque_head(q6, head, hash);
+ frag6_insque_head(q6, head, bucket);
- /* ip6q_nxt will be filled afterwards, from 1st fragment */
+ /* ip6q_nxt will be filled afterwards, from 1st fragment. */
q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
#ifdef notyet
q6->ip6q_nxtp = (u_char *)nxtp;
@@ -383,7 +483,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
}
/*
- * If it's the 1st fragment, record the length of the
+ * If it is the 1st fragment, record the length of the
* unfragmentable part and the next header of the fragment header.
*/
fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
@@ -404,18 +504,18 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
- IP6Q_UNLOCK(hash);
+ IP6QB_UNLOCK(bucket);
return (IPPROTO_DONE);
}
} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
- IP6Q_UNLOCK(hash);
+ IP6QB_UNLOCK(bucket);
return (IPPROTO_DONE);
}
/*
- * If it's the first fragment, do the above check for each
+ * If it is the first fragment, do the above check for each
* fragment already stored in the reassembly queue.
*/
if (fragoff == 0) {
@@ -425,15 +525,18 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
IPV6_MAXPACKET) {
- struct mbuf *merr = IP6_REASS_MBUF(af6);
struct ip6_hdr *ip6err;
- int erroff = af6->ip6af_offset;
+ struct mbuf *merr;
+ int erroff;
+
+ merr = IP6_REASS_MBUF(af6);
+ erroff = af6->ip6af_offset;
- /* dequeue the fragment. */
- frag6_deq(af6, hash);
- free(af6, M_FTABLE);
+ /* Dequeue the fragment. */
+ frag6_deq(af6, bucket);
+ free(af6, M_FRAG6);
- /* adjust pointer. */
+ /* Adjust pointer. */
ip6err = mtod(merr, struct ip6_hdr *);
/*
@@ -451,174 +554,113 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
}
}
- ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
- M_NOWAIT);
+ /* Allocate an IPv6 fragement queue entry for this fragmented part. */
+ ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FRAG6,
+ M_NOWAIT | M_ZERO);
if (ip6af == NULL)
goto dropfrag;
- bzero(ip6af, sizeof(*ip6af));
ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
ip6af->ip6af_off = fragoff;
ip6af->ip6af_frglen = frgpartlen;
ip6af->ip6af_offset = offset;
IP6_REASS_MBUF(ip6af) = m;
- if (first_frag) {
+ if (only_frag) {
af6 = (struct ip6asfrag *)q6;
goto insert;
}
+ /* Do duplicate, condition, and boundry checks. */
/*
* Handle ECN by comparing this segment with the first one;
* if CE is set, do not lose CE.
- * drop if CE and not-ECT are mixed for the same packet.
+ * Drop if CE and not-ECT are mixed for the same packet.
*/
ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
ecn0 = q6->ip6q_ecn;
if (ecn == IPTOS_ECN_CE) {
if (ecn0 == IPTOS_ECN_NOTECT) {
- free(ip6af, M_FTABLE);
+ free(ip6af, M_FRAG6);
goto dropfrag;
}
if (ecn0 != IPTOS_ECN_CE)
q6->ip6q_ecn = IPTOS_ECN_CE;
}
if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
- free(ip6af, M_FTABLE);
+ free(ip6af, M_FRAG6);
goto dropfrag;
}
- /*
- * Find a segment which begins after this one does.
- */
+ /* Find a fragmented part which begins after this one does. */
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = af6->ip6af_down)
if (af6->ip6af_off > ip6af->ip6af_off)
break;
-#if 0
- /*
- * If there is a preceding segment, it may provide some of
- * our data already. If so, drop the data from the incoming
- * segment. If it provides all of our data, drop us.
- */
- if (af6->ip6af_up != (struct ip6asfrag *)q6) {
- i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
- - ip6af->ip6af_off;
- if (i > 0) {
- if (i >= ip6af->ip6af_frglen)
- goto dropfrag;
- m_adj(IP6_REASS_MBUF(ip6af), i);
- ip6af->ip6af_off += i;
- ip6af->ip6af_frglen -= i;
- }
- }
-
- /*
- * While we overlap succeeding segments trim them or,
- * if they are completely covered, dequeue them.
- */
- while (af6 != (struct ip6asfrag *)q6 &&
- ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
- i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
- if (i < af6->ip6af_frglen) {
- af6->ip6af_frglen -= i;
- af6->ip6af_off += i;
- m_adj(IP6_REASS_MBUF(af6), i);
- break;
- }
- af6 = af6->ip6af_down;
- m_freem(IP6_REASS_MBUF(af6->ip6af_up));
- frag6_deq(af6->ip6af_up, hash);
- }
-#else
/*
* If the incoming framgent overlaps some existing fragments in
- * the reassembly queue, drop it, since it is dangerous to override
- * existing fragments from a security point of view.
- * We don't know which fragment is the bad guy - here we trust
- * fragment that came in earlier, with no real reason.
- *
- * Note: due to changes after disabling this part, mbuf passed to
- * m_adj() below now does not meet the requirement.
+ * the reassembly queue, drop both the new fragment and the
+ * entire reassembly queue. However, if the new fragment
+ * is an exact duplicate of an existing fragment, only silently
+ * drop the existing fragment and leave the fragmentation queue
+ * unchanged, as allowed by the RFC. (RFC 8200, 4.5)
*/
if (af6->ip6af_up != (struct ip6asfrag *)q6) {
- i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
- - ip6af->ip6af_off;
- if (i > 0) {
-#if 0 /* suppress the noisy log */
- log(LOG_ERR, "%d bytes of a fragment from %s "
- "overlaps the previous fragment\n",
- i, ip6_sprintf(ip6buf, &q6->ip6q_src));
-#endif
- free(ip6af, M_FTABLE);
+ if (af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen -
+ ip6af->ip6af_off > 0) {
+ free(ip6af, M_FRAG6);
goto dropfrag;
}
}
if (af6 != (struct ip6asfrag *)q6) {
- i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
- if (i > 0) {
-#if 0 /* suppress the noisy log */
- log(LOG_ERR, "%d bytes of a fragment from %s "
- "overlaps the succeeding fragment",
- i, ip6_sprintf(ip6buf, &q6->ip6q_src));
-#endif
- free(ip6af, M_FTABLE);
+ if (ip6af->ip6af_off + ip6af->ip6af_frglen -
+ af6->ip6af_off > 0) {
+ free(ip6af, M_FRAG6);
goto dropfrag;
}
}
-#endif
insert:
#ifdef MAC
- if (!first_frag)
+ if (!only_frag)
mac_ip6q_update(m, q6);
#endif
/*
- * Stick new segment in its place;
- * check for complete reassembly.
- * If not complete, check fragment limit.
- * Move to front of packet queue, as we are
- * the most recently active fragmented packet.
+ * Stick new segment in its place; check for complete reassembly.
+ * If not complete, check fragment limit. Move to front of packet
+ * queue, as we are the most recently active fragmented packet.
*/
- frag6_enq(ip6af, af6->ip6af_up, hash);
+ frag6_enq(ip6af, af6->ip6af_up, bucket);
atomic_add_int(&frag6_nfrags, 1);
q6->ip6q_nfrag++;
-#if 0 /* xxx */
- if (q6 != head->ip6q_next) {
- frag6_remque(q6, hash);
- frag6_insque_head(q6, head, hash);
- }
-#endif
- next = 0;
+ plen = 0;
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = af6->ip6af_down) {
- if (af6->ip6af_off != next) {
+ if (af6->ip6af_off != plen) {
if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
- IP6STAT_INC(ip6s_fragdropped);
- frag6_freef(q6, hash);
+ IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag);
+ frag6_freef(q6, bucket);
}
- IP6Q_UNLOCK(hash);
- return IPPROTO_DONE;
+ IP6QB_UNLOCK(bucket);
+ return (IPPROTO_DONE);
}
- next += af6->ip6af_frglen;
+ plen += af6->ip6af_frglen;
}
if (af6->ip6af_up->ip6af_mff) {
if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
- IP6STAT_INC(ip6s_fragdropped);
- frag6_freef(q6, hash);
+ IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag);
+ frag6_freef(q6, bucket);
}
- IP6Q_UNLOCK(hash);
- return IPPROTO_DONE;
+ IP6QB_UNLOCK(bucket);
+ return (IPPROTO_DONE);
}
- /*
- * Reassembly is complete; concatenate fragments.
- */
+ /* Reassembly is complete; concatenate fragments. */
ip6af = q6->ip6q_down;
t = m = IP6_REASS_MBUF(ip6af);
af6 = ip6af->ip6af_down;
- frag6_deq(ip6af, hash);
+ frag6_deq(ip6af, bucket);
while (af6 != (struct ip6asfrag *)q6) {
m->m_pkthdr.csum_flags &=
IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
@@ -626,13 +668,13 @@ insert:
IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
af6dwn = af6->ip6af_down;
- frag6_deq(af6, hash);
+ frag6_deq(af6, bucket);
while (t->m_next)
t = t->m_next;
m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
m_demote_pkthdr(IP6_REASS_MBUF(af6));
m_cat(t, IP6_REASS_MBUF(af6));
- free(af6, M_FTABLE);
+ free(af6, M_FRAG6);
af6 = af6dwn;
}
@@ -640,47 +682,43 @@ insert:
m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
(m->m_pkthdr.csum_data >> 16);
- /* adjust offset to point where the original next header starts */
+ /* Adjust offset to point where the original next header starts. */
offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
- free(ip6af, M_FTABLE);
+ free(ip6af, M_FRAG6);
ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
+ ip6->ip6_plen = htons((u_short)plen + offset - sizeof(struct ip6_hdr));
if (q6->ip6q_ecn == IPTOS_ECN_CE)
ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
nxt = q6->ip6q_nxt;
-#ifdef notyet
- *q6->ip6q_nxtp = (u_char)(nxt & 0xff);
-#endif
if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
- frag6_remque(q6, hash);
+ frag6_remque(q6, bucket);
atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_destroy(q6);
#endif
- free(q6, M_FTABLE);
+ free(q6, M_FRAG6);
atomic_subtract_int(&V_frag6_nfragpackets, 1);
goto dropfrag;
}
- /*
- * Store NXT to the original.
- */
+ /* Set nxt(-hdr field value) to the original value. */
m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
(caddr_t)&nxt);
- frag6_remque(q6, hash);
+ frag6_remque(q6, bucket);
atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_reassemble(q6, m);
mac_ip6q_destroy(q6);
#endif
- free(q6, M_FTABLE);
+ free(q6, M_FRAG6);
atomic_subtract_int(&V_frag6_nfragpackets, 1);
if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
- int plen = 0;
+
+ plen = 0;
for (t = m; t; t = t->m_next)
plen += t->m_len;
m->m_pkthdr.len = plen;
@@ -699,173 +737,64 @@ insert:
m_tag_prepend(m, mtag);
#endif
- IP6Q_UNLOCK(hash);
+ IP6QB_UNLOCK(bucket);
IP6STAT_INC(ip6s_reassembled);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
#ifdef RSS
- /*
- * Queue/dispatch for reprocessing.
- */
+ /* Queue/dispatch for reprocessing. */
netisr_dispatch(NETISR_IPV6_DIRECT, m);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
#endif
- /*
- * Tell launch routine the next header
- */
-
+ /* Tell launch routine the next header. */
*mp = m;
*offp = offset;
- return nxt;
+ return (nxt);
- dropfrag:
- IP6Q_UNLOCK(hash);
+dropfrag:
+ IP6QB_UNLOCK(bucket);
in6_ifstat_inc(dstifp, ifs6_reass_fail);
IP6STAT_INC(ip6s_fragdropped);
m_freem(m);
- return IPPROTO_DONE;
-}
-
-/*
- * Free a fragment reassembly header and all
- * associated datagrams.
- */
-static void
-frag6_freef(struct ip6q *q6, uint32_t bucket)
-{
- struct ip6asfrag *af6, *down6;
-
- IP6Q_LOCK_ASSERT(bucket);
-
- for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
- af6 = down6) {
- struct mbuf *m = IP6_REASS_MBUF(af6);
-
- down6 = af6->ip6af_down;
- frag6_deq(af6, bucket);
-
- /*
- * Return ICMP time exceeded error for the 1st fragment.
- * Just free other fragments.
- */
- if (af6->ip6af_off == 0) {
- struct ip6_hdr *ip6;
-
- /* adjust pointer */
- ip6 = mtod(m, struct ip6_hdr *);
-
- /* restore source and destination addresses */
- ip6->ip6_src = q6->ip6q_src;
- ip6->ip6_dst = q6->ip6q_dst;
-
- icmp6_error(m, ICMP6_TIME_EXCEEDED,
- ICMP6_TIME_EXCEED_REASSEMBLY, 0);
- } else
- m_freem(m);
- free(af6, M_FTABLE);
- }
- frag6_remque(q6, bucket);
- atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
-#ifdef MAC
- mac_ip6q_destroy(q6);
-#endif
- free(q6, M_FTABLE);
- atomic_subtract_int(&V_frag6_nfragpackets, 1);
-}
-
-/*
- * Put an ip fragment on a reassembly chain.
- * Like insque, but pointers in middle of structure.
- */
-static void
-frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
- uint32_t bucket __unused)
-{
-
- IP6Q_LOCK_ASSERT(bucket);
-
- af6->ip6af_up = up6;
- af6->ip6af_down = up6->ip6af_down;
- up6->ip6af_down->ip6af_up = af6;
- up6->ip6af_down = af6;
-}
-
-/*
- * To frag6_enq as remque is to insque.
- */
-static void
-frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
-{
-
- IP6Q_LOCK_ASSERT(bucket);
-
- af6->ip6af_up->ip6af_down = af6->ip6af_down;
- af6->ip6af_down->ip6af_up = af6->ip6af_up;
-}
-
-static void
-frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
-{
-
- IP6Q_LOCK_ASSERT(bucket);
- KASSERT(IP6Q_HEAD(bucket) == old,
- ("%s: attempt to insert at head of wrong bucket"
- " (bucket=%u, old=%p)", __func__, bucket, old));
-
- new->ip6q_prev = old;
- new->ip6q_next = old->ip6q_next;
- old->ip6q_next->ip6q_prev= new;
- old->ip6q_next = new;
- V_ip6q[bucket].count++;
-}
-
-static void
-frag6_remque(struct ip6q *p6, uint32_t bucket)
-{
-
- IP6Q_LOCK_ASSERT(bucket);
-
- p6->ip6q_prev->ip6q_next = p6->ip6q_next;
- p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
- V_ip6q[bucket].count--;
+ return (IPPROTO_DONE);
}
/*
* IPv6 reassembling timer processing;
- * if a timer expires on a reassembly
- * queue, discard it.
+ * if a timer expires on a reassembly queue, discard it.
*/
void
frag6_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
struct ip6q *head, *q6;
- int i;
+ uint32_t bucket;
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- for (i = 0; i < IP6REASS_NHASH; i++) {
- IP6Q_LOCK(i);
- head = IP6Q_HEAD(i);
+ for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+ IP6QB_LOCK(bucket);
+ head = IP6QB_HEAD(bucket);
q6 = head->ip6q_next;
if (q6 == NULL) {
/*
* XXXJTL: This should never happen. This
* should turn into an assertion.
*/
- IP6Q_UNLOCK(i);
+ IP6QB_UNLOCK(bucket);
continue;
}
while (q6 != head) {
--q6->ip6q_ttl;
q6 = q6->ip6q_next;
if (q6->ip6q_prev->ip6q_ttl == 0) {
- IP6STAT_INC(ip6s_fragtimeout);
+ IP6STAT_ADD(ip6s_fragtimeout,
+ q6->ip6q_prev->ip6q_nfrag);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(q6->ip6q_prev, i);
+ frag6_freef(q6->ip6q_prev, bucket);
}
}
/*
@@ -874,36 +803,38 @@ frag6_slowtimo(void)
* enough to get down to the new limit.
* Note that we drain all reassembly queues if
* maxfragpackets is 0 (fragmentation is disabled),
- * and don't enforce a limit when maxfragpackets
+ * and do not enforce a limit when maxfragpackets
* is negative.
*/
while ((V_ip6_maxfragpackets == 0 ||
(V_ip6_maxfragpackets > 0 &&
- V_ip6q[i].count > V_ip6_maxfragbucketsize)) &&
+ V_ip6qb[bucket].count > V_ip6_maxfragbucketsize)) &&
head->ip6q_prev != head) {
- IP6STAT_INC(ip6s_fragoverflow);
+ IP6STAT_ADD(ip6s_fragoverflow,
+ q6->ip6q_prev->ip6q_nfrag);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(head->ip6q_prev, i);
+ frag6_freef(head->ip6q_prev, bucket);
}
- IP6Q_UNLOCK(i);
+ IP6QB_UNLOCK(bucket);
}
/*
* If we are still over the maximum number of fragmented
* packets, drain off enough to get down to the new limit.
*/
- i = 0;
+ bucket = 0;
while (V_ip6_maxfragpackets >= 0 &&
atomic_load_int(&V_frag6_nfragpackets) >
(u_int)V_ip6_maxfragpackets) {
- IP6Q_LOCK(i);
- head = IP6Q_HEAD(i);
+ IP6QB_LOCK(bucket);
+ head = IP6QB_HEAD(bucket);
if (head->ip6q_prev != head) {
- IP6STAT_INC(ip6s_fragoverflow);
+ IP6STAT_ADD(ip6s_fragoverflow,
+ q6->ip6q_prev->ip6q_nfrag);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(head->ip6q_prev, i);
+ frag6_freef(head->ip6q_prev, bucket);
}
- IP6Q_UNLOCK(i);
- i = (i + 1) % IP6REASS_NHASH;
+ IP6QB_UNLOCK(bucket);
+ bucket = (bucket + 1) % IP6REASS_NHASH;
}
CURVNET_RESTORE();
}
@@ -911,6 +842,52 @@ frag6_slowtimo(void)
}
/*
+ * Eventhandler to adjust limits in case nmbclusters change.
+ */
+static void
+frag6_change(void *tag)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+
+ ip6_maxfrags = IP6_MAXFRAGS;
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
+ frag6_set_bucketsize();
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+/*
+ * Initialise reassembly queue and fragment identifier.
+ */
+void
+frag6_init(void)
+{
+ struct ip6q *q6;
+ uint32_t bucket;
+
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
+ frag6_set_bucketsize();
+ for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+ q6 = IP6QB_HEAD(bucket);
+ q6->ip6q_next = q6->ip6q_prev = q6;
+ mtx_init(&V_ip6qb[bucket].lock, "ip6qlock", NULL, MTX_DEF);
+ V_ip6qb[bucket].count = 0;
+ }
+ V_ip6qb_hashseed = arc4random();
+ V_ip6_maxfragsperpacket = 64;
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+
+ ip6_maxfrags = IP6_MAXFRAGS;
+ EVENTHANDLER_REGISTER(nmbclusters_change,
+ frag6_change, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+/*
* Drain off all datagram fragments.
*/
void
@@ -918,48 +895,80 @@ frag6_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
struct ip6q *head;
- int i;
+ uint32_t bucket;
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- for (i = 0; i < IP6REASS_NHASH; i++) {
- if (IP6Q_TRYLOCK(i) == 0)
+ for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+ if (IP6QB_TRYLOCK(bucket) == 0)
continue;
- head = IP6Q_HEAD(i);
+ head = IP6QB_HEAD(bucket);
while (head->ip6q_next != head) {
IP6STAT_INC(ip6s_fragdropped);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(head->ip6q_next, i);
+ frag6_freef(head->ip6q_next, bucket);
}
- IP6Q_UNLOCK(i);
+ IP6QB_UNLOCK(bucket);
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
}
-int
-ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
+/*
+ * Put an ip fragment on a reassembly chain.
+ * Like insque, but pointers in middle of structure.
+ */
+static void
+frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
+ uint32_t bucket __unused)
{
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
- struct mbuf *t;
- /* Delete frag6 header. */
- if (m->m_len >= offset + sizeof(struct ip6_frag)) {
- /* This is the only possible case with !PULLDOWN_TEST. */
- bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
- offset);
- m->m_data += sizeof(struct ip6_frag);
- m->m_len -= sizeof(struct ip6_frag);
- } else {
- /* This comes with no copy if the boundary is on cluster. */
- if ((t = m_split(m, offset, wait)) == NULL)
- return (ENOMEM);
- m_adj(t, sizeof(struct ip6_frag));
- m_cat(m, t);
- }
+ IP6QB_LOCK_ASSERT(bucket);
- m->m_flags |= M_FRAGMENTED;
- return (0);
+ af6->ip6af_up = up6;
+ af6->ip6af_down = up6->ip6af_down;
+ up6->ip6af_down->ip6af_up = af6;
+ up6->ip6af_down = af6;
+}
+
+/*
+ * To frag6_enq as remque is to insque.
+ */
+static void
+frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
+{
+
+ IP6QB_LOCK_ASSERT(bucket);
+
+ af6->ip6af_up->ip6af_down = af6->ip6af_down;
+ af6->ip6af_down->ip6af_up = af6->ip6af_up;
+}
+
+static void
+frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
+{
+
+ IP6QB_LOCK_ASSERT(bucket);
+ KASSERT(IP6QB_HEAD(bucket) == old,
+ ("%s: attempt to insert at head of wrong bucket"
+ " (bucket=%u, old=%p)", __func__, bucket, old));
+
+ new->ip6q_prev = old;
+ new->ip6q_next = old->ip6q_next;
+ old->ip6q_next->ip6q_prev= new;
+ old->ip6q_next = new;
+ V_ip6qb[bucket].count++;
+}
+
+static void
+frag6_remque(struct ip6q *p6, uint32_t bucket)
+{
+
+ IP6QB_LOCK_ASSERT(bucket);
+
+ p6->ip6q_prev->ip6q_next = p6->ip6q_next;
+ p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
+ V_ip6qb[bucket].count--;
}
diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c
index 6dd25e98..4a35eb8d 100644
--- a/freebsd/sys/netinet6/icmp6.c
+++ b/freebsd/sys/netinet6/icmp6.c
@@ -142,7 +142,7 @@ static int icmp6_rip6_input(struct mbuf **, int);
static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
static const char *icmp6_redirect_diag(struct in6_addr *,
struct in6_addr *, struct in6_addr *);
-static struct mbuf *ni6_input(struct mbuf *, int);
+static struct mbuf *ni6_input(struct mbuf *, int, struct prison *);
static struct mbuf *ni6_nametodns(const char *, int, int);
static int ni6_dnsmatch(const char *, int, const char *, int);
static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *,
@@ -629,6 +629,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */
{
enum { WRU, FQDN } mode;
+ struct prison *pr;
if (!V_icmp6_nodeinfo)
break;
@@ -640,6 +641,18 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
else
goto badlen;
+#ifndef __rtems__
+ pr = NULL;
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list)
+ if (pr->pr_vnet == ifp->if_vnet)
+ break;
+ sx_sunlock(&allprison_lock);
+ if (pr == NULL)
+ pr = curthread->td_ucred->cr_prison;
+#else /* __rtems__ */
+ pr = &prison0;
+#endif /* __rtems__ */
if (mode == FQDN) {
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
@@ -647,11 +660,10 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
#endif
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (n)
- n = ni6_input(n, off);
+ n = ni6_input(n, off, pr);
/* XXX meaningless if n == NULL */
noff = sizeof(struct ip6_hdr);
} else {
- struct prison *pr;
u_char *p;
int maxhlen, hlen;
@@ -685,17 +697,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
n = NULL;
break;
}
- maxhlen = M_TRAILINGSPACE(n) -
- (sizeof(*nip6) + sizeof(*nicmp6) + 4);
-#ifndef __rtems__
- pr = curthread->td_ucred->cr_prison;
-#else /* __rtems__ */
- pr = &prison0;
-#endif /* __rtems__ */
- mtx_lock(&pr->pr_mtx);
- hlen = strlen(pr->pr_hostname);
- if (maxhlen > hlen)
- maxhlen = hlen;
/*
* Copy IPv6 and ICMPv6 only.
*/
@@ -705,6 +706,13 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
p = (u_char *)(nicmp6 + 1);
bzero(p, 4);
+
+ maxhlen = M_TRAILINGSPACE(n) -
+ (sizeof(*nip6) + sizeof(*nicmp6) + 4);
+ mtx_lock(&pr->pr_mtx);
+ hlen = strlen(pr->pr_hostname);
+ if (maxhlen > hlen)
+ maxhlen = hlen;
/* meaningless TTL */
bcopy(pr->pr_hostname, p + 4, maxhlen);
mtx_unlock(&pr->pr_mtx);
@@ -1173,11 +1181,10 @@ icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
* with hostname changes by sethostname(3)
*/
static struct mbuf *
-ni6_input(struct mbuf *m, int off)
+ni6_input(struct mbuf *m, int off, struct prison *pr)
{
struct icmp6_nodeinfo *ni6, *nni6;
struct mbuf *n = NULL;
- struct prison *pr;
u_int16_t qtype;
int subjlen;
int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
@@ -1329,11 +1336,6 @@ ni6_input(struct mbuf *m, int off)
* wildcard match, if gethostname(3) side has
* truncated hostname.
*/
-#ifndef __rtems__
- pr = curthread->td_ucred->cr_prison;
-#else /* __rtems__ */
- pr = &prison0;
-#endif /* __rtems__ */
mtx_lock(&pr->pr_mtx);
n = ni6_nametodns(pr->pr_hostname,
strlen(pr->pr_hostname), 0);
@@ -1458,11 +1460,6 @@ ni6_input(struct mbuf *m, int off)
/*
* XXX do we really have FQDN in hostname?
*/
-#ifndef __rtems__
- pr = curthread->td_ucred->cr_prison;
-#else /* __rtems__ */
- pr = &prison0;
-#endif /* __rtems__ */
mtx_lock(&pr->pr_mtx);
n->m_next = ni6_nametodns(pr->pr_hostname,
strlen(pr->pr_hostname), oldfqdn);
@@ -1669,6 +1666,7 @@ static int
ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
struct in6_addr *subj)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
struct in6_ifaddr *ifa6;
struct ifaddr *ifa;
@@ -1690,10 +1688,9 @@ ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
}
}
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
addrsofif = 0;
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1744,16 +1741,15 @@ ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
}
addrsofif++; /* count the address */
}
- IF_ADDR_RUNLOCK(ifp);
if (iffound) {
*ifpp = ifp;
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (addrsofif);
}
addrs += addrsofif;
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (addrs);
}
@@ -1762,6 +1758,7 @@ static int
ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
struct ifnet *ifp0, int resid)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
struct in6_ifaddr *ifa6;
struct ifaddr *ifa;
@@ -1774,12 +1771,11 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
return (0); /* needless to copy */
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
ifp = ifp0 ? ifp0 : CK_STAILQ_FIRST(&V_ifnet);
again:
for (; ifp; ifp = CK_STAILQ_NEXT(ifp, if_link)) {
- IF_ADDR_RLOCK(ifp);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1834,13 +1830,12 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
/* now we can copy the address */
if (resid < sizeof(struct in6_addr) +
sizeof(u_int32_t)) {
- IF_ADDR_RUNLOCK(ifp);
/*
* We give up much more copy.
* Set the truncate flag and return.
*/
nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (copied);
}
@@ -1881,7 +1876,6 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
}
- IF_ADDR_RUNLOCK(ifp);
if (ifp0) /* we need search only on the specified IF */
break;
}
@@ -1893,7 +1887,7 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
goto again;
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
return (copied);
}
@@ -1906,7 +1900,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
{
struct mbuf *m = *mp;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
- struct inpcb *in6p;
+ struct inpcb *inp;
struct inpcb *last = NULL;
struct sockaddr_in6 fromsa;
struct icmp6_hdr *icmp6;
@@ -1938,25 +1932,25 @@ icmp6_rip6_input(struct mbuf **mp, int off)
}
INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
- CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
- if ((in6p->inp_vflag & INP_IPV6) == 0)
+ CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
+ if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
- if (in6p->inp_ip_p != IPPROTO_ICMPV6)
+ if (inp->inp_ip_p != IPPROTO_ICMPV6)
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
- !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
- !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
continue;
- INP_RLOCK(in6p);
- if (__predict_false(in6p->inp_flags2 & INP_FREED)) {
- INP_RUNLOCK(in6p);
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(inp);
continue;
}
if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
- in6p->in6p_icmp6filt)) {
- INP_RUNLOCK(in6p);
+ inp->in6p_icmp6filt)) {
+ INP_RUNLOCK(inp);
continue;
}
if (last != NULL) {
@@ -2017,7 +2011,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
}
INP_RUNLOCK(last);
}
- last = in6p;
+ last = inp;
}
INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
@@ -2575,13 +2569,14 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
{
/* target lladdr option */
+ struct epoch_tracker et;
int len;
struct nd_opt_hdr *nd_opt;
char *lladdr;
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
ln = nd6_lookup(router_ll6, 0, ifp);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ln == NULL)
goto nolladdropt;
diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c
index ef59203e..f3306bc3 100644
--- a/freebsd/sys/netinet6/in6.c
+++ b/freebsd/sys/netinet6/in6.c
@@ -1392,13 +1392,15 @@ in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia,
* if this is its first address,
*/
if (hostIsNew != 0) {
- IF_ADDR_RLOCK(ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ifacount++;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
if (ifacount <= 1 && ifp->if_ioctl) {
@@ -1476,9 +1478,10 @@ done:
struct in6_ifaddr *
in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1490,7 +1493,7 @@ in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return ((struct in6_ifaddr *)ifa);
}
@@ -1527,9 +1530,10 @@ in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid)
struct in6_ifaddr *
in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1538,7 +1542,7 @@ in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr)
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return ((struct in6_ifaddr *)ifa);
}
@@ -1549,12 +1553,13 @@ in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr)
struct in6_ifaddr *
in6ifa_llaonifp(struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct sockaddr_in6 *sin6;
struct ifaddr *ifa;
if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
return (NULL);
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1564,7 +1569,7 @@ in6ifa_llaonifp(struct ifnet *ifp)
IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
break;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return ((struct in6_ifaddr *)ifa);
}
@@ -1701,6 +1706,7 @@ int
in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr)
{
struct in6_addr in6;
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct in6_ifaddr *ia6;
@@ -1709,17 +1715,17 @@ in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr)
return (0);
in6_setscope(&in6, ifp, NULL);
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ia6 = (struct in6_ifaddr *)ifa;
if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6)) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (1);
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
}
@@ -1823,6 +1829,7 @@ in6_prefixlen2mask(struct in6_addr *maskp, int len)
struct in6_ifaddr *
in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
{
+ struct epoch_tracker et;
int dst_scope = in6_addrscope(dst), blen = -1, tlen;
struct ifaddr *ifa;
struct in6_ifaddr *besta = NULL;
@@ -1836,7 +1843,7 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
* If two or more, return one which matches the dst longest.
* If none, return one of global addresses assigned other ifs.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1870,7 +1877,7 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
}
if (besta) {
ifa_ref(&besta->ia_ifa);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (besta);
}
@@ -1891,23 +1898,23 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
if (ifa != NULL)
ifa_ref(ifa);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (struct in6_ifaddr *)ifa;
}
/* use the last-resort values, that are, deprecated addresses */
if (dep[0]) {
ifa_ref((struct ifaddr *)dep[0]);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return dep[0];
}
if (dep[1]) {
ifa_ref((struct ifaddr *)dep[1]);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return dep[1];
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return NULL;
}
@@ -1917,10 +1924,11 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
void
in6_if_up(struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct in6_ifaddr *ia;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1936,7 +1944,7 @@ in6_if_up(struct ifnet *ifp)
arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz));
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
/*
* special cases, like 6to4, are handled in in6_ifattach
@@ -1947,26 +1955,14 @@ in6_if_up(struct ifnet *ifp)
int
in6if_do_dad(struct ifnet *ifp)
{
+
if ((ifp->if_flags & IFF_LOOPBACK) != 0)
return (0);
-
- if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ||
- (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD))
+ if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ return (0);
+ if ((ND_IFINFO(ifp)->flags &
+ (ND6_IFF_IFDISABLED | ND6_IFF_NO_DAD)) != 0)
return (0);
-
- /*
- * Our DAD routine requires the interface up and running.
- * However, some interfaces can be up before the RUNNING
- * status. Additionally, users may try to assign addresses
- * before the interface becomes up (or running).
- * This function returns EAGAIN in that case.
- * The caller should mark "tentative" on the address instead of
- * performing DAD immediately.
- */
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)))
- return (EAGAIN);
-
return (1);
}
@@ -1977,10 +1973,11 @@ in6if_do_dad(struct ifnet *ifp)
void
in6_setmaxmtu(void)
{
+ struct epoch_tracker et;
unsigned long maxmtu = 0;
struct ifnet *ifp;
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
/* this function can be called during ifnet initialization */
if (!ifp->if_afdata[AF_INET6])
@@ -1989,7 +1986,7 @@ in6_setmaxmtu(void)
IN6_LINKMTU(ifp) > maxmtu)
maxmtu = IN6_LINKMTU(ifp);
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
if (maxmtu) /* update only when maxmtu is positive */
V_in6_maxmtu = maxmtu;
}
@@ -2167,18 +2164,19 @@ in6_lltable_rtcheck(struct ifnet *ifp,
fibnum = V_rt_add_addr_allfibs ? RT_DEFAULT_FIB : ifp->if_fib;
error = fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6);
if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) {
+ struct epoch_tracker et;
struct ifaddr *ifa;
/*
* Create an ND6 cache for an IPv6 neighbor
* that is not covered by our own prefix.
*/
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
ifa = ifaof_ifpforaddr(l3addr, ifp);
if (ifa != NULL) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return 0;
}
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
ip6_sprintf(ip6buf, &sin6->sin6_addr));
return EINVAL;
@@ -2319,16 +2317,13 @@ in6_lltable_lookup(struct lltable *llt, u_int flags,
IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
KASSERT(l3addr->sa_family == AF_INET6,
("sin_family %d", l3addr->sa_family));
+ KASSERT((flags & (LLE_UNLOCKED | LLE_EXCLUSIVE)) !=
+ (LLE_UNLOCKED | LLE_EXCLUSIVE),
+ ("wrong lle request flags: %#x", flags));
lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);
-
if (lle == NULL)
return (NULL);
-
- KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
- (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
- flags));
-
if (flags & LLE_UNLOCKED)
return (lle);
@@ -2336,6 +2331,18 @@ in6_lltable_lookup(struct lltable *llt, u_int flags,
LLE_WLOCK(lle);
else
LLE_RLOCK(lle);
+
+ /*
+ * If the afdata lock is not held, the LLE may have been unlinked while
+ * we were blocked on the LLE lock. Check for this case.
+ */
+ if (__predict_false((lle->la_flags & LLE_LINKED) == 0)) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(lle);
+ else
+ LLE_RUNLOCK(lle);
+ return (NULL);
+ }
return (lle);
}
diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c
index 6af4b557..560b4255 100644
--- a/freebsd/sys/netinet6/in6_ifattach.c
+++ b/freebsd/sys/netinet6/in6_ifattach.c
@@ -246,6 +246,7 @@ generate_tmp_ifid(u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret)
int
in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct sockaddr_dl *sdl;
u_int8_t *addr;
@@ -254,7 +255,7 @@ in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6)
static u_int8_t allone[8] =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_LINK)
continue;
@@ -266,7 +267,7 @@ in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6)
goto found;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return -1;
@@ -289,7 +290,7 @@ found:
/* look at IEEE802/EUI64 only */
if (addrlen != 8 && addrlen != 6) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return -1;
}
@@ -299,11 +300,11 @@ found:
* card insertion.
*/
if (bcmp(addr, allzero, addrlen) == 0) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return -1;
}
if (bcmp(addr, allone, addrlen) == 0) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return -1;
}
@@ -330,17 +331,25 @@ found:
* identifier source (can be renumbered).
* we don't do this.
*/
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return -1;
+ case IFT_INFINIBAND:
+ if (addrlen != 20) {
+ NET_EPOCH_EXIT(et);
+ return -1;
+ }
+ bcopy(addr + 12, &in6->s6_addr[8], 8);
+ break;
+
default:
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return -1;
}
/* sanity check: g bit must not indicate "group" */
if (EUI64_GROUP(in6)) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return -1;
}
@@ -353,11 +362,11 @@ found:
*/
if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 &&
bcmp(&in6->s6_addr[9], allzero, 7) == 0) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return -1;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return 0;
}
@@ -372,6 +381,7 @@ static int
get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
struct in6_addr *in6)
{
+ struct epoch_tracker et;
struct ifnet *ifp;
/* first, try to get it from the interface itself */
@@ -389,7 +399,7 @@ get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
}
/* next, try to get it from some other hardware interface */
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp == ifp0)
continue;
@@ -404,11 +414,11 @@ get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
nd6log((LOG_DEBUG,
"%s: borrow interface identifier from %s\n",
if_name(ifp0), if_name(ifp)));
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
goto success;
}
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
/* last resort: get from random number source */
if (get_rand_ifid(ifp, in6) == 0) {
@@ -700,6 +710,7 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
* it is rather harmful to have one.
*/
ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
+ ND_IFINFO(ifp)->flags |= ND6_IFF_NO_DAD;
break;
default:
break;
@@ -773,9 +784,11 @@ _in6_ifdetach(struct ifnet *ifp, int purgeulp)
in6_purgeaddr(ifa);
}
if (purgeulp) {
+ IN6_MULTI_LOCK();
in6_pcbpurgeif0(&V_udbinfo, ifp);
in6_pcbpurgeif0(&V_ulitecbinfo, ifp);
in6_pcbpurgeif0(&V_ripcbinfo, ifp);
+ IN6_MULTI_UNLOCK();
}
/* leave from all multicast groups joined */
in6_purgemaddrs(ifp);
@@ -862,36 +875,22 @@ in6_tmpaddrtimer(void *arg)
static void
in6_purgemaddrs(struct ifnet *ifp)
{
- struct in6_multi_head purgeinms;
- struct in6_multi *inm;
- struct ifmultiaddr *ifma, *next;
+ struct in6_multi_head inmh;
- SLIST_INIT(&purgeinms);
+ SLIST_INIT(&inmh);
IN6_MULTI_LOCK();
IN6_MULTI_LIST_LOCK();
- IF_ADDR_WLOCK(ifp);
- /*
- * Extract list of in6_multi associated with the detaching ifp
- * which the PF_INET6 layer is about to release.
- */
- restart:
- CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
- if (ifma->ifma_addr->sa_family != AF_INET6 ||
- ifma->ifma_protospec == NULL)
- continue;
- inm = (struct in6_multi *)ifma->ifma_protospec;
- in6m_disconnect(inm);
- in6m_rele_locked(&purgeinms, inm);
- if (__predict_false(ifma6_restart)) {
- ifma6_restart = false;
- goto restart;
- }
- }
- IF_ADDR_WUNLOCK(ifp);
- mld_ifdetach(ifp);
+ mld_ifdetach(ifp, &inmh);
IN6_MULTI_LIST_UNLOCK();
IN6_MULTI_UNLOCK();
- in6m_release_list_deferred(&purgeinms);
+ in6m_release_list_deferred(&inmh);
+
+ /*
+ * Make sure all multicast deletions invoking if_ioctl() are
+ * completed before returning. Else we risk accessing a freed
+ * ifnet structure pointer.
+ */
+ in6m_release_wait();
}
void
diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c
index 3824645d..44d20612 100644
--- a/freebsd/sys/netinet6/in6_mcast.c
+++ b/freebsd/sys/netinet6/in6_mcast.c
@@ -104,7 +104,8 @@ RB_GENERATE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
/*
* Locking:
- * - Lock order is: Giant, INP_WLOCK, IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK.
+ * - Lock order is: Giant, IN6_MULTI_LOCK, INP_WLOCK,
+ * IN6_MULTI_LIST_LOCK, MLD_LOCK, IF_ADDR_LOCK.
* - The IF_ADDR_LOCK is implicitly taken by in6m_lookup() earlier, however
* it can be taken by code in net/if.c also.
* - ip6_moptions and in6_mfilter are covered by the INP_WLOCK.
@@ -136,12 +137,11 @@ static int im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *);
static void im6f_purge(struct in6_mfilter *);
static void im6f_rollback(struct in6_mfilter *);
static void im6f_reap(struct in6_mfilter *);
-static int im6o_grow(struct ip6_moptions *);
-static size_t im6o_match_group(const struct ip6_moptions *,
+static struct in6_mfilter *
+ im6o_match_group(const struct ip6_moptions *,
const struct ifnet *, const struct sockaddr *);
static struct in6_msource *
- im6o_match_source(const struct ip6_moptions *, const size_t,
- const struct sockaddr *);
+ im6o_match_source(struct in6_mfilter *, const struct sockaddr *);
static void im6s_merge(struct ip6_msource *ims,
const struct in6_msource *lims, const int rollback);
static int in6_getmulti(struct ifnet *, const struct in6_addr *,
@@ -192,7 +192,6 @@ static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters,
"Per-interface stack-wide source filters");
-int ifma6_restart = 0;
#ifdef KTR
/*
* Inline function which wraps assertions for a valid ifp.
@@ -231,55 +230,25 @@ im6f_init(struct in6_mfilter *imf, const int st0, const int st1)
imf->im6f_st[1] = st1;
}
-/*
- * Resize the ip6_moptions vector to the next power-of-two minus 1.
- * May be called with locks held; do not sleep.
- */
-static int
-im6o_grow(struct ip6_moptions *imo)
+struct in6_mfilter *
+ip6_mfilter_alloc(const int mflags, const int st0, const int st1)
{
- struct in6_multi **nmships;
- struct in6_multi **omships;
- struct in6_mfilter *nmfilters;
- struct in6_mfilter *omfilters;
- size_t idx;
- size_t newmax;
- size_t oldmax;
-
- nmships = NULL;
- nmfilters = NULL;
- omships = imo->im6o_membership;
- omfilters = imo->im6o_mfilters;
- oldmax = imo->im6o_max_memberships;
- newmax = ((oldmax + 1) * 2) - 1;
-
- if (newmax <= IPV6_MAX_MEMBERSHIPS) {
- nmships = (struct in6_multi **)realloc(omships,
- sizeof(struct in6_multi *) * newmax, M_IP6MOPTS, M_NOWAIT);
- nmfilters = (struct in6_mfilter *)realloc(omfilters,
- sizeof(struct in6_mfilter) * newmax, M_IN6MFILTER,
- M_NOWAIT);
- if (nmships != NULL && nmfilters != NULL) {
- /* Initialize newly allocated source filter heads. */
- for (idx = oldmax; idx < newmax; idx++) {
- im6f_init(&nmfilters[idx], MCAST_UNDEFINED,
- MCAST_EXCLUDE);
- }
- imo->im6o_max_memberships = newmax;
- imo->im6o_membership = nmships;
- imo->im6o_mfilters = nmfilters;
- }
- }
+ struct in6_mfilter *imf;
- if (nmships == NULL || nmfilters == NULL) {
- if (nmships != NULL)
- free(nmships, M_IP6MOPTS);
- if (nmfilters != NULL)
- free(nmfilters, M_IN6MFILTER);
- return (ETOOMANYREFS);
- }
+ imf = malloc(sizeof(*imf), M_IN6MFILTER, mflags);
- return (0);
+ if (imf != NULL)
+ im6f_init(imf, st0, st1);
+
+ return (imf);
+}
+
+void
+ip6_mfilter_free(struct in6_mfilter *imf)
+{
+
+ im6f_purge(imf);
+ free(imf, M_IN6MFILTER);
}
/*
@@ -287,36 +256,27 @@ im6o_grow(struct ip6_moptions *imo)
* which matches the specified group, and optionally an interface.
* Return its index into the array, or -1 if not found.
*/
-static size_t
+static struct in6_mfilter *
im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp,
const struct sockaddr *group)
{
const struct sockaddr_in6 *gsin6;
- struct in6_multi **pinm;
- int idx;
- int nmships;
+ struct in6_mfilter *imf;
+ struct in6_multi *inm;
- gsin6 = (const struct sockaddr_in6 *)group;
+ gsin6 = (const struct sockaddr_in6 *)group;
- /* The im6o_membership array may be lazy allocated. */
- if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0)
- return (-1);
-
- nmships = imo->im6o_num_memberships;
- pinm = &imo->im6o_membership[0];
- for (idx = 0; idx < nmships; idx++, pinm++) {
- if (*pinm == NULL)
+ IP6_MFILTER_FOREACH(imf, &imo->im6o_head) {
+ inm = imf->im6f_in6m;
+ if (inm == NULL)
continue;
- if ((ifp == NULL || ((*pinm)->in6m_ifp == ifp)) &&
- IN6_ARE_ADDR_EQUAL(&(*pinm)->in6m_addr,
+ if ((ifp == NULL || (inm->in6m_ifp == ifp)) &&
+ IN6_ARE_ADDR_EQUAL(&inm->in6m_addr,
&gsin6->sin6_addr)) {
break;
}
}
- if (idx >= nmships)
- idx = -1;
-
- return (idx);
+ return (imf);
}
/*
@@ -331,22 +291,13 @@ im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp,
* it exists, which may not be the desired behaviour.
*/
static struct in6_msource *
-im6o_match_source(const struct ip6_moptions *imo, const size_t gidx,
- const struct sockaddr *src)
+im6o_match_source(struct in6_mfilter *imf, const struct sockaddr *src)
{
struct ip6_msource find;
- struct in6_mfilter *imf;
struct ip6_msource *ims;
const sockunion_t *psa;
KASSERT(src->sa_family == AF_INET6, ("%s: !AF_INET6", __func__));
- KASSERT(gidx != -1 && gidx < imo->im6o_num_memberships,
- ("%s: invalid index %d\n", __func__, (int)gidx));
-
- /* The im6o_mfilters array may be lazy allocated. */
- if (imo->im6o_mfilters == NULL)
- return (NULL);
- imf = &imo->im6o_mfilters[gidx];
psa = (const sockunion_t *)src;
find.im6s_addr = psa->sin6.sin6_addr;
@@ -366,14 +317,14 @@ int
im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp,
const struct sockaddr *group, const struct sockaddr *src)
{
- size_t gidx;
+ struct in6_mfilter *imf;
struct in6_msource *ims;
int mode;
KASSERT(ifp != NULL, ("%s: null ifp", __func__));
- gidx = im6o_match_group(imo, ifp, group);
- if (gidx == -1)
+ imf = im6o_match_group(imo, ifp, group);
+ if (imf == NULL)
return (MCAST_NOTGMEMBER);
/*
@@ -385,8 +336,8 @@ im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp,
* NOTE: We are comparing group state here at MLD t1 (now)
* with socket-layer t0 (since last downcall).
*/
- mode = imo->im6o_mfilters[gidx].im6f_st[1];
- ims = im6o_match_source(imo, gidx, src);
+ mode = imf->im6f_st[1];
+ ims = im6o_match_source(imf, src);
if ((ims == NULL && mode == MCAST_INCLUDE) ||
(ims != NULL && ims->im6sl_st[0] != mode))
@@ -407,6 +358,7 @@ static int
in6_getmulti(struct ifnet *ifp, const struct in6_addr *group,
struct in6_multi **pinm)
{
+ struct epoch_tracker et;
struct sockaddr_in6 gsin6;
struct ifmultiaddr *ifma;
struct in6_multi *inm;
@@ -422,7 +374,10 @@ in6_getmulti(struct ifnet *ifp, const struct in6_addr *group,
IN6_MULTI_LOCK_ASSERT();
IN6_MULTI_LIST_LOCK();
IF_ADDR_WLOCK(ifp);
+ NET_EPOCH_ENTER(et);
inm = in6m_lookup_locked(ifp, group);
+ NET_EPOCH_EXIT(et);
+
if (inm != NULL) {
/*
* If we already joined this group, just bump the
@@ -587,7 +542,15 @@ in6m_release_list_deferred(struct in6_multi_head *inmh)
}
void
-in6m_disconnect(struct in6_multi *inm)
+in6m_release_wait(void)
+{
+
+ /* Wait for all jobs to complete. */
+ gtaskqueue_drain_all(free_gtask.gt_taskqueue);
+}
+
+void
+in6m_disconnect_locked(struct in6_multi_head *inmh, struct in6_multi *inm)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -595,10 +558,12 @@ in6m_disconnect(struct in6_multi *inm)
struct in6_multi_mship *imm, *imm_tmp;
struct ifmultiaddr *ifma, *ll_ifma;
- ifp = inm->in6m_ifp;
+ IN6_MULTI_LIST_LOCK_ASSERT();
+ ifp = inm->in6m_ifp;
if (ifp == NULL)
- return;
+ return; /* already called */
+
inm->in6m_ifp = NULL;
IF_ADDR_WLOCK_ASSERT(ifp);
ifma = inm->in6m_ifma;
@@ -617,7 +582,6 @@ in6m_disconnect(struct in6_multi *inm)
MPASS(ll_ifma->ifma_llifma == NULL);
MPASS(ll_ifma->ifma_ifp == ifp);
if (--ll_ifma->ifma_refcount == 0) {
- ifma6_restart = true;
if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
@@ -635,28 +599,12 @@ in6m_disconnect(struct in6_multi *inm)
if (inm == imm->i6mm_maddr) {
LIST_REMOVE(imm, i6mm_chain);
free(imm, M_IP6MADDR);
+ in6m_rele_locked(inmh, inm);
}
}
}
}
-void
-in6m_release_deferred(struct in6_multi *inm)
-{
- struct in6_multi_head tmp;
-
- IN6_MULTI_LIST_LOCK_ASSERT();
- KASSERT(inm->in6m_refcount > 0, ("refcount == %d inm: %p", inm->in6m_refcount, inm));
- if (--inm->in6m_refcount == 0) {
- MPASS(inm->in6m_ifp == NULL);
- SLIST_INIT(&tmp);
- inm->in6m_ifma->ifma_protospec = NULL;
- MPASS(inm->in6m_ifma->ifma_llifma == NULL);
- SLIST_INSERT_HEAD(&tmp, inm, in6m_nrele);
- in6m_release_list_deferred(&tmp);
- }
-}
-
static void
in6m_release_task(void *arg __unused)
{
@@ -1256,6 +1204,7 @@ in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr,
/*const*/ struct in6_mfilter *imf, struct in6_multi **pinm,
const int delay)
{
+ struct in6_multi_head inmh;
struct in6_mfilter timf;
struct in6_multi *inm;
struct ifmultiaddr *ifma;
@@ -1264,7 +1213,6 @@ in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr,
char ip6tbuf[INET6_ADDRSTRLEN];
#endif
-#ifdef INVARIANTS
/*
* Sanity: Check scope zone ID was set for ifp, if and
* only if group is scoped to an interface.
@@ -1276,7 +1224,6 @@ in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr,
KASSERT(mcaddr->s6_addr16[1] != 0,
("%s: scope zone ID not set", __func__));
}
-#endif
IN6_MULTI_LOCK_ASSERT();
IN6_MULTI_LIST_UNLOCK_ASSERT();
@@ -1317,22 +1264,26 @@ in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr,
}
out_in6m_release:
+ SLIST_INIT(&inmh);
if (error) {
+ struct epoch_tracker et;
+
CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm);
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_protospec == inm) {
ifma->ifma_protospec = NULL;
break;
}
}
- in6m_disconnect(inm);
- in6m_release_deferred(inm);
- IF_ADDR_RUNLOCK(ifp);
+ in6m_disconnect_locked(&inmh, inm);
+ in6m_rele_locked(&inmh, inm);
+ NET_EPOCH_EXIT(et);
} else {
*pinm = inm;
}
IN6_MULTI_LIST_UNLOCK();
+ in6m_release_list_deferred(&inmh);
return (error);
}
@@ -1366,6 +1317,7 @@ in6_leavegroup(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
int
in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
{
+ struct in6_multi_head inmh;
struct in6_mfilter timf;
struct ifnet *ifp;
int error;
@@ -1415,13 +1367,15 @@ in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm);
if (ifp)
IF_ADDR_WLOCK(ifp);
- if (inm->in6m_refcount == 1 && inm->in6m_ifp != NULL)
- in6m_disconnect(inm);
- in6m_release_deferred(inm);
+
+ SLIST_INIT(&inmh);
+ if (inm->in6m_refcount == 1)
+ in6m_disconnect_locked(&inmh, inm);
+ in6m_rele_locked(&inmh, inm);
if (ifp)
IF_ADDR_WUNLOCK(ifp);
IN6_MULTI_LIST_UNLOCK();
-
+ in6m_release_list_deferred(&inmh);
return (error);
}
@@ -1447,7 +1401,6 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
struct ip6_moptions *imo;
struct in6_msource *ims;
struct in6_multi *inm;
- size_t idx;
uint16_t fmode;
int error, doblock;
#ifdef KTR
@@ -1504,16 +1457,12 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
* Check if we are actually a member of this group.
*/
imo = in6p_findmoptions(inp);
- idx = im6o_match_group(imo, ifp, &gsa->sa);
- if (idx == -1 || imo->im6o_mfilters == NULL) {
+ imf = im6o_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
error = EADDRNOTAVAIL;
goto out_in6p_locked;
}
-
- KASSERT(imo->im6o_mfilters != NULL,
- ("%s: im6o_mfilters not allocated", __func__));
- imf = &imo->im6o_mfilters[idx];
- inm = imo->im6o_membership[idx];
+ inm = imf->im6f_in6m;
/*
* Attempting to use the delta-based API on an
@@ -1531,7 +1480,7 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
* Asked to unblock, but nothing to unblock.
* If adding a new block entry, allocate it.
*/
- ims = im6o_match_source(imo, idx, &ssa->sa);
+ ims = im6o_match_source(imf, &ssa->sa);
if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
CTR3(KTR_MLD, "%s: source %s %spresent", __func__,
ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr),
@@ -1601,9 +1550,6 @@ static struct ip6_moptions *
in6p_findmoptions(struct inpcb *inp)
{
struct ip6_moptions *imo;
- struct in6_multi **immp;
- struct in6_mfilter *imfp;
- size_t idx;
INP_WLOCK(inp);
if (inp->in6p_moptions != NULL)
@@ -1612,27 +1558,14 @@ in6p_findmoptions(struct inpcb *inp)
INP_WUNLOCK(inp);
imo = malloc(sizeof(*imo), M_IP6MOPTS, M_WAITOK);
- immp = malloc(sizeof(*immp) * IPV6_MIN_MEMBERSHIPS, M_IP6MOPTS,
- M_WAITOK | M_ZERO);
- imfp = malloc(sizeof(struct in6_mfilter) * IPV6_MIN_MEMBERSHIPS,
- M_IN6MFILTER, M_WAITOK);
imo->im6o_multicast_ifp = NULL;
imo->im6o_multicast_hlim = V_ip6_defmcasthlim;
imo->im6o_multicast_loop = in6_mcast_loop;
- imo->im6o_num_memberships = 0;
- imo->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
- imo->im6o_membership = immp;
-
- /* Initialize per-group source filters. */
- for (idx = 0; idx < IPV6_MIN_MEMBERSHIPS; idx++)
- im6f_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
- imo->im6o_mfilters = imfp;
+ STAILQ_INIT(&imo->im6o_head);
INP_WLOCK(inp);
if (inp->in6p_moptions != NULL) {
- free(imfp, M_IN6MFILTER);
- free(immp, M_IP6MOPTS);
free(imo, M_IP6MOPTS);
return (inp->in6p_moptions);
}
@@ -1652,33 +1585,26 @@ in6p_findmoptions(struct inpcb *inp)
static void
inp_gcmoptions(struct ip6_moptions *imo)
{
- struct in6_mfilter *imf;
+ struct in6_mfilter *imf;
struct in6_multi *inm;
struct ifnet *ifp;
- size_t idx, nmships;
-
- nmships = imo->im6o_num_memberships;
- for (idx = 0; idx < nmships; ++idx) {
- imf = imo->im6o_mfilters ? &imo->im6o_mfilters[idx] : NULL;
- if (imf)
- im6f_leave(imf);
- inm = imo->im6o_membership[idx];
- ifp = inm->in6m_ifp;
- if (ifp != NULL) {
- CURVNET_SET(ifp->if_vnet);
- (void)in6_leavegroup(inm, imf);
- CURVNET_RESTORE();
- } else {
- (void)in6_leavegroup(inm, imf);
- }
- if (imf)
- im6f_purge(imf);
- }
- if (imo->im6o_mfilters)
- free(imo->im6o_mfilters, M_IN6MFILTER);
- free(imo->im6o_membership, M_IP6MOPTS);
- free(imo, M_IP6MOPTS);
+ while ((imf = ip6_mfilter_first(&imo->im6o_head)) != NULL) {
+ ip6_mfilter_remove(&imo->im6o_head, imf);
+
+ im6f_leave(imf);
+ if ((inm = imf->im6f_in6m) != NULL) {
+ if ((ifp = inm->in6m_ifp) != NULL) {
+ CURVNET_SET(ifp->if_vnet);
+ (void)in6_leavegroup(inm, imf);
+ CURVNET_RESTORE();
+ } else {
+ (void)in6_leavegroup(inm, imf);
+ }
+ }
+ ip6_mfilter_free(imf);
+ }
+ free(imo, M_IP6MOPTS);
}
void
@@ -1707,7 +1633,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
struct sockaddr_storage *ptss;
struct sockaddr_storage *tss;
int error;
- size_t idx, nsrcs, ncsrcs;
+ size_t nsrcs, ncsrcs;
INP_WLOCK_ASSERT(inp);
@@ -1741,12 +1667,11 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
/*
* Lookup group on the socket.
*/
- idx = im6o_match_group(imo, ifp, &gsa->sa);
- if (idx == -1 || imo->im6o_mfilters == NULL) {
+ imf = im6o_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
INP_WUNLOCK(inp);
return (EADDRNOTAVAIL);
}
- imf = &imo->im6o_mfilters[idx];
/*
* Ignore memberships which are in limbo.
@@ -1905,7 +1830,7 @@ ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
* Returns NULL if no ifp could be found.
*/
static struct ifnet *
-in6p_lookup_mcast_ifp(const struct inpcb *in6p,
+in6p_lookup_mcast_ifp(const struct inpcb *inp,
const struct sockaddr_in6 *gsin6)
{
struct nhop6_basic nh6;
@@ -1913,13 +1838,13 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p,
uint32_t scopeid;
uint32_t fibnum;
- KASSERT(in6p->inp_vflag & INP_IPV6,
+ KASSERT(inp->inp_vflag & INP_IPV6,
("%s: not INP_IPV6 inpcb", __func__));
KASSERT(gsin6->sin6_family == AF_INET6,
("%s: not AF_INET6 group", __func__));
in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid);
- fibnum = in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
+ fibnum = inp ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0)
return (NULL);
@@ -1935,6 +1860,7 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p,
static int
in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
{
+ struct in6_multi_head inmh;
struct group_source_req gsr;
sockunion_t *gsa, *ssa;
struct ifnet *ifp;
@@ -1942,14 +1868,12 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
struct ip6_moptions *imo;
struct in6_multi *inm;
struct in6_msource *lims;
- size_t idx;
int error, is_new;
+ SLIST_INIT(&inmh);
ifp = NULL;
- imf = NULL;
lims = NULL;
error = 0;
- is_new = 0;
memset(&gsr, 0, sizeof(struct group_source_req));
gsa = (sockunion_t *)&gsr.gsr_group;
@@ -2050,13 +1974,25 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
*/
(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+ IN6_MULTI_LOCK();
+
+ /*
+ * Find the membership in the membership list.
+ */
imo = in6p_findmoptions(inp);
- idx = im6o_match_group(imo, ifp, &gsa->sa);
- if (idx == -1) {
+ imf = im6o_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
is_new = 1;
+ inm = NULL;
+
+ if (ip6_mfilter_count(&imo->im6o_head) >= IPV6_MAX_MEMBERSHIPS) {
+ error = ENOMEM;
+ goto out_in6p_locked;
+ }
} else {
- inm = imo->im6o_membership[idx];
- imf = &imo->im6o_mfilters[idx];
+ is_new = 0;
+ inm = imf->im6f_in6m;
+
if (ssa->ss.ss_family != AF_UNSPEC) {
/*
* MCAST_JOIN_SOURCE_GROUP on an exclusive membership
@@ -2083,7 +2019,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
* full-state SSM API with the delta-based API,
* which is discouraged in the relevant RFCs.
*/
- lims = im6o_match_source(imo, idx, &ssa->sa);
+ lims = im6o_match_source(imf, &ssa->sa);
if (lims != NULL /*&&
lims->im6sl_st[1] == MCAST_INCLUDE*/) {
error = EADDRNOTAVAIL;
@@ -2111,27 +2047,6 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
*/
INP_WLOCK_ASSERT(inp);
- if (is_new) {
- if (imo->im6o_num_memberships == imo->im6o_max_memberships) {
- error = im6o_grow(imo);
- if (error)
- goto out_in6p_locked;
- }
- /*
- * Allocate the new slot upfront so we can deal with
- * grafting the new source filter in same code path
- * as for join-source on existing membership.
- */
- idx = imo->im6o_num_memberships;
- imo->im6o_membership[idx] = NULL;
- imo->im6o_num_memberships++;
- KASSERT(imo->im6o_mfilters != NULL,
- ("%s: im6f_mfilters vector was not allocated", __func__));
- imf = &imo->im6o_mfilters[idx];
- KASSERT(RB_EMPTY(&imf->im6f_sources),
- ("%s: im6f_sources not empty", __func__));
- }
-
/*
* Graft new source into filter list for this inpcb's
* membership of the group. The in6_multi may not have
@@ -2147,7 +2062,11 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
/* Membership starts in IN mode */
if (is_new) {
CTR1(KTR_MLD, "%s: new join w/source", __func__);
- im6f_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
+ imf = ip6_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE);
+ if (imf == NULL) {
+ error = ENOMEM;
+ goto out_in6p_locked;
+ }
} else {
CTR2(KTR_MLD, "%s: %s source", __func__, "allow");
}
@@ -2156,77 +2075,88 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
CTR1(KTR_MLD, "%s: merge imf state failed",
__func__);
error = ENOMEM;
- goto out_im6o_free;
+ goto out_in6p_locked;
}
} else {
/* No address specified; Membership starts in EX mode */
if (is_new) {
CTR1(KTR_MLD, "%s: new join w/o source", __func__);
- im6f_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+ imf = ip6_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE);
+ if (imf == NULL) {
+ error = ENOMEM;
+ goto out_in6p_locked;
+ }
}
}
/*
* Begin state merge transaction at MLD layer.
*/
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- IN6_MULTI_LOCK();
-
if (is_new) {
+ in_pcbref(inp);
+ INP_WUNLOCK(inp);
+
error = in6_joingroup_locked(ifp, &gsa->sin6.sin6_addr, imf,
- &inm, 0);
+ &imf->im6f_in6m, 0);
+
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ error = ENXIO;
+ goto out_in6p_unlocked;
+ }
if (error) {
- IN6_MULTI_UNLOCK();
- goto out_im6o_free;
+ goto out_in6p_locked;
}
- in6m_acquire(inm);
- imo->im6o_membership[idx] = inm;
+ /*
+ * NOTE: Refcount from in6_joingroup_locked()
+ * is protecting membership.
+ */
} else {
CTR1(KTR_MLD, "%s: merge inm state", __func__);
IN6_MULTI_LIST_LOCK();
error = in6m_merge(inm, imf);
- if (error)
+ if (error) {
CTR1(KTR_MLD, "%s: failed to merge inm state",
__func__);
- else {
- CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
- error = mld_change_state(inm, 0);
- if (error)
- CTR1(KTR_MLD, "%s: failed mld downcall",
- __func__);
+ IN6_MULTI_LIST_UNLOCK();
+ im6f_rollback(imf);
+ im6f_reap(imf);
+ goto out_in6p_locked;
}
+ CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+ error = mld_change_state(inm, 0);
IN6_MULTI_LIST_UNLOCK();
- }
- IN6_MULTI_UNLOCK();
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- return (ENXIO);
- if (error) {
- im6f_rollback(imf);
- if (is_new)
- im6f_purge(imf);
- else
+ if (error) {
+ CTR1(KTR_MLD, "%s: failed mld downcall",
+ __func__);
+ im6f_rollback(imf);
im6f_reap(imf);
- } else {
- im6f_commit(imf);
- }
-
-out_im6o_free:
- if (error && is_new) {
- inm = imo->im6o_membership[idx];
- if (inm != NULL) {
- IN6_MULTI_LIST_LOCK();
- in6m_release_deferred(inm);
- IN6_MULTI_LIST_UNLOCK();
+ goto out_in6p_locked;
}
- imo->im6o_membership[idx] = NULL;
- --imo->im6o_num_memberships;
}
+ if (is_new)
+ ip6_mfilter_insert(&imo->im6o_head, imf);
+
+ im6f_commit(imf);
+ imf = NULL;
+
out_in6p_locked:
INP_WUNLOCK(inp);
+out_in6p_unlocked:
+ IN6_MULTI_UNLOCK();
+
+ if (is_new && imf) {
+ if (imf->im6f_in6m != NULL) {
+ struct in6_multi_head inmh;
+
+ SLIST_INIT(&inmh);
+ SLIST_INSERT_HEAD(&inmh, imf->im6f_in6m, in6m_defer);
+ in6m_release_list_deferred(&inmh);
+ }
+ ip6_mfilter_free(imf);
+ }
return (error);
}
@@ -2245,8 +2175,8 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
struct in6_msource *ims;
struct in6_multi *inm;
uint32_t ifindex;
- size_t idx;
- int error, is_final;
+ int error;
+ bool is_final;
#ifdef KTR
char ip6tbuf[INET6_ADDRSTRLEN];
#endif
@@ -2254,7 +2184,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
ifp = NULL;
ifindex = 0;
error = 0;
- is_final = 1;
+ is_final = true;
memset(&gsr, 0, sizeof(struct group_source_req));
gsa = (sockunion_t *)&gsr.gsr_group;
@@ -2372,20 +2302,21 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp);
KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__));
+ IN6_MULTI_LOCK();
+
/*
- * Find the membership in the membership array.
+ * Find the membership in the membership list.
*/
imo = in6p_findmoptions(inp);
- idx = im6o_match_group(imo, ifp, &gsa->sa);
- if (idx == -1) {
+ imf = im6o_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
error = EADDRNOTAVAIL;
goto out_in6p_locked;
}
- inm = imo->im6o_membership[idx];
- imf = &imo->im6o_mfilters[idx];
+ inm = imf->im6f_in6m;
if (ssa->ss.ss_family != AF_UNSPEC)
- is_final = 0;
+ is_final = false;
/*
* Begin state merge transaction at socket layer.
@@ -2397,13 +2328,14 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
* MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
*/
if (is_final) {
+ ip6_mfilter_remove(&imo->im6o_head, imf);
im6f_leave(imf);
} else {
if (imf->im6f_st[0] == MCAST_EXCLUDE) {
error = EADDRNOTAVAIL;
goto out_in6p_locked;
}
- ims = im6o_match_source(imo, idx, &ssa->sa);
+ ims = im6o_match_source(imf, &ssa->sa);
if (ims == NULL) {
CTR3(KTR_MLD, "%s: source %p %spresent", __func__,
ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr),
@@ -2423,56 +2355,47 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
/*
* Begin state merge transaction at MLD layer.
*/
- in_pcbref(inp);
- INP_WUNLOCK(inp);
- IN6_MULTI_LOCK();
-
- if (is_final) {
- /*
- * Give up the multicast address record to which
- * the membership points.
- */
- (void)in6_leavegroup_locked(inm, imf);
- } else {
+ if (!is_final) {
CTR1(KTR_MLD, "%s: merge inm state", __func__);
IN6_MULTI_LIST_LOCK();
error = in6m_merge(inm, imf);
- if (error)
+ if (error) {
CTR1(KTR_MLD, "%s: failed to merge inm state",
__func__);
- else {
- CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
- error = mld_change_state(inm, 0);
- if (error)
- CTR1(KTR_MLD, "%s: failed mld downcall",
- __func__);
+ IN6_MULTI_LIST_UNLOCK();
+ im6f_rollback(imf);
+ im6f_reap(imf);
+ goto out_in6p_locked;
}
+
+ CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+ error = mld_change_state(inm, 0);
IN6_MULTI_LIST_UNLOCK();
+ if (error) {
+ CTR1(KTR_MLD, "%s: failed mld downcall",
+ __func__);
+ im6f_rollback(imf);
+ im6f_reap(imf);
+ goto out_in6p_locked;
+ }
}
- IN6_MULTI_UNLOCK();
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- return (ENXIO);
-
- if (error)
- im6f_rollback(imf);
- else
- im6f_commit(imf);
-
+ im6f_commit(imf);
im6f_reap(imf);
- if (is_final) {
- /* Remove the gap in the membership array. */
- for (++idx; idx < imo->im6o_num_memberships; ++idx) {
- imo->im6o_membership[idx-1] = imo->im6o_membership[idx];
- imo->im6o_mfilters[idx-1] = imo->im6o_mfilters[idx];
- }
- imo->im6o_num_memberships--;
- }
-
out_in6p_locked:
INP_WUNLOCK(inp);
+
+ if (is_final && imf) {
+ /*
+ * Give up the multicast address record to which
+ * the membership points.
+ */
+ (void)in6_leavegroup_locked(inm, imf);
+ ip6_mfilter_free(imf);
+ }
+
+ IN6_MULTI_UNLOCK();
return (error);
}
@@ -2530,7 +2453,6 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
struct in6_mfilter *imf;
struct ip6_moptions *imo;
struct in6_multi *inm;
- size_t idx;
int error;
error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
@@ -2567,13 +2489,12 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
* Check if this socket is a member of this group.
*/
imo = in6p_findmoptions(inp);
- idx = im6o_match_group(imo, ifp, &gsa->sa);
- if (idx == -1 || imo->im6o_mfilters == NULL) {
+ imf = im6o_match_group(imo, ifp, &gsa->sa);
+ if (imf == NULL) {
error = EADDRNOTAVAIL;
goto out_in6p_locked;
}
- inm = imo->im6o_membership[idx];
- imf = &imo->im6o_mfilters[idx];
+ inm = imf->im6f_in6m;
/*
* Begin state merge transaction at socket layer.
@@ -2814,6 +2735,7 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS)
{
struct in6_addr mcaddr;
struct in6_addr src;
+ struct epoch_tracker et;
struct ifnet *ifp;
struct ifmultiaddr *ifma;
struct in6_multi *inm;
@@ -2868,12 +2790,11 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS)
IN6_MULTI_LOCK();
IN6_MULTI_LIST_LOCK();
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_INET6 ||
- ifma->ifma_protospec == NULL)
+ inm = in6m_ifmultiaddr_get_inm(ifma);
+ if (inm == NULL)
continue;
- inm = (struct in6_multi *)ifma->ifma_protospec;
if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr))
continue;
fmode = inm->in6m_st[1].iss_fmode;
@@ -2897,7 +2818,7 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS)
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
IN6_MULTI_LIST_UNLOCK();
IN6_MULTI_UNLOCK();
diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c
index a6beba43..b66aa8a4 100644
--- a/freebsd/sys/netinet6/in6_pcb.c
+++ b/freebsd/sys/netinet6/in6_pcb.c
@@ -186,14 +186,15 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
(SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ struct epoch_tracker et;
struct ifaddr *ifa;
sin6->sin6_port = 0; /* yech... */
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) ==
NULL &&
(inp->inp_flags & INP_BINDANY) == 0) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (EADDRNOTAVAIL);
}
@@ -206,10 +207,10 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
if (ifa != NULL &&
((struct in6_ifaddr *)ifa)->ia6_flags &
(IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (EADDRNOTAVAIL);
}
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
}
if (lport) {
struct inpcb *t;
@@ -814,19 +815,20 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
void
in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
{
- struct inpcb *in6p;
+ struct inpcb *inp;
+ struct in6_multi *inm;
+ struct in6_mfilter *imf;
struct ip6_moptions *im6o;
- int i, gap;
INP_INFO_WLOCK(pcbinfo);
- CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
- INP_WLOCK(in6p);
- if (__predict_false(in6p->inp_flags2 & INP_FREED)) {
- INP_WUNLOCK(in6p);
+ CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_WUNLOCK(inp);
continue;
}
- im6o = in6p->in6p_moptions;
- if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) {
+ im6o = inp->in6p_moptions;
+ if ((inp->inp_vflag & INP_IPV6) && im6o != NULL) {
/*
* Unselect the outgoing ifp for multicast if it
* is being detached.
@@ -837,20 +839,20 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
* Drop multicast group membership if we joined
* through the interface being detached.
*/
- gap = 0;
- for (i = 0; i < im6o->im6o_num_memberships; i++) {
- if (im6o->im6o_membership[i]->in6m_ifp ==
- ifp) {
- in6_leavegroup(im6o->im6o_membership[i], NULL);
- gap++;
- } else if (gap != 0) {
- im6o->im6o_membership[i - gap] =
- im6o->im6o_membership[i];
- }
+restart:
+ IP6_MFILTER_FOREACH(imf, &im6o->im6o_head) {
+ if ((inm = imf->im6f_in6m) == NULL)
+ continue;
+ if (inm->in6m_ifp != ifp)
+ continue;
+ ip6_mfilter_remove(&im6o->im6o_head, imf);
+ IN6_MULTI_LOCK_ASSERT();
+ in6_leavegroup_locked(inm, NULL);
+ ip6_mfilter_free(imf);
+ goto restart;
}
- im6o->im6o_num_memberships -= gap;
}
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
}
INP_INFO_WUNLOCK(pcbinfo);
}
diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h
index 2c6bcdc6..56ea6eeb 100644
--- a/freebsd/sys/netinet6/in6_pcb.h
+++ b/freebsd/sys/netinet6/in6_pcb.h
@@ -113,7 +113,7 @@ int in6_getpeeraddr(struct socket *so, struct sockaddr **nam);
int in6_getsockaddr(struct socket *so, struct sockaddr **nam);
int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam);
int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam);
-int in6_selecthlim(struct in6pcb *, struct ifnet *);
+int in6_selecthlim(struct inpcb *, struct ifnet *);
int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct ucred *);
void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int);
#endif /* _KERNEL */
diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c
index cf62e60c..652d70b6 100644
--- a/freebsd/sys/netinet6/in6_proto.c
+++ b/freebsd/sys/netinet6/in6_proto.c
@@ -386,10 +386,6 @@ VNET_DEFINE(int, ip6_accept_rtadv) = 0;
VNET_DEFINE(int, ip6_no_radr) = 0;
VNET_DEFINE(int, ip6_norbit_raif) = 0;
VNET_DEFINE(int, ip6_rfc6204w3) = 0;
-VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */
-int ip6_maxfrags; /* initialized in frag6.c:frag6_init() */
-VNET_DEFINE(int, ip6_maxfragbucketsize);/* initialized in frag6.c:frag6_init() */
-VNET_DEFINE(int, ip6_maxfragsperpacket); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_log_interval) = 5;
VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we
* process? */
@@ -476,20 +472,6 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
return (0);
}
-static int
-sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
-{
- int error, val;
-
- val = V_ip6_maxfragpackets;
- error = sysctl_handle_int(oidp, &val, 0, req);
- if (error != 0 || !req->newptr)
- return (error);
- V_ip6_maxfragpackets = val;
- frag6_set_bucketsize();
- return (0);
-}
-
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
"Enable forwarding of IPv6 packets between interfaces");
@@ -502,12 +484,6 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim,
SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
ip6stat,
"IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
-SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
- CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
- sysctl_ip6_maxfragpackets, "I",
- "Default maximum number of outstanding fragmented IPv6 packets. "
- "A value of 0 means no fragmented packets will be accepted, while a "
- "a value of -1 means no limit");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
"Default value of per-interface flag for accepting ICMPv6 RA messages");
@@ -577,17 +553,6 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
"Use the default scope zone when none is specified");
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
- CTLFLAG_RW, &ip6_maxfrags, 0,
- "Maximum allowed number of outstanding IPv6 packet fragments. "
- "A value of 0 means no fragmented packets will be accepted, while a "
- "a value of -1 means no limit");
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
- "Maximum number of reassembly queues per hash bucket");
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
- "Maximum allowed number of fragments per packet");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
"Enable path MTU discovery for multicast packets");
@@ -643,3 +608,10 @@ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(nd6_onlink_ns_rfc4861), 0,
"Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861");
+#ifdef EXPERIMENTAL
+SYSCTL_INT(_net_inet6_icmp6, OID_AUTO,
+ nd6_ignore_ipv6_only_ra, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(nd6_ignore_ipv6_only_ra), 0,
+ "Ignore the 'IPv6-Only flag' in RA messages in compliance with "
+ "draft-ietf-6man-ipv6only-flag");
+#endif
diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c
index 1cb71b88..0bd8bba4 100644
--- a/freebsd/sys/netinet6/in6_src.c
+++ b/freebsd/sys/netinet6/in6_src.c
@@ -726,6 +726,10 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
if (ron->ro_rt == NULL ||
(ron->ro_rt->rt_flags & RTF_GATEWAY) != 0)
error = EHOSTUNREACH;
+ else {
+ rt = ron->ro_rt;
+ ifp = rt->rt_ifp;
+ }
goto done;
}
@@ -929,21 +933,21 @@ in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* 3. The system default hoplimit.
*/
int
-in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp)
+in6_selecthlim(struct inpcb *inp, struct ifnet *ifp)
{
- if (in6p && in6p->in6p_hops >= 0)
- return (in6p->in6p_hops);
+ if (inp && inp->in6p_hops >= 0)
+ return (inp->in6p_hops);
else if (ifp)
return (ND_IFINFO(ifp)->chlim);
- else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
+ else if (inp && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
struct nhop6_basic nh6;
struct in6_addr dst;
uint32_t fibnum, scopeid;
int hlim;
- fibnum = in6p->inp_inc.inc_fibnum;
- in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid);
+ fibnum = inp->inp_inc.inc_fibnum;
+ in6_splitscope(&inp->in6p_faddr, &dst, &scopeid);
if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){
hlim = ND_IFINFO(nh6.nh_ifp)->chlim;
return (hlim);
diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h
index 5ed0ae90..3e535310 100644
--- a/freebsd/sys/netinet6/in6_var.h
+++ b/freebsd/sys/netinet6/in6_var.h
@@ -602,9 +602,61 @@ struct in6_mfilter {
struct ip6_msource_tree im6f_sources; /* source list for (S,G) */
u_long im6f_nsrc; /* # of source entries */
uint8_t im6f_st[2]; /* state before/at commit */
+ struct in6_multi *im6f_in6m; /* associated multicast address */
+ STAILQ_ENTRY(in6_mfilter) im6f_entry; /* list entry */
};
/*
+ * Helper types and functions for IPv4 multicast filters.
+ */
+STAILQ_HEAD(ip6_mfilter_head, in6_mfilter);
+
+struct in6_mfilter *ip6_mfilter_alloc(int mflags, int st0, int st1);
+void ip6_mfilter_free(struct in6_mfilter *);
+
+static inline void
+ip6_mfilter_init(struct ip6_mfilter_head *head)
+{
+
+ STAILQ_INIT(head);
+}
+
+static inline struct in6_mfilter *
+ip6_mfilter_first(const struct ip6_mfilter_head *head)
+{
+
+ return (STAILQ_FIRST(head));
+}
+
+static inline void
+ip6_mfilter_insert(struct ip6_mfilter_head *head, struct in6_mfilter *imf)
+{
+
+ STAILQ_INSERT_TAIL(head, imf, im6f_entry);
+}
+
+static inline void
+ip6_mfilter_remove(struct ip6_mfilter_head *head, struct in6_mfilter *imf)
+{
+
+ STAILQ_REMOVE(head, imf, in6_mfilter, im6f_entry);
+}
+
+#define IP6_MFILTER_FOREACH(imf, head) \
+ STAILQ_FOREACH(imf, head, im6f_entry)
+
+static inline size_t
+ip6_mfilter_count(struct ip6_mfilter_head *head)
+{
+ struct in6_mfilter *imf;
+ size_t num = 0;
+
+ STAILQ_FOREACH(imf, head, im6f_entry)
+ num++;
+ return (num);
+}
+
+/*
* Legacy KAME IPv6 multicast membership descriptor.
*/
struct in6_multi_mship {
@@ -645,6 +697,7 @@ struct in6_multi {
/* New fields for MLDv2 follow. */
struct mld_ifsoftc *in6m_mli; /* MLD info */
SLIST_ENTRY(in6_multi) in6m_nrele; /* to-be-released by MLD */
+ SLIST_ENTRY(in6_multi) in6m_defer; /* deferred MLDv1 */
struct ip6_msource_tree in6m_srcs; /* tree of sources */
u_long in6m_nsrc; /* # of tree entries */
@@ -670,8 +723,8 @@ struct in6_multi {
} in6m_st[2]; /* state at t0, t1 */
};
-void in6m_disconnect(struct in6_multi *inm);
-extern int ifma6_restart;
+void in6m_disconnect_locked(struct in6_multi_head *inmh, struct in6_multi *inm);
+
/*
* Helper function to derive the filter mode on a source entry
* from its internal counters. Predicates are:
@@ -713,13 +766,25 @@ extern struct sx in6_multi_sx;
#define IN6_MULTI_LOCK_ASSERT() sx_assert(&in6_multi_sx, SA_XLOCKED)
#define IN6_MULTI_UNLOCK_ASSERT() sx_assert(&in6_multi_sx, SA_XUNLOCKED)
+/*
+ * Get the in6_multi pointer from a ifmultiaddr.
+ * Returns NULL if ifmultiaddr is no longer valid.
+ */
+static __inline struct in6_multi *
+in6m_ifmultiaddr_get_inm(struct ifmultiaddr *ifma)
+{
+
+ NET_EPOCH_ASSERT();
+
+ return ((ifma->ifma_addr->sa_family != AF_INET6 ||
+ (ifma->ifma_flags & IFMA_F_ENQUEUED) == 0) ? NULL :
+ ifma->ifma_protospec);
+}
/*
* Look up an in6_multi record for an IPv6 multicast address
* on the interface ifp.
* If no record found, return NULL.
- *
- * SMPng: The IN6_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held.
*/
static __inline struct in6_multi *
in6m_lookup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr)
@@ -727,18 +792,14 @@ in6m_lookup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr)
struct ifmultiaddr *ifma;
struct in6_multi *inm;
- inm = NULL;
- CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
- if (ifma->ifma_addr->sa_family == AF_INET6) {
- inm = (struct in6_multi *)ifma->ifma_protospec;
- if (inm == NULL)
- continue;
- if (IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, mcaddr))
- break;
- inm = NULL;
- }
+ CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ inm = in6m_ifmultiaddr_get_inm(ifma);
+ if (inm == NULL)
+ continue;
+ if (IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, mcaddr))
+ return (inm);
}
- return (inm);
+ return (NULL);
}
/*
@@ -749,12 +810,13 @@ in6m_lookup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr)
static __inline struct in6_multi *
in6m_lookup(struct ifnet *ifp, const struct in6_addr *mcaddr)
{
+ struct epoch_tracker et;
struct in6_multi *inm;
IN6_MULTI_LIST_LOCK();
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
inm = in6m_lookup_locked(ifp, mcaddr);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
IN6_MULTI_LIST_UNLOCK();
return (inm);
@@ -808,8 +870,8 @@ void in6m_clear_recorded(struct in6_multi *);
void in6m_commit(struct in6_multi *);
void in6m_print(const struct in6_multi *);
int in6m_record_source(struct in6_multi *, const struct in6_addr *);
-void in6m_release_deferred(struct in6_multi *);
void in6m_release_list_deferred(struct in6_multi_head *);
+void in6m_release_wait(void);
void ip6_freemoptions(struct ip6_moptions *);
int ip6_getmoptions(struct inpcb *, struct sockopt *);
int ip6_setmoptions(struct inpcb *, struct sockopt *);
diff --git a/freebsd/sys/netinet6/ip6_fastfwd.c b/freebsd/sys/netinet6/ip6_fastfwd.c
index f63c51bf..0c04200c 100644
--- a/freebsd/sys/netinet6/ip6_fastfwd.c
+++ b/freebsd/sys/netinet6/ip6_fastfwd.c
@@ -158,10 +158,10 @@ ip6_tryforward(struct mbuf *m)
/*
* Incoming packet firewall processing.
*/
- if (!PFIL_HOOKED(&V_inet6_pfil_hook))
+ if (!PFIL_HOOKED_IN(V_inet6_pfil_head))
goto passin;
- if (pfil_run_hooks(&V_inet6_pfil_hook, &m, rcvif, PFIL_IN, 0,
- NULL) != 0 || m == NULL)
+ if (pfil_run_hooks(V_inet6_pfil_head, &m, rcvif, PFIL_IN, NULL) !=
+ PFIL_PASS)
goto dropin;
/*
* If packet filter sets the M_FASTFWD_OURS flag, this means
@@ -197,7 +197,7 @@ passin:
in6_ifstat_inc(rcvif, ifs6_in_noroute);
goto dropin;
}
- if (!PFIL_HOOKED(&V_inet6_pfil_hook)) {
+ if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) {
if (m->m_pkthdr.len > nh.nh_mtu) {
in6_ifstat_inc(nh.nh_ifp, ifs6_in_toobig);
icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu);
@@ -210,8 +210,8 @@ passin:
/*
* Outgoing packet firewall processing.
*/
- if (pfil_run_hooks(&V_inet6_pfil_hook, &m, nh.nh_ifp, PFIL_OUT,
- PFIL_FWD, NULL) != 0 || m == NULL)
+ if (pfil_run_hooks(V_inet6_pfil_head, &m, nh.nh_ifp, PFIL_OUT |
+ PFIL_FWD, NULL) != PFIL_PASS)
goto dropout;
/*
diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c
index 80535efe..ca73977f 100644
--- a/freebsd/sys/netinet6/ip6_forward.c
+++ b/freebsd/sys/netinet6/ip6_forward.c
@@ -322,15 +322,14 @@ again2:
in6_clearscope(&ip6->ip6_dst);
/* Jump over all PFIL processing if hooks are not active. */
- if (!PFIL_HOOKED(&V_inet6_pfil_hook))
+ if (!PFIL_HOOKED_OUT(V_inet6_pfil_head))
goto pass;
odst = ip6->ip6_dst;
/* Run through list of hooks for forwarded packets. */
- error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT,
- PFIL_FWD, NULL);
- if (error != 0 || m == NULL)
- goto freecopy; /* consumed by filter */
+ if (pfil_run_hooks(V_inet6_pfil_head, &m, rt->rt_ifp, PFIL_OUT |
+ PFIL_FWD, NULL) != PFIL_PASS)
+ goto freecopy;
ip6 = mtod(m, struct ip6_hdr *);
/* See if destination IP address was changed by packet filter. */
diff --git a/freebsd/sys/netinet6/ip6_id.c b/freebsd/sys/netinet6/ip6_id.c
index 0905ab3f..847dc403 100644
--- a/freebsd/sys/netinet6/ip6_id.c
+++ b/freebsd/sys/netinet6/ip6_id.c
@@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/random.h>
#include <sys/socket.h>
#include <sys/libkern.h>
@@ -260,5 +261,15 @@ u_int32_t
ip6_randomflowlabel(void)
{
+ /*
+ * It's ok to emit zero flow labels early, before random is available
+ * (seeded). RFC 6437:
+ *
+ * "A Flow Label of zero is used to indicate packets that have not been
+ * labeled."
+ */
+ if (__predict_false(!is_random_seeded()))
+ return (0);
+
return randomid(&randomtab_20) & 0xfffff;
}
diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c
index 25ab624c..c5c040f0 100644
--- a/freebsd/sys/netinet6/ip6_input.c
+++ b/freebsd/sys/netinet6/ip6_input.c
@@ -193,7 +193,7 @@ SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
#endif
-VNET_DEFINE(struct pfil_head, inet6_pfil_hook);
+VNET_DEFINE(pfil_head_t, inet6_pfil_head);
VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat);
VNET_PCPUSTAT_SYSINIT(ip6stat);
@@ -216,6 +216,7 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
void
ip6_init(void)
{
+ struct pfil_head_args args;
struct protosw *pr;
int i;
@@ -229,11 +230,11 @@ ip6_init(void)
&V_in6_ifaddrhmask);
/* Initialize packet filter hooks. */
- V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
- V_inet6_pfil_hook.ph_af = AF_INET6;
- if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0)
- printf("%s: WARNING: unable to register pfil hook, "
- "error %d\n", __func__, i);
+ args.pa_version = PFIL_VERSION;
+ args.pa_flags = PFIL_IN | PFIL_OUT;
+ args.pa_type = PFIL_TYPE_IP6;
+ args.pa_headname = PFIL_INET6_NAME;
+ V_inet6_pfil_head = pfil_head_register(&args);
if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6,
&V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
@@ -361,9 +362,7 @@ ip6_destroy(void *unused __unused)
#endif
netisr_unregister_vnet(&ip6_nh);
- if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
- printf("%s: WARNING: unable to unregister pfil hook, "
- "error %d\n", __func__, error);
+ pfil_head_unregister(V_inet6_pfil_head);
error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]);
if (error != 0) {
printf("%s: WARNING: unable to deregister input helper hook "
@@ -406,20 +405,22 @@ VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL);
#endif
static int
-ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off,
+ip6_input_hbh(struct mbuf **mp, uint32_t *plen, uint32_t *rtalert, int *off,
int *nxt, int *ours)
{
+ struct mbuf *m;
struct ip6_hdr *ip6;
struct ip6_hbh *hbh;
- if (ip6_hopopts_input(plen, rtalert, &m, off)) {
+ if (ip6_hopopts_input(plen, rtalert, mp, off)) {
#if 0 /*touches NULL pointer*/
- in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
+ in6_ifstat_inc((*mp)->m_pkthdr.rcvif, ifs6_in_discard);
#endif
goto out; /* m have already been freed */
}
/* adjust pointer */
+ m = *mp;
ip6 = mtod(m, struct ip6_hdr *);
/*
@@ -760,14 +761,12 @@ ip6_input(struct mbuf *m)
*/
/* Jump over all PFIL processing if hooks are not active. */
- if (!PFIL_HOOKED(&V_inet6_pfil_hook))
+ if (!PFIL_HOOKED_IN(V_inet6_pfil_head))
goto passin;
odst = ip6->ip6_dst;
- if (pfil_run_hooks(&V_inet6_pfil_hook, &m,
- m->m_pkthdr.rcvif, PFIL_IN, 0, NULL))
- return;
- if (m == NULL) /* consumed by filter */
+ if (pfil_run_hooks(V_inet6_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN,
+ NULL) != PFIL_PASS)
return;
ip6 = mtod(m, struct ip6_hdr *);
srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
@@ -859,7 +858,7 @@ passin:
*/
plen = (u_int32_t)ntohs(ip6->ip6_plen);
if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
- if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0)
+ if (ip6_input_hbh(&m, &plen, &rtalert, &off, &nxt, &ours) != 0)
return;
} else
nxt = ip6->ip6_nxt;
@@ -1409,12 +1408,12 @@ ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
}
void
-ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
+ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
int v4only = 0;
- mp = ip6_savecontrol_v4(in6p, m, mp, &v4only);
+ mp = ip6_savecontrol_v4(inp, m, mp, &v4only);
if (v4only)
return;
@@ -1425,7 +1424,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
* returned to normal user.
* See also RFC 2292 section 6 (or RFC 3542 section 8).
*/
- if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) {
+ if ((inp->inp_flags & IN6P_HOPOPTS) != 0) {
/*
* Check if a hop-by-hop options header is contatined in the
* received packet, and if so, store the options as ancillary
@@ -1467,7 +1466,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
* Note: this constraint is removed in RFC3542
*/
*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
- IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
+ IS2292(inp, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
@@ -1477,7 +1476,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
}
}
- if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
+ if ((inp->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
/*
@@ -1538,22 +1537,22 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
switch (nxt) {
case IPPROTO_DSTOPTS:
- if (!(in6p->inp_flags & IN6P_DSTOPTS))
+ if (!(inp->inp_flags & IN6P_DSTOPTS))
break;
*mp = sbcreatecontrol((caddr_t)ip6e, elen,
- IS2292(in6p,
+ IS2292(inp,
IPV6_2292DSTOPTS, IPV6_DSTOPTS),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
break;
case IPPROTO_ROUTING:
- if (!(in6p->inp_flags & IN6P_RTHDR))
+ if (!(inp->inp_flags & IN6P_RTHDR))
break;
*mp = sbcreatecontrol((caddr_t)ip6e, elen,
- IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
+ IS2292(inp, IPV6_2292RTHDR, IPV6_RTHDR),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
@@ -1589,7 +1588,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
;
}
- if (in6p->inp_flags2 & INP_RECVFLOWID) {
+ if (inp->inp_flags2 & INP_RECVFLOWID) {
uint32_t flowid, flow_type;
flowid = m->m_pkthdr.flowid;
@@ -1610,7 +1609,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
}
#ifdef RSS
- if (in6p->inp_flags2 & INP_RECVRSSBUCKETID) {
+ if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
uint32_t flowid, flow_type;
uint32_t rss_bucketid;
diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c
index 0851bef8..483f17f0 100644
--- a/freebsd/sys/netinet6/ip6_output.c
+++ b/freebsd/sys/netinet6/ip6_output.c
@@ -69,14 +69,16 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ratelimit.h>
#include <rtems/bsd/local/opt_ipsec.h>
-#include <rtems/bsd/local/opt_sctp.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
+#include <rtems/bsd/local/opt_ratelimit.h>
#include <rtems/bsd/local/opt_route.h>
#include <rtems/bsd/local/opt_rss.h>
+#include <rtems/bsd/local/opt_sctp.h>
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/ktls.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
@@ -230,7 +232,20 @@ ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
IP6STAT_INC(ip6s_odropped);
return (ENOBUFS);
}
- m->m_flags = m0->m_flags & M_COPYFLAGS;
+
+ /*
+ * Make sure the complete packet header gets copied
+ * from the originating mbuf to the newly created
+ * mbuf. This also ensures that existing firewall
+ * classification(s), VLAN tags and so on get copied
+ * to the resulting fragmented packet(s):
+ */
+ if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
+ m_free(m);
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+
*mnext = m;
mnext = &m->m_nextpkt;
m->m_data += max_linkhdr;
@@ -255,8 +270,6 @@ ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
}
m_cat(m, m_frgpart);
m->m_pkthdr.len = fraglen + hlen + sizeof(*ip6f);
- m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
- m->m_pkthdr.rcvif = NULL;
ip6f->ip6f_reserved = 0;
ip6f->ip6f_ident = id;
ip6f->ip6f_nxt = nextproto;
@@ -267,6 +280,83 @@ ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
return (0);
}
+static int
+ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp,
+ struct mbuf *m, struct sockaddr_in6 *dst, struct route_in6 *ro)
+{
+#ifdef KERN_TLS
+ struct ktls_session *tls = NULL;
+#endif
+ struct m_snd_tag *mst;
+ int error;
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
+ mst = NULL;
+
+#ifdef KERN_TLS
+ /*
+ * If this is an unencrypted TLS record, save a reference to
+ * the record. This local reference is used to call
+ * ktls_output_eagain after the mbuf has been freed (thus
+ * dropping the mbuf's reference) in if_output.
+ */
+ if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
+ tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls);
+ mst = tls->snd_tag;
+
+ /*
+ * If a TLS session doesn't have a valid tag, it must
+ * have had an earlier ifp mismatch, so drop this
+ * packet.
+ */
+ if (mst == NULL) {
+ error = EAGAIN;
+ goto done;
+ }
+ }
+#endif
+#ifdef RATELIMIT
+ if (inp != NULL && mst == NULL) {
+ if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
+ (inp->inp_snd_tag != NULL &&
+ inp->inp_snd_tag->ifp != ifp))
+ in_pcboutput_txrtlmt(inp, ifp, m);
+
+ if (inp->inp_snd_tag != NULL)
+ mst = inp->inp_snd_tag;
+ }
+#endif
+ if (mst != NULL) {
+ KASSERT(m->m_pkthdr.rcvif == NULL,
+ ("trying to add a send tag to a forwarded packet"));
+ if (mst->ifp != ifp) {
+ error = EAGAIN;
+ goto done;
+ }
+
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ }
+
+ error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro);
+
+done:
+ /* Check for route change invalidating send tags. */
+#ifdef KERN_TLS
+ if (tls != NULL) {
+ if (error == EAGAIN)
+ error = ktls_output_eagain(inp, tls);
+ ktls_free(tls);
+ }
+#endif
+#ifdef RATELIMIT
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
+ return (error);
+}
+
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
@@ -324,6 +414,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
m->m_pkthdr.flowid = inp->inp_flowid;
M_HASHTYPE_SET(m, inp->inp_flowtype);
}
+#ifdef NUMA
+ m->m_pkthdr.numa_domain = inp->inp_numa_domain;
+#endif
}
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
@@ -573,52 +666,72 @@ again:
counter_u64_add(rt->rt_pksent, 1);
}
-
- /*
- * The outgoing interface must be in the zone of source and
- * destination addresses.
- */
- origifp = ifp;
-
+ /* Setup data structures for scope ID checks. */
src0 = ip6->ip6_src;
- if (in6_setscope(&src0, origifp, &zone))
- goto badscope;
bzero(&src_sa, sizeof(src_sa));
src_sa.sin6_family = AF_INET6;
src_sa.sin6_len = sizeof(src_sa);
src_sa.sin6_addr = ip6->ip6_src;
- if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
- goto badscope;
dst0 = ip6->ip6_dst;
- if (in6_setscope(&dst0, origifp, &zone))
- goto badscope;
/* re-initialize to be sure */
bzero(&dst_sa, sizeof(dst_sa));
dst_sa.sin6_family = AF_INET6;
dst_sa.sin6_len = sizeof(dst_sa);
dst_sa.sin6_addr = ip6->ip6_dst;
- if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
- goto badscope;
- }
-
- /* We should use ia_ifp to support the case of
- * sending packets to an address of our own.
- */
- if (ia != NULL && ia->ia_ifp)
- ifp = ia->ia_ifp;
- /* scope check is done. */
- goto routefound;
+ /* Check for valid scope ID. */
+ if (in6_setscope(&src0, ifp, &zone) == 0 &&
+ sa6_recoverscope(&src_sa) == 0 && zone == src_sa.sin6_scope_id &&
+ in6_setscope(&dst0, ifp, &zone) == 0 &&
+ sa6_recoverscope(&dst_sa) == 0 && zone == dst_sa.sin6_scope_id) {
+ /*
+ * The outgoing interface is in the zone of the source
+ * and destination addresses.
+ *
+ * Because the loopback interface cannot receive
+ * packets with a different scope ID than its own,
+ * there is a trick is to pretend the outgoing packet
+ * was received by the real network interface, by
+ * setting "origifp" different from "ifp". This is
+ * only allowed when "ifp" is a loopback network
+ * interface. Refer to code in nd6_output_ifp() for
+ * more details.
+ */
+ origifp = ifp;
+
+ /*
+ * We should use ia_ifp to support the case of sending
+ * packets to an address of our own.
+ */
+ if (ia != NULL && ia->ia_ifp)
+ ifp = ia->ia_ifp;
+
+ } else if ((ifp->if_flags & IFF_LOOPBACK) == 0 ||
+ sa6_recoverscope(&src_sa) != 0 ||
+ sa6_recoverscope(&dst_sa) != 0 ||
+ dst_sa.sin6_scope_id == 0 ||
+ (src_sa.sin6_scope_id != 0 &&
+ src_sa.sin6_scope_id != dst_sa.sin6_scope_id) ||
+ (origifp = ifnet_byindex(dst_sa.sin6_scope_id)) == NULL) {
+ /*
+ * If the destination network interface is not a
+ * loopback interface, or the destination network
+ * address has no scope ID, or the source address has
+ * a scope ID set which is different from the
+ * destination address one, or there is no network
+ * interface representing this scope ID, the address
+ * pair is considered invalid.
+ */
+ IP6STAT_INC(ip6s_badscope);
+ in6_ifstat_inc(ifp, ifs6_out_discard);
+ if (error == 0)
+ error = EHOSTUNREACH; /* XXX */
+ goto bad;
+ }
- badscope:
- IP6STAT_INC(ip6s_badscope);
- in6_ifstat_inc(origifp, ifs6_out_discard);
- if (error == 0)
- error = EHOSTUNREACH; /* XXX */
- goto bad;
+ /* All scope ID checks are successful. */
- routefound:
if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
if (opt && opt->ip6po_nextroute.ro_rt) {
/*
@@ -774,16 +887,21 @@ again:
}
/* Jump over all PFIL processing if hooks are not active. */
- if (!PFIL_HOOKED(&V_inet6_pfil_hook))
+ if (!PFIL_HOOKED_OUT(V_inet6_pfil_head))
goto passout;
odst = ip6->ip6_dst;
/* Run through list of hooks for output packets. */
- error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, 0, inp);
- if (error != 0 || m == NULL)
+ switch (pfil_run_hooks(V_inet6_pfil_head, &m, ifp, PFIL_OUT, inp)) {
+ case PFIL_PASS:
+ ip6 = mtod(m, struct ip6_hdr *);
+ break;
+ case PFIL_DROPPED:
+ error = EPERM;
+ /* FALLTHROUGH */
+ case PFIL_CONSUMED:
goto done;
- /* adjust pointer */
- ip6 = mtod(m, struct ip6_hdr *);
+ }
needfiblookup = 0;
/* See if destination IP address was changed by packet filter. */
@@ -881,11 +999,30 @@ passout:
*/
if (sw_csum & CSUM_DELAY_DATA_IPV6) {
sw_csum &= ~CSUM_DELAY_DATA_IPV6;
+ m = mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ error = ENOBUFS;
+ IP6STAT_INC(ip6s_odropped);
+ goto bad;
+ }
in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
+ } else if ((ifp->if_capenable & IFCAP_NOMAP) == 0) {
+ m = mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ error = ENOBUFS;
+ IP6STAT_INC(ip6s_odropped);
+ goto bad;
+ }
}
#ifdef SCTP
if (sw_csum & CSUM_SCTP_IPV6) {
sw_csum &= ~CSUM_SCTP_IPV6;
+ m = mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ error = ENOBUFS;
+ IP6STAT_INC(ip6s_odropped);
+ goto bad;
+ }
sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
}
#endif
@@ -931,23 +1068,7 @@ passout:
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = nd6_output_ifp(ifp, origifp, m, dst,
- (struct route *)ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip6_output_send(inp, ifp, origifp, m, dst, ro);
goto done;
}
@@ -989,11 +1110,23 @@ passout:
* XXX-BZ handle the hw offloading case. Need flags.
*/
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ m = mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ in6_ifstat_inc(ifp, ifs6_out_fragfail);
+ error = ENOBUFS;
+ goto bad;
+ }
in6_delayed_cksum(m, plen, hlen);
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
}
#ifdef SCTP
if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
+ m = mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ in6_ifstat_inc(ifp, ifs6_out_fragfail);
+ error = ENOBUFS;
+ goto bad;
+ }
sctp_delayed_cksum(m, hlen);
m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
}
@@ -1046,23 +1179,7 @@ sendorfree:
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = nd6_output_ifp(ifp, origifp, m, dst,
- (struct route *)ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip6_output_send(inp, ifp, origifp, m, dst, ro);
} else
m_freem(m);
}
@@ -1390,7 +1507,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
{
int optdatalen, uproto;
void *optdata;
- struct inpcb *in6p = sotoinpcb(so);
+ struct inpcb *inp = sotoinpcb(so);
int error, optval;
int level, op, optname;
int optlen;
@@ -1425,43 +1542,43 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
sopt->sopt_dir == SOPT_SET) {
switch (sopt->sopt_name) {
case SO_REUSEADDR:
- INP_WLOCK(in6p);
+ INP_WLOCK(inp);
if ((so->so_options & SO_REUSEADDR) != 0)
- in6p->inp_flags2 |= INP_REUSEADDR;
+ inp->inp_flags2 |= INP_REUSEADDR;
else
- in6p->inp_flags2 &= ~INP_REUSEADDR;
- INP_WUNLOCK(in6p);
+ inp->inp_flags2 &= ~INP_REUSEADDR;
+ INP_WUNLOCK(inp);
error = 0;
break;
case SO_REUSEPORT:
- INP_WLOCK(in6p);
+ INP_WLOCK(inp);
if ((so->so_options & SO_REUSEPORT) != 0)
- in6p->inp_flags2 |= INP_REUSEPORT;
+ inp->inp_flags2 |= INP_REUSEPORT;
else
- in6p->inp_flags2 &= ~INP_REUSEPORT;
- INP_WUNLOCK(in6p);
+ inp->inp_flags2 &= ~INP_REUSEPORT;
+ INP_WUNLOCK(inp);
error = 0;
break;
case SO_REUSEPORT_LB:
- INP_WLOCK(in6p);
+ INP_WLOCK(inp);
if ((so->so_options & SO_REUSEPORT_LB) != 0)
- in6p->inp_flags2 |= INP_REUSEPORT_LB;
+ inp->inp_flags2 |= INP_REUSEPORT_LB;
else
- in6p->inp_flags2 &= ~INP_REUSEPORT_LB;
- INP_WUNLOCK(in6p);
+ inp->inp_flags2 &= ~INP_REUSEPORT_LB;
+ INP_WUNLOCK(inp);
error = 0;
break;
case SO_SETFIB:
- INP_WLOCK(in6p);
- in6p->inp_inc.inc_fibnum = so->so_fibnum;
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ inp->inp_inc.inc_fibnum = so->so_fibnum;
+ INP_WUNLOCK(inp);
error = 0;
break;
case SO_MAX_PACING_RATE:
#ifdef RATELIMIT
- INP_WLOCK(in6p);
- in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+ INP_WUNLOCK(inp);
error = 0;
#else
error = EOPNOTSUPP;
@@ -1495,7 +1612,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
error = soopt_mcopyin(sopt, m); /* XXX */
if (error != 0)
break;
- error = ip6_pcbopts(&in6p->in6p_outputopts,
+ error = ip6_pcbopts(&inp->in6p_outputopts,
m, so, sopt);
m_freem(m); /* XXX */
break;
@@ -1566,57 +1683,57 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
error = EINVAL;
else {
/* -1 = kernel default */
- in6p->in6p_hops = optval;
- if ((in6p->inp_vflag &
+ inp->in6p_hops = optval;
+ if ((inp->inp_vflag &
INP_IPV4) != 0)
- in6p->inp_ip_ttl = optval;
+ inp->inp_ip_ttl = optval;
}
break;
#define OPTSET(bit) \
do { \
- INP_WLOCK(in6p); \
+ INP_WLOCK(inp); \
if (optval) \
- in6p->inp_flags |= (bit); \
+ inp->inp_flags |= (bit); \
else \
- in6p->inp_flags &= ~(bit); \
- INP_WUNLOCK(in6p); \
+ inp->inp_flags &= ~(bit); \
+ INP_WUNLOCK(inp); \
} while (/*CONSTCOND*/ 0)
#define OPTSET2292(bit) \
do { \
- INP_WLOCK(in6p); \
- in6p->inp_flags |= IN6P_RFC2292; \
+ INP_WLOCK(inp); \
+ inp->inp_flags |= IN6P_RFC2292; \
if (optval) \
- in6p->inp_flags |= (bit); \
+ inp->inp_flags |= (bit); \
else \
- in6p->inp_flags &= ~(bit); \
- INP_WUNLOCK(in6p); \
+ inp->inp_flags &= ~(bit); \
+ INP_WUNLOCK(inp); \
} while (/*CONSTCOND*/ 0)
-#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
+#define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0)
#define OPTSET2_N(bit, val) do { \
if (val) \
- in6p->inp_flags2 |= bit; \
+ inp->inp_flags2 |= bit; \
else \
- in6p->inp_flags2 &= ~bit; \
+ inp->inp_flags2 &= ~bit; \
} while (0)
#define OPTSET2(bit, val) do { \
- INP_WLOCK(in6p); \
+ INP_WLOCK(inp); \
OPTSET2_N(bit, val); \
- INP_WUNLOCK(in6p); \
+ INP_WUNLOCK(inp); \
} while (0)
-#define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
+#define OPTBIT2(bit) (inp->inp_flags2 & (bit) ? 1 : 0)
#define OPTSET2292_EXCLUSIVE(bit) \
do { \
- INP_WLOCK(in6p); \
+ INP_WLOCK(inp); \
if (OPTBIT(IN6P_RFC2292)) { \
error = EINVAL; \
} else { \
if (optval) \
- in6p->inp_flags |= (bit); \
+ inp->inp_flags |= (bit); \
else \
- in6p->inp_flags &= ~(bit); \
+ inp->inp_flags &= ~(bit); \
} \
- INP_WUNLOCK(in6p); \
+ INP_WUNLOCK(inp); \
} while (/*CONSTCOND*/ 0)
case IPV6_RECVPKTINFO:
@@ -1632,17 +1749,17 @@ do { \
error = EINVAL;
break;
}
- INP_WLOCK(in6p);
- if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(inp);
return (ECONNRESET);
}
- optp = &in6p->in6p_outputopts;
+ optp = &inp->in6p_outputopts;
error = ip6_pcbopt(IPV6_HOPLIMIT,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
break;
}
@@ -1693,16 +1810,16 @@ do { \
* available only prior to bind(2).
* see ipng mailing list, Jun 22 2001.
*/
- if (in6p->inp_lport ||
- !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
+ if (inp->inp_lport ||
+ !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
error = EINVAL;
break;
}
OPTSET(IN6P_IPV6_V6ONLY);
if (optval)
- in6p->inp_vflag &= ~INP_IPV4;
+ inp->inp_vflag &= ~INP_IPV4;
else
- in6p->inp_vflag |= INP_IPV4;
+ inp->inp_vflag |= INP_IPV4;
break;
case IPV6_RECVTCLASS:
/* cannot mix with RFC2292 XXX */
@@ -1726,10 +1843,10 @@ do { \
case IPV6_RSS_LISTEN_BUCKET:
if ((optval >= 0) &&
(optval < rss_getnumbuckets())) {
- INP_WLOCK(in6p);
- in6p->inp_rss_listen_bucket = optval;
+ INP_WLOCK(inp);
+ inp->inp_rss_listen_bucket = optval;
OPTSET2_N(INP_RSS_BUCKET_SET, 1);
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
} else {
error = EINVAL;
}
@@ -1752,17 +1869,17 @@ do { \
break;
{
struct ip6_pktopts **optp;
- INP_WLOCK(in6p);
- if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(inp);
return (ECONNRESET);
}
- optp = &in6p->in6p_outputopts;
+ optp = &inp->in6p_outputopts;
error = ip6_pcbopt(optname,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
break;
}
@@ -1844,16 +1961,16 @@ do { \
break;
optlen = sopt->sopt_valsize;
optbuf = optbuf_storage;
- INP_WLOCK(in6p);
- if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(inp);
return (ECONNRESET);
}
- optp = &in6p->in6p_outputopts;
+ optp = &inp->in6p_outputopts;
error = ip6_pcbopt(optname, optbuf, optlen,
optp, (td != NULL) ? td->td_ucred : NULL,
uproto);
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
break;
}
#undef OPTSET
@@ -1870,7 +1987,7 @@ do { \
case MCAST_LEAVE_GROUP:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
- error = ip6_setmoptions(in6p, sopt);
+ error = ip6_setmoptions(inp, sopt);
break;
case IPV6_PORTRANGE:
@@ -1879,34 +1996,34 @@ do { \
if (error)
break;
- INP_WLOCK(in6p);
+ INP_WLOCK(inp);
switch (optval) {
case IPV6_PORTRANGE_DEFAULT:
- in6p->inp_flags &= ~(INP_LOWPORT);
- in6p->inp_flags &= ~(INP_HIGHPORT);
+ inp->inp_flags &= ~(INP_LOWPORT);
+ inp->inp_flags &= ~(INP_HIGHPORT);
break;
case IPV6_PORTRANGE_HIGH:
- in6p->inp_flags &= ~(INP_LOWPORT);
- in6p->inp_flags |= INP_HIGHPORT;
+ inp->inp_flags &= ~(INP_LOWPORT);
+ inp->inp_flags |= INP_HIGHPORT;
break;
case IPV6_PORTRANGE_LOW:
- in6p->inp_flags &= ~(INP_HIGHPORT);
- in6p->inp_flags |= INP_LOWPORT;
+ inp->inp_flags &= ~(INP_HIGHPORT);
+ inp->inp_flags |= INP_LOWPORT;
break;
default:
error = EINVAL;
break;
}
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
break;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
case IPV6_IPSEC_POLICY:
if (IPSEC_ENABLED(ipv6)) {
- error = IPSEC_PCBCTL(ipv6, in6p, sopt);
+ error = IPSEC_PCBCTL(ipv6, inp, sopt);
break;
}
/* FALLTHROUGH */
@@ -1974,7 +2091,7 @@ do { \
break;
case IPV6_UNICAST_HOPS:
- optval = in6p->in6p_hops;
+ optval = inp->in6p_hops;
break;
case IPV6_RECVPKTINFO:
@@ -2000,7 +2117,7 @@ do { \
case IPV6_PORTRANGE:
{
int flags;
- flags = in6p->inp_flags;
+ flags = inp->inp_flags;
if (flags & INP_HIGHPORT)
optval = IPV6_PORTRANGE_HIGH;
else if (flags & INP_LOWPORT)
@@ -2026,11 +2143,11 @@ do { \
break;
case IPV6_FLOWID:
- optval = in6p->inp_flowid;
+ optval = inp->inp_flowid;
break;
case IPV6_FLOWTYPE:
- optval = in6p->inp_flowtype;
+ optval = inp->inp_flowtype;
break;
case IPV6_RECVFLOWID:
@@ -2039,8 +2156,8 @@ do { \
#ifdef RSS
case IPV6_RSSBUCKETID:
retval =
- rss_hash2bucket(in6p->inp_flowid,
- in6p->inp_flowtype,
+ rss_hash2bucket(inp->inp_flowid,
+ inp->inp_flowtype,
&rss_bucket);
if (retval == 0)
optval = rss_bucket;
@@ -2076,12 +2193,12 @@ do { \
* XXX: we dot not consider the case of source
* routing, or optional information to specify
* the outgoing interface.
- * Copy faddr out of in6p to avoid holding lock
+ * Copy faddr out of inp to avoid holding lock
* on inp during route lookup.
*/
- INP_RLOCK(in6p);
- bcopy(&in6p->in6p_faddr, &addr, sizeof(addr));
- INP_RUNLOCK(in6p);
+ INP_RLOCK(inp);
+ bcopy(&inp->in6p_faddr, &addr, sizeof(addr));
+ INP_RUNLOCK(inp);
error = ip6_getpmtu_ctl(so->so_fibnum,
&addr, &pmtu);
if (error)
@@ -2133,20 +2250,20 @@ do { \
case IPV6_DONTFRAG:
case IPV6_USE_MIN_MTU:
case IPV6_PREFER_TEMPADDR:
- error = ip6_getpcbopt(in6p, optname, sopt);
+ error = ip6_getpcbopt(inp, optname, sopt);
break;
case IPV6_MULTICAST_IF:
case IPV6_MULTICAST_HOPS:
case IPV6_MULTICAST_LOOP:
case IPV6_MSFILTER:
- error = ip6_getmoptions(in6p, sopt);
+ error = ip6_getmoptions(inp, sopt);
break;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
case IPV6_IPSEC_POLICY:
if (IPSEC_ENABLED(ipv6)) {
- error = IPSEC_PCBCTL(ipv6, in6p, sopt);
+ error = IPSEC_PCBCTL(ipv6, inp, sopt);
break;
}
/* FALLTHROUGH */
@@ -2166,7 +2283,7 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
{
int error = 0, optval, optlen;
const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
- struct inpcb *in6p = sotoinpcb(so);
+ struct inpcb *inp = sotoinpcb(so);
int level, op, optname;
level = sopt->sopt_level;
@@ -2198,22 +2315,25 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
sizeof(optval));
if (error)
break;
- if ((optval % 2) != 0) {
- /* the API assumes even offset values */
+ if (optval < -1 || (optval % 2) != 0) {
+ /*
+ * The API assumes non-negative even offset
+ * values or -1 as a special value.
+ */
error = EINVAL;
} else if (so->so_proto->pr_protocol ==
IPPROTO_ICMPV6) {
if (optval != icmp6off)
error = EINVAL;
} else
- in6p->in6p_cksum = optval;
+ inp->in6p_cksum = optval;
break;
case SOPT_GET:
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
optval = icmp6off;
else
- optval = in6p->in6p_cksum;
+ optval = inp->in6p_cksum;
error = sooptcopyout(sopt, &optval, sizeof(optval));
break;
@@ -2312,16 +2432,16 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
#define GET_PKTOPT_VAR(field, lenexpr) do { \
if (pktopt && pktopt->field) { \
- INP_RUNLOCK(in6p); \
+ INP_RUNLOCK(inp); \
optdata = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK); \
malloc_optdata = true; \
- INP_RLOCK(in6p); \
- if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \
- INP_RUNLOCK(in6p); \
+ INP_RLOCK(inp); \
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \
+ INP_RUNLOCK(inp); \
free(optdata, M_TEMP); \
return (ECONNRESET); \
} \
- pktopt = in6p->in6p_outputopts; \
+ pktopt = inp->in6p_outputopts; \
if (pktopt && pktopt->field) { \
optdatalen = min(lenexpr, sopt->sopt_valsize); \
bcopy(&pktopt->field, optdata, optdatalen); \
@@ -2340,7 +2460,7 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
pktopt->field->sa_len)
static int
-ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt)
+ip6_getpcbopt(struct inpcb *inp, int optname, struct sockopt *sopt)
{
void *optdata = NULL;
bool malloc_optdata = false;
@@ -2352,8 +2472,8 @@ ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt)
int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
struct ip6_pktopts *pktopt;
- INP_RLOCK(in6p);
- pktopt = in6p->in6p_outputopts;
+ INP_RLOCK(inp);
+ pktopt = inp->in6p_outputopts;
switch (optname) {
case IPV6_PKTINFO:
@@ -2413,10 +2533,10 @@ ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt)
#ifdef DIAGNOSTIC
panic("ip6_getpcbopt: unexpected option\n");
#endif
- INP_RUNLOCK(in6p);
+ INP_RUNLOCK(inp);
return (ENOPROTOOPT);
}
- INP_RUNLOCK(in6p);
+ INP_RUNLOCK(inp);
error = sooptcopyout(sopt, optdata, optdatalen);
if (malloc_optdata)
@@ -3098,23 +3218,23 @@ ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
* Compute IPv6 extension header length.
*/
int
-ip6_optlen(struct inpcb *in6p)
+ip6_optlen(struct inpcb *inp)
{
int len;
- if (!in6p->in6p_outputopts)
+ if (!inp->in6p_outputopts)
return 0;
len = 0;
#define elen(x) \
(((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
- len += elen(in6p->in6p_outputopts->ip6po_hbh);
- if (in6p->in6p_outputopts->ip6po_rthdr)
+ len += elen(inp->in6p_outputopts->ip6po_hbh);
+ if (inp->in6p_outputopts->ip6po_rthdr)
/* dest1 is valid with rthdr only */
- len += elen(in6p->in6p_outputopts->ip6po_dest1);
- len += elen(in6p->in6p_outputopts->ip6po_rthdr);
- len += elen(in6p->in6p_outputopts->ip6po_dest2);
+ len += elen(inp->in6p_outputopts->ip6po_dest1);
+ len += elen(inp->in6p_outputopts->ip6po_rthdr);
+ len += elen(inp->in6p_outputopts->ip6po_dest2);
return len;
#undef elen
}
diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h
index f235572d..be748b31 100644
--- a/freebsd/sys/netinet6/ip6_var.h
+++ b/freebsd/sys/netinet6/ip6_var.h
@@ -68,6 +68,7 @@
#include <sys/epoch.h>
+struct ip6asfrag;
/*
* IP6 reassembly queue structure. Each fragment
* being reassembled is attached to one of these structures.
@@ -83,25 +84,10 @@ struct ip6q {
struct ip6q *ip6q_next;
struct ip6q *ip6q_prev;
int ip6q_unfrglen; /* len of unfragmentable part */
-#ifdef notyet
- u_char *ip6q_nxtp;
-#endif
int ip6q_nfrag; /* # of fragments */
struct label *ip6q_label;
};
-struct ip6asfrag {
- struct ip6asfrag *ip6af_down;
- struct ip6asfrag *ip6af_up;
- struct mbuf *ip6af_m;
- int ip6af_offset; /* offset in ip6af_m to next header */
- int ip6af_frglen; /* fragmentable part length */
- int ip6af_off; /* fragment offset */
- u_int16_t ip6af_mff; /* more fragment bit in frag off */
-};
-
-#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m))
-
/*
* IP6 reinjecting structure.
*/
@@ -110,6 +96,7 @@ struct ip6_direct_ctx {
uint32_t ip6dc_off; /* offset to next header */
};
+#if defined(_NETINET6_IN6_VAR_H_) && defined(_KERNEL)
/*
* Structure attached to inpcb.in6p_moptions and
* passed to ip6_output when IPv6 multicast options are in use.
@@ -119,13 +106,11 @@ struct ip6_moptions {
struct ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */
u_char im6o_multicast_hlim; /* hoplimit for outgoing multicasts */
u_char im6o_multicast_loop; /* 1 >= hear sends if a member */
- u_short im6o_num_memberships; /* no. memberships this socket */
- u_short im6o_max_memberships; /* max memberships this socket */
- struct in6_multi **im6o_membership; /* group memberships */
- struct in6_mfilter *im6o_mfilters; /* source filters */
- struct epoch_context imo6_epoch_ctx;
+ struct ip6_mfilter_head im6o_head; /* group membership list */
};
-
+#else
+struct ip6_moptions;
+#endif
/*
* Control options for outgoing packets
*/
@@ -208,6 +193,7 @@ struct ip6stat {
uint64_t ip6s_localout; /* total ip packets generated here */
uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */
uint64_t ip6s_reassembled; /* total packets reassembled ok */
+ uint64_t ip6s_atomicfrags; /* atomic fragments */
uint64_t ip6s_fragmented; /* datagrams successfully fragmented */
uint64_t ip6s_ofragments; /* output fragments created */
uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */
@@ -299,12 +285,6 @@ VNET_DECLARE(int, ip6_v6only);
VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */
VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */
-VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly
- * queue */
-extern int ip6_maxfrags; /* Maximum fragments in reassembly
- * queue */
-VNET_DECLARE(int, ip6_maxfragbucketsize); /* Maximum reassembly queues per bucket */
-VNET_DECLARE(int, ip6_maxfragsperpacket); /* Maximum fragments per packet */
VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */
VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */
VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
@@ -318,9 +298,6 @@ VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension
VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */
#define V_ip6_mrouter VNET(ip6_mrouter)
#define V_ip6_sendredirects VNET(ip6_sendredirects)
-#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
-#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize)
-#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket)
#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv)
#define V_ip6_no_radr VNET(ip6_no_radr)
#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
@@ -346,13 +323,20 @@ VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope
* zone when unspecified */
#define V_ip6_use_defzone VNET(ip6_use_defzone)
-VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */
-#define V_inet6_pfil_hook VNET(inet6_pfil_hook)
+VNET_DECLARE(struct pfil_head *, inet6_pfil_head);
+#define V_inet6_pfil_head VNET(inet6_pfil_head)
+#define PFIL_INET6_NAME "inet6"
+
#ifdef IPSTEALTH
VNET_DECLARE(int, ip6stealth);
#define V_ip6stealth VNET(ip6stealth)
#endif
+#ifdef EXPERIMENTAL
+VNET_DECLARE(int, nd6_ignore_ipv6_only_ra);
+#define V_nd6_ignore_ipv6_only_ra VNET(nd6_ignore_ipv6_only_ra)
+#endif
+
extern struct pr_usrreqs rip6_usrreqs;
struct sockopt;
@@ -407,7 +391,6 @@ int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
int route6_input(struct mbuf **, int *, int);
-void frag6_set_bucketsize(void);
void frag6_init(void);
int frag6_input(struct mbuf **, int *, int);
void frag6_slowtimo(void);
diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c
index b00f03ef..cc946f67 100644
--- a/freebsd/sys/netinet6/mld6.c
+++ b/freebsd/sys/netinet6/mld6.c
@@ -112,7 +112,7 @@ static void mli_delete_locked(const struct ifnet *);
static void mld_dispatch_packet(struct mbuf *);
static void mld_dispatch_queue(struct mbufq *, int);
static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *);
-static void mld_fasttimo_vnet(void);
+static void mld_fasttimo_vnet(struct in6_multi_head *inmh);
static int mld_handle_state_change(struct in6_multi *,
struct mld_ifsoftc *);
static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *,
@@ -141,14 +141,15 @@ static int mld_v2_enqueue_group_record(struct mbufq *,
struct in6_multi *, const int, const int, const int,
const int);
static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
- struct mbuf *, const int, const int);
+ struct mbuf *, struct mldv2_query *, const int, const int);
static int mld_v2_merge_state_changes(struct in6_multi *,
struct mbufq *);
static void mld_v2_process_group_timers(struct in6_multi_head *,
struct mbufq *, struct mbufq *,
struct in6_multi *, const int);
static int mld_v2_process_group_query(struct in6_multi *,
- struct mld_ifsoftc *mli, int, struct mbuf *, const int);
+ struct mld_ifsoftc *mli, int, struct mbuf *,
+ struct mldv2_query *, const int);
static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS);
static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS);
@@ -245,6 +246,10 @@ static int mld_v1enable = 1;
SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN,
&mld_v1enable, 0, "Enable fallback to MLDv1");
+static int mld_v2enable = 1;
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_RWTUN,
+ &mld_v2enable, 0, "Enable MLDv2");
+
static int mld_use_allow = 1;
SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN,
&mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
@@ -535,45 +540,48 @@ out:
* XXX This routine is also bitten by unlocked ifma_protospec access.
*/
void
-mld_ifdetach(struct ifnet *ifp)
+mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh)
{
+ struct epoch_tracker et;
struct mld_ifsoftc *mli;
- struct ifmultiaddr *ifma, *next;
+ struct ifmultiaddr *ifma;
struct in6_multi *inm;
- struct in6_multi_head inmh;
CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp,
if_name(ifp));
- SLIST_INIT(&inmh);
IN6_MULTI_LIST_LOCK_ASSERT();
MLD_LOCK();
mli = MLD_IFINFO(ifp);
- if (mli->mli_version == MLD_VERSION_2) {
- IF_ADDR_WLOCK(ifp);
- restart:
- CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
- if (ifma->ifma_addr->sa_family != AF_INET6 ||
- ifma->ifma_protospec == NULL)
- continue;
- inm = (struct in6_multi *)ifma->ifma_protospec;
- if (inm->in6m_state == MLD_LEAVING_MEMBER) {
- in6m_disconnect(inm);
- in6m_rele_locked(&inmh, inm);
- ifma->ifma_protospec = NULL;
- }
+ IF_ADDR_WLOCK(ifp);
+ /*
+ * Extract list of in6_multi associated with the detaching ifp
+ * which the PF_INET6 layer is about to release.
+ */
+ NET_EPOCH_ENTER(et);
+ CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ inm = in6m_ifmultiaddr_get_inm(ifma);
+ if (inm == NULL)
+ continue;
+ in6m_disconnect_locked(inmh, inm);
+
+ if (mli->mli_version == MLD_VERSION_2) {
in6m_clear_recorded(inm);
- if (__predict_false(ifma6_restart)) {
- ifma6_restart = false;
- goto restart;
+
+ /*
+ * We need to release the final reference held
+ * for issuing the INCLUDE {}.
+ */
+ if (inm->in6m_state == MLD_LEAVING_MEMBER) {
+ inm->in6m_state = MLD_NOT_MEMBER;
+ in6m_rele_locked(inmh, inm);
}
}
- IF_ADDR_WUNLOCK(ifp);
}
-
+ NET_EPOCH_EXIT(et);
+ IF_ADDR_WUNLOCK(ifp);
MLD_UNLOCK();
- in6m_release_list_deferred(&inmh);
}
/*
@@ -630,6 +638,7 @@ static int
mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
/*const*/ struct mld_hdr *mld)
{
+ struct epoch_tracker et;
struct ifmultiaddr *ifma;
struct mld_ifsoftc *mli;
struct in6_multi *inm;
@@ -697,7 +706,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (timer == 0)
timer = 1;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
if (is_general_query) {
/*
* For each reporting group joined on this
@@ -706,10 +715,9 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)",
ifp, if_name(ifp));
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_INET6 ||
- ifma->ifma_protospec == NULL)
+ inm = in6m_ifmultiaddr_get_inm(ifma);
+ if (inm == NULL)
continue;
- inm = (struct in6_multi *)ifma->ifma_protospec;
mld_v1_update_group(inm, timer);
}
} else {
@@ -729,7 +737,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
in6_clearscope(&mld->mld_addr);
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
MLD_UNLOCK();
IN6_MULTI_LIST_UNLOCK();
@@ -799,16 +807,16 @@ mld_v1_update_group(struct in6_multi *inm, const int timer)
* Process a received MLDv2 general, group-specific or
* group-and-source-specific query.
*
- * Assumes that the query header has been pulled up to sizeof(mldv2_query).
+ * Assumes that mld points to a struct mldv2_query which is stored in
+ * contiguous memory.
*
* Return 0 if successful, otherwise an appropriate error code is returned.
*/
static int
mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
- struct mbuf *m, const int off, const int icmp6len)
+ struct mbuf *m, struct mldv2_query *mld, const int off, const int icmp6len)
{
struct mld_ifsoftc *mli;
- struct mldv2_query *mld;
struct in6_multi *inm;
uint32_t maxdelay, nsrc, qqi;
int is_general_query;
@@ -818,7 +826,12 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
char ip6tbuf[INET6_ADDRSTRLEN];
#endif
- is_general_query = 0;
+ if (!mld_v2enable) {
+ CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)",
+ ip6_sprintf(ip6tbuf, &ip6->ip6_src),
+ ifp, if_name(ifp));
+ return (0);
+ }
/*
* RFC3810 Section 6.2: MLD queries must originate from
@@ -831,9 +844,9 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
return (0);
}
- CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp));
+ is_general_query = 0;
- mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
+ CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp));
maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */
if (maxdelay >= 32768) {
@@ -926,6 +939,8 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
V_interface_timers_running6 = 1;
}
} else {
+ struct epoch_tracker et;
+
/*
* MLDv2 Group-specific or Group-and-source-specific Query.
*
@@ -934,10 +949,10 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
* Queries for groups we are not a member of on this
* link are simply ignored.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
inm = in6m_lookup_locked(ifp, &mld->mld_addr);
if (inm == NULL) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
goto out_locked;
}
if (nsrc > 0) {
@@ -945,7 +960,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
&V_mld_gsrdelay)) {
CTR1(KTR_MLD, "%s: GS query throttled.",
__func__);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
goto out_locked;
}
}
@@ -959,11 +974,11 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
* group-specific or group-and-source query.
*/
if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer)
- mld_v2_process_group_query(inm, mli, timer, m, off);
+ mld_v2_process_group_query(inm, mli, timer, m, mld, off);
/* XXX Clear embedded scope ID as userland won't expect it. */
in6_clearscope(&mld->mld_addr);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
out_locked:
@@ -980,9 +995,8 @@ out_locked:
*/
static int
mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli,
- int timer, struct mbuf *m0, const int off)
+ int timer, struct mbuf *m0, struct mldv2_query *mld, const int off)
{
- struct mldv2_query *mld;
int retval;
uint16_t nsrc;
@@ -990,7 +1004,6 @@ mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli,
MLD_LOCK_ASSERT();
retval = 0;
- mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
switch (inm->in6m_state) {
case MLD_NOT_MEMBER:
@@ -1010,6 +1023,15 @@ mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli,
nsrc = ntohs(mld->mld_numsrc);
+ /* Length should be checked by calling function. */
+ KASSERT((m0->m_flags & M_PKTHDR) == 0 ||
+ m0->m_pkthdr.len >= off + sizeof(struct mldv2_query) +
+ nsrc * sizeof(struct in6_addr),
+ ("mldv2 packet is too short: (%d bytes < %zd bytes, m=%p)",
+ m0->m_pkthdr.len, off + sizeof(struct mldv2_query) +
+ nsrc * sizeof(struct in6_addr), m0));
+
+
/*
* Deal with group-specific queries upfront.
* If any group query is already pending, purge any recorded
@@ -1051,28 +1073,20 @@ mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli,
* report for those sources.
*/
if (inm->in6m_nsrc > 0) {
- struct mbuf *m;
- uint8_t *sp;
+ struct in6_addr srcaddr;
int i, nrecorded;
int soff;
- m = m0;
soff = off + sizeof(struct mldv2_query);
nrecorded = 0;
for (i = 0; i < nsrc; i++) {
- sp = mtod(m, uint8_t *) + soff;
- retval = in6m_record_source(inm,
- (const struct in6_addr *)sp);
+ m_copydata(m0, soff, sizeof(struct in6_addr),
+ (caddr_t)&srcaddr);
+ retval = in6m_record_source(inm, &srcaddr);
if (retval < 0)
break;
nrecorded += retval;
soff += sizeof(struct in6_addr);
- if (soff >= m->m_len) {
- soff = soff - m->m_len;
- m = m->m_next;
- if (m == NULL)
- break;
- }
}
if (nrecorded > 0) {
CTR1(KTR_MLD,
@@ -1098,6 +1112,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
/*const*/ struct mld_hdr *mld)
{
struct in6_addr src, dst;
+ struct epoch_tracker et;
struct in6_ifaddr *ia;
struct in6_multi *inm;
#ifdef KTR
@@ -1173,7 +1188,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
IN6_MULTI_LIST_LOCK();
MLD_LOCK();
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
/*
* MLDv1 report suppression.
@@ -1221,7 +1236,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
}
out_locked:
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
MLD_UNLOCK();
IN6_MULTI_LIST_UNLOCK();
@@ -1281,8 +1296,8 @@ mld_input(struct mbuf *m, int off, int icmp6len)
if (mld_v1_input_query(ifp, ip6, mld) != 0)
return (0);
} else if (icmp6len >= sizeof(struct mldv2_query)) {
- if (mld_v2_input_query(ifp, ip6, m, off,
- icmp6len) != 0)
+ if (mld_v2_input_query(ifp, ip6, m,
+ (struct mldv2_query *)mld, off, icmp6len) != 0)
return (0);
}
break;
@@ -1311,15 +1326,19 @@ mld_input(struct mbuf *m, int off, int icmp6len)
void
mld_fasttimo(void)
{
+ struct in6_multi_head inmh;
VNET_ITERATOR_DECL(vnet_iter);
+ SLIST_INIT(&inmh);
+
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- mld_fasttimo_vnet();
+ mld_fasttimo_vnet(&inmh);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
+ in6m_release_list_deferred(&inmh);
}
/*
@@ -1328,15 +1347,15 @@ mld_fasttimo(void)
* VIMAGE: Assume caller has set up our curvnet.
*/
static void
-mld_fasttimo_vnet(void)
+mld_fasttimo_vnet(struct in6_multi_head *inmh)
{
+ struct epoch_tracker et;
struct mbufq scq; /* State-change packets */
struct mbufq qrq; /* Query response packets */
struct ifnet *ifp;
struct mld_ifsoftc *mli;
- struct ifmultiaddr *ifma, *next;
- struct in6_multi *inm, *tinm;
- struct in6_multi_head inmh;
+ struct ifmultiaddr *ifma;
+ struct in6_multi *inm;
int uri_fasthz;
uri_fasthz = 0;
@@ -1351,7 +1370,6 @@ mld_fasttimo_vnet(void)
!V_state_change_timers_running6)
return;
- SLIST_INIT(&inmh);
IN6_MULTI_LIST_LOCK();
MLD_LOCK();
@@ -1397,25 +1415,20 @@ mld_fasttimo_vnet(void)
}
IF_ADDR_WLOCK(ifp);
- restart:
- CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
- if (ifma->ifma_addr->sa_family != AF_INET6 ||
- ifma->ifma_protospec == NULL)
+ NET_EPOCH_ENTER(et);
+ CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ inm = in6m_ifmultiaddr_get_inm(ifma);
+ if (inm == NULL)
continue;
- inm = (struct in6_multi *)ifma->ifma_protospec;
switch (mli->mli_version) {
case MLD_VERSION_1:
- mld_v1_process_group_timer(&inmh, inm);
+ mld_v1_process_group_timer(inmh, inm);
break;
case MLD_VERSION_2:
- mld_v2_process_group_timers(&inmh, &qrq,
+ mld_v2_process_group_timers(inmh, &qrq,
&scq, inm, uri_fasthz);
break;
}
- if (__predict_false(ifma6_restart)) {
- ifma6_restart = false;
- goto restart;
- }
}
IF_ADDR_WUNLOCK(ifp);
@@ -1429,9 +1442,8 @@ mld_fasttimo_vnet(void)
* IF_ADDR_LOCK internally as well as
* ip6_output() to transmit a packet.
*/
- SLIST_FOREACH_SAFE(inm, &inmh, in6m_nrele, tinm) {
- SLIST_REMOVE_HEAD(&inmh,
- in6m_nrele);
+ while ((inm = SLIST_FIRST(inmh)) != NULL) {
+ SLIST_REMOVE_HEAD(inmh, in6m_defer);
(void)mld_v1_transmit_report(inm,
MLD_LISTENER_REPORT);
}
@@ -1439,14 +1451,9 @@ mld_fasttimo_vnet(void)
case MLD_VERSION_2:
mld_dispatch_queue(&qrq, 0);
mld_dispatch_queue(&scq, 0);
-
- /*
- * Free the in_multi reference(s) for
- * this lifecycle.
- */
- in6m_release_list_deferred(&inmh);
break;
}
+ NET_EPOCH_EXIT(et);
}
out_locked:
@@ -1486,8 +1493,7 @@ mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm)
case MLD_REPORTING_MEMBER:
if (report_timer_expired) {
inm->in6m_state = MLD_IDLE_MEMBER;
- in6m_disconnect(inm);
- in6m_rele_locked(inmh, inm);
+ SLIST_INSERT_HEAD(inmh, inm, in6m_defer);
}
break;
case MLD_G_QUERY_PENDING_MEMBER:
@@ -1611,7 +1617,7 @@ mld_v2_process_group_timers(struct in6_multi_head *inmh,
if (inm->in6m_state == MLD_LEAVING_MEMBER &&
inm->in6m_scrv == 0) {
inm->in6m_state = MLD_NOT_MEMBER;
- in6m_disconnect(inm);
+ in6m_disconnect_locked(inmh, inm);
in6m_rele_locked(inmh, inm);
}
}
@@ -1656,10 +1662,11 @@ mld_set_version(struct mld_ifsoftc *mli, const int version)
static void
mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
{
- struct ifmultiaddr *ifma, *next;
+ struct epoch_tracker et;
+ struct in6_multi_head inmh;
+ struct ifmultiaddr *ifma;
struct ifnet *ifp;
struct in6_multi *inm;
- struct in6_multi_head inmh;
CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__,
mli->mli_ifp, if_name(mli->mli_ifp));
@@ -1682,12 +1689,11 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
ifp = mli->mli_ifp;
IF_ADDR_WLOCK(ifp);
- restart:
- CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
- if (ifma->ifma_addr->sa_family != AF_INET6 ||
- ifma->ifma_protospec == NULL)
+ NET_EPOCH_ENTER(et);
+ CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ inm = in6m_ifmultiaddr_get_inm(ifma);
+ if (inm == NULL)
continue;
- inm = (struct in6_multi *)ifma->ifma_protospec;
switch (inm->in6m_state) {
case MLD_NOT_MEMBER:
case MLD_SILENT_MEMBER:
@@ -1702,9 +1708,9 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
* version, we need to release the final
* reference held for issuing the INCLUDE {}.
*/
- in6m_disconnect(inm);
+ if (inm->in6m_refcount == 1)
+ in6m_disconnect_locked(&inmh, inm);
in6m_rele_locked(&inmh, inm);
- ifma->ifma_protospec = NULL;
/* FALLTHROUGH */
case MLD_G_QUERY_PENDING_MEMBER:
case MLD_SG_QUERY_PENDING_MEMBER:
@@ -1720,11 +1726,8 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
mbufq_drain(&inm->in6m_scq);
break;
}
- if (__predict_false(ifma6_restart)) {
- ifma6_restart = false;
- goto restart;
- }
}
+ NET_EPOCH_EXIT(et);
IF_ADDR_WUNLOCK(ifp);
in6m_release_list_deferred(&inmh);
}
@@ -1897,6 +1900,14 @@ mld_change_state(struct in6_multi *inm, const int delay)
error = 0;
/*
+ * Check if the in6_multi has already been disconnected.
+ */
+ if (inm->in6m_ifp == NULL) {
+ CTR1(KTR_MLD, "%s: inm is disconnected", __func__);
+ return (0);
+ }
+
+ /*
* Try to detect if the upper layer just asked us to change state
* for an interface which has now gone away.
*/
@@ -2006,6 +2017,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli,
if (mli->mli_version == MLD_VERSION_2 &&
inm->in6m_state == MLD_LEAVING_MEMBER) {
inm->in6m_refcount--;
+ MPASS(inm->in6m_refcount > 0);
}
inm->in6m_state = MLD_REPORTING_MEMBER;
@@ -2985,6 +2997,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq)
static void
mld_v2_dispatch_general_query(struct mld_ifsoftc *mli)
{
+ struct epoch_tracker et;
struct ifmultiaddr *ifma;
struct ifnet *ifp;
struct in6_multi *inm;
@@ -3007,13 +3020,11 @@ mld_v2_dispatch_general_query(struct mld_ifsoftc *mli)
ifp = mli->mli_ifp;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_INET6 ||
- ifma->ifma_protospec == NULL)
+ inm = in6m_ifmultiaddr_get_inm(ifma);
+ if (inm == NULL)
continue;
-
- inm = (struct in6_multi *)ifma->ifma_protospec;
KASSERT(ifp == inm->in6m_ifp,
("%s: inconsistent ifp", __func__));
@@ -3038,7 +3049,7 @@ mld_v2_dispatch_general_query(struct mld_ifsoftc *mli)
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
send:
mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
diff --git a/freebsd/sys/netinet6/mld6_var.h b/freebsd/sys/netinet6/mld6_var.h
index 166c2055..8dc2ffa4 100644
--- a/freebsd/sys/netinet6/mld6_var.h
+++ b/freebsd/sys/netinet6/mld6_var.h
@@ -160,12 +160,13 @@ struct mld_ifsoftc {
#define MLD_IFINFO(ifp) \
(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->mld_ifinfo)
+struct in6_multi_head;
int mld_change_state(struct in6_multi *, const int);
struct mld_ifsoftc *
mld_domifattach(struct ifnet *);
void mld_domifdetach(struct ifnet *);
void mld_fasttimo(void);
-void mld_ifdetach(struct ifnet *);
+void mld_ifdetach(struct ifnet *, struct in6_multi_head *);
int mld_input(struct mbuf *, int, int);
void mld_slowtimo(void);
diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c
index f065815c..201b4d40 100644
--- a/freebsd/sys/netinet6/nd6.c
+++ b/freebsd/sys/netinet6/nd6.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/eventhandler.h>
#include <sys/callout.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -115,7 +116,7 @@ VNET_DEFINE(int, nd6_debug) = 1;
VNET_DEFINE(int, nd6_debug) = 0;
#endif
-static eventhandler_tag lle_event_eh, iflladdr_event_eh;
+static eventhandler_tag lle_event_eh, iflladdr_event_eh, ifnet_link_event_eh;
VNET_DEFINE(struct nd_drhead, nd_defrouter);
VNET_DEFINE(struct nd_prhead, nd_prefix);
@@ -235,6 +236,8 @@ nd6_init(void)
NULL, EVENTHANDLER_PRI_ANY);
iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ ifnet_link_event_eh = EVENTHANDLER_REGISTER(ifnet_link_event,
+ nd6_ifnet_link_event, NULL, EVENTHANDLER_PRI_ANY);
}
}
@@ -246,6 +249,7 @@ nd6_destroy()
callout_drain(&V_nd6_slowtimo_ch);
callout_drain(&V_nd6_timer_ch);
if (IS_DEFAULT_VNET(curvnet)) {
+ EVENTHANDLER_DEREGISTER(ifnet_link_event, ifnet_link_event_eh);
EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
}
@@ -300,9 +304,10 @@ nd6_ifattach(struct ifnet *ifp)
void
nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd)
{
+ struct epoch_tracker et;
struct ifaddr *ifa, *next;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -310,7 +315,7 @@ nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd)
/* stop DAD processing */
nd6_dad_stop(ifa);
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
free(nd, M_IP6NDP);
}
@@ -898,6 +903,7 @@ nd6_timer(void *arg)
struct nd_prhead prl;
struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
+ struct ifnet *ifp;
struct in6_ifaddr *ia6, *nia6;
uint64_t genid;
@@ -994,14 +1000,15 @@ nd6_timer(void *arg)
* Check status of the interface. If it is down,
* mark the address as tentative for future DAD.
*/
- if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 ||
- (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING)
- == 0 ||
- (ND_IFINFO(ia6->ia_ifp)->flags &
- ND6_IFF_IFDISABLED) != 0) {
+ ifp = ia6->ia_ifp;
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0 &&
+ ((ifp->if_flags & IFF_UP) == 0 ||
+ (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0)){
ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
ia6->ia6_flags |= IN6_IFF_TENTATIVE;
}
+
/*
* A new RA might have made a deprecated address
* preferred.
@@ -1064,12 +1071,13 @@ restart:
static int
regen_tmpaddr(struct in6_ifaddr *ia6)
{
+ struct epoch_tracker et;
struct ifaddr *ifa;
struct ifnet *ifp;
struct in6_ifaddr *public_ifa6 = NULL;
ifp = ia6->ia_ifa.ifa_ifp;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct in6_ifaddr *it6;
@@ -1110,7 +1118,7 @@ regen_tmpaddr(struct in6_ifaddr *ia6)
}
if (public_ifa6 != NULL)
ifa_ref(&public_ifa6->ia_ifa);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (public_ifa6 != NULL) {
int e;
@@ -1345,17 +1353,19 @@ restart:
* a p2p interface, the address should be a neighbor.
*/
if (ifp->if_flags & IFF_POINTOPOINT) {
- IF_ADDR_RLOCK(ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sin6_family)
continue;
if (ifa->ifa_dstaddr != NULL &&
sa_equal(addr, ifa->ifa_dstaddr)) {
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return 1;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
/*
@@ -1379,6 +1389,7 @@ restart:
int
nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
{
+ struct epoch_tracker et;
struct llentry *lle;
int rc = 0;
@@ -1390,12 +1401,12 @@ nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
* Even if the address matches none of our addresses, it might be
* in the neighbor cache.
*/
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
LLE_RUNLOCK(lle);
rc = 1;
}
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (rc);
}
@@ -1624,6 +1635,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
struct in6_ndireq *ndi = (struct in6_ndireq *)data;
struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
+ struct epoch_tracker et;
int error = 0;
if (ifp->if_afdata[AF_INET6] == NULL)
@@ -1688,7 +1700,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
* do not clear ND6_IFF_IFDISABLED.
* See RFC 4862, Section 5.4.5.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1697,7 +1709,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
break;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ifa != NULL) {
/* LLA is duplicated. */
@@ -1718,7 +1730,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
if (V_ip6_dad_count > 0 &&
(ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead,
ifa_link) {
if (ifa->ifa_addr->sa_family !=
@@ -1727,7 +1739,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
ia = (struct in6_ifaddr *)ifa;
ia->ia6_flags |= IN6_IFF_TENTATIVE;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
}
@@ -1746,7 +1758,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
* address is assigned, and IFF_UP, try to
* assign one.
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead,
ifa_link) {
if (ifa->ifa_addr->sa_family !=
@@ -1756,7 +1768,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
break;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ifa != NULL)
/* No LLA is configured. */
in6_ifattach(ifp, NULL);
@@ -1833,9 +1845,9 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
return (error);
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
ln = nd6_lookup(&nb_addr, 0, ifp);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ln == NULL) {
error = EINVAL;
@@ -1960,6 +1972,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
int flags;
uint16_t router = 0;
struct sockaddr_in6 sin6;
+ struct epoch_tracker et;
struct mbuf *chain = NULL;
u_char linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
@@ -1984,9 +1997,9 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* description on it in NS section (RFC 2461 7.2.3).
*/
flags = lladdr ? LLE_EXCLUSIVE : 0;
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
ln = nd6_lookup(from, flags, ifp);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
is_newentry = 0;
if (ln == NULL) {
flags |= LLE_EXCLUSIVE;
@@ -2128,13 +2141,14 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
static void
nd6_slowtimo(void *arg)
{
+ struct epoch_tracker et;
CURVNET_SET((struct vnet *) arg);
struct nd_ifinfo *nd6if;
struct ifnet *ifp;
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, curvnet);
- IFNET_RLOCK_NOSLEEP();
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp->if_afdata[AF_INET6] == NULL)
continue;
@@ -2151,7 +2165,7 @@ nd6_slowtimo(void *arg)
nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
}
}
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
}
@@ -2244,6 +2258,7 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags,
struct llentry **plle)
{
+ struct epoch_tracker et;
struct llentry *ln = NULL;
const struct sockaddr_in6 *dst6;
@@ -2272,7 +2287,7 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
}
}
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED,
ifp);
if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
@@ -2292,11 +2307,11 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
*plle = ln;
LLE_WUNLOCK(ln);
}
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
} else if (plle && ln)
LLE_WUNLOCK(ln);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle));
}
@@ -2330,9 +2345,11 @@ nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
* or an anycast address(i.e. not a multicast).
*/
if (lle == NULL) {
- IF_AFDATA_RLOCK(ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
/*
* Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h
index 7544d23c..ffc88cb5 100644
--- a/freebsd/sys/netinet6/nd6.h
+++ b/freebsd/sys/netinet6/nd6.h
@@ -91,7 +91,10 @@ struct nd_ifinfo {
#define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */
#define ND6_IFF_NO_DAD 0x100
#ifdef EXPERIMENTAL
+/* XXX: not related to ND. */
#define ND6_IFF_IPV6_ONLY 0x200 /* draft-ietf-6man-ipv6only-flag */
+#define ND6_IFF_IPV6_ONLY_MANUAL 0x400
+#define ND6_IFF_IPV6_ONLY_MASK (ND6_IFF_IPV6_ONLY|ND6_IFF_IPV6_ONLY_MANUAL)
#endif
#ifdef _KERNEL
@@ -473,6 +476,7 @@ void nd6_dad_stop(struct ifaddr *);
/* nd6_rtr.c */
void nd6_rs_input(struct mbuf *, int, int);
void nd6_ra_input(struct mbuf *, int, int);
+void nd6_ifnet_link_event(void *, struct ifnet *, int);
void defrouter_reset(void);
void defrouter_select_fib(int fibnum);
void defrouter_select(void);
diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c
index 49810020..136fbecc 100644
--- a/freebsd/sys/netinet6/nd6_nbr.c
+++ b/freebsd/sys/netinet6/nd6_nbr.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/libkern.h>
#include <sys/lock.h>
@@ -615,6 +616,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6,
void
nd6_na_input(struct mbuf *m, int off, int icmp6len)
{
+ struct epoch_tracker et;
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct nd_neighbor_advert *nd_na;
@@ -742,9 +744,9 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* If no neighbor cache entry is found, NA SHOULD silently be
* discarded.
*/
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp);
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ln == NULL) {
goto freeit;
}
diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c
index 59868383..0ba1e416 100644
--- a/freebsd/sys/netinet6/nd6_rtr.c
+++ b/freebsd/sys/netinet6/nd6_rtr.c
@@ -108,6 +108,10 @@ VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
+#ifdef EXPERIMENTAL
+VNET_DEFINE(int, nd6_ignore_ipv6_only_ra) = 1;
+#endif
+
/* RTPREF_MEDIUM has to be 0! */
#define RTPREF_HIGH 1
#define RTPREF_MEDIUM 0
@@ -210,7 +214,7 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len)
/*
* An initial update routine for draft-ietf-6man-ipv6only-flag.
* We need to iterate over all default routers for the given
- * interface to see whether they are all advertising the "6"
+ * interface to see whether they are all advertising the "S"
* (IPv6-Only) flag. If they do set, otherwise unset, the
* interface flag we later use to filter on.
*/
@@ -218,7 +222,15 @@ static void
defrtr_ipv6_only_ifp(struct ifnet *ifp)
{
struct nd_defrouter *dr;
- bool ipv6_only;
+ bool ipv6_only, ipv6_only_old;
+#ifdef INET
+ struct epoch_tracker et;
+ struct ifaddr *ifa;
+ bool has_ipv4_addr;
+#endif
+
+ if (V_nd6_ignore_ipv6_only_ra != 0)
+ return;
ipv6_only = true;
ND6_RLOCK();
@@ -229,13 +241,78 @@ defrtr_ipv6_only_ifp(struct ifnet *ifp)
ND6_RUNLOCK();
IF_AFDATA_WLOCK(ifp);
+ ipv6_only_old = ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY;
+ IF_AFDATA_WUNLOCK(ifp);
+
+ /* If nothing changed, we have an early exit. */
+ if (ipv6_only == ipv6_only_old)
+ return;
+
+#ifdef INET
+ /*
+ * Should we want to set the IPV6-ONLY flag, check if the
+ * interface has a non-0/0 and non-link-local IPv4 address
+ * configured on it. If it has we will assume working
+ * IPv4 operations and will clear the interface flag.
+ */
+ has_ipv4_addr = false;
+ if (ipv6_only) {
+ NET_EPOCH_ENTER(et);
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ if (in_canforward(
+ satosin(ifa->ifa_addr)->sin_addr)) {
+ has_ipv4_addr = true;
+ break;
+ }
+ }
+ NET_EPOCH_EXIT(et);
+ }
+ if (ipv6_only && has_ipv4_addr) {
+ log(LOG_NOTICE, "%s rcvd RA w/ IPv6-Only flag set but has IPv4 "
+ "configured, ignoring IPv6-Only flag.\n", ifp->if_xname);
+ ipv6_only = false;
+ }
+#endif
+
+ IF_AFDATA_WLOCK(ifp);
if (ipv6_only)
ND_IFINFO(ifp)->flags |= ND6_IFF_IPV6_ONLY;
else
ND_IFINFO(ifp)->flags &= ~ND6_IFF_IPV6_ONLY;
IF_AFDATA_WUNLOCK(ifp);
+
+#ifdef notyet
+ /* Send notification of flag change. */
+#endif
+}
+
+static void
+defrtr_ipv6_only_ipf_down(struct ifnet *ifp)
+{
+
+ IF_AFDATA_WLOCK(ifp);
+ ND_IFINFO(ifp)->flags &= ~ND6_IFF_IPV6_ONLY;
+ IF_AFDATA_WUNLOCK(ifp);
}
+#endif /* EXPERIMENTAL */
+
+void
+nd6_ifnet_link_event(void *arg __unused, struct ifnet *ifp, int linkstate)
+{
+
+ /*
+ * XXX-BZ we might want to trigger re-evaluation of our default router
+ * availability. E.g., on link down the default router might be
+ * unreachable but a different interface might still have connectivity.
+ */
+
+#ifdef EXPERIMENTAL
+ if (linkstate == LINK_STATE_DOWN)
+ defrtr_ipv6_only_ipf_down(ifp);
#endif
+}
/*
* Receive Router Advertisement Message.
@@ -513,11 +590,13 @@ nd6_rtmsg(int cmd, struct rtentry *rt)
info.rti_info[RTAX_NETMASK] = rt_mask(rt);
ifp = rt->rt_ifp;
if (ifp != NULL) {
- IF_ADDR_RLOCK(ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
info.rti_info[RTAX_IFP] = ifa->ifa_addr;
ifa_ref(ifa);
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
} else
ifa = NULL;
@@ -791,6 +870,7 @@ defrouter_del(struct nd_defrouter *dr)
void
defrouter_select_fib(int fibnum)
{
+ struct epoch_tracker et;
struct nd_defrouter *dr, *selected_dr, *installed_dr;
struct llentry *ln = NULL;
@@ -817,14 +897,14 @@ defrouter_select_fib(int fibnum)
*/
selected_dr = installed_dr = NULL;
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
- IF_AFDATA_RLOCK(dr->ifp);
+ NET_EPOCH_ENTER(et);
if (selected_dr == NULL && dr->ifp->if_fib == fibnum &&
(ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln)) {
selected_dr = dr;
defrouter_ref(selected_dr);
}
- IF_AFDATA_RUNLOCK(dr->ifp);
+ NET_EPOCH_EXIT(et);
if (ln != NULL) {
LLE_RUNLOCK(ln);
ln = NULL;
@@ -868,7 +948,7 @@ defrouter_select_fib(int fibnum)
}
}
} else if (installed_dr != NULL) {
- IF_AFDATA_RLOCK(installed_dr->ifp);
+ NET_EPOCH_ENTER(et);
if ((ln = nd6_lookup(&installed_dr->rtaddr, 0,
installed_dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln) &&
@@ -877,7 +957,7 @@ defrouter_select_fib(int fibnum)
defrouter_rele(selected_dr);
selected_dr = installed_dr;
}
- IF_AFDATA_RUNLOCK(installed_dr->ifp);
+ NET_EPOCH_EXIT(et);
if (ln != NULL)
LLE_RUNLOCK(ln);
}
@@ -1273,6 +1353,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
int auth;
struct in6_addrlifetime lt6_tmp;
char ip6buf[INET6_ADDRSTRLEN];
+ struct epoch_tracker et;
auth = 0;
if (m) {
@@ -1386,7 +1467,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
* consider autoconfigured addresses while RFC2462 simply said
* "address".
*/
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct in6_ifaddr *ifa6;
u_int32_t remaininglifetime;
@@ -1509,7 +1590,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
ifa6->ia6_lifetime = lt6_tmp;
ifa6->ia6_updatetime = time_uptime;
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
if (ia6_match == NULL && new->ndpr_vltime) {
int ifidlen;
@@ -1598,6 +1679,7 @@ end:
static struct nd_pfxrouter *
find_pfxlist_reachable_router(struct nd_prefix *pr)
{
+ struct epoch_tracker et;
struct nd_pfxrouter *pfxrtr;
struct llentry *ln;
int canreach;
@@ -1605,9 +1687,9 @@ find_pfxlist_reachable_router(struct nd_prefix *pr)
ND6_LOCK_ASSERT();
LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
- IF_AFDATA_RLOCK(pfxrtr->router->ifp);
+ NET_EPOCH_ENTER(et);
ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
- IF_AFDATA_RUNLOCK(pfxrtr->router->ifp);
+ NET_EPOCH_EXIT(et);
if (ln == NULL)
continue;
canreach = ND6_IS_LLINFO_PROBREACH(ln);
@@ -1814,8 +1896,7 @@ restart:
static int
nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
{
- static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
- struct rib_head *rnh;
+ struct sockaddr_dl sdl;
struct rtentry *rt;
struct sockaddr_in6 mask6;
u_long rtflags;
@@ -1830,6 +1911,12 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
mask6.sin6_addr = pr->ndpr_mask;
rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
+ bzero(&sdl, sizeof(struct sockaddr_dl));
+ sdl.sdl_len = sizeof(struct sockaddr_dl);
+ sdl.sdl_family = AF_LINK;
+ sdl.sdl_type = ifa->ifa_ifp->if_type;
+ sdl.sdl_index = ifa->ifa_ifp->if_index;
+
if(V_rt_add_addr_allfibs) {
fibnum = 0;
maxfib = rt_numfibs;
@@ -1842,26 +1929,13 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
rt = NULL;
error = in6_rtrequest(RTM_ADD,
- (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr,
+ (struct sockaddr *)&pr->ndpr_prefix, (struct sockaddr *)&sdl,
(struct sockaddr *)&mask6, rtflags, &rt, fibnum);
if (error == 0) {
KASSERT(rt != NULL, ("%s: in6_rtrequest return no "
"error(%d) but rt is NULL, pr=%p, ifa=%p", __func__,
error, pr, ifa));
-
- rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
- /* XXX what if rhn == NULL? */
- RIB_WLOCK(rnh);
RT_LOCK(rt);
- if (rt_setgate(rt, rt_key(rt),
- (struct sockaddr *)&null_sdl) == 0) {
- struct sockaddr_dl *dl;
-
- dl = (struct sockaddr_dl *)rt->rt_gateway;
- dl->sdl_type = rt->rt_ifp->if_type;
- dl->sdl_index = rt->rt_ifp->if_index;
- }
- RIB_WUNLOCK(rnh);
nd6_rtmsg(RTM_ADD, rt);
RT_UNLOCK(rt);
pr->ndpr_stateflags |= NDPRF_ONLINK;
@@ -1946,15 +2020,17 @@ nd6_prefix_onlink(struct nd_prefix *pr)
ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
if (ifa == NULL) {
+ struct epoch_tracker et;
+
/* XXX: freebsd does not have ifa_ifwithaf */
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family == AF_INET6) {
ifa_ref(ifa);
break;
}
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
/* should we care about ia6_flags? */
}
if (ifa == NULL) {
diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c
index 73d0832a..b4aa9664 100644
--- a/freebsd/sys/netinet6/raw_ip6.c
+++ b/freebsd/sys/netinet6/raw_ip6.c
@@ -163,7 +163,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
struct ifnet *ifp;
struct mbuf *m = *mp;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
- struct inpcb *in6p;
+ struct inpcb *inp;
struct inpcb *last = NULL;
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
@@ -176,18 +176,18 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
ifp = m->m_pkthdr.rcvif;
INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
- CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
+ CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
/* XXX inp locking */
- if ((in6p->inp_vflag & INP_IPV6) == 0)
+ if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
- if (in6p->inp_ip_p &&
- in6p->inp_ip_p != proto)
+ if (inp->inp_ip_p &&
+ inp->inp_ip_p != proto)
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
- !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
- !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
continue;
if (last != NULL) {
struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
@@ -225,25 +225,32 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
INP_RUNLOCK(last);
last = NULL;
}
- INP_RLOCK(in6p);
- if (__predict_false(in6p->inp_flags2 & INP_FREED))
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED))
goto skip_2;
- if (jailed_without_vnet(in6p->inp_cred)) {
+ if (jailed_without_vnet(inp->inp_cred)) {
/*
* Allow raw socket in jail to receive multicast;
* assume process had PRIV_NETINET_RAW at attach,
* and fall through into normal filter path if so.
*/
if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
- prison_check_ip6(in6p->inp_cred,
+ prison_check_ip6(inp->inp_cred,
&ip6->ip6_dst) != 0)
goto skip_2;
}
- if (in6p->in6p_cksum != -1) {
+ if (inp->in6p_cksum != -1) {
RIP6STAT_INC(rip6s_isum);
- if (in6_cksum(m, proto, *offp,
+ if (m->m_pkthdr.len - (*offp + inp->in6p_cksum) < 2 ||
+ in6_cksum(m, proto, *offp,
m->m_pkthdr.len - *offp)) {
RIP6STAT_INC(rip6s_badsum);
+ /*
+ * Drop the received message, don't send an
+ * ICMP6 message. Set proto to IPPROTO_NONE
+ * to achieve that.
+ */
+ proto = IPPROTO_NONE;
goto skip_2;
}
}
@@ -253,7 +260,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
* should receive it, as multicast filtering is now
* the responsibility of the transport layer.
*/
- if (in6p->in6p_moptions &&
+ if (inp->in6p_moptions &&
IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
/*
* If the incoming datagram is for MLD, allow it
@@ -283,7 +290,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
mcaddr.sin6_family = AF_INET6;
mcaddr.sin6_addr = ip6->ip6_dst;
- blocked = im6o_mc_filter(in6p->in6p_moptions,
+ blocked = im6o_mc_filter(inp->in6p_moptions,
ifp,
(struct sockaddr *)&mcaddr,
(struct sockaddr *)&fromsa);
@@ -293,10 +300,10 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
goto skip_2;
}
}
- last = in6p;
+ last = inp;
continue;
skip_2:
- INP_RUNLOCK(in6p);
+ INP_RUNLOCK(inp);
}
INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
@@ -389,7 +396,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
struct m_tag *mtag;
struct sockaddr_in6 *dstsock;
struct ip6_hdr *ip6;
- struct inpcb *in6p;
+ struct inpcb *inp;
u_int plen = m->m_pkthdr.len;
int error = 0;
struct ip6_pktopts opt, *optp;
@@ -406,18 +413,18 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
control = va_arg(ap, struct mbuf *);
va_end(ap);
- in6p = sotoinpcb(so);
- INP_WLOCK(in6p);
+ inp = sotoinpcb(so);
+ INP_WLOCK(inp);
if (control != NULL) {
if ((error = ip6_setpktopts(control, &opt,
- in6p->in6p_outputopts, so->so_cred,
+ inp->in6p_outputopts, so->so_cred,
so->so_proto->pr_protocol)) != 0) {
goto bad;
}
optp = &opt;
} else
- optp = in6p->in6p_outputopts;
+ optp = inp->in6p_outputopts;
/*
* Check and convert scope zone ID into internal form.
@@ -460,12 +467,12 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
/*
* Source address selection.
*/
- error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred,
+ error = in6_selectsrc_socket(dstsock, optp, inp, so->so_cred,
scope_ambiguous, &in6a, &hlim);
if (error)
goto bad;
- error = prison_check_ip6(in6p->inp_cred, &in6a);
+ error = prison_check_ip6(inp->inp_cred, &in6a);
if (error != 0)
goto bad;
ip6->ip6_src = in6a;
@@ -476,18 +483,18 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
* Fill in the rest of the IPv6 header fields.
*/
ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
- (in6p->inp_flow & IPV6_FLOWINFO_MASK);
+ (inp->inp_flow & IPV6_FLOWINFO_MASK);
ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
(IPV6_VERSION & IPV6_VERSION_MASK);
/*
* ip6_plen will be filled in ip6_output, so not fill it here.
*/
- ip6->ip6_nxt = in6p->inp_ip_p;
+ ip6->ip6_nxt = inp->inp_ip_p;
ip6->ip6_hlim = hlim;
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
- in6p->in6p_cksum != -1) {
+ inp->in6p_cksum != -1) {
struct mbuf *n;
int off;
u_int16_t *p;
@@ -496,8 +503,8 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
off = offsetof(struct icmp6_hdr, icmp6_cksum);
else
- off = in6p->in6p_cksum;
- if (plen < off + 1) {
+ off = inp->in6p_cksum;
+ if (plen < off + 2) {
error = EINVAL;
goto bad;
}
@@ -532,7 +539,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
}
}
- error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p);
+ error = ip6_output(m, optp, NULL, 0, inp->in6p_moptions, &oifp, inp);
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
if (oifp)
icmp6_ifoutstat_inc(oifp, type, code);
@@ -551,7 +558,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
ip6_clearpktopts(&opt, -1);
m_freem(control);
}
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
return (error);
}
@@ -729,6 +736,7 @@ rip6_disconnect(struct socket *so)
static int
rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+ struct epoch_tracker et;
struct inpcb *inp;
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
struct ifaddr *ifa = NULL;
@@ -746,20 +754,20 @@ rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
return (error);
- NET_EPOCH_ENTER();
+ NET_EPOCH_ENTER(et);
if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
(ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (EADDRNOTAVAIL);
}
if (ifa != NULL &&
((struct in6_ifaddr *)ifa)->ia6_flags &
(IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
return (EADDRNOTAVAIL);
}
- NET_EPOCH_EXIT();
+ NET_EPOCH_EXIT(et);
INP_INFO_WLOCK(&V_ripcbinfo);
INP_WLOCK(inp);
inp->in6p_laddr = addr->sin6_addr;
diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c
index 64b866dd..d556f3a4 100644
--- a/freebsd/sys/netinet6/scope6.c
+++ b/freebsd/sys/netinet6/scope6.c
@@ -211,19 +211,20 @@ scope6_set(struct ifnet *ifp, struct scope6_id *idlist)
static int
scope6_get(struct ifnet *ifp, struct scope6_id *idlist)
{
+ struct epoch_tracker et;
struct scope6_id *sid;
/* We only need to lock the interface's afdata for SID() to work. */
- IF_AFDATA_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
sid = SID(ifp);
if (sid == NULL) { /* paranoid? */
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (EINVAL);
}
*idlist = *sid;
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
return (0);
}
@@ -420,10 +421,12 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id)
zoneid = ifp->if_index;
in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
} else if (scope != IPV6_ADDR_SCOPE_GLOBAL) {
- IF_AFDATA_RLOCK(ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
sid = SID(ifp);
zoneid = sid->s6id_list[scope];
- IF_AFDATA_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
}
diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c
index 6a3391ee..dd320c32 100644
--- a/freebsd/sys/netinet6/sctp6_usrreq.c
+++ b/freebsd/sys/netinet6/sctp6_usrreq.c
@@ -522,7 +522,6 @@ sctp_must_try_again:
static int
sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUSED)
{
- struct in6pcb *inp6;
int error;
struct sctp_inpcb *inp;
uint32_t vrf_id = SCTP_DEFAULT_VRFID;
@@ -544,18 +543,17 @@ sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNU
inp = (struct sctp_inpcb *)so->so_pcb;
SCTP_INP_WLOCK(inp);
inp->sctp_flags |= SCTP_PCB_FLAGS_BOUND_V6; /* I'm v6! */
- inp6 = (struct in6pcb *)inp;
- inp6->inp_vflag |= INP_IPV6;
- inp6->in6p_hops = -1; /* use kernel default */
- inp6->in6p_cksum = -1; /* just to be sure */
+ inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+ inp->ip_inp.inp.in6p_hops = -1; /* use kernel default */
+ inp->ip_inp.inp.in6p_cksum = -1; /* just to be sure */
#ifdef INET
/*
* XXX: ugly!! IPv4 TTL initialization is necessary for an IPv6
* socket as well, because the socket may be bound to an IPv6
* wildcard address, which may match an IPv4-mapped IPv6 address.
*/
- inp6->inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
+ inp->ip_inp.inp.inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
#endif
SCTP_INP_WUNLOCK(inp);
return (0);
@@ -565,7 +563,6 @@ static int
sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
{
struct sctp_inpcb *inp;
- struct in6pcb *inp6;
int error;
inp = (struct sctp_inpcb *)so->so_pcb;
@@ -597,16 +594,15 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
return (EINVAL);
}
}
- inp6 = (struct in6pcb *)inp;
- inp6->inp_vflag &= ~INP_IPV4;
- inp6->inp_vflag |= INP_IPV6;
- if ((addr != NULL) && (SCTP_IPV6_V6ONLY(inp6) == 0)) {
+ inp->ip_inp.inp.inp_vflag &= ~INP_IPV4;
+ inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+ if ((addr != NULL) && (SCTP_IPV6_V6ONLY(inp) == 0)) {
switch (addr->sa_family) {
#ifdef INET
case AF_INET:
/* binding v4 addr to v6 socket, so reset flags */
- inp6->inp_vflag |= INP_IPV4;
- inp6->inp_vflag &= ~INP_IPV6;
+ inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+ inp->ip_inp.inp.inp_vflag &= ~INP_IPV6;
break;
#endif
#ifdef INET6
@@ -617,15 +613,15 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
sin6_p = (struct sockaddr_in6 *)addr;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) {
- inp6->inp_vflag |= INP_IPV4;
+ inp->ip_inp.inp.inp_vflag |= INP_IPV4;
}
#ifdef INET
if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
struct sockaddr_in sin;
in6_sin6_2_sin(&sin, sin6_p);
- inp6->inp_vflag |= INP_IPV4;
- inp6->inp_vflag &= ~INP_IPV6;
+ inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+ inp->ip_inp.inp.inp_vflag &= ~INP_IPV6;
error = sctp_inpcb_bind(so, (struct sockaddr *)&sin, NULL, p);
return (error);
}
@@ -687,7 +683,6 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *p)
{
struct sctp_inpcb *inp;
- struct in6pcb *inp6;
#ifdef INET
struct sockaddr_in6 *sin6;
@@ -704,7 +699,6 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
- inp6 = (struct in6pcb *)inp;
/*
* For the TCP model we may get a NULL addr, if we are a connected
* socket thats ok.
@@ -724,7 +718,7 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
}
#ifdef INET
sin6 = (struct sockaddr_in6 *)addr;
- if (SCTP_IPV6_V6ONLY(inp6)) {
+ if (SCTP_IPV6_V6ONLY(inp)) {
/*
* if IPV6_V6ONLY flag, we discard datagrams destined to a
* v4 addr or v4-mapped addr
@@ -793,14 +787,10 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
struct sctp_inpcb *inp;
struct sctp_tcb *stcb;
#ifdef INET
- struct in6pcb *inp6;
struct sockaddr_in6 *sin6;
union sctp_sockstore store;
#endif
-#ifdef INET
- inp6 = (struct in6pcb *)so->so_pcb;
-#endif
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET);
@@ -858,7 +848,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
}
#ifdef INET
sin6 = (struct sockaddr_in6 *)addr;
- if (SCTP_IPV6_V6ONLY(inp6)) {
+ if (SCTP_IPV6_V6ONLY(inp)) {
/*
* if IPV6_V6ONLY flag, ignore connections destined to a v4
* addr or v4-mapped addr
@@ -912,7 +902,8 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
/* We are GOOD to go */
stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
inp->sctp_ep.pre_open_stream_count,
- inp->sctp_ep.port, p);
+ inp->sctp_ep.port, p,
+ SCTP_INITIALIZE_AUTH_PARAMS);
SCTP_ASOC_CREATE_UNLOCK(inp);
if (stcb == NULL) {
/* Gak! no memory */
@@ -925,10 +916,6 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
}
SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
-
- /* initialize authentication parameters for the assoc */
- sctp_initialize_auth_params(inp, stcb);
-
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
SCTP_TCB_UNLOCK(stcb);
return (error);
@@ -1103,10 +1090,10 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr)
static int
sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
{
- struct in6pcb *inp6 = sotoin6pcb(so);
+ struct inpcb *inp = sotoinpcb(so);
int error;
- if (inp6 == NULL) {
+ if (inp == NULL) {
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
@@ -1139,10 +1126,10 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
static int
sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
{
- struct in6pcb *inp6 = sotoin6pcb(so);
+ struct inpcb *inp = sotoinpcb(so);
int error;
- if (inp6 == NULL) {
+ if (inp == NULL) {
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c
index e0fcd06d..270b4880 100644
--- a/freebsd/sys/netinet6/udp6_usrreq.c
+++ b/freebsd/sys/netinet6/udp6_usrreq.c
@@ -744,9 +744,24 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
* - when we are not bound to an address and source port (it is
* in6_pcbsetport() which will require the write lock).
*/
+retry:
if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
inp->inp_lport == 0)) {
INP_WLOCK(inp);
+ /*
+ * In case we lost a race and another thread bound addr/port
+ * on the inp we cannot keep the wlock (which still would be
+ * fine) as further down, based on these values we make
+ * decisions for the pcbinfo lock. If the locks are not in
+ * synch the assertions on unlock will fire, hence we go for
+ * one retry loop.
+ */
+ if (sin6 != NULL &&
+ (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
+ inp->inp_lport != 0)) {
+ INP_WUNLOCK(inp);
+ goto retry;
+ }
unlock_inp = UH_WLOCKED;
} else {
INP_RLOCK(inp);
diff --git a/freebsd/sys/netipsec/ipsec.c b/freebsd/sys/netipsec/ipsec.c
index 116557ed..f5c3967c 100644
--- a/freebsd/sys/netipsec/ipsec.c
+++ b/freebsd/sys/netipsec/ipsec.c
@@ -218,6 +218,11 @@ SYSCTL_INT(_net_inet_ipsec, OID_AUTO, filtertunnel,
SYSCTL_VNET_PCPUSTAT(_net_inet_ipsec, OID_AUTO, ipsecstats, struct ipsecstat,
ipsec4stat, "IPsec IPv4 statistics.");
+struct timeval ipsec_warn_interval = { .tv_sec = 1, .tv_usec = 0 };
+SYSCTL_TIMEVAL_SEC(_net_inet_ipsec, OID_AUTO, crypto_warn_interval, CTLFLAG_RW,
+ &ipsec_warn_interval,
+ "Delay in seconds between warnings of deprecated IPsec crypto algorithms.");
+
#ifdef REGRESSION
/*
* When set to 1, IPsec will send packets with the same sequence number.
@@ -1320,6 +1325,8 @@ ok:
__func__, replay->overflow,
ipsec_sa2str(sav, buf, sizeof(buf))));
}
+
+ replay->count++;
return (0);
}
diff --git a/freebsd/sys/netipsec/ipsec.h b/freebsd/sys/netipsec/ipsec.h
index eed2d077..b9b6eca2 100644
--- a/freebsd/sys/netipsec/ipsec.h
+++ b/freebsd/sys/netipsec/ipsec.h
@@ -287,6 +287,8 @@ VNET_DECLARE(int, crypto_support);
VNET_DECLARE(int, async_crypto);
VNET_DECLARE(int, natt_cksum_policy);
+extern struct timeval ipsec_warn_interval;
+
#define IPSECSTAT_INC(name) \
VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec4stat, name, 1)
#define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev)
diff --git a/freebsd/sys/netipsec/key.c b/freebsd/sys/netipsec/key.c
index 4b79f881..062fbf28 100644
--- a/freebsd/sys/netipsec/key.c
+++ b/freebsd/sys/netipsec/key.c
@@ -286,7 +286,7 @@ key_addrprotohash(const union sockaddr_union *src,
#endif
default:
hval = 0;
- ipseclog((LOG_DEBUG, "%s: unknown address family %d",
+ ipseclog((LOG_DEBUG, "%s: unknown address family %d\n",
__func__, dst->sa.sa_family));
}
return (hval);
@@ -2041,8 +2041,8 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
key_freesp(&newsp);
} else {
key_freesp(&newsp);
- ipseclog((LOG_DEBUG, "%s: a SP entry exists already.",
- __func__));
+ ipseclog((LOG_DEBUG,
+ "%s: a SP entry exists already.\n", __func__));
return (key_senderror(so, m, EEXIST));
}
}
@@ -4762,34 +4762,10 @@ key_random()
{
u_long value;
- key_randomfill(&value, sizeof(value));
+ arc4random_buf(&value, sizeof(value));
return value;
}
-void
-key_randomfill(void *p, size_t l)
-{
- size_t n;
- u_long v;
- static int warn = 1;
-
- n = 0;
- n = (size_t)read_random(p, (u_int)l);
- /* last resort */
- while (n < l) {
- v = random();
- bcopy(&v, (u_int8_t *)p + n,
- l - n < sizeof(v) ? l - n : sizeof(v));
- n += sizeof(v);
-
- if (warn) {
- printf("WARNING: pseudo-random number generator "
- "used for IPsec processing\n");
- warn = 0;
- }
- }
-}
-
/*
* map SADB_SATYPE_* to IPPROTO_*.
* if satype == SADB_SATYPE then satype is mapped to ~0.
@@ -5435,7 +5411,7 @@ key_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
}
/* saidx should match with SA. */
if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_MODE_REQID) == 0) {
- ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u",
+ ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u\n",
__func__, ntohl(sav->spi)));
key_freesav(&sav);
return key_senderror(so, m, ESRCH);
@@ -6911,14 +6887,14 @@ key_acqdone(const struct secasindex *saidx, uint32_t seq)
if (acq != NULL) {
if (key_cmpsaidx(&acq->saidx, saidx, CMP_EXACTLY) == 0) {
ipseclog((LOG_DEBUG,
- "%s: Mismatched saidx for ACQ %u", __func__, seq));
+ "%s: Mismatched saidx for ACQ %u\n", __func__, seq));
acq = NULL;
} else {
acq->created = 0;
}
} else {
ipseclog((LOG_DEBUG,
- "%s: ACQ %u is not found.", __func__, seq));
+ "%s: ACQ %u is not found.\n", __func__, seq));
}
ACQ_UNLOCK();
if (acq == NULL)
@@ -7190,7 +7166,7 @@ key_register(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
return key_senderror(so, m, ENOBUFS);
MGETHDR(n, M_NOWAIT, MT_DATA);
- if (len > MHLEN) {
+ if (n != NULL && len > MHLEN) {
if (!(MCLGET(n, M_NOWAIT))) {
m_freem(n);
n = NULL;
diff --git a/freebsd/sys/netipsec/key.h b/freebsd/sys/netipsec/key.h
index 7d7ae69f..2ee7c208 100644
--- a/freebsd/sys/netipsec/key.h
+++ b/freebsd/sys/netipsec/key.h
@@ -78,7 +78,6 @@ void key_unregister_ifnet(struct secpolicy **, u_int);
void key_delete_xform(const struct xformsw *);
extern u_long key_random(void);
-extern void key_randomfill(void *, size_t);
extern void key_freereg(struct socket *);
extern int key_parse(struct mbuf *, struct socket *);
extern void key_init(void);
diff --git a/freebsd/sys/netipsec/xform_ah.c b/freebsd/sys/netipsec/xform_ah.c
index 84ba6c16..618fbd9b 100644
--- a/freebsd/sys/netipsec/xform_ah.c
+++ b/freebsd/sys/netipsec/xform_ah.c
@@ -110,6 +110,7 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_ah, IPSECCTL_STATS, stats, struct ahstat,
#endif
static unsigned char ipseczeroes[256]; /* larger than an ip6 extension hdr */
+static struct timeval md5warn, ripewarn, kpdkmd5warn, kpdksha1warn;
static int ah_input_cb(struct cryptop*);
static int ah_output_cb(struct cryptop*);
@@ -186,6 +187,26 @@ ah_init0(struct secasvar *sav, struct xformsw *xsp, struct cryptoini *cria)
__func__, sav->alg_auth));
return EINVAL;
}
+
+ switch (sav->alg_auth) {
+ case SADB_AALG_MD5HMAC:
+ if (ratecheck(&md5warn, &ipsec_warn_interval))
+ gone_in(13, "MD5-HMAC authenticator for IPsec");
+ break;
+ case SADB_X_AALG_RIPEMD160HMAC:
+ if (ratecheck(&ripewarn, &ipsec_warn_interval))
+ gone_in(13, "RIPEMD160-HMAC authenticator for IPsec");
+ break;
+ case SADB_X_AALG_MD5:
+ if (ratecheck(&kpdkmd5warn, &ipsec_warn_interval))
+ gone_in(13, "Keyed-MD5 authenticator for IPsec");
+ break;
+ case SADB_X_AALG_SHA:
+ if (ratecheck(&kpdksha1warn, &ipsec_warn_interval))
+ gone_in(13, "Keyed-SHA1 authenticator for IPsec");
+ break;
+ }
+
/*
* Verify the replay state block allocation is consistent with
* the protocol type. We check here so we can make assumptions
diff --git a/freebsd/sys/netipsec/xform_esp.c b/freebsd/sys/netipsec/xform_esp.c
index f8473575..388fe499 100644
--- a/freebsd/sys/netipsec/xform_esp.c
+++ b/freebsd/sys/netipsec/xform_esp.c
@@ -96,6 +96,8 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_esp, IPSECCTL_STATS, stats,
struct espstat, espstat,
"ESP statistics (struct espstat, netipsec/esp_var.h");
+static struct timeval deswarn, blfwarn, castwarn, camelliawarn;
+
static int esp_input_cb(struct cryptop *op);
static int esp_output_cb(struct cryptop *crp);
@@ -158,6 +160,26 @@ esp_init(struct secasvar *sav, struct xformsw *xsp)
__func__));
return EINVAL;
}
+
+ switch (sav->alg_enc) {
+ case SADB_EALG_DESCBC:
+ if (ratecheck(&deswarn, &ipsec_warn_interval))
+ gone_in(13, "DES cipher for IPsec");
+ break;
+ case SADB_X_EALG_BLOWFISHCBC:
+ if (ratecheck(&blfwarn, &ipsec_warn_interval))
+ gone_in(13, "Blowfish cipher for IPsec");
+ break;
+ case SADB_X_EALG_CAST128CBC:
+ if (ratecheck(&castwarn, &ipsec_warn_interval))
+ gone_in(13, "CAST cipher for IPsec");
+ break;
+ case SADB_X_EALG_CAMELLIACBC:
+ if (ratecheck(&camelliawarn, &ipsec_warn_interval))
+ gone_in(13, "Camellia cipher for IPsec");
+ break;
+ }
+
/* subtract off the salt, RFC4106, 8.1 and RFC3686, 5.1 */
keylen = _KEYLEN(sav->key_enc) - SAV_ISCTRORGCM(sav) * 4;
if (txform->minkey > keylen || keylen > txform->maxkey) {
@@ -770,7 +792,7 @@ esp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav,
*/
switch (sav->flags & SADB_X_EXT_PMASK) {
case SADB_X_EXT_PRAND:
- (void) read_random(pad, padding - 2);
+ arc4random_buf(pad, padding - 2);
break;
case SADB_X_EXT_PZERO:
bzero(pad, padding - 2);
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_private.h b/freebsd/sys/netpfil/ipfw/ip_fw_private.h
index 7e966d0a..57fa7464 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_private.h
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_private.h
@@ -61,6 +61,7 @@ enum {
IP_FW_NGTEE,
IP_FW_NAT,
IP_FW_REASS,
+ IP_FW_NAT64,
};
/*
@@ -83,11 +84,20 @@ struct _ip6dn_args {
* efficient to pass variables around and extend the interface.
*/
struct ip_fw_args {
- struct mbuf *m; /* the mbuf chain */
- struct ifnet *oif; /* output interface */
- struct sockaddr_in *next_hop; /* forward address */
- struct sockaddr_in6 *next_hop6; /* ipv6 forward address */
-
+ uint32_t flags;
+#define IPFW_ARGS_ETHER 0x00010000 /* valid ethernet header */
+#define IPFW_ARGS_NH4 0x00020000 /* IPv4 next hop in hopstore */
+#define IPFW_ARGS_NH6 0x00040000 /* IPv6 next hop in hopstore */
+#define IPFW_ARGS_NH4PTR 0x00080000 /* IPv4 next hop in next_hop */
+#define IPFW_ARGS_NH6PTR 0x00100000 /* IPv6 next hop in next_hop6 */
+#define IPFW_ARGS_REF 0x00200000 /* valid ipfw_rule_ref */
+#define IPFW_ARGS_IN 0x00400000 /* called on input */
+#define IPFW_ARGS_OUT 0x00800000 /* called on output */
+#define IPFW_ARGS_IP4 0x01000000 /* belongs to v4 ISR */
+#define IPFW_ARGS_IP6 0x02000000 /* belongs to v6 ISR */
+#define IPFW_ARGS_DROP 0x04000000 /* drop it (dummynet) */
+#define IPFW_ARGS_LENMASK 0x0000ffff /* length of data in *mem */
+#define IPFW_ARGS_LENGTH(f) ((f) & IPFW_ARGS_LENMASK)
/*
* On return, it points to the matching rule.
* On entry, rule.slot > 0 means the info is valid and
@@ -95,45 +105,36 @@ struct ip_fw_args {
* If chain_id == chain->id && slot >0 then jump to that slot.
* Otherwise, we locate the first rule >= rulenum:rule_id
*/
- struct ipfw_rule_ref rule; /* match/restart info */
-
- struct ether_header *eh; /* for bridged packets */
-
- struct ipfw_flow_id f_id; /* grabbed from IP header */
- //uint32_t cookie; /* a cookie depending on rule action */
- struct inpcb *inp;
-
- struct _ip6dn_args dummypar; /* dummynet->ip6_output */
- union { /* store here if cannot use a pointer */
- struct sockaddr_in hopstore;
- struct sockaddr_in6 hopstore6;
+ struct ipfw_rule_ref rule; /* match/restart info */
+
+ struct ifnet *ifp; /* input/output interface */
+ struct inpcb *inp;
+ union {
+ /*
+ * next_hop[6] pointers can be used to point to next hop
+ * stored in rule's opcode to avoid copying into hopstore.
+ * Also, it is expected that all 0x1-0x10 flags are mutually
+ * exclusive.
+ */
+ struct sockaddr_in *next_hop;
+ struct sockaddr_in6 *next_hop6;
+ /* ipfw next hop storage */
+ struct sockaddr_in hopstore;
+ struct ip_fw_nh6 {
+ struct in6_addr sin6_addr;
+ uint32_t sin6_scope_id;
+ uint16_t sin6_port;
+ } hopstore6;
};
+ union {
+ struct mbuf *m; /* the mbuf chain */
+ void *mem; /* or memory pointer */
+ };
+ struct ipfw_flow_id f_id; /* grabbed from IP header */
};
MALLOC_DECLARE(M_IPFW);
-/*
- * Hooks sometime need to know the direction of the packet
- * (divert, dummynet, netgraph, ...)
- * We use a generic definition here, with bit0-1 indicating the
- * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the
- * specific protocol
- * indicating the protocol (if necessary)
- */
-enum {
- DIR_MASK = 0x3,
- DIR_OUT = 0,
- DIR_IN = 1,
- DIR_FWD = 2,
- DIR_DROP = 3,
- PROTO_LAYER2 = 0x4, /* set for layer 2 */
- /* PROTO_DEFAULT = 0, */
- PROTO_IPV4 = 0x08,
- PROTO_IPV6 = 0x10,
- PROTO_IFB = 0x0c, /* layer2 + ifbridge */
- /* PROTO_OLDBDG = 0x14, unused, old bridge */
-};
-
/* wrapper for freeing a packet, in case we need to do more work */
#ifndef FREE_PKT
#if defined(__linux__) || defined(_WIN32)
@@ -150,8 +151,8 @@ int ipfw_chk(struct ip_fw_args *args);
struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
u_int32_t, u_int32_t, int);
-/* attach (arg = 1) or detach (arg = 0) hooks */
-int ipfw_attach_hooks(int);
+int ipfw_attach_hooks(void);
+void ipfw_detach_hooks(void);
#ifdef NOTYET
void ipfw_nat_destroy(void);
#endif
@@ -162,10 +163,11 @@ struct ip_fw_chain;
void ipfw_bpf_init(int);
void ipfw_bpf_uninit(int);
+void ipfw_bpf_tap(u_char *, u_int);
+void ipfw_bpf_mtap(struct mbuf *);
void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
- struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif,
- u_short offset, uint32_t tablearg, struct ip *ip);
+ struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip);
VNET_DECLARE(u_int64_t, norule_counter);
#define V_norule_counter VNET(norule_counter)
VNET_DECLARE(int, verbose_limit);
@@ -296,6 +298,8 @@ struct ip_fw_chain {
void **srvstate; /* runtime service mappings */
#if defined( __linux__ ) || defined( _WIN32 )
spinlock_t rwmtx;
+#else
+ struct rmlock rwmtx;
#endif
int static_len; /* total len of static rules (v0) */
uint32_t gencnt; /* NAT generation count */
@@ -436,23 +440,25 @@ struct ipfw_ifc {
#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p)
#else /* FreeBSD */
#define IPFW_LOCK_INIT(_chain) do { \
+ rm_init_flags(&(_chain)->rwmtx, "IPFW static rules", RM_RECURSE); \
rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \
} while (0)
#define IPFW_LOCK_DESTROY(_chain) do { \
+ rm_destroy(&(_chain)->rwmtx); \
rw_destroy(&(_chain)->uh_lock); \
} while (0)
-#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_RLOCKED)
-#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_WLOCKED)
+#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_RLOCKED)
+#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_WLOCKED)
#define IPFW_RLOCK_TRACKER struct rm_priotracker _tracker
-#define IPFW_RLOCK(p) rm_rlock(&V_pfil_lock, &_tracker)
-#define IPFW_RUNLOCK(p) rm_runlock(&V_pfil_lock, &_tracker)
-#define IPFW_WLOCK(p) rm_wlock(&V_pfil_lock)
-#define IPFW_WUNLOCK(p) rm_wunlock(&V_pfil_lock)
-#define IPFW_PF_RLOCK(p)
-#define IPFW_PF_RUNLOCK(p)
+#define IPFW_RLOCK(p) rm_rlock(&(p)->rwmtx, &_tracker)
+#define IPFW_RUNLOCK(p) rm_runlock(&(p)->rwmtx, &_tracker)
+#define IPFW_WLOCK(p) rm_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rm_wunlock(&(p)->rwmtx)
+#define IPFW_PF_RLOCK(p) IPFW_RLOCK(p)
+#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p)
#endif
#define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED)
@@ -659,6 +665,7 @@ struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize);
void ipfw_free_rule(struct ip_fw *rule);
int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt);
int ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx);
+ipfw_insn *ipfw_get_action(struct ip_fw *);
typedef int (sopt_handler_f)(struct ip_fw_chain *ch,
ip_fw3_opheader *op3, struct sockopt_data *sd);
diff --git a/freebsd/sys/netpfil/pf/if_pfsync.c b/freebsd/sys/netpfil/pf/if_pfsync.c
index 026d19a3..9d87cf67 100644
--- a/freebsd/sys/netpfil/pf/if_pfsync.c
+++ b/freebsd/sys/netpfil/pf/if_pfsync.c
@@ -266,7 +266,7 @@ static void pfsync_push(struct pfsync_bucket *);
static void pfsync_push_all(struct pfsync_softc *);
static void pfsyncintr(void *);
static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
- void *);
+ struct in_mfilter *imf);
static void pfsync_multicast_cleanup(struct pfsync_softc *);
static void pfsync_pointers_init(void);
static void pfsync_pointers_uninit(void);
@@ -337,6 +337,7 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
pfsync_buckets = mp_ncpus * 2;
sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
+ sc->sc_flags |= PFSYNCF_OK;
sc->sc_maxupdates = 128;
ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
@@ -364,7 +365,7 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
M_PFSYNC, M_ZERO | M_WAITOK);
for (c = 0; c < pfsync_buckets; c++) {
b = &sc->sc_buckets[c];
- mtx_init(&b->b_mtx, pfsyncname, NULL, MTX_DEF);
+ mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF);
b->b_id = c;
b->b_sc = sc;
@@ -431,8 +432,7 @@ pfsync_clone_destroy(struct ifnet *ifp)
pfsync_drop(sc);
if_free(ifp);
- if (sc->sc_imo.imo_membership)
- pfsync_multicast_cleanup(sc);
+ pfsync_multicast_cleanup(sc);
mtx_destroy(&sc->sc_mtx);
mtx_destroy(&sc->sc_bulk_mtx);
@@ -1374,10 +1374,9 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSETPFSYNC:
{
- struct ip_moptions *imo = &sc->sc_imo;
+ struct in_mfilter *imf = NULL;
struct ifnet *sifp;
struct ip *ip;
- void *mship = NULL;
if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
return (error);
@@ -1397,8 +1396,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
pfsyncr.pfsyncr_syncpeer.s_addr ==
htonl(INADDR_PFSYNC_GROUP)))
- mship = malloc((sizeof(struct in_multi *) *
- IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO);
+ imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
PFSYNC_LOCK(sc);
if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
@@ -1420,8 +1418,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (sc->sc_sync_if)
if_rele(sc->sc_sync_if);
sc->sc_sync_if = NULL;
- if (imo->imo_membership)
- pfsync_multicast_cleanup(sc);
+ pfsync_multicast_cleanup(sc);
PFSYNC_UNLOCK(sc);
break;
}
@@ -1437,14 +1434,13 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
}
- if (imo->imo_membership)
- pfsync_multicast_cleanup(sc);
+ pfsync_multicast_cleanup(sc);
if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
- error = pfsync_multicast_setup(sc, sifp, mship);
+ error = pfsync_multicast_setup(sc, sifp, imf);
if (error) {
if_rele(sifp);
- free(mship, M_PFSYNC);
+ ip_mfilter_free(imf);
PFSYNC_UNLOCK(sc);
return (error);
}
@@ -2354,7 +2350,8 @@ pfsyncintr(void *arg)
}
static int
-pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
+pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp,
+ struct in_mfilter *imf)
{
struct ip_moptions *imo = &sc->sc_imo;
int error;
@@ -2362,16 +2359,14 @@ pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
if (!(ifp->if_flags & IFF_MULTICAST))
return (EADDRNOTAVAIL);
- imo->imo_membership = (struct in_multi **)mship;
- imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
imo->imo_multicast_vif = -1;
if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
- &imo->imo_membership[0])) != 0) {
- imo->imo_membership = NULL;
+ &imf->imf_inm)) != 0)
return (error);
- }
- imo->imo_num_memberships++;
+
+ ip_mfilter_init(&imo->imo_head);
+ ip_mfilter_insert(&imo->imo_head, imf);
imo->imo_multicast_ifp = ifp;
imo->imo_multicast_ttl = PFSYNC_DFLTTL;
imo->imo_multicast_loop = 0;
@@ -2383,10 +2378,13 @@ static void
pfsync_multicast_cleanup(struct pfsync_softc *sc)
{
struct ip_moptions *imo = &sc->sc_imo;
+ struct in_mfilter *imf;
- in_leavegroup(imo->imo_membership[0], NULL);
- free(imo->imo_membership, M_PFSYNC);
- imo->imo_membership = NULL;
+ while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
+ ip_mfilter_remove(&imo->imo_head, imf);
+ in_leavegroup(imf->imf_inm, NULL);
+ ip_mfilter_free(imf);
+ }
imo->imo_multicast_ifp = NULL;
}
@@ -2405,7 +2403,7 @@ pfsync_detach_ifnet(struct ifnet *ifp)
* is going away. We do need to ensure we don't try to do
* cleanup later.
*/
- sc->sc_imo.imo_membership = NULL;
+ ip_mfilter_init(&sc->sc_imo.imo_head);
sc->sc_imo.imo_multicast_ifp = NULL;
sc->sc_sync_if = NULL;
}
diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c
index 9b4653e2..c0f6459b 100644
--- a/freebsd/sys/netpfil/pf/pf.c
+++ b/freebsd/sys/netpfil/pf/pf.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/endian.h>
+#include <sys/gsb_crc32.h>
#include <sys/hash.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
@@ -93,8 +94,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/udp.h>
#include <netinet/udp_var.h>
-#include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
-
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -115,10 +114,12 @@ __FBSDID("$FreeBSD$");
*/
/* state tables */
-VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]);
+VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]);
VNET_DEFINE(struct pf_palist, pf_pabuf);
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active);
+VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active);
VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive);
+VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive);
VNET_DEFINE(struct pf_kstatus, pf_status);
VNET_DEFINE(u_int32_t, ticket_altqs_active);
@@ -360,7 +361,7 @@ VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
counter_u64_add(s->rule.ptr->states_cur, -1); \
} while (0)
-static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
+MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
VNET_DEFINE(struct pf_idhash *, pf_idhash);
VNET_DEFINE(struct pf_srchash *, pf_srchash);
@@ -862,9 +863,13 @@ pf_initialize()
/* ALTQ */
TAILQ_INIT(&V_pf_altqs[0]);
TAILQ_INIT(&V_pf_altqs[1]);
+ TAILQ_INIT(&V_pf_altqs[2]);
+ TAILQ_INIT(&V_pf_altqs[3]);
TAILQ_INIT(&V_pf_pabuf);
V_pf_altqs_active = &V_pf_altqs[0];
- V_pf_altqs_inactive = &V_pf_altqs[1];
+ V_pf_altq_ifs_active = &V_pf_altqs[1];
+ V_pf_altqs_inactive = &V_pf_altqs[2];
+ V_pf_altq_ifs_inactive = &V_pf_altqs[3];
/* Send & overload+flush queues. */
STAILQ_INIT(&V_pf_sendqueue);
@@ -1560,7 +1565,7 @@ pf_state_expires(const struct pf_state *state)
if (!timeout)
timeout = V_pf_default_rule.timeout[state->timeout];
start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
- if (start) {
+ if (start && state->rule.ptr != &V_pf_default_rule) {
end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
states = counter_u64_fetch(state->rule.ptr->states_cur);
} else {
@@ -3210,7 +3215,7 @@ pf_tcp_iss(struct pf_pdesc *pd)
u_int32_t digest[4];
if (V_pf_tcp_secret_init == 0) {
- read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
+ arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
MD5Init(&V_pf_tcp_secret_ctx);
MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
sizeof(V_pf_tcp_secret));
@@ -4602,7 +4607,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
{
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
u_int16_t icmpid = 0, *icmpsum;
- u_int8_t icmptype;
+ u_int8_t icmptype, icmpcode;
int state_icmp = 0;
struct pf_state_key_cmp key;
@@ -4611,6 +4616,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
#ifdef INET
case IPPROTO_ICMP:
icmptype = pd->hdr.icmp->icmp_type;
+ icmpcode = pd->hdr.icmp->icmp_code;
icmpid = pd->hdr.icmp->icmp_id;
icmpsum = &pd->hdr.icmp->icmp_cksum;
@@ -4625,6 +4631,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
#ifdef INET6
case IPPROTO_ICMPV6:
icmptype = pd->hdr.icmp6->icmp6_type;
+ icmpcode = pd->hdr.icmp6->icmp6_code;
icmpid = pd->hdr.icmp6->icmp6_id;
icmpsum = &pd->hdr.icmp6->icmp6_cksum;
@@ -4823,6 +4830,23 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
#endif /* INET6 */
}
+ if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf: BAD ICMP %d:%d outer dst: ",
+ icmptype, icmpcode);
+ pf_print_host(pd->src, 0, pd->af);
+ printf(" -> ");
+ pf_print_host(pd->dst, 0, pd->af);
+ printf(" inner src: ");
+ pf_print_host(pd2.src, 0, pd2.af);
+ printf(" -> ");
+ pf_print_host(pd2.dst, 0, pd2.af);
+ printf("\n");
+ }
+ REASON_SET(reason, PFRES_BADSTATE);
+ return (PF_DROP);
+ }
+
switch (pd2.proto) {
case IPPROTO_TCP: {
struct tcphdr th;
@@ -4879,7 +4903,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
!SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: BAD ICMP %d:%d ",
- icmptype, pd->hdr.icmp->icmp_code);
+ icmptype, icmpcode);
pf_print_host(pd->src, 0, pd->af);
printf(" -> ");
pf_print_host(pd->dst, 0, pd->af);
@@ -4892,7 +4916,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
} else {
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: OK ICMP %d:%d ",
- icmptype, pd->hdr.icmp->icmp_code);
+ icmptype, icmpcode);
pf_print_host(pd->src, 0, pd->af);
printf(" -> ");
pf_print_host(pd->dst, 0, pd->af);
@@ -5249,7 +5273,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
nk->addr[pd->didx].v4.s_addr,
0);
- break;
+ break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
@@ -6159,7 +6183,7 @@ done:
pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
(s->nat_rule.ptr->action == PF_RDR ||
s->nat_rule.ptr->action == PF_BINAT) &&
- (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+ IN_LOOPBACK(ntohl(pd.dst->v4.s_addr)))
m->m_flags |= M_SKIP_FIREWALL;
if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
@@ -6190,7 +6214,7 @@ done:
m->m_flags &= ~M_FASTFWD_OURS;
}
}
- ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT);
+ ip_divert_ptr(*m0, dir == PF_IN);
*m0 = NULL;
return (action);
@@ -6339,9 +6363,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
m = *m0; /* pf_normalize messes with m0 */
h = mtod(m, struct ip6_hdr *);
-#if 1
/*
- * we do not support jumbogram yet. if we keep going, zero ip6_plen
+ * we do not support jumbogram. if we keep going, zero ip6_plen
* will do something bad, so drop the packet for now.
*/
if (htons(h->ip6_plen) == 0) {
@@ -6349,7 +6372,6 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
REASON_SET(&reason, PFRES_NORM); /*XXX*/
goto done;
}
-#endif
pd.src = (struct pf_addr *)&h->ip6_src;
pd.dst = (struct pf_addr *)&h->ip6_dst;
diff --git a/freebsd/sys/netpfil/pf/pf_if.c b/freebsd/sys/netpfil/pf/pf_if.c
index 4314bbce..44b6f7a3 100644
--- a/freebsd/sys/netpfil/pf/pf_if.c
+++ b/freebsd/sys/netpfil/pf/pf_if.c
@@ -302,13 +302,15 @@ pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif)
return (1);
if (rule_kif->pfik_group != NULL) {
- IF_ADDR_RLOCK(packet_kif->pfik_ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
if (p->ifgl_group == rule_kif->pfik_group) {
- IF_ADDR_RUNLOCK(packet_kif->pfik_ifp);
+ NET_EPOCH_EXIT(et);
return (1);
}
- IF_ADDR_RUNLOCK(packet_kif->pfik_ifp);
+ NET_EPOCH_EXIT(et);
}
@@ -475,11 +477,13 @@ pfi_kif_update(struct pfi_kif *kif)
/* again for all groups kif is member of */
if (kif->pfik_ifp != NULL) {
- IF_ADDR_RLOCK(kif->pfik_ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next)
pfi_kif_update((struct pfi_kif *)
ifgl->ifgl_group->ifg_pf_kif);
- IF_ADDR_RUNLOCK(kif->pfik_ifp);
+ NET_EPOCH_EXIT(et);
}
}
@@ -515,10 +519,12 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
if (kif->pfik_ifp != NULL)
pfi_instance_add(kif->pfik_ifp, net, flags);
else if (kif->pfik_group != NULL) {
- IFNET_RLOCK_NOSLEEP();
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
pfi_instance_add(ifgm->ifgm_ifp, net, flags);
- IFNET_RUNLOCK_NOSLEEP();
+ NET_EPOCH_EXIT(et);
}
if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2,
@@ -530,11 +536,12 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
static void
pfi_instance_add(struct ifnet *ifp, int net, int flags)
{
+ struct epoch_tracker et;
struct ifaddr *ia;
int got4 = 0, got6 = 0;
int net2, af;
- IF_ADDR_RLOCK(ifp);
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
if (ia->ifa_addr == NULL)
continue;
@@ -592,7 +599,7 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags)
else
pfi_address_add(ia->ifa_addr, af, net2);
}
- IF_ADDR_RUNLOCK(ifp);
+ NET_EPOCH_EXIT(et);
}
static void
@@ -760,15 +767,17 @@ pfi_skip_if(const char *filter, struct pfi_kif *p)
if (filter[n-1] >= '0' && filter[n-1] <= '9')
return (1); /* group names may not end in a digit */
if (p->pfik_ifp != NULL) {
- IF_ADDR_RLOCK(p->pfik_ifp);
+ struct epoch_tracker et;
+
+ NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(i, &p->pfik_ifp->if_groups, ifgl_next) {
if (!strncmp(i->ifgl_group->ifg_group, filter,
IFNAMSIZ)) {
- IF_ADDR_RUNLOCK(p->pfik_ifp);
+ NET_EPOCH_EXIT(et);
return (0); /* iface is in group "filter" */
}
}
- IF_ADDR_RUNLOCK(p->pfik_ifp);
+ NET_EPOCH_EXIT(et);
}
return (1);
}
diff --git a/freebsd/sys/netpfil/pf/pf_ioctl.c b/freebsd/sys/netpfil/pf/pf_ioctl.c
index eaac7abc..06b308b5 100644
--- a/freebsd/sys/netpfil/pf/pf_ioctl.c
+++ b/freebsd/sys/netpfil/pf/pf_ioctl.c
@@ -48,11 +48,14 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_pf.h>
#include <sys/param.h>
+#include <sys/_bitset.h>
+#include <sys/bitset.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/endian.h>
#include <sys/fcntl.h>
#include <sys/filio.h>
+#include <sys/hash.h>
#include <sys/interrupt.h>
#include <sys/jail.h>
#include <sys/kernel.h>
@@ -131,18 +134,40 @@ VNET_DEFINE_STATIC(int, pf_altq_running);
#define TAGID_MAX 50000
struct pf_tagname {
- TAILQ_ENTRY(pf_tagname) entries;
+ TAILQ_ENTRY(pf_tagname) namehash_entries;
+ TAILQ_ENTRY(pf_tagname) taghash_entries;
char name[PF_TAG_NAME_SIZE];
uint16_t tag;
int ref;
};
-TAILQ_HEAD(pf_tags, pf_tagname);
-#define V_pf_tags VNET(pf_tags)
-VNET_DEFINE(struct pf_tags, pf_tags);
-#define V_pf_qids VNET(pf_qids)
-VNET_DEFINE(struct pf_tags, pf_qids);
-static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names");
+struct pf_tagset {
+ TAILQ_HEAD(, pf_tagname) *namehash;
+ TAILQ_HEAD(, pf_tagname) *taghash;
+ unsigned int mask;
+ uint32_t seed;
+ BITSET_DEFINE(, TAGID_MAX) avail;
+};
+
+VNET_DEFINE(struct pf_tagset, pf_tags);
+#define V_pf_tags VNET(pf_tags)
+static unsigned int pf_rule_tag_hashsize;
+#define PF_RULE_TAG_HASH_SIZE_DEFAULT 128
+SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN,
+ &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT,
+ "Size of pf(4) rule tag hashtable");
+
+#ifdef ALTQ
+VNET_DEFINE(struct pf_tagset, pf_qids);
+#define V_pf_qids VNET(pf_qids)
+static unsigned int pf_queue_tag_hashsize;
+#define PF_QUEUE_TAG_HASH_SIZE_DEFAULT 128
+SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN,
+ &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT,
+ "Size of pf(4) queue tag hashtable");
+#endif
+VNET_DEFINE(uma_zone_t, pf_tag_z);
+#define V_pf_tag_z VNET(pf_tag_z)
static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
@@ -150,9 +175,14 @@ static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
#endif
-static u_int16_t tagname2tag(struct pf_tags *, char *);
+static void pf_init_tagset(struct pf_tagset *, unsigned int *,
+ unsigned int);
+static void pf_cleanup_tagset(struct pf_tagset *);
+static uint16_t tagname2hashindex(const struct pf_tagset *, const char *);
+static uint16_t tag2hashindex(const struct pf_tagset *, uint16_t);
+static u_int16_t tagname2tag(struct pf_tagset *, char *);
static u_int16_t pf_tagname2tag(char *);
-static void tag_unref(struct pf_tags *, u_int16_t);
+static void tag_unref(struct pf_tagset *, u_int16_t);
#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
@@ -171,16 +201,16 @@ static void pf_tbladdr_copyout(struct pf_addr_wrap *);
* Wrapper functions for pfil(9) hooks
*/
#ifdef INET
-static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp,
- int dir, int flags, struct inpcb *inp);
-static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp,
- int dir, int flags, struct inpcb *inp);
+static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp,
+ int flags, void *ruleset __unused, struct inpcb *inp);
+static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp,
+ int flags, void *ruleset __unused, struct inpcb *inp);
#endif
#ifdef INET6
-static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp,
- int dir, int flags, struct inpcb *inp);
-static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp,
- int dir, int flags, struct inpcb *inp);
+static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp,
+ int flags, void *ruleset __unused, struct inpcb *inp);
+static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp,
+ int flags, void *ruleset __unused, struct inpcb *inp);
#endif
static int hook_pf(void);
@@ -438,68 +468,141 @@ pf_free_rule(struct pf_rule *rule)
free(rule, M_PFRULE);
}
+static void
+pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size,
+ unsigned int default_size)
+{
+ unsigned int i;
+ unsigned int hashsize;
+
+ if (*tunable_size == 0 || !powerof2(*tunable_size))
+ *tunable_size = default_size;
+
+ hashsize = *tunable_size;
+ ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH,
+ M_WAITOK);
+ ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH,
+ M_WAITOK);
+ ts->mask = hashsize - 1;
+ ts->seed = arc4random();
+ for (i = 0; i < hashsize; i++) {
+ TAILQ_INIT(&ts->namehash[i]);
+ TAILQ_INIT(&ts->taghash[i]);
+ }
+ BIT_FILL(TAGID_MAX, &ts->avail);
+}
+
+static void
+pf_cleanup_tagset(struct pf_tagset *ts)
+{
+ unsigned int i;
+ unsigned int hashsize;
+ struct pf_tagname *t, *tmp;
+
+ /*
+ * Only need to clean up one of the hashes as each tag is hashed
+ * into each table.
+ */
+ hashsize = ts->mask + 1;
+ for (i = 0; i < hashsize; i++)
+ TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp)
+ uma_zfree(V_pf_tag_z, t);
+
+ free(ts->namehash, M_PFHASH);
+ free(ts->taghash, M_PFHASH);
+}
+
+static uint16_t
+tagname2hashindex(const struct pf_tagset *ts, const char *tagname)
+{
+
+ return (murmur3_32_hash(tagname, strlen(tagname), ts->seed) & ts->mask);
+}
+
+static uint16_t
+tag2hashindex(const struct pf_tagset *ts, uint16_t tag)
+{
+
+ return (tag & ts->mask);
+}
+
static u_int16_t
-tagname2tag(struct pf_tags *head, char *tagname)
+tagname2tag(struct pf_tagset *ts, char *tagname)
{
- struct pf_tagname *tag, *p = NULL;
- u_int16_t new_tagid = 1;
+ struct pf_tagname *tag;
+ u_int32_t index;
+ u_int16_t new_tagid;
PF_RULES_WASSERT();
- TAILQ_FOREACH(tag, head, entries)
+ index = tagname2hashindex(ts, tagname);
+ TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries)
if (strcmp(tagname, tag->name) == 0) {
tag->ref++;
return (tag->tag);
}
/*
+ * new entry
+ *
* to avoid fragmentation, we do a linear search from the beginning
- * and take the first free slot we find. if there is none or the list
- * is empty, append a new entry at the end.
+ * and take the first free slot we find.
*/
-
- /* new entry */
- if (!TAILQ_EMPTY(head))
- for (p = TAILQ_FIRST(head); p != NULL &&
- p->tag == new_tagid; p = TAILQ_NEXT(p, entries))
- new_tagid = p->tag + 1;
-
- if (new_tagid > TAGID_MAX)
+ new_tagid = BIT_FFS(TAGID_MAX, &ts->avail);
+ /*
+ * Tags are 1-based, with valid tags in the range [1..TAGID_MAX].
+ * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits
+ * set. It may also return a bit number greater than TAGID_MAX due
+ * to rounding of the number of bits in the vector up to a multiple
+ * of the vector word size at declaration/allocation time.
+ */
+ if ((new_tagid == 0) || (new_tagid > TAGID_MAX))
return (0);
+ /* Mark the tag as in use. Bits are 0-based for BIT_CLR() */
+ BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail);
+
/* allocate and fill new struct pf_tagname */
- tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO);
+ tag = uma_zalloc(V_pf_tag_z, M_NOWAIT);
if (tag == NULL)
return (0);
strlcpy(tag->name, tagname, sizeof(tag->name));
tag->tag = new_tagid;
- tag->ref++;
+ tag->ref = 1;
- if (p != NULL) /* insert new entry before p */
- TAILQ_INSERT_BEFORE(p, tag, entries);
- else /* either list empty or no free slot in between */
- TAILQ_INSERT_TAIL(head, tag, entries);
+ /* Insert into namehash */
+ TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries);
+ /* Insert into taghash */
+ index = tag2hashindex(ts, new_tagid);
+ TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries);
+
return (tag->tag);
}
static void
-tag_unref(struct pf_tags *head, u_int16_t tag)
+tag_unref(struct pf_tagset *ts, u_int16_t tag)
{
- struct pf_tagname *p, *next;
-
+ struct pf_tagname *t;
+ uint16_t index;
+
PF_RULES_WASSERT();
- for (p = TAILQ_FIRST(head); p != NULL; p = next) {
- next = TAILQ_NEXT(p, entries);
- if (tag == p->tag) {
- if (--p->ref == 0) {
- TAILQ_REMOVE(head, p, entries);
- free(p, M_PFTAG);
+ index = tag2hashindex(ts, tag);
+ TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries)
+ if (tag == t->tag) {
+ if (--t->ref == 0) {
+ TAILQ_REMOVE(&ts->taghash[index], t,
+ taghash_entries);
+ index = tagname2hashindex(ts, t->name);
+ TAILQ_REMOVE(&ts->namehash[index], t,
+ namehash_entries);
+ /* Bits are 0-based for BIT_SET() */
+ BIT_SET(TAGID_MAX, tag - 1, &ts->avail);
+ uma_zfree(V_pf_tag_z, t);
}
break;
}
- }
}
static u_int16_t
@@ -524,22 +627,25 @@ pf_qid_unref(u_int32_t qid)
static int
pf_begin_altq(u_int32_t *ticket)
{
- struct pf_altq *altq;
+ struct pf_altq *altq, *tmp;
int error = 0;
PF_RULES_WASSERT();
- /* Purge the old altq list */
- while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
- TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
- if (altq->qname[0] == 0 &&
- (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ /* Purge the old altq lists */
+ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
+ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
/* detach and destroy the discipline */
error = altq_remove(altq);
- } else
- pf_qid_unref(altq->qid);
+ }
+ free(altq, M_PFALTQ);
+ }
+ TAILQ_INIT(V_pf_altq_ifs_inactive);
+ TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
+ pf_qid_unref(altq->qid);
free(altq, M_PFALTQ);
}
+ TAILQ_INIT(V_pf_altqs_inactive);
if (error)
return (error);
*ticket = ++V_ticket_altqs_inactive;
@@ -550,24 +656,27 @@ pf_begin_altq(u_int32_t *ticket)
static int
pf_rollback_altq(u_int32_t ticket)
{
- struct pf_altq *altq;
+ struct pf_altq *altq, *tmp;
int error = 0;
PF_RULES_WASSERT();
if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
return (0);
- /* Purge the old altq list */
- while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
- TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
- if (altq->qname[0] == 0 &&
- (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ /* Purge the old altq lists */
+ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
+ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
/* detach and destroy the discipline */
error = altq_remove(altq);
- } else
- pf_qid_unref(altq->qid);
+ }
+ free(altq, M_PFALTQ);
+ }
+ TAILQ_INIT(V_pf_altq_ifs_inactive);
+ TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
+ pf_qid_unref(altq->qid);
free(altq, M_PFALTQ);
}
+ TAILQ_INIT(V_pf_altqs_inactive);
V_altqs_inactive_open = 0;
return (error);
}
@@ -575,8 +684,8 @@ pf_rollback_altq(u_int32_t ticket)
static int
pf_commit_altq(u_int32_t ticket)
{
- struct pf_altqqueue *old_altqs;
- struct pf_altq *altq;
+ struct pf_altqqueue *old_altqs, *old_altq_ifs;
+ struct pf_altq *altq, *tmp;
int err, error = 0;
PF_RULES_WASSERT();
@@ -586,14 +695,16 @@ pf_commit_altq(u_int32_t ticket)
/* swap altqs, keep the old. */
old_altqs = V_pf_altqs_active;
+ old_altq_ifs = V_pf_altq_ifs_active;
V_pf_altqs_active = V_pf_altqs_inactive;
+ V_pf_altq_ifs_active = V_pf_altq_ifs_inactive;
V_pf_altqs_inactive = old_altqs;
+ V_pf_altq_ifs_inactive = old_altq_ifs;
V_ticket_altqs_active = V_ticket_altqs_inactive;
/* Attach new disciplines */
- TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
- if (altq->qname[0] == 0 &&
- (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
+ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
/* attach the discipline */
error = altq_pfattach(altq);
if (error == 0 && V_pf_altq_running)
@@ -603,11 +714,9 @@ pf_commit_altq(u_int32_t ticket)
}
}
- /* Purge the old altq list */
- while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
- TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
- if (altq->qname[0] == 0 &&
- (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ /* Purge the old altq lists */
+ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
+ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
/* detach and destroy the discipline */
if (V_pf_altq_running)
error = pf_disable_altq(altq);
@@ -617,10 +726,15 @@ pf_commit_altq(u_int32_t ticket)
err = altq_remove(altq);
if (err != 0 && error == 0)
error = err;
- } else
- pf_qid_unref(altq->qid);
+ }
+ free(altq, M_PFALTQ);
+ }
+ TAILQ_INIT(V_pf_altq_ifs_inactive);
+ TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
+ pf_qid_unref(altq->qid);
free(altq, M_PFALTQ);
}
+ TAILQ_INIT(V_pf_altqs_inactive);
V_altqs_inactive_open = 0;
return (error);
@@ -677,14 +791,46 @@ pf_disable_altq(struct pf_altq *altq)
return (error);
}
+static int
+pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket,
+ struct pf_altq *altq)
+{
+ struct ifnet *ifp1;
+ int error = 0;
+
+ /* Deactivate the interface in question */
+ altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
+ if ((ifp1 = ifunit(altq->ifname)) == NULL ||
+ (remove && ifp1 == ifp)) {
+ altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+ } else {
+ error = altq_add(ifp1, altq);
+
+ if (ticket != V_ticket_altqs_inactive)
+ error = EBUSY;
+
+ if (error)
+ free(altq, M_PFALTQ);
+ }
+
+ return (error);
+}
+
void
pf_altq_ifnet_event(struct ifnet *ifp, int remove)
{
- struct ifnet *ifp1;
struct pf_altq *a1, *a2, *a3;
u_int32_t ticket;
int error = 0;
+ /*
+ * No need to re-evaluate the configuration for events on interfaces
+ * that do not support ALTQ, as it's not possible for such
+ * interfaces to be part of the configuration.
+ */
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return;
+
/* Interrupt userland queue modifications */
if (V_altqs_inactive_open)
pf_rollback_altq(V_ticket_altqs_inactive);
@@ -694,7 +840,7 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove)
return;
/* Copy the current active set */
- TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
+ TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) {
a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
if (a2 == NULL) {
error = ENOMEM;
@@ -702,41 +848,43 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove)
}
bcopy(a1, a2, sizeof(struct pf_altq));
- if (a2->qname[0] != 0) {
- if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
- error = EBUSY;
- free(a2, M_PFALTQ);
- break;
- }
- a2->altq_disc = NULL;
- TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) {
- if (strncmp(a3->ifname, a2->ifname,
- IFNAMSIZ) == 0 && a3->qname[0] == 0) {
- a2->altq_disc = a3->altq_disc;
- break;
- }
- }
- }
- /* Deactivate the interface in question */
- a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
- if ((ifp1 = ifunit(a2->ifname)) == NULL ||
- (remove && ifp1 == ifp)) {
- a2->local_flags |= PFALTQ_FLAG_IF_REMOVED;
- } else {
- error = altq_add(a2);
+ error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
+ if (error)
+ break;
- if (ticket != V_ticket_altqs_inactive)
- error = EBUSY;
+ TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries);
+ }
+ if (error)
+ goto out;
+ TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
+ a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
+ if (a2 == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bcopy(a1, a2, sizeof(struct pf_altq));
- if (error) {
- free(a2, M_PFALTQ);
+ if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
+ error = EBUSY;
+ free(a2, M_PFALTQ);
+ break;
+ }
+ a2->altq_disc = NULL;
+ TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) {
+ if (strncmp(a3->ifname, a2->ifname,
+ IFNAMSIZ) == 0) {
+ a2->altq_disc = a3->altq_disc;
break;
}
}
+ error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
+ if (error)
+ break;
TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
}
+out:
if (error != 0)
pf_rollback_altq(ticket);
else
@@ -1214,6 +1362,28 @@ pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size)
return (0);
}
+
+static struct pf_altq *
+pf_altq_get_nth_active(u_int32_t n)
+{
+ struct pf_altq *altq;
+ u_int32_t nr;
+
+ nr = 0;
+ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
+ if (nr == n)
+ return (altq);
+ nr++;
+ }
+
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+ if (nr == n)
+ return (altq);
+ nr++;
+ }
+
+ return (NULL);
+}
#endif /* ALTQ */
static int
@@ -2011,7 +2181,7 @@ relock_DIOCKILLSTATES:
break;
}
- p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK);
+ p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK | M_ZERO);
nr = 0;
for (i = 0; i <= pf_hashmask; i++) {
@@ -2273,9 +2443,8 @@ DIOCGETSTATES_full:
PF_RULES_WLOCK();
/* enable all altq interfaces on active list */
- TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
- if (altq->qname[0] == 0 && (altq->local_flags &
- PFALTQ_FLAG_IF_REMOVED) == 0) {
+ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
+ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
error = pf_enable_altq(altq);
if (error != 0)
break;
@@ -2293,9 +2462,8 @@ DIOCGETSTATES_full:
PF_RULES_WLOCK();
/* disable all altq interfaces on active list */
- TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
- if (altq->qname[0] == 0 && (altq->local_flags &
- PFALTQ_FLAG_IF_REMOVED) == 0) {
+ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
+ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
error = pf_disable_altq(altq);
if (error != 0)
break;
@@ -2340,9 +2508,9 @@ DIOCGETSTATES_full:
break;
}
altq->altq_disc = NULL;
- TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) {
+ TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) {
if (strncmp(a->ifname, altq->ifname,
- IFNAMSIZ) == 0 && a->qname[0] == 0) {
+ IFNAMSIZ) == 0) {
altq->altq_disc = a->altq_disc;
break;
}
@@ -2352,7 +2520,7 @@ DIOCGETSTATES_full:
if ((ifp = ifunit(altq->ifname)) == NULL)
altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
else
- error = altq_add(altq);
+ error = altq_add(ifp, altq);
if (error) {
PF_RULES_WUNLOCK();
@@ -2360,7 +2528,10 @@ DIOCGETSTATES_full:
break;
}
- TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
+ if (altq->qname[0] != 0)
+ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
+ else
+ TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries);
/* version error check done on import above */
pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
PF_RULES_WUNLOCK();
@@ -2374,6 +2545,8 @@ DIOCGETSTATES_full:
PF_RULES_RLOCK();
pa->nr = 0;
+ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries)
+ pa->nr++;
TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
pa->nr++;
pa->ticket = V_ticket_altqs_active;
@@ -2385,7 +2558,6 @@ DIOCGETSTATES_full:
case DIOCGETALTQV1: {
struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr;
struct pf_altq *altq;
- u_int32_t nr;
PF_RULES_RLOCK();
if (pa->ticket != V_ticket_altqs_active) {
@@ -2393,12 +2565,7 @@ DIOCGETSTATES_full:
error = EBUSY;
break;
}
- nr = 0;
- altq = TAILQ_FIRST(V_pf_altqs_active);
- while ((altq != NULL) && (nr < pa->nr)) {
- altq = TAILQ_NEXT(altq, entries);
- nr++;
- }
+ altq = pf_altq_get_nth_active(pa->nr);
if (altq == NULL) {
PF_RULES_RUNLOCK();
error = EBUSY;
@@ -2419,7 +2586,6 @@ DIOCGETSTATES_full:
case DIOCGETQSTATSV1: {
struct pfioc_qstats_v1 *pq = (struct pfioc_qstats_v1 *)addr;
struct pf_altq *altq;
- u_int32_t nr;
int nbytes;
u_int32_t version;
@@ -2430,12 +2596,7 @@ DIOCGETSTATES_full:
break;
}
nbytes = pq->nbytes;
- nr = 0;
- altq = TAILQ_FIRST(V_pf_altqs_active);
- while ((altq != NULL) && (nr < pq->nr)) {
- altq = TAILQ_NEXT(altq, entries);
- nr++;
- }
+ altq = pf_altq_get_nth_active(pq->nr);
if (altq == NULL) {
PF_RULES_RUNLOCK();
error = EBUSY;
@@ -2954,24 +3115,20 @@ DIOCCHANGEADDR_error:
break;
}
- PF_RULES_WLOCK();
+ PF_RULES_RLOCK();
n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
io->pfrio_size = min(io->pfrio_size, n);
+ PF_RULES_RUNLOCK();
totlen = io->pfrio_size * sizeof(struct pfr_table);
pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
- M_TEMP, M_NOWAIT);
- if (pfrts == NULL) {
- error = ENOMEM;
- PF_RULES_WUNLOCK();
- break;
- }
+ M_TEMP, M_WAITOK);
error = copyin(io->pfrio_buffer, pfrts, totlen);
if (error) {
free(pfrts, M_TEMP);
- PF_RULES_WUNLOCK();
break;
}
+ PF_RULES_WLOCK();
error = pfr_set_tflags(pfrts, io->pfrio_size,
io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
&io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
@@ -3589,19 +3746,25 @@ DIOCCHANGEADDR_error:
struct pf_src_node *n, *p, *pstore;
uint32_t i, nr = 0;
+ for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
+ i++, sh++) {
+ PF_HASHROW_LOCK(sh);
+ LIST_FOREACH(n, &sh->nodes, entry)
+ nr++;
+ PF_HASHROW_UNLOCK(sh);
+ }
+
+ psn->psn_len = min(psn->psn_len,
+ sizeof(struct pf_src_node) * nr);
+
if (psn->psn_len == 0) {
- for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
- i++, sh++) {
- PF_HASHROW_LOCK(sh);
- LIST_FOREACH(n, &sh->nodes, entry)
- nr++;
- PF_HASHROW_UNLOCK(sh);
- }
psn->psn_len = sizeof(struct pf_src_node) * nr;
break;
}
- p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK);
+ nr = 0;
+
+ p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO);
for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
i++, sh++) {
PF_HASHROW_LOCK(sh);
@@ -3997,65 +4160,59 @@ shutdown_pf(void)
/* status does not use malloced mem so no need to cleanup */
/* fingerprints and interfaces have their own cleanup code */
-
- /* Free counters last as we updated them during shutdown. */
- counter_u64_free(V_pf_default_rule.states_cur);
- counter_u64_free(V_pf_default_rule.states_tot);
- counter_u64_free(V_pf_default_rule.src_nodes);
-
- for (int i = 0; i < PFRES_MAX; i++)
- counter_u64_free(V_pf_status.counters[i]);
- for (int i = 0; i < LCNT_MAX; i++)
- counter_u64_free(V_pf_status.lcounters[i]);
- for (int i = 0; i < FCNT_MAX; i++)
- counter_u64_free(V_pf_status.fcounters[i]);
- for (int i = 0; i < SCNT_MAX; i++)
- counter_u64_free(V_pf_status.scounters[i]);
} while(0);
return (error);
}
+static pfil_return_t
+pf_check_return(int chk, struct mbuf **m)
+{
+
+ switch (chk) {
+ case PF_PASS:
+ if (*m == NULL)
+ return (PFIL_CONSUMED);
+ else
+ return (PFIL_PASS);
+ break;
+ default:
+ if (*m != NULL) {
+ m_freem(*m);
+ *m = NULL;
+ }
+ return (PFIL_DROPPED);
+ }
+}
+
#ifdef INET
-static int
-pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags,
- struct inpcb *inp)
+static pfil_return_t
+pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
+ void *ruleset __unused, struct inpcb *inp)
{
int chk;
chk = pf_test(PF_IN, flags, ifp, m, inp);
- if (chk && *m) {
- m_freem(*m);
- *m = NULL;
- }
- if (chk != PF_PASS)
- return (EACCES);
- return (0);
+ return (pf_check_return(chk, m));
}
-static int
-pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags,
- struct inpcb *inp)
+static pfil_return_t
+pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
+ void *ruleset __unused, struct inpcb *inp)
{
int chk;
chk = pf_test(PF_OUT, flags, ifp, m, inp);
- if (chk && *m) {
- m_freem(*m);
- *m = NULL;
- }
- if (chk != PF_PASS)
- return (EACCES);
- return (0);
+ return (pf_check_return(chk, m));
}
#endif
#ifdef INET6
-static int
-pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags,
- struct inpcb *inp)
+static pfil_return_t
+pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags,
+ void *ruleset __unused, struct inpcb *inp)
{
int chk;
@@ -4067,67 +4224,89 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags,
CURVNET_SET(ifp->if_vnet);
chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp);
CURVNET_RESTORE();
- if (chk && *m) {
- m_freem(*m);
- *m = NULL;
- }
- if (chk != PF_PASS)
- return (EACCES);
- return (0);
+
+ return (pf_check_return(chk, m));
}
-static int
-pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags,
- struct inpcb *inp)
+static pfil_return_t
+pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags,
+ void *ruleset __unused, struct inpcb *inp)
{
int chk;
CURVNET_SET(ifp->if_vnet);
chk = pf_test6(PF_OUT, flags, ifp, m, inp);
CURVNET_RESTORE();
- if (chk && *m) {
- m_freem(*m);
- *m = NULL;
- }
- if (chk != PF_PASS)
- return (EACCES);
- return (0);
+
+ return (pf_check_return(chk, m));
}
#endif /* INET6 */
-static int
-hook_pf(void)
-{
#ifdef INET
- struct pfil_head *pfh_inet;
+VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook);
+VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook);
+#define V_pf_ip4_in_hook VNET(pf_ip4_in_hook)
+#define V_pf_ip4_out_hook VNET(pf_ip4_out_hook)
#endif
#ifdef INET6
- struct pfil_head *pfh_inet6;
+VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook);
+VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook);
+#define V_pf_ip6_in_hook VNET(pf_ip6_in_hook)
+#define V_pf_ip6_out_hook VNET(pf_ip6_out_hook)
#endif
+static int
+hook_pf(void)
+{
+ struct pfil_hook_args pha;
+ struct pfil_link_args pla;
+
if (V_pf_pfil_hooked)
return (0);
+ pha.pa_version = PFIL_VERSION;
+ pha.pa_modname = "pf";
+ pha.pa_ruleset = NULL;
+
+ pla.pa_version = PFIL_VERSION;
+
#ifdef INET
- pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
- if (pfh_inet == NULL)
- return (ESRCH); /* XXX */
- pfil_add_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
- pfil_add_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
+ pha.pa_type = PFIL_TYPE_IP4;
+ pha.pa_func = pf_check_in;
+ pha.pa_flags = PFIL_IN;
+ pha.pa_rulname = "default-in";
+ V_pf_ip4_in_hook = pfil_add_hook(&pha);
+ pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
+ pla.pa_head = V_inet_pfil_head;
+ pla.pa_hook = V_pf_ip4_in_hook;
+ (void)pfil_link(&pla);
+ pha.pa_func = pf_check_out;
+ pha.pa_flags = PFIL_OUT;
+ pha.pa_rulname = "default-out";
+ V_pf_ip4_out_hook = pfil_add_hook(&pha);
+ pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
+ pla.pa_head = V_inet_pfil_head;
+ pla.pa_hook = V_pf_ip4_out_hook;
+ (void)pfil_link(&pla);
#endif
#ifdef INET6
- pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
- if (pfh_inet6 == NULL) {
-#ifdef INET
- pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
- pfh_inet);
- pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
- pfh_inet);
-#endif
- return (ESRCH); /* XXX */
- }
- pfil_add_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
- pfil_add_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
+ pha.pa_type = PFIL_TYPE_IP6;
+ pha.pa_func = pf_check6_in;
+ pha.pa_flags = PFIL_IN;
+ pha.pa_rulname = "default-in6";
+ V_pf_ip6_in_hook = pfil_add_hook(&pha);
+ pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
+ pla.pa_head = V_inet6_pfil_head;
+ pla.pa_hook = V_pf_ip6_in_hook;
+ (void)pfil_link(&pla);
+ pha.pa_func = pf_check6_out;
+ pha.pa_rulname = "default-out6";
+ pha.pa_flags = PFIL_OUT;
+ V_pf_ip6_out_hook = pfil_add_hook(&pha);
+ pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
+ pla.pa_head = V_inet6_pfil_head;
+ pla.pa_hook = V_pf_ip6_out_hook;
+ (void)pfil_link(&pla);
#endif
V_pf_pfil_hooked = 1;
@@ -4137,33 +4316,17 @@ hook_pf(void)
static int
dehook_pf(void)
{
-#ifdef INET
- struct pfil_head *pfh_inet;
-#endif
-#ifdef INET6
- struct pfil_head *pfh_inet6;
-#endif
if (V_pf_pfil_hooked == 0)
return (0);
#ifdef INET
- pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
- if (pfh_inet == NULL)
- return (ESRCH); /* XXX */
- pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
- pfh_inet);
- pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
- pfh_inet);
+ pfil_remove_hook(V_pf_ip4_in_hook);
+ pfil_remove_hook(V_pf_ip4_out_hook);
#endif
#ifdef INET6
- pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
- if (pfh_inet6 == NULL)
- return (ESRCH); /* XXX */
- pfil_remove_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK,
- pfh_inet6);
- pfil_remove_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK,
- pfh_inet6);
+ pfil_remove_hook(V_pf_ip6_in_hook);
+ pfil_remove_hook(V_pf_ip6_out_hook);
#endif
V_pf_pfil_hooked = 0;
@@ -4173,8 +4336,15 @@ dehook_pf(void)
static void
pf_load_vnet(void)
{
- TAILQ_INIT(&V_pf_tags);
- TAILQ_INIT(&V_pf_qids);
+ V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+ pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize,
+ PF_RULE_TAG_HASH_SIZE_DEFAULT);
+#ifdef ALTQ
+ pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize,
+ PF_QUEUE_TAG_HASH_SIZE_DEFAULT);
+#endif
pfattach_vnet();
V_pf_vnet_active = 1;
@@ -4191,7 +4361,7 @@ pf_load(void)
pf_mtag_initialize();
- pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME);
+ pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME);
if (pf_dev == NULL)
return (ENOMEM);
@@ -4241,6 +4411,26 @@ pf_unload_vnet(void)
pf_cleanup();
if (IS_DEFAULT_VNET(curvnet))
pf_mtag_cleanup();
+
+ pf_cleanup_tagset(&V_pf_tags);
+#ifdef ALTQ
+ pf_cleanup_tagset(&V_pf_qids);
+#endif
+ uma_zdestroy(V_pf_tag_z);
+
+ /* Free counters last as we updated them during shutdown. */
+ counter_u64_free(V_pf_default_rule.states_cur);
+ counter_u64_free(V_pf_default_rule.states_tot);
+ counter_u64_free(V_pf_default_rule.src_nodes);
+
+ for (int i = 0; i < PFRES_MAX; i++)
+ counter_u64_free(V_pf_status.counters[i]);
+ for (int i = 0; i < LCNT_MAX; i++)
+ counter_u64_free(V_pf_status.lcounters[i]);
+ for (int i = 0; i < FCNT_MAX; i++)
+ counter_u64_free(V_pf_status.fcounters[i]);
+ for (int i = 0; i < SCNT_MAX; i++)
+ counter_u64_free(V_pf_status.scounters[i]);
}
#endif /* __rtems__ */
diff --git a/freebsd/sys/netpfil/pf/pf_norm.c b/freebsd/sys/netpfil/pf/pf_norm.c
index 9538e97c..eb25bbc8 100644
--- a/freebsd/sys/netpfil/pf/pf_norm.c
+++ b/freebsd/sys/netpfil/pf/pf_norm.c
@@ -838,11 +838,11 @@ pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
}
/* We have all the data. */
+ frent = TAILQ_FIRST(&frag->fr_queue);
+ KASSERT(frent != NULL, ("frent != NULL"));
extoff = frent->fe_extoff;
maxlen = frag->fr_maxlen;
frag_id = frag->fr_id;
- frent = TAILQ_FIRST(&frag->fr_queue);
- KASSERT(frent != NULL, ("frent != NULL"));
total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
@@ -1141,9 +1141,8 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
int off;
struct ip6_ext ext;
struct ip6_opt opt;
- struct ip6_opt_jumbo jumbo;
struct ip6_frag frag;
- u_int32_t jumbolen = 0, plen;
+ u_int32_t plen;
int optend;
int ooff;
u_int8_t proto;
@@ -1187,6 +1186,11 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
goto drop;
+ plen = ntohs(h->ip6_plen);
+ /* jumbo payload option not supported */
+ if (plen == 0)
+ goto drop;
+
extoff = 0;
off = sizeof(struct ip6_hdr);
proto = h->ip6_nxt;
@@ -1230,26 +1234,8 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
goto shortpkt;
if (ooff + sizeof(opt) + opt.ip6o_len > optend)
goto drop;
- switch (opt.ip6o_type) {
- case IP6OPT_JUMBO:
- if (h->ip6_plen != 0)
- goto drop;
- if (!pf_pull_hdr(m, ooff, &jumbo,
- sizeof(jumbo), NULL, NULL,
- AF_INET6))
- goto shortpkt;
- memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
- sizeof(jumbolen));
- jumbolen = ntohl(jumbolen);
- if (jumbolen <= IPV6_MAXPACKET)
- goto drop;
- if (sizeof(struct ip6_hdr) + jumbolen !=
- m->m_pkthdr.len)
- goto drop;
- break;
- default:
- break;
- }
+ if (opt.ip6o_type == IP6OPT_JUMBO)
+ goto drop;
ooff += sizeof(opt) + opt.ip6o_len;
} while (ooff < optend);
@@ -1262,13 +1248,6 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
}
} while (!terminal);
- /* jumbo payload option must be present, or plen > 0 */
- if (ntohs(h->ip6_plen) == 0)
- plen = jumbolen;
- else
- plen = ntohs(h->ip6_plen);
- if (plen == 0)
- goto drop;
if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
goto shortpkt;
@@ -1277,10 +1256,6 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
return (PF_PASS);
fragment:
- /* Jumbo payload packets cannot be fragmented. */
- plen = ntohs(h->ip6_plen);
- if (plen == 0 || jumbolen)
- goto drop;
if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
goto shortpkt;
diff --git a/freebsd/sys/netpfil/pf/pf_table.c b/freebsd/sys/netpfil/pf/pf_table.c
index 3f15fb0e..96ed849c 100644
--- a/freebsd/sys/netpfil/pf/pf_table.c
+++ b/freebsd/sys/netpfil/pf/pf_table.c
@@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <net/pfvar.h>
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+
#define ACCEPT_FLAGS(flags, oklist) \
do { \
if ((flags & ~(oklist)) & \
@@ -113,6 +115,7 @@ struct pfr_walktree {
struct pfi_dynaddr *pfrw1_dyn;
} pfrw_1;
int pfrw_free;
+ int pfrw_flags;
};
#define pfrw_addr pfrw_1.pfrw1_addr
#define pfrw_astats pfrw_1.pfrw1_astats
@@ -126,15 +129,16 @@ struct pfr_walktree {
static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures");
VNET_DEFINE_STATIC(uma_zone_t, pfr_kentry_z);
#define V_pfr_kentry_z VNET(pfr_kentry_z)
-VNET_DEFINE_STATIC(uma_zone_t, pfr_kcounters_z);
-#define V_pfr_kcounters_z VNET(pfr_kcounters_z)
static struct pf_addr pfr_ffaddr = {
.addr32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
};
+static void pfr_copyout_astats(struct pfr_astats *,
+ const struct pfr_kentry *,
+ const struct pfr_walktree *);
static void pfr_copyout_addr(struct pfr_addr *,
- struct pfr_kentry *ke);
+ const struct pfr_kentry *ke);
static int pfr_validate_addr(struct pfr_addr *);
static void pfr_enqueue_addrs(struct pfr_ktable *,
struct pfr_kentryworkq *, int *, int);
@@ -142,8 +146,12 @@ static void pfr_mark_addrs(struct pfr_ktable *);
static struct pfr_kentry
*pfr_lookup_addr(struct pfr_ktable *,
struct pfr_addr *, int);
+static bool pfr_create_kentry_counter(struct pfr_kcounters *,
+ int, int);
static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *);
static void pfr_destroy_kentries(struct pfr_kentryworkq *);
+static void pfr_destroy_kentry_counter(struct pfr_kcounters *,
+ int, int);
static void pfr_destroy_kentry(struct pfr_kentry *);
static void pfr_insert_kentries(struct pfr_ktable *,
struct pfr_kentryworkq *, long);
@@ -202,9 +210,6 @@ pfr_initialize(void)
V_pfr_kentry_z = uma_zcreate("pf table entries",
sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
0);
- V_pfr_kcounters_z = uma_zcreate("pf table counters",
- sizeof(struct pfr_kcounters), NULL, NULL, NULL, NULL,
- UMA_ALIGN_PTR, 0);
V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z;
V_pf_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
}
@@ -214,7 +219,6 @@ pfr_cleanup(void)
{
uma_zdestroy(V_pfr_kentry_z);
- uma_zdestroy(V_pfr_kcounters_z);
}
int
@@ -608,6 +612,13 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
w.pfrw_op = PFRW_GET_ASTATS;
w.pfrw_astats = addr;
w.pfrw_free = kt->pfrkt_cnt;
+ /*
+ * Flags below are for backward compatibility. It was possible to have
+ * a table without per-entry counters. Now they are always allocated,
+ * we just discard data when reading it if table is not configured to
+ * have counters.
+ */
+ w.pfrw_flags = kt->pfrkt_flags;
rv = kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
if (!rv)
rv = kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh,
@@ -774,10 +785,30 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
return (ke);
}
+static bool
+pfr_create_kentry_counter(struct pfr_kcounters *kc, int pfr_dir, int pfr_op)
+{
+ kc->pfrkc_packets[pfr_dir][pfr_op] = counter_u64_alloc(M_NOWAIT);
+ if (! kc->pfrkc_packets[pfr_dir][pfr_op])
+ return (false);
+
+ kc->pfrkc_bytes[pfr_dir][pfr_op] = counter_u64_alloc(M_NOWAIT);
+ if (! kc->pfrkc_bytes[pfr_dir][pfr_op]) {
+ /* Previous allocation will be freed through
+ * pfr_destroy_kentry() */
+ return (false);
+ }
+
+ kc->pfrkc_tzero = 0;
+
+ return (true);
+}
+
static struct pfr_kentry *
pfr_create_kentry(struct pfr_addr *ad)
{
struct pfr_kentry *ke;
+ int pfr_dir, pfr_op;
ke = uma_zalloc(V_pfr_kentry_z, M_NOWAIT | M_ZERO);
if (ke == NULL)
@@ -790,6 +821,14 @@ pfr_create_kentry(struct pfr_addr *ad)
ke->pfrke_af = ad->pfra_af;
ke->pfrke_net = ad->pfra_net;
ke->pfrke_not = ad->pfra_not;
+ for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++)
+ for (pfr_op = 0; pfr_op < PFR_OP_ADDR_MAX; pfr_op ++) {
+ if (! pfr_create_kentry_counter(&ke->pfrke_counters,
+ pfr_dir, pfr_op)) {
+ pfr_destroy_kentry(ke);
+ return (NULL);
+ }
+ }
return (ke);
}
@@ -805,10 +844,22 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq)
}
static void
+pfr_destroy_kentry_counter(struct pfr_kcounters *kc, int pfr_dir, int pfr_op)
+{
+ counter_u64_free(kc->pfrkc_packets[pfr_dir][pfr_op]);
+ counter_u64_free(kc->pfrkc_bytes[pfr_dir][pfr_op]);
+}
+
+static void
pfr_destroy_kentry(struct pfr_kentry *ke)
{
- if (ke->pfrke_counters)
- uma_zfree(V_pfr_kcounters_z, ke->pfrke_counters);
+ int pfr_dir, pfr_op;
+
+ for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++)
+ for (pfr_op = 0; pfr_op < PFR_OP_ADDR_MAX; pfr_op ++)
+ pfr_destroy_kentry_counter(&ke->pfrke_counters,
+ pfr_dir, pfr_op);
+
uma_zfree(V_pfr_kentry_z, ke);
}
@@ -826,7 +877,7 @@ pfr_insert_kentries(struct pfr_ktable *kt,
"(code=%d).\n", rv);
break;
}
- p->pfrke_tzero = tzero;
+ p->pfrke_counters.pfrkc_tzero = tzero;
n++;
}
kt->pfrkt_cnt += n;
@@ -849,7 +900,7 @@ pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero)
if (rv)
return (rv);
- p->pfrke_tzero = tzero;
+ p->pfrke_counters.pfrkc_tzero = tzero;
kt->pfrkt_cnt++;
return (0);
@@ -884,15 +935,20 @@ static void
pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange)
{
struct pfr_kentry *p;
+ int pfr_dir, pfr_op;
SLIST_FOREACH(p, workq, pfrke_workq) {
if (negchange)
p->pfrke_not = !p->pfrke_not;
- if (p->pfrke_counters) {
- uma_zfree(V_pfr_kcounters_z, p->pfrke_counters);
- p->pfrke_counters = NULL;
+ for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
+ for (pfr_op = 0; pfr_op < PFR_OP_ADDR_MAX; pfr_op ++) {
+ counter_u64_zero(p->pfrke_counters.
+ pfrkc_packets[pfr_dir][pfr_op]);
+ counter_u64_zero(p->pfrke_counters.
+ pfrkc_bytes[pfr_dir][pfr_op]);
+ }
}
- p->pfrke_tzero = tzero;
+ p->pfrke_counters.pfrkc_tzero = tzero;
}
}
@@ -981,7 +1037,7 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
}
static void
-pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
+pfr_copyout_addr(struct pfr_addr *ad, const struct pfr_kentry *ke)
{
bzero(ad, sizeof(*ad));
if (ke == NULL)
@@ -995,6 +1051,33 @@ pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
}
+static void
+pfr_copyout_astats(struct pfr_astats *as, const struct pfr_kentry *ke,
+ const struct pfr_walktree *w)
+{
+ int dir, op;
+ const struct pfr_kcounters *kc = &ke->pfrke_counters;
+
+ pfr_copyout_addr(&as->pfras_a, ke);
+ as->pfras_tzero = kc->pfrkc_tzero;
+
+ if (! (w->pfrw_flags & PFR_TFLAG_COUNTERS)) {
+ bzero(as->pfras_packets, sizeof(as->pfras_packets));
+ bzero(as->pfras_bytes, sizeof(as->pfras_bytes));
+ as->pfras_a.pfra_fback = PFR_FB_NOCOUNT;
+ return;
+ }
+
+ for (dir = 0; dir < PFR_DIR_MAX; dir ++) {
+ for (op = 0; op < PFR_OP_ADDR_MAX; op ++) {
+ as->pfras_packets[dir][op] =
+ counter_u64_fetch(kc->pfrkc_packets[dir][op]);
+ as->pfras_bytes[dir][op] =
+ counter_u64_fetch(kc->pfrkc_bytes[dir][op]);
+ }
+ }
+}
+
static int
pfr_walktree(struct radix_node *rn, void *arg)
{
@@ -1023,19 +1106,7 @@ pfr_walktree(struct radix_node *rn, void *arg)
if (w->pfrw_free-- > 0) {
struct pfr_astats as;
- pfr_copyout_addr(&as.pfras_a, ke);
-
- if (ke->pfrke_counters) {
- bcopy(ke->pfrke_counters->pfrkc_packets,
- as.pfras_packets, sizeof(as.pfras_packets));
- bcopy(ke->pfrke_counters->pfrkc_bytes,
- as.pfras_bytes, sizeof(as.pfras_bytes));
- } else {
- bzero(as.pfras_packets, sizeof(as.pfras_packets));
- bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
- as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
- }
- as.pfras_tzero = ke->pfrke_tzero;
+ pfr_copyout_astats(&as, ke, w);
bcopy(&as, w->pfrw_astats, sizeof(as));
w->pfrw_astats++;
@@ -1260,6 +1331,7 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
struct pfr_ktableworkq workq;
int n, nn;
long tzero = time_second;
+ int pfr_dir, pfr_op;
/* XXX PFR_FLAG_CLSTATS disabled */
ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
@@ -1278,7 +1350,25 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
continue;
if (n-- <= 0)
continue;
- bcopy(&p->pfrkt_ts, tbl++, sizeof(*tbl));
+ bcopy(&p->pfrkt_kts.pfrts_t, &tbl->pfrts_t,
+ sizeof(struct pfr_table));
+ for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
+ for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
+ tbl->pfrts_packets[pfr_dir][pfr_op] =
+ counter_u64_fetch(
+ p->pfrkt_packets[pfr_dir][pfr_op]);
+ tbl->pfrts_bytes[pfr_dir][pfr_op] =
+ counter_u64_fetch(
+ p->pfrkt_bytes[pfr_dir][pfr_op]);
+ }
+ }
+ tbl->pfrts_match = counter_u64_fetch(p->pfrkt_match);
+ tbl->pfrts_nomatch = counter_u64_fetch(p->pfrkt_nomatch);
+ tbl->pfrts_tzero = p->pfrkt_tzero;
+ tbl->pfrts_cnt = p->pfrkt_cnt;
+ for (pfr_op = 0; pfr_op < PFR_REFCNT_MAX; pfr_op++)
+ tbl->pfrts_refcnt[pfr_op] = p->pfrkt_refcnt[pfr_op];
+ tbl++;
SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
}
if (flags & PFR_FLAG_CLSTATS)
@@ -1612,7 +1702,7 @@ pfr_commit_ktable(struct pfr_ktable *kt, long tzero)
q->pfrke_mark = 1;
SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq);
} else {
- p->pfrke_tzero = tzero;
+ p->pfrke_counters.pfrkc_tzero = tzero;
SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
}
}
@@ -1796,14 +1886,20 @@ static void
pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
{
struct pfr_kentryworkq addrq;
+ int pfr_dir, pfr_op;
if (recurse) {
pfr_enqueue_addrs(kt, &addrq, NULL, 0);
pfr_clstats_kentries(&addrq, tzero, 0);
}
- bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
- bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
- kt->pfrkt_match = kt->pfrkt_nomatch = 0;
+ for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
+ for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
+ counter_u64_zero(kt->pfrkt_packets[pfr_dir][pfr_op]);
+ counter_u64_zero(kt->pfrkt_bytes[pfr_dir][pfr_op]);
+ }
+ }
+ counter_u64_zero(kt->pfrkt_match);
+ counter_u64_zero(kt->pfrkt_nomatch);
kt->pfrkt_tzero = tzero;
}
@@ -1812,6 +1908,7 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
{
struct pfr_ktable *kt;
struct pf_ruleset *rs;
+ int pfr_dir, pfr_op;
PF_RULES_WASSERT();
@@ -1830,6 +1927,34 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
rs->tables++;
}
+ for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
+ for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
+ kt->pfrkt_packets[pfr_dir][pfr_op] =
+ counter_u64_alloc(M_NOWAIT);
+ if (! kt->pfrkt_packets[pfr_dir][pfr_op]) {
+ pfr_destroy_ktable(kt, 0);
+ return (NULL);
+ }
+ kt->pfrkt_bytes[pfr_dir][pfr_op] =
+ counter_u64_alloc(M_NOWAIT);
+ if (! kt->pfrkt_bytes[pfr_dir][pfr_op]) {
+ pfr_destroy_ktable(kt, 0);
+ return (NULL);
+ }
+ }
+ }
+ kt->pfrkt_match = counter_u64_alloc(M_NOWAIT);
+ if (! kt->pfrkt_match) {
+ pfr_destroy_ktable(kt, 0);
+ return (NULL);
+ }
+
+ kt->pfrkt_nomatch = counter_u64_alloc(M_NOWAIT);
+ if (! kt->pfrkt_nomatch) {
+ pfr_destroy_ktable(kt, 0);
+ return (NULL);
+ }
+
if (!rn_inithead((void **)&kt->pfrkt_ip4,
offsetof(struct sockaddr_in, sin_addr) * 8) ||
!rn_inithead((void **)&kt->pfrkt_ip6,
@@ -1857,6 +1982,7 @@ static void
pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
{
struct pfr_kentryworkq addrq;
+ int pfr_dir, pfr_op;
if (flushaddr) {
pfr_enqueue_addrs(kt, &addrq, NULL, 0);
@@ -1873,6 +1999,15 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
kt->pfrkt_rs->tables--;
pf_remove_if_empty_ruleset(kt->pfrkt_rs);
}
+ for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
+ for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
+ counter_u64_free(kt->pfrkt_packets[pfr_dir][pfr_op]);
+ counter_u64_free(kt->pfrkt_bytes[pfr_dir][pfr_op]);
+ }
+ }
+ counter_u64_free(kt->pfrkt_match);
+ counter_u64_free(kt->pfrkt_nomatch);
+
free(kt, M_PFTABLE);
}
@@ -1941,9 +2076,9 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
}
match = (ke && !ke->pfrke_not);
if (match)
- kt->pfrkt_match++;
+ counter_u64_add(kt->pfrkt_match, 1);
else
- kt->pfrkt_nomatch++;
+ counter_u64_add(kt->pfrkt_nomatch, 1);
return (match);
}
@@ -1994,20 +2129,18 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
}
if ((ke == NULL || ke->pfrke_not) != notrule) {
if (op_pass != PFR_OP_PASS)
- printf("pfr_update_stats: assertion failed.\n");
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("pfr_update_stats: assertion failed.\n"));
op_pass = PFR_OP_XPASS;
}
- kt->pfrkt_packets[dir_out][op_pass]++;
- kt->pfrkt_bytes[dir_out][op_pass] += len;
+ counter_u64_add(kt->pfrkt_packets[dir_out][op_pass], 1);
+ counter_u64_add(kt->pfrkt_bytes[dir_out][op_pass], len);
if (ke != NULL && op_pass != PFR_OP_XPASS &&
(kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
- if (ke->pfrke_counters == NULL)
- ke->pfrke_counters = uma_zalloc(V_pfr_kcounters_z,
- M_NOWAIT | M_ZERO);
- if (ke->pfrke_counters != NULL) {
- ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++;
- ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len;
- }
+ counter_u64_add(ke->pfrke_counters.
+ pfrkc_packets[dir_out][op_pass], 1);
+ counter_u64_add(ke->pfrke_counters.
+ pfrkc_bytes[dir_out][op_pass], len);
}
}
@@ -2097,7 +2230,7 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
_next_block:
ke = pfr_kentry_byidx(kt, idx, af);
if (ke == NULL) {
- kt->pfrkt_nomatch++;
+ counter_u64_add(kt->pfrkt_nomatch, 1);
return (1);
}
pfr_prepare_network(&umask, af, ke->pfrke_net);
@@ -2122,7 +2255,7 @@ _next_block:
/* this is a single IP address - no possible nested block */
PF_ACPY(counter, addr, af);
*pidx = idx;
- kt->pfrkt_match++;
+ counter_u64_add(kt->pfrkt_match, 1);
return (0);
}
for (;;) {
@@ -2142,7 +2275,7 @@ _next_block:
/* lookup return the same block - perfect */
PF_ACPY(counter, addr, af);
*pidx = idx;
- kt->pfrkt_match++;
+ counter_u64_add(kt->pfrkt_match, 1);
return (0);
}
diff --git a/freebsd/sys/opencrypto/cast.c b/freebsd/sys/opencrypto/cast.c
index 1fb62f20..8031dabe 100644
--- a/freebsd/sys/opencrypto/cast.c
+++ b/freebsd/sys/opencrypto/cast.c
@@ -131,7 +131,7 @@ u_int32_t t, l, r;
/***** Key Schedule *****/
-void cast_setkey(cast_key* key, u_int8_t* rawkey, int keybytes)
+void cast_setkey(cast_key* key, const u_int8_t* rawkey, int keybytes)
{
u_int32_t t[4] = {0, 0, 0, 0}, z[4] = {0, 0, 0, 0}, x[4];
int i;
diff --git a/freebsd/sys/opencrypto/cast.h b/freebsd/sys/opencrypto/cast.h
index 8e2d0d19..2aca9340 100644
--- a/freebsd/sys/opencrypto/cast.h
+++ b/freebsd/sys/opencrypto/cast.h
@@ -16,7 +16,7 @@ typedef struct {
int rounds; /* Number of rounds to use, 12 or 16 */
} cast_key;
-void cast_setkey(cast_key * key, u_int8_t * rawkey, int keybytes);
+void cast_setkey(cast_key * key, const u_int8_t * rawkey, int keybytes);
void cast_encrypt(cast_key * key, u_int8_t * inblock, u_int8_t * outblock);
void cast_decrypt(cast_key * key, u_int8_t * inblock, u_int8_t * outblock);
diff --git a/freebsd/sys/opencrypto/cbc_mac.c b/freebsd/sys/opencrypto/cbc_mac.c
new file mode 100644
index 00000000..1bcf356a
--- /dev/null
+++ b/freebsd/sys/opencrypto/cbc_mac.c
@@ -0,0 +1,267 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2018-2019 iXsystems Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <opencrypto/cbc_mac.h>
+#include <opencrypto/xform_auth.h>
+
+/*
+ * Given two CCM_CBC_BLOCK_LEN blocks, xor
+ * them into dst, and then encrypt dst.
+ */
+static void
+xor_and_encrypt(struct aes_cbc_mac_ctx *ctx,
+ const uint8_t *src, uint8_t *dst)
+{
+ const uint64_t *b1;
+ uint64_t *b2;
+ uint64_t temp_block[CCM_CBC_BLOCK_LEN/sizeof(uint64_t)];
+
+ b1 = (const uint64_t*)src;
+ b2 = (uint64_t*)dst;
+
+ for (size_t count = 0;
+ count < CCM_CBC_BLOCK_LEN/sizeof(uint64_t);
+ count++) {
+ temp_block[count] = b1[count] ^ b2[count];
+ }
+ rijndaelEncrypt(ctx->keysched, ctx->rounds, (void*)temp_block, dst);
+}
+
+void
+AES_CBC_MAC_Init(struct aes_cbc_mac_ctx *ctx)
+{
+ bzero(ctx, sizeof(*ctx));
+}
+
+void
+AES_CBC_MAC_Setkey(struct aes_cbc_mac_ctx *ctx, const uint8_t *key, uint16_t klen)
+{
+ ctx->rounds = rijndaelKeySetupEnc(ctx->keysched, key, klen * 8);
+}
+
+/*
+ * This is called to set the nonce, aka IV.
+ * Before this call, the authDataLength and cryptDataLength fields
+ * MUST have been set. Sadly, there's no way to return an error.
+ *
+ * The CBC-MAC algorithm requires that the first block contain the
+ * nonce, as well as information about the sizes and lengths involved.
+ */
+void
+AES_CBC_MAC_Reinit(struct aes_cbc_mac_ctx *ctx, const uint8_t *nonce, uint16_t nonceLen)
+{
+ uint8_t b0[CCM_CBC_BLOCK_LEN];
+ uint8_t *bp = b0, flags = 0;
+ uint8_t L = 0;
+ uint64_t dataLength = ctx->cryptDataLength;
+
+ KASSERT(nonceLen >= 7 && nonceLen <= 13,
+ ("nonceLen must be between 7 and 13 bytes"));
+
+ ctx->nonce = nonce;
+ ctx->nonceLength = nonceLen;
+
+ ctx->authDataCount = 0;
+ ctx->blockIndex = 0;
+ explicit_bzero(ctx->staging_block, sizeof(ctx->staging_block));
+
+ /*
+ * Need to determine the L field value. This is the number of
+ * bytes needed to specify the length of the message; the length
+ * is whatever is left in the 16 bytes after specifying flags and
+ * the nonce.
+ */
+ L = 15 - nonceLen;
+
+ flags = ((ctx->authDataLength > 0) << 6) +
+ (((AES_CBC_MAC_HASH_LEN - 2) / 2) << 3) +
+ L - 1;
+ /*
+ * Now we need to set up the first block, which has flags, nonce,
+ * and the message length.
+ */
+ b0[0] = flags;
+ bcopy(nonce, b0 + 1, nonceLen);
+ bp = b0 + 1 + nonceLen;
+
+ /* Need to copy L' [aka L-1] bytes of cryptDataLength */
+ for (uint8_t *dst = b0 + sizeof(b0) - 1; dst >= bp; dst--) {
+ *dst = dataLength;
+ dataLength >>= 8;
+ }
+ /* Now need to encrypt b0 */
+ rijndaelEncrypt(ctx->keysched, ctx->rounds, b0, ctx->block);
+ /* If there is auth data, we need to set up the staging block */
+ if (ctx->authDataLength) {
+ size_t addLength;
+ if (ctx->authDataLength < ((1<<16) - (1<<8))) {
+ uint16_t sizeVal = htobe16(ctx->authDataLength);
+ bcopy(&sizeVal, ctx->staging_block, sizeof(sizeVal));
+ addLength = sizeof(sizeVal);
+ } else if (ctx->authDataLength < (1ULL<<32)) {
+ uint32_t sizeVal = htobe32(ctx->authDataLength);
+ ctx->staging_block[0] = 0xff;
+ ctx->staging_block[1] = 0xfe;
+ bcopy(&sizeVal, ctx->staging_block+2, sizeof(sizeVal));
+ addLength = 2 + sizeof(sizeVal);
+ } else {
+ uint64_t sizeVal = htobe64(ctx->authDataLength);
+ ctx->staging_block[0] = 0xff;
+ ctx->staging_block[1] = 0xff;
+ bcopy(&sizeVal, ctx->staging_block+2, sizeof(sizeVal));
+ addLength = 2 + sizeof(sizeVal);
+ }
+ ctx->blockIndex = addLength;
+ /*
+ * The length descriptor goes into the AAD buffer, so we
+ * need to account for it.
+ */
+ ctx->authDataLength += addLength;
+ ctx->authDataCount = addLength;
+ }
+}
+
+int
+AES_CBC_MAC_Update(struct aes_cbc_mac_ctx *ctx, const uint8_t *data,
+ uint16_t length)
+{
+ size_t copy_amt;
+
+ /*
+ * This will be called in one of two phases:
+ * (1) Applying authentication data, or
+ * (2) Applying the payload data.
+ *
+ * Because CBC-MAC puts the authentication data size before the
+ * data, subsequent calls won't be block-size-aligned. Which
+ * complicates things a fair bit.
+ *
+ * The payload data doesn't have that problem.
+ */
+
+ if (ctx->authDataCount < ctx->authDataLength) {
+ /*
+ * We need to process data as authentication data.
+ * Since we may be out of sync, we may also need
+ * to pad out the staging block.
+ */
+ const uint8_t *ptr = data;
+ while (length > 0) {
+
+ copy_amt = MIN(length,
+ sizeof(ctx->staging_block) - ctx->blockIndex);
+
+ bcopy(ptr, ctx->staging_block + ctx->blockIndex,
+ copy_amt);
+ ptr += copy_amt;
+ length -= copy_amt;
+ ctx->authDataCount += copy_amt;
+ ctx->blockIndex += copy_amt;
+ ctx->blockIndex %= sizeof(ctx->staging_block);
+
+ if (ctx->blockIndex == 0 ||
+ ctx->authDataCount == ctx->authDataLength) {
+ /*
+ * We're done with this block, so we
+ * xor staging_block with block, and then
+ * encrypt it.
+ */
+ xor_and_encrypt(ctx, ctx->staging_block, ctx->block);
+ bzero(ctx->staging_block, sizeof(ctx->staging_block));
+ ctx->blockIndex = 0;
+ if (ctx->authDataCount >= ctx->authDataLength)
+ break;
+ }
+ }
+ /*
+ * We'd like to be able to check length == 0 and return
+ * here, but the way OCF calls us, length is always
+ * blksize (16, in this case). So we have to count on
+ * the fact that OCF calls us separately for the AAD and
+ * for the real data.
+ */
+ return (0);
+ }
+ /*
+ * If we're here, then we're encoding payload data.
+ * This is marginally easier, except that _Update can
+ * be called with non-aligned update lengths. As a result,
+ * we still need to use the staging block.
+ */
+ KASSERT((length + ctx->cryptDataCount) <= ctx->cryptDataLength,
+ ("More encryption data than allowed"));
+
+ while (length) {
+ uint8_t *ptr;
+
+ copy_amt = MIN(sizeof(ctx->staging_block) - ctx->blockIndex,
+ length);
+ ptr = ctx->staging_block + ctx->blockIndex;
+ bcopy(data, ptr, copy_amt);
+ data += copy_amt;
+ ctx->blockIndex += copy_amt;
+ ctx->cryptDataCount += copy_amt;
+ length -= copy_amt;
+ if (ctx->blockIndex == sizeof(ctx->staging_block)) {
+ /* We've got a full block */
+ xor_and_encrypt(ctx, ctx->staging_block, ctx->block);
+ ctx->blockIndex = 0;
+ bzero(ctx->staging_block, sizeof(ctx->staging_block));
+ }
+ }
+ return (0);
+}
+
+void
+AES_CBC_MAC_Final(uint8_t *buf, struct aes_cbc_mac_ctx *ctx)
+{
+ uint8_t s0[CCM_CBC_BLOCK_LEN];
+
+ /*
+ * We first need to check to see if we've got any data
+ * left over to encrypt.
+ */
+ if (ctx->blockIndex != 0) {
+ xor_and_encrypt(ctx, ctx->staging_block, ctx->block);
+ ctx->cryptDataCount += ctx->blockIndex;
+ ctx->blockIndex = 0;
+ explicit_bzero(ctx->staging_block, sizeof(ctx->staging_block));
+ }
+ bzero(s0, sizeof(s0));
+ s0[0] = (15 - ctx->nonceLength) - 1;
+ bcopy(ctx->nonce, s0 + 1, ctx->nonceLength);
+ rijndaelEncrypt(ctx->keysched, ctx->rounds, s0, s0);
+ for (size_t indx = 0; indx < AES_CBC_MAC_HASH_LEN; indx++)
+ buf[indx] = ctx->block[indx] ^ s0[indx];
+ explicit_bzero(s0, sizeof(s0));
+}
diff --git a/freebsd/sys/opencrypto/cbc_mac.h b/freebsd/sys/opencrypto/cbc_mac.h
new file mode 100644
index 00000000..33e61cc1
--- /dev/null
+++ b/freebsd/sys/opencrypto/cbc_mac.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * Copyright (c) 2018, iXsystems Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Sean Eric Fagan, with lots of references
+ * to existing AES-CCM (gmac) code.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _CBC_CCM_H
+# define _CBC_CCM_H
+
+# include <sys/types.h>
+# include <crypto/rijndael/rijndael.h>
+
+# define CCM_CBC_BLOCK_LEN 16 /* 128 bits */
+# define CCM_CBC_MAX_DIGEST_LEN 16
+# define CCM_CBC_MIN_DIGEST_LEN 4
+
+/*
+ * This is the authentication context structure;
+ * the encryption one is similar.
+ */
+struct aes_cbc_mac_ctx {
+ uint64_t authDataLength, authDataCount;
+ uint64_t cryptDataLength, cryptDataCount;
+ int blockIndex;
+ uint8_t staging_block[CCM_CBC_BLOCK_LEN];
+ uint8_t block[CCM_CBC_BLOCK_LEN];
+ const uint8_t *nonce;
+ int nonceLength; /* This one is in bytes, not bits! */
+ /* AES state data */
+ int rounds;
+ uint32_t keysched[4*(RIJNDAEL_MAXNR+1)];
+};
+
+void AES_CBC_MAC_Init(struct aes_cbc_mac_ctx *);
+void AES_CBC_MAC_Setkey(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t);
+void AES_CBC_MAC_Reinit(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t);
+int AES_CBC_MAC_Update(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t);
+void AES_CBC_MAC_Final(uint8_t *, struct aes_cbc_mac_ctx *);
+
+#endif /* _CBC_CCM_H */
diff --git a/freebsd/sys/opencrypto/cryptodeflate.c b/freebsd/sys/opencrypto/cryptodeflate.c
index 8ab063f4..30d0844a 100644
--- a/freebsd/sys/opencrypto/cryptodeflate.c
+++ b/freebsd/sys/opencrypto/cryptodeflate.c
@@ -31,7 +31,7 @@
/*
* This file contains a wrapper around the deflate algo compression
- * functions using the zlib library (see libkern/zlib.c and sys/zlib.h})
+ * functions using the zlib library (see sys/contrib/zlib)
*/
#include <sys/cdefs.h>
@@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/sdt.h>
#include <sys/systm.h>
-#include <sys/zlib.h>
+#include <contrib/zlib/zlib.h>
#include <opencrypto/cryptodev.h>
#include <opencrypto/deflate.h>
@@ -52,16 +52,32 @@ __FBSDID("$FreeBSD$");
SDT_PROVIDER_DECLARE(opencrypto);
SDT_PROBE_DEFINE2(opencrypto, deflate, deflate_global, entry,
"int", "u_int32_t");
-SDT_PROBE_DEFINE5(opencrypto, deflate, deflate_global, bad,
- "int", "int", "int", "int", "int");
-SDT_PROBE_DEFINE5(opencrypto, deflate, deflate_global, iter,
- "int", "int", "int", "int", "int");
+SDT_PROBE_DEFINE6(opencrypto, deflate, deflate_global, bad,
+ "int", "int", "int", "int", "int", "int");
+SDT_PROBE_DEFINE6(opencrypto, deflate, deflate_global, iter,
+ "int", "int", "int", "int", "int", "int");
SDT_PROBE_DEFINE2(opencrypto, deflate, deflate_global, return,
"int", "u_int32_t");
int window_inflate = -1 * MAX_WBITS;
int window_deflate = -12;
+static void *
+crypto_zalloc(void *nil, u_int type, u_int size)
+{
+ void *ptr;
+
+ ptr = malloc(type *size, M_CRYPTO_DATA, M_NOWAIT);
+ return ptr;
+}
+
+static void
+crypto_zfree(void *nil, void *ptr)
+{
+
+ free(ptr, M_CRYPTO_DATA);
+}
+
/*
* This function takes a block of data and (de)compress it using the deflate
* algorithm
@@ -107,16 +123,16 @@ deflate_global(data, size, decomp, out)
bufh = bufp = malloc(sizeof(*bufp) + (size_t)(size * i),
M_CRYPTO_DATA, M_NOWAIT);
if (bufp == NULL) {
- SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
- decomp, 0, __LINE__, 0, 0);
+ SDT_PROBE6(opencrypto, deflate, deflate_global, bad,
+ decomp, 0, __LINE__, 0, 0, 0);
goto bad2;
}
bufp->next = NULL;
bufp->size = size * i;
bzero(&zbuf, sizeof(z_stream));
- zbuf.zalloc = z_alloc;
- zbuf.zfree = z_free;
+ zbuf.zalloc = crypto_zalloc;
+ zbuf.zfree = crypto_zfree;
zbuf.opaque = Z_NULL;
zbuf.next_in = data; /* Data that is going to be processed. */
zbuf.avail_in = size; /* Total length of data to be processed. */
@@ -127,8 +143,8 @@ deflate_global(data, size, decomp, out)
deflateInit2(&zbuf, Z_DEFAULT_COMPRESSION, Z_METHOD,
window_deflate, Z_MEMLEVEL, Z_DEFAULT_STRATEGY);
if (error != Z_OK) {
- SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
- decomp, error, __LINE__, 0, 0);
+ SDT_PROBE6(opencrypto, deflate, deflate_global, bad,
+ decomp, error, __LINE__, 0, 0, 0);
goto bad;
}
@@ -136,24 +152,14 @@ deflate_global(data, size, decomp, out)
error = decomp ? inflate(&zbuf, Z_SYNC_FLUSH) :
deflate(&zbuf, Z_FINISH);
if (error != Z_OK && error != Z_STREAM_END) {
- /*
- * Unfortunately we are limited to 5 arguments,
- * thus use two probes.
- */
- SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
- decomp, error, __LINE__,
- zbuf.avail_in, zbuf.avail_out);
- SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
+ SDT_PROBE6(opencrypto, deflate, deflate_global, bad,
decomp, error, __LINE__,
- zbuf.state->dummy, zbuf.total_out);
+ zbuf.avail_in, zbuf.avail_out, zbuf.total_out);
goto bad;
}
- SDT_PROBE5(opencrypto, deflate, deflate_global, iter,
- decomp, error, __LINE__,
- zbuf.avail_in, zbuf.avail_out);
- SDT_PROBE5(opencrypto, deflate, deflate_global, iter,
+ SDT_PROBE6(opencrypto, deflate, deflate_global, iter,
decomp, error, __LINE__,
- zbuf.state->dummy, zbuf.total_out);
+ zbuf.avail_in, zbuf.avail_out, zbuf.total_out);
if (decomp && zbuf.avail_in == 0 && error == Z_STREAM_END) {
/* Done. */
break;
@@ -167,8 +173,8 @@ deflate_global(data, size, decomp, out)
p = malloc(sizeof(*p) + (size_t)(size * i),
M_CRYPTO_DATA, M_NOWAIT);
if (p == NULL) {
- SDT_PROBE5(opencrypto, deflate, deflate_global,
- bad, decomp, 0, __LINE__, 0, 0);
+ SDT_PROBE6(opencrypto, deflate, deflate_global,
+ bad, decomp, 0, __LINE__, 0, 0, 0);
goto bad;
}
p->next = NULL;
@@ -179,16 +185,9 @@ deflate_global(data, size, decomp, out)
zbuf.avail_out = bufp->size;
} else {
/* Unexpect result. */
- /*
- * Unfortunately we are limited to 5 arguments,
- * thus, again, use two probes.
- */
- SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
- decomp, error, __LINE__,
- zbuf.avail_in, zbuf.avail_out);
- SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
- decomp, error, __LINE__,
- zbuf.state->dummy, zbuf.total_out);
+ SDT_PROBE6(opencrypto, deflate, deflate_global,
+ bad, decomp, error, __LINE__,
+ zbuf.avail_in, zbuf.avail_out, zbuf.total_out);
goto bad;
}
}
@@ -197,8 +196,8 @@ deflate_global(data, size, decomp, out)
*out = malloc(result, M_CRYPTO_DATA, M_NOWAIT);
if (*out == NULL) {
- SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
- decomp, 0, __LINE__, 0, 0);
+ SDT_PROBE6(opencrypto, deflate, deflate_global, bad,
+ decomp, 0, __LINE__, 0, 0, 0);
goto bad;
}
if (decomp)
@@ -245,21 +244,3 @@ bad2:
*out = NULL;
return 0;
}
-
-void *
-z_alloc(nil, type, size)
- void *nil;
- u_int type, size;
-{
- void *ptr;
-
- ptr = malloc(type *size, M_CRYPTO_DATA, M_NOWAIT);
- return ptr;
-}
-
-void
-z_free(nil, ptr)
- void *nil, *ptr;
-{
- free(ptr, M_CRYPTO_DATA);
-}
diff --git a/freebsd/sys/opencrypto/cryptodev.c b/freebsd/sys/opencrypto/cryptodev.c
index b569cbf7..575142f2 100644
--- a/freebsd/sys/opencrypto/cryptodev.c
+++ b/freebsd/sys/opencrypto/cryptodev.c
@@ -296,6 +296,11 @@ struct fcrypt {
int sesn;
};
+static struct timeval warninterval = { .tv_sec = 60, .tv_usec = 0 };
+SYSCTL_TIMEVAL_SEC(_kern, OID_AUTO, cryptodev_warn_interval, CTLFLAG_RW,
+ &warninterval,
+ "Delay in seconds between warnings of deprecated /dev/crypto algorithms");
+
#ifndef __rtems__
static int cryptof_ioctl(struct file *, u_long, void *,
struct ucred *, struct thread *);
@@ -450,6 +455,9 @@ cryptof_ioctl(
case CRYPTO_CHACHA20:
txform = &enc_xform_chacha20;
break;
+ case CRYPTO_AES_CCM_16:
+ txform = &enc_xform_ccm;
+ break;
default:
CRYPTDEB("invalid cipher");
@@ -494,6 +502,25 @@ cryptof_ioctl(
thash = &auth_hash_nist_gmac_aes_256;
break;
+ case CRYPTO_AES_CCM_CBC_MAC:
+ switch (sop->keylen) {
+ case 16:
+ thash = &auth_hash_ccm_cbc_mac_128;
+ break;
+ case 24:
+ thash = &auth_hash_ccm_cbc_mac_192;
+ break;
+ case 32:
+ thash = &auth_hash_ccm_cbc_mac_256;
+ break;
+ default:
+ CRYPTDEB("Invalid CBC MAC key size %d",
+ sop->keylen);
+ SDT_PROBE1(opencrypto, dev, ioctl,
+ error, __LINE__);
+ return (EINVAL);
+ }
+ break;
#ifdef notdef
case CRYPTO_MD5:
thash = &auth_hash_md5;
@@ -798,6 +825,47 @@ cod_free(struct cryptop_data *cod)
free(cod, M_XDATA);
}
+static void
+cryptodev_warn(struct csession *cse)
+{
+ static struct timeval arc4warn, blfwarn, castwarn, deswarn, md5warn;
+ static struct timeval skipwarn, tdeswarn;
+
+ switch (cse->cipher) {
+ case CRYPTO_DES_CBC:
+ if (ratecheck(&deswarn, &warninterval))
+ gone_in(13, "DES cipher via /dev/crypto");
+ break;
+ case CRYPTO_3DES_CBC:
+ if (ratecheck(&tdeswarn, &warninterval))
+ gone_in(13, "3DES cipher via /dev/crypto");
+ break;
+ case CRYPTO_BLF_CBC:
+ if (ratecheck(&blfwarn, &warninterval))
+ gone_in(13, "Blowfish cipher via /dev/crypto");
+ break;
+ case CRYPTO_CAST_CBC:
+ if (ratecheck(&castwarn, &warninterval))
+ gone_in(13, "CAST128 cipher via /dev/crypto");
+ break;
+ case CRYPTO_SKIPJACK_CBC:
+ if (ratecheck(&skipwarn, &warninterval))
+ gone_in(13, "Skipjack cipher via /dev/crypto");
+ break;
+ case CRYPTO_ARC4:
+ if (ratecheck(&arc4warn, &warninterval))
+ gone_in(13, "ARC4 cipher via /dev/crypto");
+ break;
+ }
+
+ switch (cse->mac) {
+ case CRYPTO_MD5_HMAC:
+ if (ratecheck(&md5warn, &warninterval))
+ gone_in(13, "MD5-HMAC authenticator via /dev/crypto");
+ break;
+ }
+}
+
static int
cryptodev_op(
struct csession *cse,
@@ -920,6 +988,7 @@ cryptodev_op(
error = EINVAL;
goto bail;
}
+ cryptodev_warn(cse);
again:
/*
@@ -1030,12 +1099,13 @@ cryptodev_aead(
}
/*
- * For GCM, crd_len covers only the AAD. For other ciphers
+ * For GCM/CCM, crd_len covers only the AAD. For other ciphers
* chained with an HMAC, crd_len covers both the AAD and the
* cipher text.
*/
crda->crd_skip = 0;
- if (cse->cipher == CRYPTO_AES_NIST_GCM_16)
+ if (cse->cipher == CRYPTO_AES_NIST_GCM_16 ||
+ cse->cipher == CRYPTO_AES_CCM_16)
crda->crd_len = caead->aadlen;
else
crda->crd_len = caead->aadlen + caead->len;
@@ -1088,6 +1158,7 @@ cryptodev_aead(
SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__);
goto bail;
}
+ cryptodev_warn(cse);
again:
/*
* Let the dispatch run unlocked, then, interlock against the
diff --git a/freebsd/sys/opencrypto/cryptodev.h b/freebsd/sys/opencrypto/cryptodev.h
index 6431e6d8..bd71e518 100644
--- a/freebsd/sys/opencrypto/cryptodev.h
+++ b/freebsd/sys/opencrypto/cryptodev.h
@@ -63,10 +63,10 @@
#define _CRYPTO_CRYPTO_H_
#include <sys/ioccom.h>
-#include <sys/_task.h>
#ifdef _KERNEL
#include <opencrypto/_cryptodev.h>
+#include <sys/_task.h>
#endif
/* Some initial values */
@@ -86,6 +86,7 @@
#define SHA1_KPDK_HASH_LEN 20
#define AES_GMAC_HASH_LEN 16
#define POLY1305_HASH_LEN 16
+#define AES_CBC_MAC_HASH_LEN 16
/* Maximum hash algorithm result length */
#define HASH_MAX_LEN SHA2_512_HASH_LEN /* Keep this updated */
@@ -107,6 +108,9 @@
#define AES_128_GMAC_KEY_LEN 16
#define AES_192_GMAC_KEY_LEN 24
#define AES_256_GMAC_KEY_LEN 32
+#define AES_128_CBC_MAC_KEY_LEN 16
+#define AES_192_CBC_MAC_KEY_LEN 24
+#define AES_256_CBC_MAC_KEY_LEN 32
#define POLY1305_KEY_LEN 32
@@ -129,6 +133,7 @@
#define ARC4_IV_LEN 1
#define AES_GCM_IV_LEN 12
+#define AES_CCM_IV_LEN 12
#define AES_XTS_IV_LEN 8
#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
@@ -199,7 +204,9 @@
#define CRYPTO_SHA2_384 36
#define CRYPTO_SHA2_512 37
#define CRYPTO_POLY1305 38
-#define CRYPTO_ALGORITHM_MAX 38 /* Keep updated - see below */
+#define CRYPTO_AES_CCM_CBC_MAC 39 /* auth side */
+#define CRYPTO_AES_CCM_16 40 /* cipher side */
+#define CRYPTO_ALGORITHM_MAX 40 /* Keep updated - see below */
#define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \
(x) <= CRYPTO_ALGORITHM_MAX)
diff --git a/freebsd/sys/opencrypto/cryptosoft.c b/freebsd/sys/opencrypto/cryptosoft.c
index 43455b48..5e63167a 100644
--- a/freebsd/sys/opencrypto/cryptosoft.c
+++ b/freebsd/sys/opencrypto/cryptosoft.c
@@ -64,6 +64,9 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <rtems/bsd/local/cryptodev_if.h>
+_Static_assert(AES_CCM_IV_LEN == AES_GCM_IV_LEN,
+ "AES_GCM_IV_LEN must currently be the same as AES_CCM_IV_LEN");
+
static int32_t swcr_id;
u_int8_t hmac_ipad_buffer[HMAC_MAX_BLOCK_LEN];
@@ -508,6 +511,7 @@ swcr_authenc(struct cryptop *crp)
caddr_t buf = (caddr_t)crp->crp_buf;
uint32_t *blkp;
int aadlen, blksz, i, ivlen, len, iskip, oskip, r;
+ int isccm = 0;
ivlen = blksz = iskip = oskip = 0;
@@ -522,13 +526,18 @@ swcr_authenc(struct cryptop *crp)
sw = &ses->swcr_algorithms[i];
switch (sw->sw_alg) {
+ case CRYPTO_AES_CCM_16:
case CRYPTO_AES_NIST_GCM_16:
case CRYPTO_AES_NIST_GMAC:
swe = sw;
crde = crd;
exf = swe->sw_exf;
- ivlen = 12;
+ /* AES_CCM_IV_LEN and AES_GCM_IV_LEN are both 12 */
+ ivlen = AES_CCM_IV_LEN;
break;
+ case CRYPTO_AES_CCM_CBC_MAC:
+ isccm = 1;
+ /* FALLTHROUGH */
case CRYPTO_AES_128_NIST_GMAC:
case CRYPTO_AES_192_NIST_GMAC:
case CRYPTO_AES_256_NIST_GMAC:
@@ -546,8 +555,26 @@ swcr_authenc(struct cryptop *crp)
}
if (crde == NULL || crda == NULL)
return (EINVAL);
+ /*
+ * We need to make sure that the auth algorithm matches the
+ * encr algorithm. Specifically, for AES-GCM must go with
+ * AES NIST GMAC, and AES-CCM must go with CBC-MAC.
+ */
+ if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16) {
+ switch (crda->crd_alg) {
+ case CRYPTO_AES_128_NIST_GMAC:
+ case CRYPTO_AES_192_NIST_GMAC:
+ case CRYPTO_AES_256_NIST_GMAC:
+ break; /* Good! */
+ default:
+ return (EINVAL); /* Not good! */
+ }
+ } else if (crde->crd_alg == CRYPTO_AES_CCM_16 &&
+ crda->crd_alg != CRYPTO_AES_CCM_CBC_MAC)
+ return (EINVAL);
- if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16 &&
+ if ((crde->crd_alg == CRYPTO_AES_NIST_GCM_16 ||
+ crde->crd_alg == CRYPTO_AES_CCM_16) &&
(crde->crd_flags & CRD_F_IV_EXPLICIT) == 0)
return (EINVAL);
@@ -578,6 +605,15 @@ swcr_authenc(struct cryptop *crp)
}
}
+ if (swa->sw_alg == CRYPTO_AES_CCM_CBC_MAC) {
+ /*
+ * AES CCM-CBC needs to know the length of
+ * both the auth data, and payload data, before
+ * doing the auth computation.
+ */
+ ctx.aes_cbc_mac_ctx.authDataLength = crda->crd_len;
+ ctx.aes_cbc_mac_ctx.cryptDataLength = crde->crd_len;
+ }
/* Supply MAC with IV */
if (axf->Reinit)
axf->Reinit(&ctx, iv, ivlen);
@@ -612,16 +648,30 @@ swcr_authenc(struct cryptop *crp)
bzero(blk, blksz);
crypto_copydata(crp->crp_flags, buf, crde->crd_skip + i, len,
blk);
+ /*
+ * One of the problems with CCM+CBC is that the authentication
+ * is done on the unecncrypted data. As a result, we have
+ * to do the authentication update at different times,
+ * depending on whether it's CCM or not.
+ */
if (crde->crd_flags & CRD_F_ENCRYPT) {
+ if (isccm)
+ axf->Update(&ctx, blk, len);
if (exf->encrypt_multi != NULL)
exf->encrypt_multi(swe->sw_kschedule, blk,
len);
else
exf->encrypt(swe->sw_kschedule, blk);
- axf->Update(&ctx, blk, len);
+ if (!isccm)
+ axf->Update(&ctx, blk, len);
crypto_copyback(crp->crp_flags, buf,
crde->crd_skip + i, len, blk);
} else {
+ if (isccm) {
+ KASSERT(exf->encrypt_multi == NULL,
+ ("assume CCM is single-block only"));
+ exf->decrypt(swe->sw_kschedule, blk);
+ }
axf->Update(&ctx, blk, len);
}
}
@@ -652,6 +702,11 @@ swcr_authenc(struct cryptop *crp)
r = timingsafe_bcmp(aalg, uaalg, axf->hashsize);
if (r == 0) {
/* tag matches, decrypt data */
+ if (isccm) {
+ KASSERT(exf->reinit != NULL,
+ ("AES-CCM reinit function must be set"));
+ exf->reinit(swe->sw_kschedule, iv);
+ }
for (i = 0; i < crde->crd_len; i += blksz) {
len = MIN(crde->crd_len - i, blksz);
if (len < blksz)
@@ -801,6 +856,9 @@ swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri)
case CRYPTO_AES_NIST_GCM_16:
txf = &enc_xform_aes_nist_gcm;
goto enccommon;
+ case CRYPTO_AES_CCM_16:
+ txf = &enc_xform_ccm;
+ goto enccommon;
case CRYPTO_AES_NIST_GMAC:
txf = &enc_xform_aes_nist_gmac;
swd->sw_exf = txf;
@@ -945,6 +1003,22 @@ swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri)
swd->sw_axf = axf;
break;
+ case CRYPTO_AES_CCM_CBC_MAC:
+ switch (cri->cri_klen) {
+ case 128:
+ axf = &auth_hash_ccm_cbc_mac_128;
+ break;
+ case 192:
+ axf = &auth_hash_ccm_cbc_mac_192;
+ break;
+ case 256:
+ axf = &auth_hash_ccm_cbc_mac_256;
+ break;
+ default:
+ swcr_freesession(dev, cses);
+ return EINVAL;
+ }
+ goto auth4common;
case CRYPTO_AES_128_NIST_GMAC:
axf = &auth_hash_nist_gmac_aes_128;
goto auth4common;
@@ -1044,6 +1118,7 @@ swcr_freesession(device_t dev, crypto_session_t cses)
case CRYPTO_CAMELLIA_CBC:
case CRYPTO_NULL_CBC:
case CRYPTO_CHACHA20:
+ case CRYPTO_AES_CCM_16:
txf = swd->sw_exf;
if (swd->sw_kschedule)
@@ -1058,6 +1133,7 @@ swcr_freesession(device_t dev, crypto_session_t cses)
case CRYPTO_SHA2_512_HMAC:
case CRYPTO_RIPEMD160_HMAC:
case CRYPTO_NULL_HMAC:
+ case CRYPTO_AES_CCM_CBC_MAC:
axf = swd->sw_axf;
if (swd->sw_ictx) {
@@ -1203,6 +1279,8 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
case CRYPTO_AES_128_NIST_GMAC:
case CRYPTO_AES_192_NIST_GMAC:
case CRYPTO_AES_256_NIST_GMAC:
+ case CRYPTO_AES_CCM_16:
+ case CRYPTO_AES_CCM_CBC_MAC:
crp->crp_etype = swcr_authenc(crp);
goto done;
@@ -1293,6 +1371,8 @@ swcr_attach(device_t dev)
REGISTER(CRYPTO_BLAKE2B);
REGISTER(CRYPTO_BLAKE2S);
REGISTER(CRYPTO_CHACHA20);
+ REGISTER(CRYPTO_AES_CCM_16);
+ REGISTER(CRYPTO_AES_CCM_CBC_MAC);
REGISTER(CRYPTO_POLY1305);
#undef REGISTER
diff --git a/freebsd/sys/opencrypto/deflate.h b/freebsd/sys/opencrypto/deflate.h
index d31a3bf2..1be746d7 100644
--- a/freebsd/sys/opencrypto/deflate.h
+++ b/freebsd/sys/opencrypto/deflate.h
@@ -36,16 +36,12 @@
#ifndef _CRYPTO_DEFLATE_H_
#define _CRYPTO_DEFLATE_H_
-#include <sys/zlib.h>
-
#define Z_METHOD 8
#define Z_MEMLEVEL 8
#define MINCOMP 2 /* won't be used, but must be defined */
#define ZBUF 10
u_int32_t deflate_global(u_int8_t *, u_int32_t, int, u_int8_t **);
-void *z_alloc(void *, u_int, u_int);
-void z_free(void *, void *);
/*
* We are going to use a combined allocation to hold the metadata
diff --git a/freebsd/sys/opencrypto/skipjack.c b/freebsd/sys/opencrypto/skipjack.c
index 047cf642..455d360d 100644
--- a/freebsd/sys/opencrypto/skipjack.c
+++ b/freebsd/sys/opencrypto/skipjack.c
@@ -67,7 +67,7 @@ static const u_int8_t ftable[0x100] =
*/
void
-subkey_table_gen (u_int8_t *key, u_int8_t **key_tables)
+subkey_table_gen (const u_int8_t *key, u_int8_t **key_tables)
{
int i, k;
diff --git a/freebsd/sys/opencrypto/skipjack.h b/freebsd/sys/opencrypto/skipjack.h
index 80367ea4..95b0b9e4 100644
--- a/freebsd/sys/opencrypto/skipjack.h
+++ b/freebsd/sys/opencrypto/skipjack.h
@@ -19,6 +19,6 @@
extern void skipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key);
extern void skipjack_backwards(u_int8_t *cipher, u_int8_t *plain, u_int8_t **key);
-extern void subkey_table_gen(u_int8_t *key, u_int8_t **key_tables);
+extern void subkey_table_gen(const u_int8_t *key, u_int8_t **key_tables);
#endif
diff --git a/freebsd/sys/opencrypto/xform_aes_icm.c b/freebsd/sys/opencrypto/xform_aes_icm.c
index 8d3694fa..ba3eca0a 100644
--- a/freebsd/sys/opencrypto/xform_aes_icm.c
+++ b/freebsd/sys/opencrypto/xform_aes_icm.c
@@ -52,11 +52,12 @@ __FBSDID("$FreeBSD$");
#include <opencrypto/xform_enc.h>
-static int aes_icm_setkey(u_int8_t **, u_int8_t *, int);
+static int aes_icm_setkey(u_int8_t **, const u_int8_t *, int);
static void aes_icm_crypt(caddr_t, u_int8_t *);
static void aes_icm_zerokey(u_int8_t **);
-static void aes_icm_reinit(caddr_t, u_int8_t *);
-static void aes_gcm_reinit(caddr_t, u_int8_t *);
+static void aes_icm_reinit(caddr_t, const u_int8_t *);
+static void aes_gcm_reinit(caddr_t, const u_int8_t *);
+static void aes_ccm_reinit(caddr_t, const u_int8_t *);
/* Encryption instances */
struct enc_xform enc_xform_aes_icm = {
@@ -79,11 +80,23 @@ struct enc_xform enc_xform_aes_nist_gcm = {
aes_gcm_reinit,
};
+struct enc_xform enc_xform_ccm = {
+ .type = CRYPTO_AES_CCM_16,
+ .name = "AES-CCM",
+ .blocksize = AES_ICM_BLOCK_LEN, .ivsize = AES_CCM_IV_LEN,
+ .minkey = AES_MIN_KEY, .maxkey = AES_MAX_KEY,
+ .encrypt = aes_icm_crypt,
+ .decrypt = aes_icm_crypt,
+ .setkey = aes_icm_setkey,
+ .zerokey = aes_icm_zerokey,
+ .reinit = aes_ccm_reinit,
+};
+
/*
* Encryption wrapper routines.
*/
static void
-aes_icm_reinit(caddr_t key, u_int8_t *iv)
+aes_icm_reinit(caddr_t key, const u_int8_t *iv)
{
struct aes_icm_ctx *ctx;
@@ -92,7 +105,7 @@ aes_icm_reinit(caddr_t key, u_int8_t *iv)
}
static void
-aes_gcm_reinit(caddr_t key, u_int8_t *iv)
+aes_gcm_reinit(caddr_t key, const u_int8_t *iv)
{
struct aes_icm_ctx *ctx;
@@ -105,6 +118,21 @@ aes_gcm_reinit(caddr_t key, u_int8_t *iv)
}
static void
+aes_ccm_reinit(caddr_t key, const u_int8_t *iv)
+{
+ struct aes_icm_ctx *ctx;
+
+ ctx = (struct aes_icm_ctx*)key;
+
+ /* CCM has flags, then the IV, then the counter, which starts at 1 */
+ bzero(ctx->ac_block, sizeof(ctx->ac_block));
+ /* 3 bytes for length field; this gives a nonce of 12 bytes */
+ ctx->ac_block[0] = (15 - AES_CCM_IV_LEN) - 1;
+ bcopy(iv, ctx->ac_block+1, AES_CCM_IV_LEN);
+ ctx->ac_block[AESICM_BLOCKSIZE - 1] = 1;
+}
+
+static void
aes_icm_crypt(caddr_t key, u_int8_t *data)
{
struct aes_icm_ctx *ctx;
@@ -125,7 +153,7 @@ aes_icm_crypt(caddr_t key, u_int8_t *data)
}
static int
-aes_icm_setkey(u_int8_t **sched, u_int8_t *key, int len)
+aes_icm_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
struct aes_icm_ctx *ctx;
@@ -138,7 +166,7 @@ aes_icm_setkey(u_int8_t **sched, u_int8_t *key, int len)
return ENOMEM;
ctx = (struct aes_icm_ctx *)*sched;
- ctx->ac_nr = rijndaelKeySetupEnc(ctx->ac_ek, (u_char *)key, len * 8);
+ ctx->ac_nr = rijndaelKeySetupEnc(ctx->ac_ek, key, len * 8);
return 0;
}
diff --git a/freebsd/sys/opencrypto/xform_aes_xts.c b/freebsd/sys/opencrypto/xform_aes_xts.c
index dedbe627..33f66a5d 100644
--- a/freebsd/sys/opencrypto/xform_aes_xts.c
+++ b/freebsd/sys/opencrypto/xform_aes_xts.c
@@ -52,11 +52,11 @@ __FBSDID("$FreeBSD$");
#include <opencrypto/xform_enc.h>
-static int aes_xts_setkey(u_int8_t **, u_int8_t *, int);
+static int aes_xts_setkey(u_int8_t **, const u_int8_t *, int);
static void aes_xts_encrypt(caddr_t, u_int8_t *);
static void aes_xts_decrypt(caddr_t, u_int8_t *);
static void aes_xts_zerokey(u_int8_t **);
-static void aes_xts_reinit(caddr_t, u_int8_t *);
+static void aes_xts_reinit(caddr_t, const u_int8_t *);
/* Encryption instances */
struct enc_xform enc_xform_aes_xts = {
@@ -73,7 +73,7 @@ struct enc_xform enc_xform_aes_xts = {
* Encryption wrapper routines.
*/
static void
-aes_xts_reinit(caddr_t key, u_int8_t *iv)
+aes_xts_reinit(caddr_t key, const u_int8_t *iv)
{
struct aes_xts_ctx *ctx = (struct aes_xts_ctx *)key;
u_int64_t blocknum;
@@ -136,7 +136,7 @@ aes_xts_decrypt(caddr_t key, u_int8_t *data)
}
static int
-aes_xts_setkey(u_int8_t **sched, u_int8_t *key, int len)
+aes_xts_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
struct aes_xts_ctx *ctx;
diff --git a/freebsd/sys/opencrypto/xform_auth.h b/freebsd/sys/opencrypto/xform_auth.h
index 9af0f8e6..9b072625 100644
--- a/freebsd/sys/opencrypto/xform_auth.h
+++ b/freebsd/sys/opencrypto/xform_auth.h
@@ -42,6 +42,7 @@
#include <crypto/sha2/sha512.h>
#include <opencrypto/rmd160.h>
#include <opencrypto/gmac.h>
+#include <opencrypto/cbc_mac.h>
#include <opencrypto/cryptodev.h>
#include <opencrypto/xform_userland.h>
@@ -85,6 +86,9 @@ extern struct auth_hash auth_hash_nist_gmac_aes_256;
extern struct auth_hash auth_hash_blake2b;
extern struct auth_hash auth_hash_blake2s;
extern struct auth_hash auth_hash_poly1305;
+extern struct auth_hash auth_hash_ccm_cbc_mac_128;
+extern struct auth_hash auth_hash_ccm_cbc_mac_192;
+extern struct auth_hash auth_hash_ccm_cbc_mac_256;
union authctx {
MD5_CTX md5ctx;
@@ -95,6 +99,7 @@ union authctx {
SHA384_CTX sha384ctx;
SHA512_CTX sha512ctx;
struct aes_gmac_ctx aes_gmac_ctx;
+ struct aes_cbc_mac_ctx aes_cbc_mac_ctx;
};
#endif /* _CRYPTO_XFORM_AUTH_H_ */
diff --git a/freebsd/sys/opencrypto/xform_blf.c b/freebsd/sys/opencrypto/xform_blf.c
index b4be5f8d..d0432c99 100644
--- a/freebsd/sys/opencrypto/xform_blf.c
+++ b/freebsd/sys/opencrypto/xform_blf.c
@@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
#include <crypto/blowfish/blowfish.h>
#include <opencrypto/xform_enc.h>
-static int blf_setkey(u_int8_t **, u_int8_t *, int);
+static int blf_setkey(u_int8_t **, const u_int8_t *, int);
static void blf_encrypt(caddr_t, u_int8_t *);
static void blf_decrypt(caddr_t, u_int8_t *);
static void blf_zerokey(u_int8_t **);
@@ -104,7 +104,7 @@ blf_decrypt(caddr_t key, u_int8_t *blk)
}
static int
-blf_setkey(u_int8_t **sched, u_int8_t *key, int len)
+blf_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
int err;
diff --git a/freebsd/sys/opencrypto/xform_cast5.c b/freebsd/sys/opencrypto/xform_cast5.c
index 85b346eb..f4d9472d 100644
--- a/freebsd/sys/opencrypto/xform_cast5.c
+++ b/freebsd/sys/opencrypto/xform_cast5.c
@@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
#include <opencrypto/cast.h>
#include <opencrypto/xform_enc.h>
-static int cast5_setkey(u_int8_t **, u_int8_t *, int);
+static int cast5_setkey(u_int8_t **, const u_int8_t *, int);
static void cast5_encrypt(caddr_t, u_int8_t *);
static void cast5_decrypt(caddr_t, u_int8_t *);
static void cast5_zerokey(u_int8_t **);
@@ -85,7 +85,7 @@ cast5_decrypt(caddr_t key, u_int8_t *blk)
}
static int
-cast5_setkey(u_int8_t **sched, u_int8_t *key, int len)
+cast5_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
int err;
diff --git a/freebsd/sys/opencrypto/xform_cbc_mac.c b/freebsd/sys/opencrypto/xform_cbc_mac.c
new file mode 100644
index 00000000..1de2e976
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_cbc_mac.c
@@ -0,0 +1,57 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/cbc_mac.h>
+#include <opencrypto/xform_auth.h>
+
+/* Authentication instances */
+struct auth_hash auth_hash_ccm_cbc_mac_128 = {
+ .type = CRYPTO_AES_CCM_CBC_MAC,
+ .name = "CBC-CCM-AES-128",
+ .keysize = AES_128_CBC_MAC_KEY_LEN,
+ .hashsize = AES_CBC_MAC_HASH_LEN,
+ .ctxsize = sizeof(struct aes_cbc_mac_ctx),
+ .blocksize = CCM_CBC_BLOCK_LEN,
+ .Init = (void (*)(void *)) AES_CBC_MAC_Init,
+ .Setkey =
+ (void (*)(void *, const u_int8_t *, u_int16_t))AES_CBC_MAC_Setkey,
+ .Reinit =
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit,
+ .Update =
+ (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update,
+ .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final,
+};
+struct auth_hash auth_hash_ccm_cbc_mac_192 = {
+ .type = CRYPTO_AES_CCM_CBC_MAC,
+ .name = "CBC-CCM-AES-192",
+ .keysize = AES_192_CBC_MAC_KEY_LEN,
+ .hashsize = AES_CBC_MAC_HASH_LEN,
+ .ctxsize = sizeof(struct aes_cbc_mac_ctx),
+ .blocksize = CCM_CBC_BLOCK_LEN,
+ .Init = (void (*)(void *)) AES_CBC_MAC_Init,
+ .Setkey =
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Setkey,
+ .Reinit =
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit,
+ .Update =
+ (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update,
+ .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final,
+};
+struct auth_hash auth_hash_ccm_cbc_mac_256 = {
+ .type = CRYPTO_AES_CCM_CBC_MAC,
+ .name = "CBC-CCM-AES-256",
+ .keysize = AES_256_CBC_MAC_KEY_LEN,
+ .hashsize = AES_CBC_MAC_HASH_LEN,
+ .ctxsize = sizeof(struct aes_cbc_mac_ctx),
+ .blocksize = CCM_CBC_BLOCK_LEN,
+ .Init = (void (*)(void *)) AES_CBC_MAC_Init,
+ .Setkey =
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Setkey,
+ .Reinit =
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit,
+ .Update =
+ (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update,
+ .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final,
+};
diff --git a/freebsd/sys/opencrypto/xform_cml.c b/freebsd/sys/opencrypto/xform_cml.c
index c807fa97..2f857fe6 100644
--- a/freebsd/sys/opencrypto/xform_cml.c
+++ b/freebsd/sys/opencrypto/xform_cml.c
@@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
#include <crypto/camellia/camellia.h>
#include <opencrypto/xform_enc.h>
-static int cml_setkey(u_int8_t **, u_int8_t *, int);
+static int cml_setkey(u_int8_t **, const u_int8_t *, int);
static void cml_encrypt(caddr_t, u_int8_t *);
static void cml_decrypt(caddr_t, u_int8_t *);
static void cml_zerokey(u_int8_t **);
@@ -87,7 +87,7 @@ cml_decrypt(caddr_t key, u_int8_t *blk)
}
static int
-cml_setkey(u_int8_t **sched, u_int8_t *key, int len)
+cml_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
int err;
@@ -96,7 +96,7 @@ cml_setkey(u_int8_t **sched, u_int8_t *key, int len)
*sched = KMALLOC(sizeof(camellia_ctx), M_CRYPTO_DATA,
M_NOWAIT|M_ZERO);
if (*sched != NULL) {
- camellia_set_key((camellia_ctx *) *sched, (u_char *) key,
+ camellia_set_key((camellia_ctx *) *sched, key,
len * 8);
err = 0;
} else
diff --git a/freebsd/sys/opencrypto/xform_des1.c b/freebsd/sys/opencrypto/xform_des1.c
index cbce5e29..0a778eef 100644
--- a/freebsd/sys/opencrypto/xform_des1.c
+++ b/freebsd/sys/opencrypto/xform_des1.c
@@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
#include <crypto/des/des.h>
#include <opencrypto/xform_enc.h>
-static int des1_setkey(u_int8_t **, u_int8_t *, int);
+static int des1_setkey(u_int8_t **, const u_int8_t *, int);
static void des1_encrypt(caddr_t, u_int8_t *);
static void des1_decrypt(caddr_t, u_int8_t *);
static void des1_zerokey(u_int8_t **);
@@ -75,23 +75,21 @@ struct enc_xform enc_xform_des = {
static void
des1_encrypt(caddr_t key, u_int8_t *blk)
{
- des_cblock *cb = (des_cblock *) blk;
des_key_schedule *p = (des_key_schedule *) key;
- des_ecb_encrypt(cb, cb, p[0], DES_ENCRYPT);
+ des_ecb_encrypt(blk, blk, p[0], DES_ENCRYPT);
}
static void
des1_decrypt(caddr_t key, u_int8_t *blk)
{
- des_cblock *cb = (des_cblock *) blk;
des_key_schedule *p = (des_key_schedule *) key;
- des_ecb_encrypt(cb, cb, p[0], DES_DECRYPT);
+ des_ecb_encrypt(blk, blk, p[0], DES_DECRYPT);
}
static int
-des1_setkey(u_int8_t **sched, u_int8_t *key, int len)
+des1_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
des_key_schedule *p;
int err;
@@ -99,7 +97,7 @@ des1_setkey(u_int8_t **sched, u_int8_t *key, int len)
p = KMALLOC(sizeof (des_key_schedule),
M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
if (p != NULL) {
- des_set_key((des_cblock *) key, p[0]);
+ des_set_key(key, p[0]);
err = 0;
} else
err = ENOMEM;
diff --git a/freebsd/sys/opencrypto/xform_des3.c b/freebsd/sys/opencrypto/xform_des3.c
index 1b26b622..ea32a1ab 100644
--- a/freebsd/sys/opencrypto/xform_des3.c
+++ b/freebsd/sys/opencrypto/xform_des3.c
@@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
#include <crypto/des/des.h>
#include <opencrypto/xform_enc.h>
-static int des3_setkey(u_int8_t **, u_int8_t *, int);
+static int des3_setkey(u_int8_t **, const u_int8_t *, int);
static void des3_encrypt(caddr_t, u_int8_t *);
static void des3_decrypt(caddr_t, u_int8_t *);
static void des3_zerokey(u_int8_t **);
@@ -76,23 +76,21 @@ struct enc_xform enc_xform_3des = {
static void
des3_encrypt(caddr_t key, u_int8_t *blk)
{
- des_cblock *cb = (des_cblock *) blk;
des_key_schedule *p = (des_key_schedule *) key;
- des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_ENCRYPT);
+ des_ecb3_encrypt(blk, blk, p[0], p[1], p[2], DES_ENCRYPT);
}
static void
des3_decrypt(caddr_t key, u_int8_t *blk)
{
- des_cblock *cb = (des_cblock *) blk;
des_key_schedule *p = (des_key_schedule *) key;
- des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_DECRYPT);
+ des_ecb3_encrypt(blk, blk, p[0], p[1], p[2], DES_DECRYPT);
}
static int
-des3_setkey(u_int8_t **sched, u_int8_t *key, int len)
+des3_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
des_key_schedule *p;
int err;
@@ -100,9 +98,9 @@ des3_setkey(u_int8_t **sched, u_int8_t *key, int len)
p = KMALLOC(3*sizeof (des_key_schedule),
M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
if (p != NULL) {
- des_set_key((des_cblock *)(key + 0), p[0]);
- des_set_key((des_cblock *)(key + 8), p[1]);
- des_set_key((des_cblock *)(key + 16), p[2]);
+ des_set_key(key + 0, p[0]);
+ des_set_key(key + 8, p[1]);
+ des_set_key(key + 16, p[2]);
err = 0;
} else
err = ENOMEM;
diff --git a/freebsd/sys/opencrypto/xform_enc.h b/freebsd/sys/opencrypto/xform_enc.h
index 545e0ec2..e2b87f5c 100644
--- a/freebsd/sys/opencrypto/xform_enc.h
+++ b/freebsd/sys/opencrypto/xform_enc.h
@@ -56,9 +56,9 @@ struct enc_xform {
u_int16_t minkey, maxkey;
void (*encrypt) (caddr_t, u_int8_t *);
void (*decrypt) (caddr_t, u_int8_t *);
- int (*setkey) (u_int8_t **, u_int8_t *, int len);
+ int (*setkey) (u_int8_t **, const u_int8_t *, int len);
void (*zerokey) (u_int8_t **);
- void (*reinit) (caddr_t, u_int8_t *);
+ void (*reinit) (caddr_t, const u_int8_t *);
/*
* Encrypt/decrypt 1+ blocks of input -- total size is 'len' bytes.
* Len is guaranteed to be a multiple of the defined 'blocksize'.
@@ -84,6 +84,7 @@ extern struct enc_xform enc_xform_aes_xts;
extern struct enc_xform enc_xform_arc4;
extern struct enc_xform enc_xform_camellia;
extern struct enc_xform enc_xform_chacha20;
+extern struct enc_xform enc_xform_ccm;
struct aes_icm_ctx {
u_int32_t ac_ek[4*(RIJNDAEL_MAXNR + 1)];
diff --git a/freebsd/sys/opencrypto/xform_null.c b/freebsd/sys/opencrypto/xform_null.c
index 3c499b31..28f20bdf 100644
--- a/freebsd/sys/opencrypto/xform_null.c
+++ b/freebsd/sys/opencrypto/xform_null.c
@@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
#include <opencrypto/xform_auth.h>
#include <opencrypto/xform_enc.h>
-static int null_setkey(u_int8_t **, u_int8_t *, int);
+static int null_setkey(u_int8_t **, const u_int8_t *, int);
static void null_encrypt(caddr_t, u_int8_t *);
static void null_decrypt(caddr_t, u_int8_t *);
static void null_zerokey(u_int8_t **);
@@ -104,7 +104,7 @@ null_decrypt(caddr_t key, u_int8_t *blk)
}
static int
-null_setkey(u_int8_t **sched, u_int8_t *key, int len)
+null_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
*sched = NULL;
return 0;
diff --git a/freebsd/sys/opencrypto/xform_rijndael.c b/freebsd/sys/opencrypto/xform_rijndael.c
index 2c974f3d..378e86c0 100644
--- a/freebsd/sys/opencrypto/xform_rijndael.c
+++ b/freebsd/sys/opencrypto/xform_rijndael.c
@@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
#include <crypto/rijndael/rijndael.h>
#include <opencrypto/xform_enc.h>
-static int rijndael128_setkey(u_int8_t **, u_int8_t *, int);
+static int rijndael128_setkey(u_int8_t **, const u_int8_t *, int);
static void rijndael128_encrypt(caddr_t, u_int8_t *);
static void rijndael128_decrypt(caddr_t, u_int8_t *);
static void rijndael128_zerokey(u_int8_t **);
@@ -87,7 +87,7 @@ rijndael128_decrypt(caddr_t key, u_int8_t *blk)
}
static int
-rijndael128_setkey(u_int8_t **sched, u_int8_t *key, int len)
+rijndael128_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
int err;
@@ -96,7 +96,7 @@ rijndael128_setkey(u_int8_t **sched, u_int8_t *key, int len)
*sched = KMALLOC(sizeof(rijndael_ctx), M_CRYPTO_DATA,
M_NOWAIT|M_ZERO);
if (*sched != NULL) {
- rijndael_set_key((rijndael_ctx *) *sched, (u_char *) key,
+ rijndael_set_key((rijndael_ctx *) *sched, key,
len * 8);
err = 0;
} else
diff --git a/freebsd/sys/opencrypto/xform_skipjack.c b/freebsd/sys/opencrypto/xform_skipjack.c
index 94090d0d..22d74b36 100644
--- a/freebsd/sys/opencrypto/xform_skipjack.c
+++ b/freebsd/sys/opencrypto/xform_skipjack.c
@@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$");
#include <opencrypto/skipjack.h>
#include <opencrypto/xform_enc.h>
-static int skipjack_setkey(u_int8_t **, u_int8_t *, int);
+static int skipjack_setkey(u_int8_t **, const u_int8_t *, int);
static void skipjack_encrypt(caddr_t, u_int8_t *);
static void skipjack_decrypt(caddr_t, u_int8_t *);
static void skipjack_zerokey(u_int8_t **);
@@ -85,7 +85,7 @@ skipjack_decrypt(caddr_t key, u_int8_t *blk)
}
static int
-skipjack_setkey(u_int8_t **sched, u_int8_t *key, int len)
+skipjack_setkey(u_int8_t **sched, const u_int8_t *key, int len)
{
int err;
diff --git a/freebsd/sys/powerpc/include/machine/cpufunc.h b/freebsd/sys/powerpc/include/machine/cpufunc.h
index 204c4801..bad2042b 100644
--- a/freebsd/sys/powerpc/include/machine/cpufunc.h
+++ b/freebsd/sys/powerpc/include/machine/cpufunc.h
@@ -212,6 +212,43 @@ get_pcpu(void)
return (ret);
}
+/* "NOP" operations to signify priorities to the kernel. */
+static __inline void
+nop_prio_vlow(void)
+{
+ __asm __volatile("or 31,31,31");
+}
+
+static __inline void
+nop_prio_low(void)
+{
+ __asm __volatile("or 1,1,1");
+}
+
+static __inline void
+nop_prio_mlow(void)
+{
+ __asm __volatile("or 6,6,6");
+}
+
+static __inline void
+nop_prio_medium(void)
+{
+ __asm __volatile("or 2,2,2");
+}
+
+static __inline void
+nop_prio_mhigh(void)
+{
+ __asm __volatile("or 5,5,5");
+}
+
+static __inline void
+nop_prio_high(void)
+{
+ __asm __volatile("or 3,3,3");
+}
+
#endif /* _KERNEL */
#endif /* !_MACHINE_CPUFUNC_H_ */
diff --git a/freebsd/sys/powerpc/include/machine/intr_machdep.h b/freebsd/sys/powerpc/include/machine/intr_machdep.h
index 6ece0fa8..7ac54253 100644
--- a/freebsd/sys/powerpc/include/machine/intr_machdep.h
+++ b/freebsd/sys/powerpc/include/machine/intr_machdep.h
@@ -54,7 +54,7 @@ u_int powerpc_get_irq(uint32_t, u_int);
void powerpc_dispatch_intr(u_int, struct trapframe *);
int powerpc_enable_intr(void);
int powerpc_setup_intr(const char *, u_int, driver_filter_t, driver_intr_t,
- void *, enum intr_type, void **);
+ void *, enum intr_type, void **, int);
int powerpc_teardown_intr(void *);
int powerpc_bind_intr(u_int irq, u_char cpu);
int powerpc_config_intr(int, enum intr_trigger, enum intr_polarity);
diff --git a/freebsd/sys/powerpc/include/machine/spr.h b/freebsd/sys/powerpc/include/machine/spr.h
index 8fa828e1..807b1a0d 100644
--- a/freebsd/sys/powerpc/include/machine/spr.h
+++ b/freebsd/sys/powerpc/include/machine/spr.h
@@ -93,11 +93,12 @@
#define SPR_MQ 0x000 /* .6. 601 MQ register */
#define SPR_XER 0x001 /* 468 Fixed Point Exception Register */
+#define SPR_DSCR 0x003 /* .6. Data Stream Control Register (Unprivileged) */
#define SPR_RTCU_R 0x004 /* .6. 601 RTC Upper - Read */
#define SPR_RTCL_R 0x005 /* .6. 601 RTC Lower - Read */
#define SPR_LR 0x008 /* 468 Link Register */
#define SPR_CTR 0x009 /* 468 Count Register */
-#define SPR_DSCR 0x011 /* Data Stream Control Register */
+#define SPR_DSCRP 0x011 /* Data Stream Control Register (Privileged) */
#define SPR_DSISR 0x012 /* .68 DSI exception source */
#define DSISR_DIRECT 0x80000000 /* Direct-store error exception */
#define DSISR_NOTFOUND 0x40000000 /* Translation not found */
@@ -122,19 +123,27 @@
#define SPR_EID 0x051 /* ..8 Exception Interrupt ??? */
#define SPR_NRI 0x052 /* ..8 Exception Interrupt ??? */
#define SPR_FSCR 0x099 /* Facility Status and Control Register */
-#define FSCR_IC_MASK 0xFF00000000000000ULL /* FSCR[0:7] is Interrupt Cause */
-#define FSCR_IC_FP 0x0000000000000000ULL /* FP unavailable */
-#define FSCR_IC_VSX 0x0100000000000000ULL /* VSX unavailable */
-#define FSCR_IC_DSCR 0x0200000000000000ULL /* Access to the DSCR at SPRs 3 or 17 */
-#define FSCR_IC_PM 0x0300000000000000ULL /* Read or write access of a Performance Monitor SPR in group A */
-#define FSCR_IC_BHRB 0x0400000000000000ULL /* Execution of a BHRB Instruction */
-#define FSCR_IC_HTM 0x0500000000000000ULL /* Access to a Transactional Memory */
+#define FSCR_IC_MASK 0xFF00000000000000ULL /* FSCR[0:7] is Interrupt Cause */
+#define FSCR_IC_FP 0x0000000000000000ULL /* FP unavailable */
+#define FSCR_IC_VSX 0x0100000000000000ULL /* VSX unavailable */
+#define FSCR_IC_DSCR 0x0200000000000000ULL /* Access to the DSCR at SPRs 3 or 17 */
+#define FSCR_IC_PM 0x0300000000000000ULL /* Read or write access of a Performance Monitor SPR in group A */
+#define FSCR_IC_BHRB 0x0400000000000000ULL /* Execution of a BHRB Instruction */
+#define FSCR_IC_HTM 0x0500000000000000ULL /* Access to a Transactional Memory */
/* Reserved 0x0600000000000000ULL */
-#define FSCR_IC_EBB 0x0700000000000000ULL /* Access to Event-Based Branch */
-#define FSCR_IC_TAR 0x0800000000000000ULL /* Access to Target Address Register */
-#define FSCR_IC_STOP 0x0900000000000000ULL /* Access to the 'stop' instruction in privileged non-hypervisor state */
-#define FSCR_IC_MSG 0x0A00000000000000ULL /* Access to 'msgsndp' or 'msgclrp' instructions */
-#define FSCR_IC_SCV 0x0C00000000000000ULL /* Execution of a 'scv' instruction */
+#define FSCR_IC_EBB 0x0700000000000000ULL /* Access to Event-Based Branch */
+#define FSCR_IC_TAR 0x0800000000000000ULL /* Access to Target Address Register */
+#define FSCR_IC_STOP 0x0900000000000000ULL /* Access to the 'stop' instruction in privileged non-hypervisor state */
+#define FSCR_IC_MSG 0x0A00000000000000ULL /* Access to 'msgsndp' or 'msgclrp' instructions */
+#define FSCR_IC_LM 0x0A00000000000000ULL /* Access to load monitored facility */
+#define FSCR_IC_SCV 0x0C00000000000000ULL /* Execution of a 'scv' instruction */
+#define FSCR_SCV 0x0000000000001000 /* scv instruction available */
+#define FSCR_LM 0x0000000000000800 /* Load monitored facilities available */
+#define FSCR_MSGP 0x0000000000000400 /* msgsndp and SPRs available */
+#define FSCR_TAR 0x0000000000000100 /* TAR register available */
+#define FSCR_EBB 0x0000000000000080 /* Event-based branch available */
+#define FSCR_DSCR 0x0000000000000004 /* DSCR available in PR state */
+#define SPR_DPDES 0x0b0 /* .6. Directed Privileged Doorbell Exception State Register */
#define SPR_USPRG0 0x100 /* 4.. User SPR General 0 */
#define SPR_VRSAVE 0x100 /* .6. AltiVec VRSAVE */
#define SPR_SPRG0 0x110 /* 468 SPR General 0 */
@@ -188,6 +197,7 @@
#define IBMPOWERPCA2 0x0049
#define IBMPOWER7PLUS 0x004a
#define IBMPOWER8E 0x004b
+#define IBMPOWER8NVL 0x004c
#define IBMPOWER8 0x004d
#define IBMPOWER9 0x004e
#define MPC860 0x0050
@@ -242,7 +252,10 @@
#define LPCR_PECE_ME (1ULL << 12) /* Machine Check and Hypervisor */
/* Maintenance exceptions */
#define SPR_LPID 0x13f /* Logical Partitioning Control */
+#define SPR_HMER 0x150 /* Hypervisor Maintenance Exception Register */
+#define SPR_HMEER 0x151 /* Hypervisor Maintenance Exception Enable Register */
+#define SPR_TIR 0x1be /* .6. Thread Identification Register */
#define SPR_PTCR 0x1d0 /* Partition Table Control Register */
#define SPR_SPEFSCR 0x200 /* ..8 Signal Processing Engine FSCR. */
#define SPEFSCR_SOVH 0x80000000
@@ -403,6 +416,16 @@
#define SPR_MD_TWC 0x31d /* ..8 DMMU tablewalk control */
#define SPR_MD_RPN 0x31e /* ..8 DMMU real (phys) page number */
#define SPR_MD_TW 0x31f /* ..8 MMU tablewalk scratch */
+#define SPR_BESCRS 0x320 /* .6. Branch Event Status and Control Set Register */
+#define SPR_BESCRSU 0x321 /* .6. Branch Event Status and Control Set Register (upper 32-bit) */
+#define SPR_BESCRR 0x322 /* .6. Branch Event Status and Control Reset Register */
+#define SPR_BESCRRU 0x323 /* .6. Branch Event Status and Control Register (upper 32-bit) */
+#define SPR_EBBHR 0x324 /* .6. Event-based Branch Handler Register */
+#define SPR_EBBRR 0x325 /* .6. Event-based Branch Return Register */
+#define SPR_BESCR 0x326 /* .6. Branch Event Status and Control Register */
+#define SPR_LMRR 0x32d /* .6. Load Monitored Region Register */
+#define SPR_LMSER 0x32e /* .6. Load Monitored Section Enable Register */
+#define SPR_TAR 0x32f /* .6. Branch Target Address Register */
#define SPR_MI_CAM 0x330 /* ..8 IMMU CAM entry read */
#define SPR_MI_RAM0 0x331 /* ..8 IMMU RAM entry read reg 0 */
#define SPR_MI_RAM1 0x332 /* ..8 IMMU RAM entry read reg 1 */
@@ -410,6 +433,19 @@
#define SPR_MD_RAM0 0x339 /* ..8 IMMU RAM entry read reg 0 */
#define SPR_MD_RAM1 0x33a /* ..8 IMMU RAM entry read reg 1 */
#define SPR_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */
+#define PSSCR_PLS_S 60
+#define PSSCR_PLS_M (0xf << PSSCR_PLS_S)
+#define PSSCR_SD (1 << 22)
+#define PSSCR_ESL (1 << 21)
+#define PSSCR_EC (1 << 20)
+#define PSSCR_PSLL_S 16
+#define PSSCR_PSLL_M (0xf << PSSCR_PSLL_S)
+#define PSSCR_TR_S 8
+#define PSSCR_TR_M (0x3 << PSSCR_TR_S)
+#define PSSCR_MTL_S 4
+#define PSSCR_MTL_M (0xf << PSSCR_MTL_S)
+#define PSSCR_RL_S 0
+#define PSSCR_RL_M (0xf << PSSCR_RL_S)
#define SPR_PMCR 0x374 /* Processor Management Control Register */
#define SPR_UMMCR2 0x3a0 /* .6. User Monitor Mode Control Register 2 */
#define SPR_UMMCR0 0x3a8 /* .6. User Monitor Mode Control Register 0 */
diff --git a/freebsd/sys/sys/_eventhandler.h b/freebsd/sys/sys/_eventhandler.h
new file mode 100644
index 00000000..f8f24d2b
--- /dev/null
+++ b/freebsd/sys/sys/_eventhandler.h
@@ -0,0 +1,144 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 1999 Michael Smith <msmith@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__EVENTHANDLER_H_
+#define _SYS__EVENTHANDLER_H_
+
+#include <sys/queue.h>
+
+struct eventhandler_entry {
+ TAILQ_ENTRY(eventhandler_entry) ee_link;
+ int ee_priority;
+#define EHE_DEAD_PRIORITY (-1)
+ void *ee_arg;
+};
+
+typedef struct eventhandler_entry *eventhandler_tag;
+
+/*
+ * You can optionally use the EVENTHANDLER_LIST and EVENTHANDLER_DIRECT macros
+ * to pre-define a symbol for the eventhandler list. This symbol can be used by
+ * EVENTHANDLER_DIRECT_INVOKE, which has the advantage of not needing to do a
+ * locked search of the global list of eventhandler lists. At least
+ * EVENTHANDLER_LIST_DEFINE must be be used for EVENTHANDLER_DIRECT_INVOKE to
+ * work. EVENTHANDLER_LIST_DECLARE is only needed if the call to
+ * EVENTHANDLER_DIRECT_INVOKE is in a different compilation unit from
+ * EVENTHANDLER_LIST_DEFINE. If the events are even relatively high frequency
+ * it is suggested that you directly define a list for them.
+ */
+struct eventhandler_list;
+#define EVENTHANDLER_LIST_DECLARE(name) \
+extern struct eventhandler_list *_eventhandler_list_ ## name \
+
+/*
+ * Event handlers need to be declared, but do not need to be defined. The
+ * declaration must be in scope wherever the handler is to be invoked.
+ */
+#define EVENTHANDLER_DECLARE(name, type) \
+struct eventhandler_entry_ ## name \
+{ \
+ struct eventhandler_entry ee; \
+ type eh_func; \
+}; \
+struct __hack
+
+#endif
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 1999 Michael Smith <msmith@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__EVENTHANDLER_H_
+#define _SYS__EVENTHANDLER_H_
+
+#include <sys/queue.h>
+
+struct eventhandler_entry {
+ TAILQ_ENTRY(eventhandler_entry) ee_link;
+ int ee_priority;
+#define EHE_DEAD_PRIORITY (-1)
+ void *ee_arg;
+};
+
+typedef struct eventhandler_entry *eventhandler_tag;
+
+/*
+ * You can optionally use the EVENTHANDLER_LIST and EVENTHANDLER_DIRECT macros
+ * to pre-define a symbol for the eventhandler list. This symbol can be used by
+ * EVENTHANDLER_DIRECT_INVOKE, which has the advantage of not needing to do a
+ * locked search of the global list of eventhandler lists. At least
+ * EVENTHANDLER_LIST_DEFINE must be be used for EVENTHANDLER_DIRECT_INVOKE to
+ * work. EVENTHANDLER_LIST_DECLARE is only needed if the call to
+ * EVENTHANDLER_DIRECT_INVOKE is in a different compilation unit from
+ * EVENTHANDLER_LIST_DEFINE. If the events are even relatively high frequency
+ * it is suggested that you directly define a list for them.
+ */
+struct eventhandler_list;
+#define EVENTHANDLER_LIST_DECLARE(name) \
+extern struct eventhandler_list *_eventhandler_list_ ## name \
+
+/*
+ * Event handlers need to be declared, but do not need to be defined. The
+ * declaration must be in scope wherever the handler is to be invoked.
+ */
+#define EVENTHANDLER_DECLARE(name, type) \
+struct eventhandler_entry_ ## name \
+{ \
+ struct eventhandler_entry ee; \
+ type eh_func; \
+}; \
+struct __hack
+
+#endif
diff --git a/freebsd/sys/sys/_lock.h b/freebsd/sys/sys/_lock.h
index ae10254c..f5609101 100644
--- a/freebsd/sys/sys/_lock.h
+++ b/freebsd/sys/sys/_lock.h
@@ -44,4 +44,38 @@ struct lock_object {
#endif /* __rtems__ */
};
+#ifdef _KERNEL
+/*
+ * If any of WITNESS, INVARIANTS, or KTR_LOCK KTR tracing has been enabled,
+ * then turn on LOCK_DEBUG. When this option is on, extra debugging
+ * facilities such as tracking the file and line number of lock operations
+ * are enabled. Also, mutex locking operations are not inlined to avoid
+ * bloat from all the extra debugging code. We also have to turn on all the
+ * calling conventions for this debugging code in modules so that modules can
+ * work with both debug and non-debug kernels.
+ */
+#if (defined(KLD_MODULE) && !defined(KLD_TIED)) || defined(WITNESS) || defined(INVARIANTS) || \
+ defined(LOCK_PROFILING) || defined(KTR)
+#define LOCK_DEBUG 1
+#else
+#define LOCK_DEBUG 0
+#endif
+
+/*
+ * In the LOCK_DEBUG case, use the filename and line numbers for debugging
+ * operations. Otherwise, use default values to avoid the unneeded bloat.
+ */
+#if LOCK_DEBUG > 0
+#define LOCK_FILE_LINE_ARG_DEF , const char *file, int line
+#define LOCK_FILE_LINE_ARG , file, line
+#define LOCK_FILE __FILE__
+#define LOCK_LINE __LINE__
+#else
+#define LOCK_FILE_LINE_ARG_DEF
+#define LOCK_FILE_LINE_ARG
+#define LOCK_FILE NULL
+#define LOCK_LINE 0
+#endif
+#endif /* _KERNEL */
+
#endif /* !_SYS__LOCK_H_ */
diff --git a/freebsd/sys/sys/_rwlock.h b/freebsd/sys/sys/_rwlock.h
index 318592d5..6530cca8 100644
--- a/freebsd/sys/sys/_rwlock.h
+++ b/freebsd/sys/sys/_rwlock.h
@@ -2,7 +2,6 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
diff --git a/freebsd/sys/sys/_task.h b/freebsd/sys/sys/_task.h
index 392dc874..6ee48800 100644
--- a/freebsd/sys/sys/_task.h
+++ b/freebsd/sys/sys/_task.h
@@ -39,12 +39,11 @@
* field of struct task and the second argument is a count of how many
* times the task was enqueued before the call to taskqueue_run().
*
- * List of locks
- * (c) const after init
+ * List of locks
+ * (c) const after init
* (q) taskqueue lock
*/
typedef void task_fn_t(void *context, int pending);
-typedef void gtask_fn_t(void *context);
struct task {
STAILQ_ENTRY(task) ta_link; /* (q) link for queue */
@@ -54,6 +53,10 @@ struct task {
void *ta_context; /* (c) argument for handler */
};
+#ifdef _KERNEL
+
+typedef void gtask_fn_t(void *context);
+
struct gtask {
STAILQ_ENTRY(gtask) ta_link; /* (q) link for queue */
uint16_t ta_flags; /* (q) state flags */
@@ -62,15 +65,6 @@ struct gtask {
void *ta_context; /* (c) argument for handler */
};
-struct grouptask {
- struct gtask gt_task;
- void *gt_taskqueue;
- LIST_ENTRY(grouptask) gt_list;
- void *gt_uniq;
-#define GROUPTASK_NAMELEN 32
- char gt_name[GROUPTASK_NAMELEN];
- int16_t gt_irq;
- int16_t gt_cpu;
-};
+#endif /* _KERNEL */
#endif /* !_SYS__TASK_H_ */
diff --git a/freebsd/sys/sys/ata.h b/freebsd/sys/sys/ata.h
index f8a332c3..22edb557 100644
--- a/freebsd/sys/sys/ata.h
+++ b/freebsd/sys/sys/ata.h
@@ -66,7 +66,8 @@ struct ata_params {
/*023*/ u_int8_t revision[8]; /* firmware revision */
/*027*/ u_int8_t model[40]; /* model name */
/*047*/ u_int16_t sectors_intr; /* sectors per interrupt */
-/*048*/ u_int16_t usedmovsd; /* double word read/write? */
+/*048*/ u_int16_t tcg; /* Trusted Computing Group */
+#define ATA_SUPPORT_TCG 0x0001
/*049*/ u_int16_t capabilities1;
#define ATA_SUPPORT_DMA 0x0100
#define ATA_SUPPORT_LBA 0x0200
@@ -92,6 +93,12 @@ struct ata_params {
/*057*/ u_int16_t current_size_1;
/*058*/ u_int16_t current_size_2;
/*059*/ u_int16_t multi;
+#define ATA_SUPPORT_BLOCK_ERASE_EXT 0x8000
+#define ATA_SUPPORT_OVERWRITE_EXT 0x4000
+#define ATA_SUPPORT_CRYPTO_SCRAMBLE_EXT 0x2000
+#define ATA_SUPPORT_SANITIZE 0x1000
+#define ATA_SUPPORT_SANITIZE_ALLOWED 0x0800
+#define ATA_SUPPORT_ANTIFREEZE_LOCK_EXT 0x0400
#define ATA_MULTI_VALID 0x0100
/*060*/ u_int16_t lba_size_1;
@@ -107,6 +114,7 @@ struct ata_params {
/*069*/ u_int16_t support3;
#define ATA_SUPPORT_RZAT 0x0020
#define ATA_SUPPORT_DRAT 0x4000
+#define ATA_ENCRYPTS_ALL_USER_DATA 0x0010 /* Self-encrypting drive */
#define ATA_SUPPORT_ZONE_MASK 0x0003
#define ATA_SUPPORT_ZONE_NR 0x0000
#define ATA_SUPPORT_ZONE_HOST_AWARE 0x0001
@@ -135,7 +143,8 @@ struct ata_params {
/*77*/ u_int16_t satacapabilities2;
#define ATA_SATA_CURR_GEN_MASK 0x0006
#define ATA_SUPPORT_NCQ_STREAM 0x0010
-#define ATA_SUPPORT_NCQ_QMANAGEMENT 0x0020
+#define ATA_SUPPORT_NCQ_NON_DATA 0x0020
+#define ATA_SUPPORT_NCQ_QMANAGEMENT ATA_SUPPORT_NCQ_NON_DATA
#define ATA_SUPPORT_RCVSND_FPDMA_QUEUED 0x0040
/*78*/ u_int16_t satasupport;
#define ATA_SUPPORT_NONZERO 0x0002
@@ -144,6 +153,7 @@ struct ata_params {
#define ATA_SUPPORT_INORDERDATA 0x0010
#define ATA_SUPPORT_ASYNCNOTIF 0x0020
#define ATA_SUPPORT_SOFTSETPRESERVE 0x0040
+#define ATA_SUPPORT_NCQ_AUTOSENSE 0x0080
/*79*/ u_int16_t sataenabled;
#define ATA_ENABLED_DAPST 0x0080
@@ -236,12 +246,15 @@ struct ata_params {
#define ATA_SUPPORT_FREEFALL 0x0020
#define ATA_SUPPORT_SENSE_REPORT 0x0040
#define ATA_SUPPORT_EPC 0x0080
+#define ATA_SUPPORT_AMAX_ADDR 0x0100
+#define ATA_SUPPORT_DSN 0x0200
/*120*/ u_int16_t enabled2;
#define ATA_ENABLED_WRITEREADVERIFY 0x0002
#define ATA_ENABLED_WRITEUNCORREXT 0x0004
#define ATA_ENABLED_FREEFALL 0x0020
#define ATA_ENABLED_SENSE_REPORT 0x0040
#define ATA_ENABLED_EPC 0x0080
+#define ATA_ENABLED_DSN 0x0200
u_int16_t reserved121[6];
/*127*/ u_int16_t removable_status;
/*128*/ u_int16_t security_status;
@@ -259,10 +272,23 @@ struct ata_params {
/*162*/ u_int16_t cfa_kms_support;
/*163*/ u_int16_t cfa_trueide_modes;
/*164*/ u_int16_t cfa_memory_modes;
- u_int16_t reserved165[4];
+ u_int16_t reserved165[3];
+/*168*/ u_int16_t form_factor;
+#define ATA_FORM_FACTOR_MASK 0x000f
+#define ATA_FORM_FACTOR_NOT_REPORTED 0x0000
+#define ATA_FORM_FACTOR_5_25 0x0001
+#define ATA_FORM_FACTOR_3_5 0x0002
+#define ATA_FORM_FACTOR_2_5 0x0003
+#define ATA_FORM_FACTOR_1_8 0x0004
+#define ATA_FORM_FACTOR_SUB_1_8 0x0005
+#define ATA_FORM_FACTOR_MSATA 0x0006
+#define ATA_FORM_FACTOR_M_2 0x0007
+#define ATA_FORM_FACTOR_MICRO_SSD 0x0008
+#define ATA_FORM_FACTOR_C_FAST 0x0009
/*169*/ u_int16_t support_dsm;
#define ATA_SUPPORT_DSM_TRIM 0x0001
- u_int16_t reserved170[6];
+/*170*/ u_int8_t product_id[8]; /* Additional Product Identifier */
+ u_int16_t reserved174[2];
/*176*/ u_int8_t media_serial[60];
/*206*/ u_int16_t sct;
u_int16_t reserved207[2];
@@ -393,6 +419,12 @@ struct ata_params {
#define ATA_READ_LOG_DMA_EXT 0x47 /* read log DMA ext - PIO Data-In */
#define ATA_ZAC_MANAGEMENT_IN 0x4a /* ZAC management in */
#define ATA_ZM_REPORT_ZONES 0x00 /* report zones */
+#define ATA_WRITE_LOG_DMA_EXT 0x57 /* WRITE LOG DMA EXT */
+#define ATA_TRUSTED_NON_DATA 0x5b /* TRUSTED NON-DATA */
+#define ATA_TRUSTED_RECEIVE 0x5c /* TRUSTED RECEIVE */
+#define ATA_TRUSTED_RECEIVE_DMA 0x5d /* TRUSTED RECEIVE DMA */
+#define ATA_TRUSTED_SEND 0x5e /* TRUSTED SEND */
+#define ATA_TRUSTED_SEND_DMA 0x5f /* TRUSTED SEND DMA */
#define ATA_READ_FPDMA_QUEUED 0x60 /* read DMA NCQ */
#define ATA_WRITE_FPDMA_QUEUED 0x61 /* write DMA NCQ */
#define ATA_NCQ_NON_DATA 0x63 /* NCQ non-data command */
@@ -412,15 +444,22 @@ struct ata_params {
#define ATA_RFPDMA_ZAC_MGMT_IN 0x02 /* NCQ ZAC mgmt in w/data */
#define ATA_SEP_ATTN 0x67 /* SEP request */
#define ATA_SEEK 0x70 /* seek */
+#define ATA_AMAX_ADDR 0x78 /* Accessible Max Address */
+#define ATA_AMAX_ADDR_GET 0x00 /* GET NATIVE MAX ADDRESS EXT */
+#define ATA_AMAX_ADDR_SET 0x01 /* SET ACCESSIBLE MAX ADDRESS EXT */
+#define ATA_AMAX_ADDR_FREEZE 0x02 /* FREEZE ACCESSIBLE MAX ADDRESS EXT */
#define ATA_ZAC_MANAGEMENT_OUT 0x9f /* ZAC management out */
#define ATA_ZM_CLOSE_ZONE 0x01 /* close zone */
#define ATA_ZM_FINISH_ZONE 0x02 /* finish zone */
#define ATA_ZM_OPEN_ZONE 0x03 /* open zone */
#define ATA_ZM_RWP 0x04 /* reset write pointer */
+#define ATA_DOWNLOAD_MICROCODE 0x92 /* DOWNLOAD MICROCODE */
+#define ATA_DOWNLOAD_MICROCODE_DMA 0x93 /* DOWNLOAD MICROCODE DMA */
#define ATA_PACKET_CMD 0xa0 /* packet command */
#define ATA_ATAPI_IDENTIFY 0xa1 /* get ATAPI params*/
#define ATA_SERVICE 0xa2 /* service command */
#define ATA_SMART_CMD 0xb0 /* SMART command */
+#define ATA_SANITIZE 0xb4 /* sanitize device */
#define ATA_CFA_ERASE 0xc0 /* CFA erase */
#define ATA_READ_MUL 0xc4 /* read multi */
#define ATA_WRITE_MUL 0xc5 /* write multi */
@@ -439,8 +478,11 @@ struct ata_params {
#define ATA_CHECK_POWER_MODE 0xe5 /* device power mode */
#define ATA_SLEEP 0xe6 /* sleep */
#define ATA_FLUSHCACHE 0xe7 /* flush cache to disk */
+#define ATA_WRITE_BUFFER 0xe8 /* write buffer */
#define ATA_WRITE_PM 0xe8 /* write portmultiplier */
+#define ATA_READ_BUFFER_DMA 0xe9 /* read buffer DMA */
#define ATA_FLUSHCACHE48 0xea /* flush cache to disk */
+#define ATA_WRITE_BUFFER_DMA 0xeb /* write buffer DMA */
#define ATA_ATA_IDENTIFY 0xec /* get ATA params */
#define ATA_SETFEATURES 0xef /* features command */
#define ATA_SF_ENAB_WCACHE 0x02 /* enable write cache */
diff --git a/freebsd/sys/sys/blist.h b/freebsd/sys/sys/blist.h
index 1e6deb52..87f492c8 100644
--- a/freebsd/sys/sys/blist.h
+++ b/freebsd/sys/sys/blist.h
@@ -33,7 +33,7 @@
* Usage:
* blist = blist_create(blocks, flags)
* (void) blist_destroy(blist)
- * blkno = blist_alloc(blist, count)
+ * blkno = blist_alloc(blist, &count, maxcount)
* (void) blist_free(blist, blkno, count)
* nblks = blist_fill(blist, blkno, count)
* (void) blist_resize(&blist, count, freeextra, flags)
@@ -73,17 +73,17 @@ typedef unsigned long u_daddr_t; /* unsigned disk address */
#define SWAPBLK_NONE ((daddr_t)((u_daddr_t)SWAPBLK_MASK + 1))/* flag */
/*
- * Both blmeta and bmu_bitmap MUST be a power of 2 in size.
+ * Both blmeta and bm_bitmap MUST be a power of 2 in size.
*/
typedef struct blmeta {
- u_daddr_t bm_bitmap; /* bitmap if we are a leaf */
+ u_daddr_t bm_bitmap; /* marking unfilled block sets */
daddr_t bm_bighint; /* biggest contiguous block hint*/
} blmeta_t;
typedef struct blist {
daddr_t bl_blocks; /* area of coverage */
- daddr_t bl_avail; /* # available blocks */
+ daddr_t bl_avail; /* # available blocks */
u_daddr_t bl_radix; /* coverage radix */
daddr_t bl_cursor; /* next-fit search starts at */
blmeta_t bl_root[1]; /* root of radix tree */
@@ -96,7 +96,7 @@ typedef struct blist {
struct sbuf;
-daddr_t blist_alloc(blist_t blist, daddr_t count);
+daddr_t blist_alloc(blist_t blist, int *count, int maxcount);
daddr_t blist_avail(blist_t blist);
blist_t blist_create(daddr_t blocks, int flags);
void blist_destroy(blist_t blist);
diff --git a/freebsd/sys/sys/buf.h b/freebsd/sys/sys/buf.h
index a099a972..f419617a 100644
--- a/freebsd/sys/sys/buf.h
+++ b/freebsd/sys/sys/buf.h
@@ -44,6 +44,7 @@
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/lockmgr.h>
+#include <vm/uma.h>
struct bio;
struct buf;
@@ -195,6 +196,11 @@ struct buf {
* may not be used with the stage 1 data write under NFS
* but may be used for the commit rpc portion.
*
+ * B_INVALONERR This flag is set on dirty buffers. It specifies that a
+ * write error should forcibly invalidate the buffer
+ * contents. This flag should be used with caution, as it
+ * discards data. It is incompatible with B_ASYNC.
+ *
* B_VMIO Indicates that the buffer is tied into an VM object.
* The buffer's data is always PAGE_SIZE aligned even
* if b_bufsize and b_bcount are not. ( b_bufsize is
@@ -225,7 +231,7 @@ struct buf {
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
#define B_MALLOC 0x00010000 /* malloced b_data */
#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
-#define B_00040000 0x00040000 /* Available flag. */
+#define B_INVALONERR 0x00040000 /* Invalidate on write error. */
#define B_00080000 0x00080000 /* Available flag. */
#define B_00100000 0x00100000 /* Available flag. */
#define B_00200000 0x00200000 /* Available flag. */
@@ -242,7 +248,7 @@ struct buf {
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
"\33paging\32infreecnt\31nocopy\30b23\27relbuf\26b21\25b20" \
- "\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
+ "\24b19\23invalonerr\22clusterok\21malloc\20nocache\17b14\16inval" \
"\15reuse\14noreuse\13eintr\12done\11b8\10delwri" \
"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
@@ -275,6 +281,11 @@ struct buf {
#define PRINT_BUF_VFLAGS "\20\4bkgrderr\3bkgrdwait\2bkgrdinprog\1scanned"
#ifdef _KERNEL
+
+#ifndef NSWBUF_MIN
+#define NSWBUF_MIN 16
+#endif
+
/*
* Buffer locking
*/
@@ -287,7 +298,7 @@ extern const char *buf_wmesg; /* Default buffer lock message */
* Initialize a lock.
*/
#define BUF_LOCKINIT(bp) \
- lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
+ lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, LK_NEW)
/*
*
* Get a lock sleeping non-interruptably until it becomes available.
@@ -353,15 +364,11 @@ extern const char *buf_wmesg; /* Default buffer lock message */
_lockmgr_assert(&(bp)->b_lock, KA_XLOCKED, LOCK_FILE, LOCK_LINE)
#define BUF_ASSERT_UNLOCKED(bp) \
_lockmgr_assert(&(bp)->b_lock, KA_UNLOCKED, LOCK_FILE, LOCK_LINE)
-#define BUF_ASSERT_HELD(bp)
-#define BUF_ASSERT_UNHELD(bp)
#else
#define BUF_ASSERT_LOCKED(bp)
#define BUF_ASSERT_SLOCKED(bp)
#define BUF_ASSERT_XLOCKED(bp)
#define BUF_ASSERT_UNLOCKED(bp)
-#define BUF_ASSERT_HELD(bp)
-#define BUF_ASSERT_UNHELD(bp)
#endif
#ifdef _SYS_PROC_H_ /* Avoid #include <sys/proc.h> pollution */
@@ -493,10 +500,6 @@ extern int bdwriteskip;
extern int dirtybufferflushes;
extern int altbufferflushes;
extern int nswbuf; /* Number of swap I/O buffer headers. */
-extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
-extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
-extern int vnode_async_pbuf_freecnt; /* Number of pbufs for vnode pager,
- asynchronous reads */
extern caddr_t unmapped_buf; /* Data address for unmapped buffers. */
static inline int
@@ -537,7 +540,6 @@ void brelse(struct buf *);
void bqrelse(struct buf *);
int vfs_bio_awrite(struct buf *);
void vfs_drain_busy_pages(struct buf *bp);
-struct buf * getpbuf(int *);
struct buf *incore(struct bufobj *, daddr_t);
struct buf *gbincore(struct bufobj *, daddr_t);
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
@@ -549,6 +551,9 @@ int bufwrite(struct buf *);
void bufdone(struct buf *);
void bd_speedup(void);
+extern uma_zone_t pbuf_zone;
+uma_zone_t pbuf_zsecond_create(char *name, int max);
+
int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, long, int, int, struct buf **);
int cluster_wbuild(struct vnode *, long, daddr_t, int, int);
@@ -562,7 +567,6 @@ void vfs_busy_pages(struct buf *, int clear_modify);
void vfs_unbusy_pages(struct buf *);
int vmapbuf(struct buf *, int);
void vunmapbuf(struct buf *);
-void relpbuf(struct buf *, int *);
void brelvp(struct buf *);
void bgetvp(struct vnode *, struct buf *);
void pbgetbo(struct bufobj *bo, struct buf *bp);
@@ -571,7 +575,6 @@ void pbrelbo(struct buf *);
void pbrelvp(struct buf *);
int allocbuf(struct buf *bp, int size);
void reassignbuf(struct buf *);
-struct buf *trypbuf(int *);
void bwait(struct buf *, u_char, const char *);
void bdone(struct buf *);
diff --git a/freebsd/sys/sys/buf_ring.h b/freebsd/sys/sys/buf_ring.h
index e8c69341..b8b136bd 100644
--- a/freebsd/sys/sys/buf_ring.h
+++ b/freebsd/sys/sys/buf_ring.h
@@ -310,15 +310,24 @@ buf_ring_peek_clear_sc(struct buf_ring *br)
if (!mtx_owned(br->br_lock))
panic("lock not held on single consumer dequeue");
#endif
- /*
- * I believe it is safe to not have a memory barrier
- * here because we control cons and tail is worst case
- * a lagging indicator so we worst case we might
- * return NULL immediately after a buffer has been enqueued
- */
+
if (br->br_cons_head == br->br_prod_tail)
return (NULL);
+#if defined(__arm__) || defined(__aarch64__)
+ /*
+ * The barrier is required there on ARM and ARM64 to ensure, that
+ * br->br_ring[br->br_cons_head] will not be fetched before the above
+ * condition is checked.
+ * Without the barrier, it is possible, that buffer will be fetched
+ * before the enqueue will put mbuf into br, then, in the meantime, the
+ * enqueue will update the array and the br_prod_tail, and the
+ * conditional check will be true, so we will return previously fetched
+ * (and invalid) buffer.
+ */
+ atomic_thread_fence_acq();
+#endif
+
#ifdef DEBUG_BUFRING
/*
* Single consumer, i.e. cons_head will not move while we are
diff --git a/freebsd/sys/sys/bufobj.h b/freebsd/sys/sys/bufobj.h
index b02d4276..b075644d 100644
--- a/freebsd/sys/sys/bufobj.h
+++ b/freebsd/sys/sys/bufobj.h
@@ -127,7 +127,7 @@ struct bufobj {
#define ASSERT_BO_LOCKED(bo) rw_assert(BO_LOCKPTR((bo)), RA_LOCKED)
#define ASSERT_BO_UNLOCKED(bo) rw_assert(BO_LOCKPTR((bo)), RA_UNLOCKED)
-void bufobj_init(struct bufobj *bo, void *private);
+void bufobj_init(struct bufobj *bo, void *priv);
void bufobj_wdrop(struct bufobj *bo);
void bufobj_wref(struct bufobj *bo);
void bufobj_wrefl(struct bufobj *bo);
diff --git a/freebsd/sys/sys/bus.h b/freebsd/sys/sys/bus.h
index 8e1ba763..26acd639 100644
--- a/freebsd/sys/sys/bus.h
+++ b/freebsd/sys/sys/bus.h
@@ -130,6 +130,7 @@ struct devreq {
#define DEV_DELETE _IOW('D', 10, struct devreq)
#define DEV_FREEZE _IOW('D', 11, struct devreq)
#define DEV_THAW _IOW('D', 12, struct devreq)
+#define DEV_RESET _IOW('D', 13, struct devreq)
/* Flags for DEV_DETACH and DEV_DISABLE. */
#define DEVF_FORCE_DETACH 0x0000001
@@ -143,10 +144,15 @@ struct devreq {
/* Flags for DEV_DELETE. */
#define DEVF_FORCE_DELETE 0x0000001
+/* Flags for DEV_RESET */
+#define DEVF_RESET_DETACH 0x0000001 /* Detach drivers vs suspend
+ device */
+
#ifdef _KERNEL
-#include <sys/eventhandler.h>
+#include <sys/_eventhandler.h>
#include <sys/kobj.h>
+#include <sys/systm.h>
/**
* devctl hooks. Typically one should use the devctl_notify
@@ -414,6 +420,8 @@ void root_bus_configure(void);
* Useful functions for implementing buses.
*/
+struct _cpuset;
+
int bus_generic_activate_resource(device_t dev, device_t child, int type,
int rid, struct resource *r);
device_t
@@ -426,6 +434,8 @@ struct resource *
bus_generic_alloc_resource(device_t bus, device_t child, int type,
int *rid, rman_res_t start, rman_res_t end,
rman_res_t count, u_int flags);
+int bus_generic_translate_resource(device_t dev, int type, rman_res_t start,
+ rman_res_t *newstart);
int bus_generic_attach(device_t dev);
int bus_generic_bind_intr(device_t dev, device_t child,
struct resource *irq, int cpu);
@@ -494,6 +504,8 @@ int bus_generic_unmap_resource(device_t dev, device_t child, int type,
struct resource_map *map);
int bus_generic_write_ivar(device_t dev, device_t child, int which,
uintptr_t value);
+int bus_helper_reset_post(device_t dev, int flags);
+int bus_helper_reset_prepare(device_t dev, int flags);
int bus_null_rescan(device_t dev);
/*
@@ -802,16 +814,24 @@ DECLARE_MODULE(name##_##busname, name##_##busname##_mod, \
static __inline type varp ## _get_ ## var(device_t dev) \
{ \
uintptr_t v; \
- BUS_READ_IVAR(device_get_parent(dev), dev, \
+ int e; \
+ e = BUS_READ_IVAR(device_get_parent(dev), dev, \
ivarp ## _IVAR_ ## ivar, &v); \
+ KASSERT(e == 0, ("%s failed for %s on bus %s, error = %d", \
+ __func__, device_get_nameunit(dev), \
+ device_get_nameunit(device_get_parent(dev)), e)); \
return ((type) v); \
} \
\
static __inline void varp ## _set_ ## var(device_t dev, type t) \
{ \
uintptr_t v = (uintptr_t) t; \
- BUS_WRITE_IVAR(device_get_parent(dev), dev, \
+ int e; \
+ e = BUS_WRITE_IVAR(device_get_parent(dev), dev, \
ivarp ## _IVAR_ ## ivar, v); \
+ KASSERT(e == 0, ("%s failed for %s on bus %s, error = %d", \
+ __func__, device_get_nameunit(dev), \
+ device_get_nameunit(device_get_parent(dev)), e)); \
}
#else /* __rtems__ */
#define __BUS_ACCESSOR(varp, var, ivarp, ivar, type) \
diff --git a/freebsd/sys/sys/bus_dma.h b/freebsd/sys/sys/bus_dma.h
index eb2bc42d..e99d8ece 100644
--- a/freebsd/sys/sys/bus_dma.h
+++ b/freebsd/sys/sys/bus_dma.h
@@ -67,7 +67,9 @@
#ifndef _BUS_DMA_H_
#define _BUS_DMA_H_
+#ifdef _KERNEL
#include <sys/_bus_dma.h>
+#endif
/*
* Machine independent interface for mapping physical addresses to peripheral
@@ -133,6 +135,7 @@ typedef struct bus_dma_segment {
bus_size_t ds_len; /* length of transfer */
} bus_dma_segment_t;
+#ifdef _KERNEL
/*
* A function that returns 1 if the address cannot be accessed by
* a device and 0 if it can be.
@@ -302,5 +305,6 @@ BUS_DMAMAP_OP void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t dmamap, bus_
BUS_DMAMAP_OP void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t dmamap);
#undef BUS_DMAMAP_OP
+#endif /* _KERNEL */
#endif /* _BUS_DMA_H_ */
diff --git a/freebsd/sys/sys/capsicum.h b/freebsd/sys/sys/capsicum.h
index d40b8572..ee5e4267 100644
--- a/freebsd/sys/sys/capsicum.h
+++ b/freebsd/sys/sys/capsicum.h
@@ -246,7 +246,12 @@
/* Process management via process descriptors. */
/* Allows for pdgetpid(2). */
#define CAP_PDGETPID CAPRIGHT(1, 0x0000000000000200ULL)
-/* Allows for pdwait4(2). */
+/*
+ * Allows for pdwait4(2).
+ *
+ * XXX: this constant was imported unused, but is targeted to be implemented
+ * in the future (bug 235871).
+ */
#define CAP_PDWAIT CAPRIGHT(1, 0x0000000000000400ULL)
/* Allows for pdkill(2). */
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
diff --git a/freebsd/sys/sys/conf.h b/freebsd/sys/sys/conf.h
index c0a66442..caa34dea 100644
--- a/freebsd/sys/sys/conf.h
+++ b/freebsd/sys/sys/conf.h
@@ -43,7 +43,7 @@
#define _SYS_CONF_H_
#ifdef _KERNEL
-#include <sys/eventhandler.h>
+#include <sys/_eventhandler.h>
#else
#include <sys/queue.h>
#endif
@@ -186,7 +186,8 @@ typedef int dumper_hdr_t(struct dumperinfo *di, struct kerneldumpheader *kdh,
#define D_VERSION_01 0x17032005 /* Add d_uid,gid,mode & kind */
#define D_VERSION_02 0x28042009 /* Add d_mmap_single */
#define D_VERSION_03 0x17122009 /* d_mmap takes memattr,vm_ooffset_t */
-#define D_VERSION D_VERSION_03
+#define D_VERSION_04 0x5c48c353 /* SPECNAMELEN bumped to MAXNAMLEN */
+#define D_VERSION D_VERSION_04
/*
* Flags used for internal housekeeping
@@ -379,6 +380,10 @@ struct dumperinfo {
off_t origdumpoff; /* Starting dump offset. */
struct kerneldumpcrypto *kdcrypto; /* Kernel dump crypto. */
struct kerneldumpcomp *kdcomp; /* Kernel dump compression. */
+
+ TAILQ_ENTRY(dumperinfo) di_next;
+
+ char di_devname[];
};
#ifndef __rtems__
@@ -388,10 +393,10 @@ extern int dumping; /* system is dumping */
#endif /* __rtems__ */
int doadump(boolean_t);
-int set_dumper(struct dumperinfo *di, const char *devname, struct thread *td,
- uint8_t compression, uint8_t encryption, const uint8_t *key,
- uint32_t encryptedkeysize, const uint8_t *encryptedkey);
-int clear_dumper(struct thread *td);
+struct diocskerneldump_arg;
+int dumper_insert(const struct dumperinfo *di_template, const char *devname,
+ const struct diocskerneldump_arg *kda);
+int dumper_remove(const char *devname, const struct diocskerneldump_arg *kda);
int dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh);
int dump_append(struct dumperinfo *, void *, vm_offset_t, size_t);
diff --git a/freebsd/sys/sys/counter.h b/freebsd/sys/sys/counter.h
index 418141a5..9960a2c3 100644
--- a/freebsd/sys/sys/counter.h
+++ b/freebsd/sys/sys/counter.h
@@ -43,23 +43,23 @@ void counter_u64_zero(counter_u64_t);
uint64_t counter_u64_fetch(counter_u64_t);
#define COUNTER_ARRAY_ALLOC(a, n, wait) do { \
- for (int i = 0; i < (n); i++) \
- (a)[i] = counter_u64_alloc(wait); \
+ for (int _i = 0; _i < (n); _i++) \
+ (a)[_i] = counter_u64_alloc(wait); \
} while (0)
#define COUNTER_ARRAY_FREE(a, n) do { \
- for (int i = 0; i < (n); i++) \
- counter_u64_free((a)[i]); \
+ for (int _i = 0; _i < (n); _i++) \
+ counter_u64_free((a)[_i]); \
} while (0)
#define COUNTER_ARRAY_COPY(a, dstp, n) do { \
- for (int i = 0; i < (n); i++) \
- ((uint64_t *)(dstp))[i] = counter_u64_fetch((a)[i]);\
+ for (int _i = 0; _i < (n); _i++) \
+ ((uint64_t *)(dstp))[_i] = counter_u64_fetch((a)[_i]);\
} while (0)
#define COUNTER_ARRAY_ZERO(a, n) do { \
- for (int i = 0; i < (n); i++) \
- counter_u64_zero((a)[i]); \
+ for (int _i = 0; _i < (n); _i++) \
+ counter_u64_zero((a)[_i]); \
} while (0)
/*
diff --git a/freebsd/sys/sys/cpu.h b/freebsd/sys/sys/cpu.h
index 8a74e470..7ec7dc9e 100644
--- a/freebsd/sys/sys/cpu.h
+++ b/freebsd/sys/sys/cpu.h
@@ -31,7 +31,7 @@
#ifndef _SYS_CPU_H_
#define _SYS_CPU_H_
-#include <sys/eventhandler.h>
+#include <sys/_eventhandler.h>
/*
* CPU device support.
diff --git a/freebsd/sys/sys/ctype.h b/freebsd/sys/sys/ctype.h
index b2a1fa93..d542e45a 100644
--- a/freebsd/sys/sys/ctype.h
+++ b/freebsd/sys/sys/ctype.h
@@ -41,19 +41,65 @@
#ifdef _KERNEL
-#define isspace(c) ((c) == ' ' || ((c) >= '\t' && (c) <= '\r'))
-#define isascii(c) (((c) & ~0x7f) == 0)
-#define isupper(c) ((c) >= 'A' && (c) <= 'Z')
-#define islower(c) ((c) >= 'a' && (c) <= 'z')
-#define isalpha(c) (isupper(c) || islower(c))
-#define isdigit(c) ((c) >= '0' && (c) <= '9')
-#define isxdigit(c) (isdigit(c) \
- || ((c) >= 'A' && (c) <= 'F') \
- || ((c) >= 'a' && (c) <= 'f'))
-#define isprint(c) ((c) >= ' ' && (c) <= '~')
-
-#define toupper(c) ((c) - 0x20 * (((c) >= 'a') && ((c) <= 'z')))
-#define tolower(c) ((c) + 0x20 * (((c) >= 'A') && ((c) <= 'Z')))
+static __inline int
+isspace(int c)
+{
+ return (c == ' ' || (c >= '\t' && c <= '\r'));
+}
+
+static __inline int
+isascii(int c)
+{
+ return ((c & ~0x7f) == 0);
+}
+
+static __inline int
+isupper(int c)
+{
+ return (c >= 'A' && c <= 'Z');
+}
+
+static __inline int
+islower(int c)
+{
+ return (c >= 'a' && c <= 'z');
+}
+
+static __inline int
+isalpha(int c)
+{
+ return (isupper(c) || islower(c));
+}
+
+static __inline int
+isdigit(int c)
+{
+ return (c >= '0' && c <= '9');
+}
+
+static __inline int
+isxdigit(int c)
+{
+ return (isdigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'));
+}
+
+static __inline int
+isprint(int c)
+{
+ return (c >= ' ' && c <= '~');
+}
+
+static __inline int
+toupper(int c)
+{
+ return (c - 0x20 * ((c >= 'a') && (c <= 'z')));
+}
+
+static __inline int
+tolower(int c)
+{
+ return (c + 0x20 * ((c >= 'A') && (c <= 'Z')));
+}
#endif
#endif /* !_SYS_CTYPE_H_ */
diff --git a/freebsd/sys/sys/disk.h b/freebsd/sys/sys/disk.h
index 020626e2..79ce947f 100644
--- a/freebsd/sys/sys/disk.h
+++ b/freebsd/sys/sys/disk.h
@@ -19,6 +19,7 @@
#include <sys/kerneldump.h>
#include <sys/types.h>
#include <sys/disk_zone.h>
+#include <sys/socket.h>
#ifdef _KERNEL
@@ -144,18 +145,53 @@ struct diocgattr_arg {
#define DIOCZONECMD _IOWR('d', 143, struct disk_zone_args)
#ifndef __rtems__
-struct diocskerneldump_arg {
- uint8_t kda_enable;
- uint8_t kda_compression;
- uint8_t kda_encryption;
- uint8_t kda_key[KERNELDUMP_KEY_MAX_SIZE];
- uint32_t kda_encryptedkeysize;
- uint8_t *kda_encryptedkey;
+struct diocskerneldump_arg_freebsd12 {
+ uint8_t kda12_enable;
+ uint8_t kda12_compression;
+ uint8_t kda12_encryption;
+ uint8_t kda12_key[KERNELDUMP_KEY_MAX_SIZE];
+ uint32_t kda12_encryptedkeysize;
+ uint8_t *kda12_encryptedkey;
};
-#define DIOCSKERNELDUMP _IOW('d', 144, struct diocskerneldump_arg)
+#define DIOCSKERNELDUMP_FREEBSD12 \
+ _IOW('d', 144, struct diocskerneldump_arg_freebsd12)
+
+#ifndef WITHOUT_NETDUMP
+#include <net/if.h>
+#include <netinet/in.h>
+
+union kd_ip {
+ struct in_addr in4;
+ struct in6_addr in6;
+};
+
+/*
+ * Sentinel values for kda_index.
+ *
+ * If kda_index is KDA_REMOVE_ALL, all dump configurations are cleared.
+ *
+ * If kda_index is KDA_REMOVE_DEV, all dump configurations for the specified
+ * device are cleared.
+ *
+ * If kda_index is KDA_REMOVE, only the specified dump configuration for the
+ * given device is removed from the list of fallback dump configurations.
+ *
+ * If kda_index is KDA_APPEND, the dump configuration is added after all
+ * existing dump configurations.
+ *
+ * Otherwise, the new configuration is inserted into the fallback dump list at
+ * index 'kda_index'.
+ */
+#define KDA_REMOVE UINT8_MAX
+#define KDA_REMOVE_ALL (UINT8_MAX - 1)
+#define KDA_REMOVE_DEV (UINT8_MAX - 2)
+#define KDA_APPEND (UINT8_MAX - 3)
+
+#define DIOCGKERNELDUMP _IOWR('d', 146, struct diocskerneldump_arg)
/*
- * Enable/Disable the device for kernel core dumps.
+ * Get current kernel netdump configuration details for a given index.
*/
+#endif
#endif /* __rtems__ */
#endif /* _SYS_DISK_H_ */
diff --git a/freebsd/sys/sys/eventhandler.h b/freebsd/sys/sys/eventhandler.h
index cc423752..9e3ff019 100644
--- a/freebsd/sys/sys/eventhandler.h
+++ b/freebsd/sys/sys/eventhandler.h
@@ -31,18 +31,12 @@
#ifndef _SYS_EVENTHANDLER_H_
#define _SYS_EVENTHANDLER_H_
+#include <sys/_eventhandler.h>
#include <sys/lock.h>
#include <sys/ktr.h>
#include <sys/mutex.h>
#include <sys/queue.h>
-struct eventhandler_entry {
- TAILQ_ENTRY(eventhandler_entry) ee_link;
- int ee_priority;
-#define EHE_DEAD_PRIORITY (-1)
- void *ee_arg;
-};
-
#ifdef VIMAGE
struct eventhandler_entry_vimage {
void (* func)(void); /* Original function registered. */
@@ -60,8 +54,6 @@ struct eventhandler_list {
TAILQ_HEAD(,eventhandler_entry) el_entries;
};
-typedef struct eventhandler_entry *eventhandler_tag;
-
#define EHL_LOCK(p) mtx_lock(&(p)->el_lock)
#define EHL_UNLOCK(p) mtx_unlock(&(p)->el_lock)
#define EHL_LOCK_ASSERT(p, x) mtx_assert(&(p)->el_lock, x)
@@ -107,9 +99,6 @@ typedef struct eventhandler_entry *eventhandler_tag;
* EVENTHANDLER_LIST_DEFINE. If the events are even relatively high frequency
* it is suggested that you directly define a list for them.
*/
-#define EVENTHANDLER_LIST_DECLARE(name) \
-extern struct eventhandler_list *_eventhandler_list_ ## name \
-
#define EVENTHANDLER_LIST_DEFINE(name) \
struct eventhandler_list *_eventhandler_list_ ## name ; \
static void _ehl_init_ ## name (void * ctx __unused) \
@@ -130,18 +119,6 @@ SYSINIT(name ## _ehl_init, SI_SUB_EVENTHANDLER, SI_ORDER_ANY, \
} \
} while (0)
-/*
- * Event handlers need to be declared, but do not need to be defined. The
- * declaration must be in scope wherever the handler is to be invoked.
- */
-#define EVENTHANDLER_DECLARE(name, type) \
-struct eventhandler_entry_ ## name \
-{ \
- struct eventhandler_entry ee; \
- type eh_func; \
-}; \
-struct __hack
-
#define EVENTHANDLER_DEFINE(name, func, arg, priority) \
static eventhandler_tag name ## _tag; \
static void name ## _evh_init(void *ctx) \
diff --git a/freebsd/sys/sys/fail.h b/freebsd/sys/sys/fail.h
index 41e07bae..66150eb7 100644
--- a/freebsd/sys/sys/fail.h
+++ b/freebsd/sys/sys/fail.h
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2009 Isilon Inc http://www.isilon.com/
+ * Copyright (c) 2009-2019 Dell EMC Isilon http://www.isilon.com/
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -191,11 +191,13 @@ fail_point_eval(struct fail_point *fp, int *ret)
__END_DECLS
/* Declare a fail_point and its sysctl in a function. */
+#define KFAIL_POINT_DECLARE(name) \
+ extern struct fail_point _FAIL_POINT_NAME(name)
#define _FAIL_POINT_NAME(name) _fail_point_##name
#define _FAIL_POINT_LOCATION() "(" __FILE__ ":" __XSTRING(__LINE__) ")"
#ifndef __rtems__
-#define _FAIL_POINT_INIT(parent, name, flags) \
- static struct fail_point _FAIL_POINT_NAME(name) = { \
+#define KFAIL_POINT_DEFINE(parent, name, flags) \
+ struct fail_point _FAIL_POINT_NAME(name) = { \
.fp_name = #name, \
.fp_location = _FAIL_POINT_LOCATION(), \
.fp_ref_cnt = 0, \
@@ -214,6 +216,9 @@ __END_DECLS
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, \
&_FAIL_POINT_NAME(name), 0, \
fail_point_sysctl_status, "A", "");
+
+#define _FAIL_POINT_INIT(parent, name, flags) \
+ static KFAIL_POINT_DEFINE(parent, name, flags)
#define _FAIL_POINT_EVAL(name, cond, code...) \
int RETURN_VALUE; \
\
@@ -223,6 +228,8 @@ __END_DECLS
code; \
\
}
+#define KFAIL_POINT_EVAL(name, code...) \
+ _FAIL_POINT_EVAL(name, true, code)
#else /* __rtems__ */
#define _FAIL_POINT_INIT(parent, name, flags) (void)0;
#define _FAIL_POINT_EVAL(name, cond, code...) (void)0;
diff --git a/freebsd/sys/sys/file.h b/freebsd/sys/sys/file.h
index 20beac22..0e7c296a 100644
--- a/freebsd/sys/sys/file.h
+++ b/freebsd/sys/sys/file.h
@@ -184,7 +184,10 @@ struct file {
/*
* DTYPE_VNODE specific fields.
*/
- int f_seqcount; /* (a) Count of sequential accesses. */
+ union {
+ int16_t f_seqcount; /* (a) Count of sequential accesses. */
+ int f_pipegen;
+ };
off_t f_nextoff; /* next expected read/write offset. */
union {
struct cdev_privdata *fvn_cdevpriv;
@@ -407,8 +410,14 @@ _fnoop(void)
return (0);
}
-#define fhold(fp) \
- (refcount_acquire(&(fp)->f_count))
+#ifndef __rtems__
+static __inline __result_use_check bool
+fhold(struct file *fp)
+{
+ return (refcount_acquire_checked(&fp->f_count));
+}
+#endif /* __rtems__ */
+
#ifndef __rtems__
#define fdrop(fp, td) \
(refcount_release(&(fp)->f_count) ? _fdrop((fp), (td)) : _fnoop())
diff --git a/freebsd/sys/sys/filedesc.h b/freebsd/sys/sys/filedesc.h
index 857d1fc9..5fafffb5 100644
--- a/freebsd/sys/sys/filedesc.h
+++ b/freebsd/sys/sys/filedesc.h
@@ -40,7 +40,7 @@
#include <sys/event.h>
#include <sys/lock.h>
#include <sys/priority.h>
-#include <sys/seq.h>
+#include <sys/seqc.h>
#include <sys/sx.h>
#include <machine/_limits.h>
@@ -56,19 +56,19 @@ struct filedescent {
struct file *fde_file; /* file structure for open file */
struct filecaps fde_caps; /* per-descriptor rights */
uint8_t fde_flags; /* per-process open file flags */
- seq_t fde_seq; /* keep file and caps in sync */
+ seqc_t fde_seqc; /* keep file and caps in sync */
};
#define fde_rights fde_caps.fc_rights
#define fde_fcntls fde_caps.fc_fcntls
#define fde_ioctls fde_caps.fc_ioctls
#define fde_nioctls fde_caps.fc_nioctls
-#define fde_change_size (offsetof(struct filedescent, fde_seq))
+#define fde_change_size (offsetof(struct filedescent, fde_seqc))
struct fdescenttbl {
int fdt_nfiles; /* number of open files allocated */
struct filedescent fdt_ofiles[0]; /* open files */
};
-#define fd_seq(fdt, fd) (&(fdt)->fdt_ofiles[(fd)].fde_seq)
+#define fd_seqc(fdt, fd) (&(fdt)->fdt_ofiles[(fd)].fde_seqc)
/*
* This structure is used for the management of descriptors. It may be
@@ -269,10 +269,10 @@ int fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp,
/* Return a referenced file from an unlocked descriptor. */
#ifndef __rtems__
int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
- struct file **fpp, seq_t *seqp);
+ struct file **fpp, seqc_t *seqp);
#else /* __rtems__ */
static inline int
-do_fget_unlocked(struct filedesc *fdp, int fd, struct file **fpp, seq_t *seqp)
+do_fget_unlocked(struct filedesc *fdp, int fd, struct file **fpp, seqc_t *seqp)
{
struct file *fp;
@@ -320,10 +320,10 @@ fdeget_locked(struct filedesc *fdp, int fd)
#ifdef CAPABILITIES
static __inline bool
-fd_modified(struct filedesc *fdp, int fd, seq_t seq)
+fd_modified(struct filedesc *fdp, int fd, seqc_t seqc)
{
- return (!seq_consistent(fd_seq(fdp->fd_files, fd), seq));
+ return (!seqc_consistent(fd_seqc(fdp->fd_files, fd), seqc));
}
#endif
#endif /* __rtems__ */
diff --git a/freebsd/sys/sys/gsb_crc32.h b/freebsd/sys/sys/gsb_crc32.h
new file mode 100644
index 00000000..c5a42d3d
--- /dev/null
+++ b/freebsd/sys/sys/gsb_crc32.h
@@ -0,0 +1,47 @@
+/*-
+ * COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
+ * code or tables extracted from it, as desired without restriction.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_GSB_CRC32_H_
+#define _SYS_GSB_CRC32_H_
+
+#include <sys/types.h>
+
+#ifdef _KERNEL
+
+extern const uint32_t crc32_tab[];
+
+static __inline uint32_t
+crc32_raw(const void *buf, size_t size, uint32_t crc)
+{
+ const uint8_t *p = (const uint8_t *)buf;
+
+ while (size--)
+ crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
+ return (crc);
+}
+
+static __inline uint32_t
+crc32(const void *buf, size_t size)
+{
+ uint32_t crc;
+
+ crc = crc32_raw(buf, size, ~0U);
+ return (crc ^ ~0U);
+}
+
+uint32_t calculate_crc32c(uint32_t crc32c, const unsigned char *buffer,
+ unsigned int length);
+#endif
+
+#if defined(__amd64__) || defined(__i386__)
+uint32_t sse42_crc32c(uint32_t, const unsigned char *, unsigned);
+#endif
+#if defined(__aarch64__)
+uint32_t armv8_crc32c(uint32_t, const unsigned char *, unsigned int);
+#endif
+
+#endif /* !_SYS_GSB_CRC32_H_ */
diff --git a/freebsd/sys/sys/gtaskqueue.h b/freebsd/sys/sys/gtaskqueue.h
index a36c770a..82307c8a 100644
--- a/freebsd/sys/sys/gtaskqueue.h
+++ b/freebsd/sys/sys/gtaskqueue.h
@@ -31,20 +31,37 @@
#ifndef _SYS_GTASKQUEUE_H_
#define _SYS_GTASKQUEUE_H_
-#include <sys/taskqueue.h>
#ifndef _KERNEL
#error "no user-serviceable parts inside"
#endif
+#include <sys/_task.h>
+#include <sys/bus.h>
+#include <sys/taskqueue.h>
+#include <sys/types.h>
+
struct gtaskqueue;
-typedef void (*gtaskqueue_enqueue_fn)(void *context);
/*
* Taskqueue groups. Manages dynamic thread groups and irq binding for
* device and other tasks.
*/
+struct grouptask {
+ struct gtask gt_task;
+ void *gt_taskqueue;
+ LIST_ENTRY(grouptask) gt_list;
+ void *gt_uniq;
+#define GROUPTASK_NAMELEN 32
+ char gt_name[GROUPTASK_NAMELEN];
+#ifndef __rtems__
+ device_t gt_dev;
+ struct resource *gt_irq;
+ int gt_cpu;
+#endif /* __rtems__ */
+};
+
void gtaskqueue_block(struct gtaskqueue *queue);
void gtaskqueue_unblock(struct gtaskqueue *queue);
@@ -55,28 +72,29 @@ void gtaskqueue_drain_all(struct gtaskqueue *queue);
void grouptask_block(struct grouptask *grouptask);
void grouptask_unblock(struct grouptask *grouptask);
int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task);
+
void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask,
- void *uniq, int irq, const char *name);
-int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask,
- void *uniq, int cpu, int irq, const char *name);
+ void *uniq, device_t dev, struct resource *irq, const char *name);
+int taskqgroup_attach_cpu(struct taskqgroup *qgroup,
+ struct grouptask *grptask, void *uniq, int cpu, device_t dev,
+ struct resource *irq, const char *name);
void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
struct taskqgroup *taskqgroup_create(const char *name);
void taskqgroup_destroy(struct taskqgroup *qgroup);
int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
-void taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
- const char *name);
+void taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask,
+ gtask_fn_t *fn, const char *name);
void taskqgroup_config_gtask_deinit(struct grouptask *gtask);
#define TASK_ENQUEUED 0x1
#define TASK_SKIP_WAKEUP 0x2
#define TASK_NOENQUEUE 0x4
-
-#define GTASK_INIT(task, flags, priority, func, context) do { \
- (task)->ta_flags = flags; \
- (task)->ta_priority = (priority); \
- (task)->ta_func = (func); \
- (task)->ta_context = (context); \
+#define GTASK_INIT(gtask, flags, priority, func, context) do { \
+ (gtask)->ta_flags = flags; \
+ (gtask)->ta_priority = (priority); \
+ (gtask)->ta_func = (func); \
+ (gtask)->ta_context = (context); \
} while (0)
#define GROUPTASK_INIT(gtask, priority, func, context) \
diff --git a/freebsd/sys/sys/interrupt.h b/freebsd/sys/sys/interrupt.h
index 5c634054..cf8b7a01 100644
--- a/freebsd/sys/sys/interrupt.h
+++ b/freebsd/sys/sys/interrupt.h
@@ -156,7 +156,7 @@ extern struct intr_event *clk_intr_event;
extern void *vm_ih;
/* Counts and names for statistics (defined in MD code). */
-#if defined(__amd64__) || defined(__i386__)
+#if defined(__amd64__) || defined(__i386__) || defined(__powerpc__)
extern u_long *intrcnt; /* counts for for each device and stray */
extern char *intrnames; /* string table containing device names */
#else
@@ -176,6 +176,9 @@ int intr_event_add_handler(struct intr_event *ie, const char *name,
int intr_event_bind(struct intr_event *ie, int cpu);
int intr_event_bind_irqonly(struct intr_event *ie, int cpu);
int intr_event_bind_ithread(struct intr_event *ie, int cpu);
+struct _cpuset;
+int intr_event_bind_ithread_cpuset(struct intr_event *ie,
+ struct _cpuset *mask);
int intr_event_create(struct intr_event **event, void *source,
int flags, int irq, void (*pre_ithread)(void *),
void (*post_ithread)(void *), void (*post_filter)(void *),
diff --git a/freebsd/sys/sys/ktls.h b/freebsd/sys/sys/ktls.h
new file mode 100644
index 00000000..079d4448
--- /dev/null
+++ b/freebsd/sys/sys/ktls.h
@@ -0,0 +1,194 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014-2019 Netflix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _SYS_KTLS_H_
+#define _SYS_KTLS_H_
+
+#include <sys/refcount.h>
+#include <sys/_task.h>
+
+struct tls_record_layer {
+ uint8_t tls_type;
+ uint8_t tls_vmajor;
+ uint8_t tls_vminor;
+ uint16_t tls_length;
+ uint8_t tls_data[0];
+} __attribute__ ((packed));
+
+#define TLS_MAX_MSG_SIZE_V10_2 16384
+#define TLS_MAX_PARAM_SIZE 1024 /* Max key/mac/iv in sockopt */
+#define TLS_AEAD_GCM_LEN 4
+#define TLS_CBC_IMPLICIT_IV_LEN 16
+
+/* Type values for the record layer */
+#define TLS_RLTYPE_APP 23
+
+/*
+ * Nonce for GCM.
+ */
+struct tls_nonce_data {
+ uint8_t fixed[TLS_AEAD_GCM_LEN];
+ uint64_t seq;
+} __packed;
+
+/*
+ * AEAD additional data format per RFC.
+ */
+struct tls_aead_data {
+ uint64_t seq; /* In network order */
+ uint8_t type;
+ uint8_t tls_vmajor;
+ uint8_t tls_vminor;
+ uint16_t tls_length;
+} __packed;
+
+/*
+ * Stream Cipher MAC additional data input. This does not match the
+ * exact data on the wire (the sequence number is not placed on the
+ * wire, and any explicit IV after the record header is not covered by
+ * the MAC).
+ */
+struct tls_mac_data {
+ uint64_t seq;
+ uint8_t type;
+ uint8_t tls_vmajor;
+ uint8_t tls_vminor;
+ uint16_t tls_length;
+} __packed;
+
+#define TLS_MAJOR_VER_ONE 3
+#define TLS_MINOR_VER_ZERO 1 /* 3, 1 */
+#define TLS_MINOR_VER_ONE 2 /* 3, 2 */
+#define TLS_MINOR_VER_TWO 3 /* 3, 3 */
+
+/* For TCP_TXTLS_ENABLE */
+struct tls_enable {
+ const uint8_t *cipher_key;
+ const uint8_t *iv; /* Implicit IV. */
+ const uint8_t *auth_key;
+ int cipher_algorithm; /* e.g. CRYPTO_AES_CBC */
+ int cipher_key_len;
+ int iv_len;
+ int auth_algorithm; /* e.g. CRYPTO_SHA2_256_HMAC */
+ int auth_key_len;
+ int flags;
+ uint8_t tls_vmajor;
+ uint8_t tls_vminor;
+};
+
+struct tls_session_params {
+ uint8_t *cipher_key;
+ uint8_t *auth_key;
+ uint8_t iv[TLS_CBC_IMPLICIT_IV_LEN];
+ int cipher_algorithm;
+ int auth_algorithm;
+ uint16_t cipher_key_len;
+ uint16_t iv_len;
+ uint16_t auth_key_len;
+ uint16_t max_frame_len;
+ uint8_t tls_vmajor;
+ uint8_t tls_vminor;
+ uint8_t tls_hlen;
+ uint8_t tls_tlen;
+ uint8_t tls_bs;
+ uint8_t flags;
+};
+
+#ifdef _KERNEL
+
+#define KTLS_API_VERSION 5
+
+struct iovec;
+struct ktls_session;
+struct m_snd_tag;
+struct mbuf;
+struct mbuf_ext_pgs;
+struct sockbuf;
+struct socket;
+
+struct ktls_crypto_backend {
+ LIST_ENTRY(ktls_crypto_backend) next;
+ int (*try)(struct socket *so, struct ktls_session *tls);
+ int prio;
+ int api_version;
+ int use_count;
+ const char *name;
+};
+
+struct ktls_session {
+ int (*sw_encrypt)(struct ktls_session *tls,
+ const struct tls_record_layer *hdr, uint8_t *trailer,
+ struct iovec *src, struct iovec *dst, int iovcnt,
+ uint64_t seqno);
+ union {
+ void *cipher;
+ struct m_snd_tag *snd_tag;
+ };
+ struct ktls_crypto_backend *be;
+ void (*free)(struct ktls_session *tls);
+ struct tls_session_params params;
+ u_int wq_index;
+ volatile u_int refcount;
+
+ struct task reset_tag_task;
+ struct inpcb *inp;
+ bool reset_pending;
+} __aligned(CACHE_LINE_SIZE);
+
+int ktls_crypto_backend_register(struct ktls_crypto_backend *be);
+int ktls_crypto_backend_deregister(struct ktls_crypto_backend *be);
+int ktls_enable_tx(struct socket *so, struct tls_enable *en);
+void ktls_destroy(struct ktls_session *tls);
+int ktls_frame(struct mbuf *m, struct ktls_session *tls, int *enqueue_cnt,
+ uint8_t record_type);
+void ktls_seq(struct sockbuf *sb, struct mbuf *m);
+void ktls_enqueue(struct mbuf *m, struct socket *so, int page_count);
+void ktls_enqueue_to_free(struct mbuf_ext_pgs *pgs);
+int ktls_set_tx_mode(struct socket *so, int mode);
+int ktls_get_tx_mode(struct socket *so);
+int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls);
+
+static inline struct ktls_session *
+ktls_hold(struct ktls_session *tls)
+{
+
+ if (tls != NULL)
+ refcount_acquire(&tls->refcount);
+ return (tls);
+}
+
+static inline void
+ktls_free(struct ktls_session *tls)
+{
+
+ if (refcount_release(&tls->refcount))
+ ktls_destroy(tls);
+}
+
+#endif /* !_KERNEL */
+#endif /* !_SYS_KTLS_H_ */
diff --git a/freebsd/sys/sys/libkern.h b/freebsd/sys/sys/libkern.h
index 28da25ca..8a906fb2 100644
--- a/freebsd/sys/sys/libkern.h
+++ b/freebsd/sys/sys/libkern.h
@@ -227,9 +227,11 @@ void _bsd_srandom(u_long);
int strcasecmp(const char *, const char *);
char *strcat(char * __restrict, const char * __restrict);
char *strchr(const char *, int);
+char *strchrnul(const char *, int);
int strcmp(const char *, const char *);
char *strcpy(char * __restrict, const char * __restrict);
size_t strcspn(const char * __restrict, const char * __restrict) __pure;
+char *strdup_flags(const char *__restrict, struct malloc_type *, int);
#ifdef __rtems__
#include <string.h>
#define strdup _bsd_strdup
@@ -251,39 +253,6 @@ size_t strspn(const char *, const char *);
char *strstr(const char *, const char *);
int strvalid(const char *, size_t);
-extern const uint32_t crc32_tab[];
-
-static __inline uint32_t
-crc32_raw(const void *buf, size_t size, uint32_t crc)
-{
- const uint8_t *p = (const uint8_t *)buf;
-
- while (size--)
- crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
- return (crc);
-}
-
-static __inline uint32_t
-crc32(const void *buf, size_t size)
-{
- uint32_t crc;
-
- crc = crc32_raw(buf, size, ~0U);
- return (crc ^ ~0U);
-}
-
-uint32_t
-calculate_crc32c(uint32_t crc32c, const unsigned char *buffer,
- unsigned int length);
-#ifdef _KERNEL
-#if defined(__amd64__) || defined(__i386__)
-uint32_t sse42_crc32c(uint32_t, const unsigned char *, unsigned);
-#endif
-#if defined(__aarch64__)
-uint32_t armv8_crc32c(uint32_t, const unsigned char *, unsigned int);
-#endif
-#endif
-
#ifndef __rtems__
static __inline char *
index(const char *p, int ch)
diff --git a/freebsd/sys/sys/lockmgr.h b/freebsd/sys/sys/lockmgr.h
index 03ae6f9e..d2a14230 100644
--- a/freebsd/sys/sys/lockmgr.h
+++ b/freebsd/sys/sys/lockmgr.h
@@ -143,7 +143,7 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
/*
* Flags for lockinit().
*/
-#define LK_INIT_MASK 0x0000FF
+#define LK_INIT_MASK 0x0001FF
#define LK_CANRECURSE 0x000001
#define LK_NODUP 0x000002
#define LK_NOPROFILE 0x000004
@@ -152,6 +152,7 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
#define LK_QUIET 0x000020
#define LK_ADAPTIVE 0x000040
#define LK_IS_VNODE 0x000080 /* Tell WITNESS about a VNODE lock */
+#define LK_NEW 0x000100
/*
* Additional attributes to be used in lockmgr().
@@ -163,7 +164,6 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
#define LK_SLEEPFAIL 0x000800
#define LK_TIMELOCK 0x001000
#define LK_NODDLKTREAT 0x002000
-#define LK_VNHELD 0x004000
/*
* Operations for lockmgr().
diff --git a/freebsd/sys/sys/lockstat.h b/freebsd/sys/sys/lockstat.h
index 9a6674fa..0526f4fb 100644
--- a/freebsd/sys/sys/lockstat.h
+++ b/freebsd/sys/sys/lockstat.h
@@ -65,6 +65,13 @@ SDT_PROBE_DECLARE(lockstat, , , sx__spin);
SDT_PROBE_DECLARE(lockstat, , , sx__upgrade);
SDT_PROBE_DECLARE(lockstat, , , sx__downgrade);
+SDT_PROBE_DECLARE(lockstat, , , lockmgr__acquire);
+SDT_PROBE_DECLARE(lockstat, , , lockmgr__release);
+SDT_PROBE_DECLARE(lockstat, , , lockmgr__disown);
+SDT_PROBE_DECLARE(lockstat, , , lockmgr__block);
+SDT_PROBE_DECLARE(lockstat, , , lockmgr__upgrade);
+SDT_PROBE_DECLARE(lockstat, , , lockmgr__downgrade);
+
SDT_PROBE_DECLARE(lockstat, , , thread__spin);
#define LOCKSTAT_WRITER 0
diff --git a/freebsd/sys/sys/malloc.h b/freebsd/sys/sys/malloc.h
index ca81a198..83510329 100644
--- a/freebsd/sys/sys/malloc.h
+++ b/freebsd/sys/sys/malloc.h
@@ -57,9 +57,10 @@
#define M_NOVM 0x0200 /* don't ask VM for pages */
#define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */
#define M_NODUMP 0x0800 /* don't dump pages in this allocation */
-#define M_FIRSTFIT 0x1000 /* Only for vmem, fast fit. */
-#define M_BESTFIT 0x2000 /* Only for vmem, low fragmentation. */
-#define M_EXEC 0x4000 /* allocate executable space. */
+#define M_FIRSTFIT 0x1000 /* only for vmem, fast fit */
+#define M_BESTFIT 0x2000 /* only for vmem, low fragmentation */
+#define M_EXEC 0x4000 /* allocate executable space */
+#define M_NEXTFIT 0x8000 /* only for vmem, follow cursor */
#define M_MAGIC 877983977 /* time when first defined :-) */
@@ -182,7 +183,7 @@ void *contigmalloc(unsigned long size, struct malloc_type *type, int flags,
void *contigmalloc_domainset(unsigned long size, struct malloc_type *type,
struct domainset *ds, int flags, vm_paddr_t low, vm_paddr_t high,
unsigned long alignment, vm_paddr_t boundary)
- __malloc_like __result_use_check __alloc_size(1) __alloc_align(6);
+ __malloc_like __result_use_check __alloc_size(1) __alloc_align(7);
void free(void *addr, struct malloc_type *type);
void free_domain(void *addr, struct malloc_type *type);
#ifndef __rtems__
diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h
index 634f7d9e..ba2e1873 100644
--- a/freebsd/sys/sys/mbuf.h
+++ b/freebsd/sys/sys/mbuf.h
@@ -40,6 +40,7 @@
#include <sys/queue.h>
#ifdef _KERNEL
#include <sys/systm.h>
+#include <sys/refcount.h>
#include <vm/uma.h>
#ifdef WITNESS
#include <sys/lock.h>
@@ -98,6 +99,7 @@ struct mbuf;
#define MLEN ((int)(MSIZE - MHSIZE))
#define MHLEN ((int)(MSIZE - MPKTHSIZE))
#define MINCLSIZE (MHLEN + 1)
+#define M_NODOM 255
#ifdef _KERNEL
/*-
@@ -137,6 +139,7 @@ struct m_tag {
*/
struct m_snd_tag {
struct ifnet *ifp; /* network interface tag belongs to */
+ volatile u_int refcount;
};
/*
@@ -158,7 +161,7 @@ struct pkthdr {
uint32_t flowid; /* packet's 4-tuple system */
uint32_t csum_flags; /* checksum and offload features */
uint16_t fibnum; /* this packet should use this fib */
- uint8_t cosqos; /* class/quality of service */
+ uint8_t numa_domain; /* NUMA domain of recvd pkt */
uint8_t rsstype; /* hash type */
union {
uint64_t rcv_tstmp; /* timestamp in ns */
@@ -196,6 +199,8 @@ struct pkthdr {
#define lro_nsegs tso_segsz
#define csum_phsum PH_per.sixteen[2]
#define csum_data PH_per.thirtytwo[1]
+#define lro_len PH_per.sixteen[0] /* inbound during LRO */
+#define lro_csum PH_per.sixteen[1] /* inbound during LRO */
#define pace_thoff PH_loc.sixteen[0]
#define pace_tlen PH_loc.sixteen[1]
#define pace_drphdrlen PH_loc.sixteen[2]
@@ -224,7 +229,15 @@ struct m_ext {
volatile u_int ext_count;
volatile u_int *ext_cnt;
};
- char *ext_buf; /* start of buffer */
+ union {
+ /*
+ * If ext_type == EXT_PGS, 'ext_pgs' points to a
+ * structure describing the buffer. Otherwise,
+ * 'ext_buf' points to the start of the buffer.
+ */
+ struct mbuf_ext_pgs *ext_pgs;
+ char *ext_buf;
+ };
uint32_t ext_size; /* size of buffer, for ext_free */
uint32_t ext_type:8, /* type of external storage */
ext_flags:24; /* external storage mbuf flags */
@@ -290,10 +303,98 @@ struct mbuf {
};
};
+struct ktls_session;
+struct socket;
+
+/*
+ * TLS records for TLS 1.0-1.2 can have the following header lengths:
+ * - 5 (AES-CBC with implicit IV)
+ * - 21 (AES-CBC with explicit IV)
+ * - 13 (AES-GCM with 8 byte explicit IV)
+ */
+#define MBUF_PEXT_HDR_LEN 24
+
+/*
+ * TLS records for TLS 1.0-1.2 can have the following maximum trailer
+ * lengths:
+ * - 16 (AES-GCM)
+ * - 36 (AES-CBC with SHA1 and up to 16 bytes of padding)
+ * - 48 (AES-CBC with SHA2-256 and up to 16 bytes of padding)
+ * - 64 (AES-CBC with SHA2-384 and up to 16 bytes of padding)
+ */
+#define MBUF_PEXT_TRAIL_LEN 64
+
+#ifdef __LP64__
+#define MBUF_PEXT_MAX_PGS (152 / sizeof(vm_paddr_t))
+#else
+#define MBUF_PEXT_MAX_PGS (156 / sizeof(vm_paddr_t))
+#endif
+
+#define MBUF_PEXT_MAX_BYTES \
+ (MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN)
+
+/*
+ * This struct is 256 bytes in size and is arranged so that the most
+ * common case (accessing the first 4 pages of a 16KB TLS record) will
+ * fit in a single 64 byte cacheline.
+ */
+struct mbuf_ext_pgs {
+ uint8_t npgs; /* Number of attached pages */
+ uint8_t nrdy; /* Pages with I/O pending */
+ uint8_t hdr_len; /* TLS header length */
+ uint8_t trail_len; /* TLS trailer length */
+ uint16_t first_pg_off; /* Offset into 1st page */
+ uint16_t last_pg_len; /* Length of last page */
+ vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pages */
+ char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */
+ struct ktls_session *tls; /* TLS session */
+#if defined(__i386__) || \
+ (defined(__powerpc__) && !defined(__powerpc64__) && defined(BOOKE))
+ /*
+ * i386 and Book-E PowerPC have 64-bit vm_paddr_t, so there is
+ * a 4 byte remainder from the space allocated for pa[].
+ */
+ uint32_t pad;
+#endif
+ union {
+ char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */
+ struct {
+ struct socket *so;
+ struct mbuf *mbuf;
+ uint64_t seqno;
+ STAILQ_ENTRY(mbuf_ext_pgs) stailq;
+ int enc_cnt;
+ };
+ };
+};
+
+#ifdef _KERNEL
+static inline int
+mbuf_ext_pg_len(struct mbuf_ext_pgs *ext_pgs, int pidx, int pgoff)
+{
+ KASSERT(pgoff == 0 || pidx == 0,
+ ("page %d with non-zero offset %d in %p", pidx, pgoff, ext_pgs));
+ if (pidx == ext_pgs->npgs - 1) {
+ return (ext_pgs->last_pg_len);
+ } else {
+ return (PAGE_SIZE - pgoff);
+ }
+}
+
+#ifdef INVARIANT_SUPPORT
+void mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs);
+#endif
+#ifdef INVARIANTS
+#define MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs) mb_ext_pgs_check((ext_pgs))
+#else
+#define MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs)
+#endif
+#endif
+
/*
* mbuf flags of global significance and layer crossing.
* Those of only protocol/layer specific significance are to be mapped
- * to M_PROTO[1-12] and cleared at layer handoff boundaries.
+ * to M_PROTO[1-11] and cleared at layer handoff boundaries.
* NB: Limited to the lower 24 bits.
*/
#define M_EXT 0x00000001 /* has associated external storage */
@@ -304,25 +405,29 @@ struct mbuf {
#define M_MCAST 0x00000020 /* send/received as link-level multicast */
#define M_PROMISC 0x00000040 /* packet was not for us */
#define M_VLANTAG 0x00000080 /* ether_vtag is valid */
-#define M_NOMAP 0x00000100 /* mbuf data is unmapped (soon from Drew) */
+#ifndef __rtems__
+#define M_NOMAP 0x00000100 /* mbuf data is unmapped */
+#else /* __rtems__ */
+#define M_NOMAP 0x00000000 /* disable unmapped mbuf data */
+#endif /* __rtems__ */
#define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */
#define M_TSTMP 0x00000400 /* rcv_tstmp field is valid */
#define M_TSTMP_HPREC 0x00000800 /* rcv_tstmp is high-prec, typically
hw-stamped on port (useful for IEEE 1588
and 802.1AS) */
-
-#define M_PROTO1 0x00001000 /* protocol-specific */
-#define M_PROTO2 0x00002000 /* protocol-specific */
-#define M_PROTO3 0x00004000 /* protocol-specific */
-#define M_PROTO4 0x00008000 /* protocol-specific */
-#define M_PROTO5 0x00010000 /* protocol-specific */
-#define M_PROTO6 0x00020000 /* protocol-specific */
-#define M_PROTO7 0x00040000 /* protocol-specific */
-#define M_PROTO8 0x00080000 /* protocol-specific */
-#define M_PROTO9 0x00100000 /* protocol-specific */
-#define M_PROTO10 0x00200000 /* protocol-specific */
-#define M_PROTO11 0x00400000 /* protocol-specific */
-#define M_PROTO12 0x00800000 /* protocol-specific */
+#define M_TSTMP_LRO 0x00001000 /* Time LRO pushed in pkt is valid in (PH_loc) */
+
+#define M_PROTO1 0x00002000 /* protocol-specific */
+#define M_PROTO2 0x00004000 /* protocol-specific */
+#define M_PROTO3 0x00008000 /* protocol-specific */
+#define M_PROTO4 0x00010000 /* protocol-specific */
+#define M_PROTO5 0x00020000 /* protocol-specific */
+#define M_PROTO6 0x00040000 /* protocol-specific */
+#define M_PROTO7 0x00080000 /* protocol-specific */
+#define M_PROTO8 0x00100000 /* protocol-specific */
+#define M_PROTO9 0x00200000 /* protocol-specific */
+#define M_PROTO10 0x00400000 /* protocol-specific */
+#define M_PROTO11 0x00800000 /* protocol-specific */
#define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */
@@ -331,7 +436,7 @@ struct mbuf {
*/
#define M_PROTOFLAGS \
(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\
- M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12)
+ M_PROTO9|M_PROTO10|M_PROTO11)
/*
* Flags preserved when copying m_pkthdr.
@@ -345,11 +450,11 @@ struct mbuf {
*/
#define M_FLAG_BITS \
"\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \
- "\7M_PROMISC\10M_VLANTAG\13M_TSTMP\14M_TSTMP_HPREC"
+ "\7M_PROMISC\10M_VLANTAG\11M_NOMAP\12M_NOFREE\13M_TSTMP\14M_TSTMP_HPREC"
#define M_FLAG_PROTOBITS \
"\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \
"\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \
- "\27M_PROTO11\30M_PROTO12"
+ "\27M_PROTO11"
#define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS)
/*
@@ -407,33 +512,6 @@ struct mbuf {
#define M_HASHTYPE_ISHASH(m) (M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP)
/*
- * COS/QOS class and quality of service tags.
- * It uses DSCP code points as base.
- */
-#define QOS_DSCP_CS0 0x00
-#define QOS_DSCP_DEF QOS_DSCP_CS0
-#define QOS_DSCP_CS1 0x20
-#define QOS_DSCP_AF11 0x28
-#define QOS_DSCP_AF12 0x30
-#define QOS_DSCP_AF13 0x38
-#define QOS_DSCP_CS2 0x40
-#define QOS_DSCP_AF21 0x48
-#define QOS_DSCP_AF22 0x50
-#define QOS_DSCP_AF23 0x58
-#define QOS_DSCP_CS3 0x60
-#define QOS_DSCP_AF31 0x68
-#define QOS_DSCP_AF32 0x70
-#define QOS_DSCP_AF33 0x78
-#define QOS_DSCP_CS4 0x80
-#define QOS_DSCP_AF41 0x88
-#define QOS_DSCP_AF42 0x90
-#define QOS_DSCP_AF43 0x98
-#define QOS_DSCP_CS5 0xa0
-#define QOS_DSCP_EF 0xb8
-#define QOS_DSCP_CS6 0xc0
-#define QOS_DSCP_CS7 0xe0
-
-/*
* External mbuf storage buffer types.
*/
#define EXT_CLUSTER 1 /* mbuf cluster */
@@ -445,6 +523,10 @@ struct mbuf {
#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */
#define EXT_PACKET 6 /* mbuf+cluster from packet zone */
#define EXT_MBUF 7 /* external mbuf reference */
+#define EXT_RXRING 8 /* data in NIC receive ring */
+#ifndef __rtems__
+#define EXT_PGS 9 /* array of unmapped pages */
+#endif /* __rtems__ */
#define EXT_VENDOR1 224 /* for vendor-internal use */
#define EXT_VENDOR2 225 /* for vendor-internal use */
@@ -489,6 +571,11 @@ struct mbuf {
"\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \
"\30EXT_FLAG_EXP4"
+#define MBUF_EXT_PGS_ASSERT(m) \
+ KASSERT((((m)->m_flags & M_EXT) != 0) && \
+ ((m)->m_ext.ext_type == EXT_PGS), \
+ ("%s: m %p !M_EXT or !EXT_PGS", __func__, m))
+
/*
* Flags indicating checksum, segmentation and other offload work to be
* done, or already done, by hardware or lower layers. It is split into
@@ -521,6 +608,8 @@ struct mbuf {
#define CSUM_L5_VALID 0x20000000 /* checksum is correct */
#define CSUM_COALESCED 0x40000000 /* contains merged segments */
+#define CSUM_SND_TAG 0x80000000 /* Packet header has send tag */
+
/*
* CSUM flag description for use with printf(9) %b identifier.
*/
@@ -530,7 +619,7 @@ struct mbuf {
"\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
"\16CSUM_IP6_ISCSI" \
"\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
- "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED"
+ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED\40CSUM_SND_TAG"
/* CSUM flags compatibility mappings. */
#define CSUM_IP_CHECKED CSUM_L3_CALC
@@ -589,6 +678,7 @@ struct mbuf {
#define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k"
#define MBUF_TAG_MEM_NAME "mbuf_tag"
#define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt"
+#define MBUF_EXTPGS_MEM_NAME "mbuf_extpgs"
#ifdef _KERNEL
@@ -613,9 +703,25 @@ extern uma_zone_t zone_pack;
extern uma_zone_t zone_jumbop;
extern uma_zone_t zone_jumbo9;
extern uma_zone_t zone_jumbo16;
+extern uma_zone_t zone_extpgs;
void mb_dupcl(struct mbuf *, struct mbuf *);
void mb_free_ext(struct mbuf *);
+void mb_free_mext_pgs(struct mbuf *);
+struct mbuf *mb_alloc_ext_pgs(int, bool, m_ext_free_t);
+int mb_unmapped_compress(struct mbuf *m);
+#ifndef __rtems__
+struct mbuf *mb_unmapped_to_ext(struct mbuf *m);
+#else /* __rtems__ */
+static __inline struct mbuf *
+mb_unmapped_to_ext(struct mbuf *m)
+{
+
+ return (m);
+}
+
+#endif /* __rtems__ */
+void mb_free_notready(struct mbuf *m, int count);
void m_adj(struct mbuf *, int);
int m_apply(struct mbuf *, int, int,
int (*)(void *, void *, u_int), void *);
@@ -650,6 +756,7 @@ struct mbuf *m_getm2(struct mbuf *, int, int, short, int);
struct mbuf *m_getptr(struct mbuf *, int, int *);
u_int m_length(struct mbuf *, struct mbuf **);
int m_mbuftouio(struct uio *, const struct mbuf *, int);
+int m_unmappedtouio(const struct mbuf *, int, struct uio *, int);
void m_move_pkthdr(struct mbuf *, struct mbuf *);
int m_pkthdr_init(struct mbuf *, int);
struct mbuf *m_prepend(struct mbuf *, int, int);
@@ -660,6 +767,8 @@ int m_sanity(struct mbuf *, int);
struct mbuf *m_split(struct mbuf *, int, int);
struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
struct mbuf *m_unshare(struct mbuf *, int);
+void m_snd_tag_init(struct m_snd_tag *, struct ifnet *);
+void m_snd_tag_destroy(struct m_snd_tag *);
static __inline int
m_gettype(int size)
@@ -903,7 +1012,7 @@ m_extrefcnt(struct mbuf *m)
* be both the local data payload, or an external buffer area, depending on
* whether M_EXT is set).
*/
-#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \
+#define M_WRITABLE(m) (((m)->m_flags & (M_RDONLY | M_NOMAP)) == 0 && \
(!(((m)->m_flags & M_EXT)) || \
(m_extrefcnt(m) == 1)))
@@ -926,7 +1035,8 @@ m_extrefcnt(struct mbuf *m)
* handling external storage, packet-header mbufs, and regular data mbufs.
*/
#define M_START(m) \
- (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \
+ (((m)->m_flags & M_NOMAP) ? NULL : \
+ ((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \
((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \
&(m)->m_dat[0])
@@ -1023,6 +1133,17 @@ m_align(struct mbuf *m, int len)
*/
#define MCHTYPE(m, t) m_chtype((m), (t))
+/* Return the rcvif of a packet header. */
+static __inline struct ifnet *
+m_rcvif(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ return (NULL);
+ return (m->m_pkthdr.rcvif);
+}
+
/* Length to m_copy to copy all. */
#define M_COPYALL 1000000000
@@ -1031,6 +1152,7 @@ extern int max_hdr; /* Largest link + protocol header */
extern int max_linkhdr; /* Largest link-level header */
extern int max_protohdr; /* Largest protocol header */
extern int nmbclusters; /* Maximum number of clusters */
+extern bool mb_use_ext_pgs; /* Use ext_pgs for sendfile */
/*-
* Network packets may have annotations attached by affixing a list of
@@ -1213,6 +1335,22 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
}
+static inline struct m_snd_tag *
+m_snd_tag_ref(struct m_snd_tag *mst)
+{
+
+ refcount_acquire(&mst->refcount);
+ return (mst);
+}
+
+static inline void
+m_snd_tag_rele(struct m_snd_tag *mst)
+{
+
+ if (refcount_release(&mst->refcount))
+ m_snd_tag_destroy(mst);
+}
+
static __inline struct mbuf *
m_free(struct mbuf *m)
{
@@ -1221,6 +1359,8 @@ m_free(struct mbuf *m)
MBUF_PROBE1(m__free, m);
if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
m_tag_delete_chain(m, NULL);
+ if (m->m_flags & M_PKTHDR && m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_rele(m->m_pkthdr.snd_tag);
if (m->m_flags & M_EXT)
mb_free_ext(m);
else if ((m->m_flags & M_NOFREE) == 0)
@@ -1314,7 +1454,7 @@ static inline int
mbufq_full(const struct mbufq *mq)
{
- return (mq->mq_len >= mq->mq_maxlen);
+ return (mq->mq_maxlen > 0 && mq->mq_len >= mq->mq_maxlen);
}
static inline int
@@ -1388,5 +1528,20 @@ void netdump_mbuf_dump(void);
void netdump_mbuf_reinit(int nmbuf, int nclust, int clsize);
#endif
+static inline bool
+mbuf_has_tls_session(struct mbuf *m)
+{
+
+#ifndef __rtems__
+ if (m->m_flags & M_NOMAP) {
+ MBUF_EXT_PGS_ASSERT(m);
+ if (m->m_ext.ext_pgs->tls != NULL) {
+ return (true);
+ }
+ }
+#endif /* __rtems__ */
+ return (false);
+}
+
#endif /* _KERNEL */
#endif /* !_SYS_MBUF_H_ */
diff --git a/freebsd/sys/sys/mount.h b/freebsd/sys/sys/mount.h
index 2a5d4cff..4b60055c 100644
--- a/freebsd/sys/sys/mount.h
+++ b/freebsd/sys/sys/mount.h
@@ -226,6 +226,11 @@ struct mount {
struct lock mnt_explock; /* vfs_export walkers lock */
TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */
TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/
+ int __aligned(CACHE_LINE_SIZE) mnt_vfs_ops;/* (i) pending vfs ops */
+ int *mnt_thread_in_ops_pcpu;
+ int *mnt_ref_pcpu;
+ int *mnt_lockref_pcpu;
+ int *mnt_writeopcount_pcpu;
};
/*
@@ -265,11 +270,17 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
#define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx)
#define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx)
#define MNT_MTX(mp) (&(mp)->mnt_mtx)
-#define MNT_REF(mp) (mp)->mnt_ref++
+
+#define MNT_REF(mp) do { \
+ mtx_assert(MNT_MTX(mp), MA_OWNED); \
+ mp->mnt_ref++; \
+} while (0)
#define MNT_REL(mp) do { \
- KASSERT((mp)->mnt_ref > 0, ("negative mnt_ref")); \
+ mtx_assert(MNT_MTX(mp), MA_OWNED); \
(mp)->mnt_ref--; \
- if ((mp)->mnt_ref == 0) \
+ if ((mp)->mnt_vfs_ops && (mp)->mnt_ref < 0) \
+ vfs_dump_mount_counters(mp); \
+ if ((mp)->mnt_ref == 0 && (mp)->mnt_vfs_ops) \
wakeup((mp)); \
} while (0)
@@ -296,6 +307,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
#define MNT_NOCLUSTERW 0x0000000080000000ULL /* disable cluster write */
#define MNT_SUJ 0x0000000100000000ULL /* using journaled soft updates */
#define MNT_AUTOMOUNTED 0x0000000200000000ULL /* mounted by automountd(8) */
+#define MNT_UNTRUSTED 0x0000000800000000ULL /* filesys metadata untrusted */
/*
* NFS export related mount flags.
@@ -333,7 +345,8 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \
MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \
MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \
- MNT_NFS4ACLS | MNT_AUTOMOUNTED | MNT_VERIFIED)
+ MNT_NFS4ACLS | MNT_AUTOMOUNTED | MNT_VERIFIED | \
+ MNT_UNTRUSTED)
/* Mask of flags that can be updated. */
#define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \
@@ -342,7 +355,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
MNT_NOSYMFOLLOW | MNT_IGNORE | \
MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | \
MNT_ACLS | MNT_USER | MNT_NFS4ACLS | \
- MNT_AUTOMOUNTED)
+ MNT_AUTOMOUNTED | MNT_UNTRUSTED)
/*
* External filesystem command modifier flags.
@@ -360,29 +373,28 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
#define MNT_SNAPSHOT 0x0000000001000000ULL /* snapshot the filesystem */
#define MNT_NONBUSY 0x0000000004000000ULL /* check vnode use counts. */
#define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */
+#define MNT_NOCOVER 0x0000001000000000ULL /* Do not cover a mount point */
+#define MNT_EMPTYDIR 0x0000002000000000ULL /* Only mount on empty dir */
#define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \
MNT_FORCE | MNT_SNAPSHOT | MNT_NONBUSY | \
- MNT_BYFSID)
+ MNT_BYFSID | MNT_NOCOVER | MNT_EMPTYDIR)
/*
* Internal filesystem control flags stored in mnt_kern_flag.
*
- * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed
- * past the mount point. This keeps the subtree stable during mounts
- * and unmounts.
+ * MNTK_UNMOUNT locks the mount entry so that name lookup cannot
+ * proceed past the mount point. This keeps the subtree stable during
+ * mounts and unmounts. When non-forced unmount flushes all vnodes
+ * from the mp queue, the MNTK_UNMOUNT flag prevents insmntque() from
+ * queueing new vnodes.
*
* MNTK_UNMOUNTF permits filesystems to detect a forced unmount while
* dounmount() is still waiting to lock the mountpoint. This allows
* the filesystem to cancel operations that might otherwise deadlock
* with the unmount attempt (used by NFS).
- *
- * MNTK_NOINSMNTQ is strict subset of MNTK_UNMOUNT. They are separated
- * to allow for failed unmount attempt to restore the syncer vnode for
- * the mount.
*/
#define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */
#define MNTK_ASYNC 0x00000002 /* filtered async flag */
#define MNTK_SOFTDEP 0x00000004 /* async disabled by softdep */
-#define MNTK_NOINSMNTQ 0x00000008 /* insmntque is not allowed */
#define MNTK_DRAINING 0x00000010 /* lock draining is happening */
#define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */
#define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */
@@ -396,6 +408,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
#define MNTK_MARKER 0x00001000
#define MNTK_UNMAPPED_BUFS 0x00002000
#define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */
+#define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */
#define MNTK_NOASYNC 0x00800000 /* disable async */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
@@ -647,6 +660,18 @@ struct nameidata;
struct sysctl_req;
struct mntarg;
+/*
+ * N.B., vfs_cmount is the ancient vfsop invoked by the old mount(2) syscall.
+ * The new way is vfs_mount.
+ *
+ * vfs_cmount implementations typically translate arguments from their
+ * respective old per-FS structures into the key-value list supported by
+ * nmount(2), then use kernel_mount(9) to mimic nmount(2) from kernelspace.
+ *
+ * Filesystems with mounters that use nmount(2) do not need to and should not
+ * implement vfs_cmount. Hopefully a future cleanup can remove vfs_cmount and
+ * mount(2) entirely.
+ */
typedef int vfs_cmount_t(struct mntarg *ma, void *data, uint64_t flags);
typedef int vfs_unmount_t(struct mount *mp, int mntflags);
typedef int vfs_root_t(struct mount *mp, int flags, struct vnode **vpp);
@@ -925,6 +950,74 @@ vfs_sysctl_t vfs_stdsysctl;
void syncer_suspend(void);
void syncer_resume(void);
+void vfs_op_barrier_wait(struct mount *);
+void vfs_op_enter(struct mount *);
+void vfs_op_exit_locked(struct mount *);
+void vfs_op_exit(struct mount *);
+
+#ifdef DIAGNOSTIC
+void vfs_assert_mount_counters(struct mount *);
+void vfs_dump_mount_counters(struct mount *);
+#else
+#define vfs_assert_mount_counters(mp) do { } while (0)
+#define vfs_dump_mount_counters(mp) do { } while (0)
+#endif
+
+enum mount_counter { MNT_COUNT_REF, MNT_COUNT_LOCKREF, MNT_COUNT_WRITEOPCOUNT };
+int vfs_mount_fetch_counter(struct mount *, enum mount_counter);
+
+/*
+ * We mark ourselves as entering the section and post a sequentially consistent
+ * fence, meaning the store is completed before we get into the section and
+ * mnt_vfs_ops is only read afterwards.
+ *
+ * Any thread transitioning the ops counter 0->1 does things in the opposite
+ * order - first bumps the count, posts a sequentially consistent fence and
+ * observes all CPUs not executing within the section.
+ *
+ * This provides an invariant that by the time the last CPU is observed not
+ * executing, everyone else entering will see the counter > 0 and exit.
+ *
+ * Note there is no barrier between vfs_ops and the rest of the code in the
+ * section. It is not necessary as the writer has to wait for everyone to drain
+ * before making any changes or only make changes safe while the section is
+ * executed.
+ */
+#define vfs_op_thread_entered(mp) ({ \
+ MPASS(curthread->td_critnest > 0); \
+ *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) == 1; \
+})
+
+#define vfs_op_thread_enter(mp) ({ \
+ bool _retval = true; \
+ critical_enter(); \
+ MPASS(!vfs_op_thread_entered(mp)); \
+ *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) = 1; \
+ atomic_thread_fence_seq_cst(); \
+ if (__predict_false(mp->mnt_vfs_ops > 0)) { \
+ vfs_op_thread_exit(mp); \
+ _retval = false; \
+ } \
+ _retval; \
+})
+
+#define vfs_op_thread_exit(mp) do { \
+ MPASS(vfs_op_thread_entered(mp)); \
+ atomic_thread_fence_rel(); \
+ *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) = 0; \
+ critical_exit(); \
+} while (0)
+
+#define vfs_mp_count_add_pcpu(mp, count, val) do { \
+ MPASS(vfs_op_thread_entered(mp)); \
+ (*(int *)zpcpu_get(mp->mnt_##count##_pcpu)) += val; \
+} while (0)
+
+#define vfs_mp_count_sub_pcpu(mp, count, val) do { \
+ MPASS(vfs_op_thread_entered(mp)); \
+ (*(int *)zpcpu_get(mp->mnt_##count##_pcpu)) -= val; \
+} while (0)
+
#else /* !_KERNEL */
#include <sys/cdefs.h>
diff --git a/freebsd/sys/sys/mouse.h b/freebsd/sys/sys/mouse.h
index e6ea68bc..a23e09ab 100644
--- a/freebsd/sys/sys/mouse.h
+++ b/freebsd/sys/sys/mouse.h
@@ -136,6 +136,7 @@ typedef struct synapticshw {
int infoXupmm;
int infoYupmm;
int forcePad;
+ int topButtonPad;
} synapticshw_t;
/* iftype */
diff --git a/freebsd/sys/sys/pcpu.h b/freebsd/sys/sys/pcpu.h
index 0ce30af7..2e1cdde3 100644
--- a/freebsd/sys/sys/pcpu.h
+++ b/freebsd/sys/sys/pcpu.h
@@ -39,6 +39,7 @@
#error "no assembler-serviceable parts inside"
#endif
+#include <sys/param.h>
#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
@@ -84,7 +85,8 @@ extern uintptr_t dpcpu_off[];
/* struct _hack is to stop this from being used with the static keyword. */
#define DPCPU_DEFINE(t, n) \
struct _hack; t DPCPU_NAME(n) __section(DPCPU_SETNAME) __used
-#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv))
+#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv) \
+ || defined(__powerpc64__))
/*
* On some architectures the compiler will use PC-relative load to
* find the address of DPCPU data with the static keyword. We then
@@ -183,6 +185,7 @@ struct pcpu {
struct thread *pc_fpcurthread; /* Fp state owner */
struct thread *pc_deadthread; /* Zombie thread or NULL */
struct pcb *pc_curpcb; /* Current pcb */
+ void *pc_sched; /* Scheduler state */
uint64_t pc_switchtime; /* cpu_ticks() at last csw */
int pc_switchticks; /* `ticks' at last csw */
u_int pc_cpuid; /* This cpu number */
@@ -221,10 +224,6 @@ extern struct cpuhead cpuhead;
extern struct pcpu *cpuid_to_pcpu[];
#define curcpu PCPU_GET(cpuid)
-#define curproc (curthread->td_proc)
-#ifndef curthread
-#define curthread PCPU_GET(curthread)
-#endif
#define curvidata PCPU_GET(vidata)
#ifndef __rtems__
@@ -233,20 +232,12 @@ extern struct pcpu *cpuid_to_pcpu[];
#define UMA_PCPU_ALLOC_SIZE (PAGE_SIZE / 32)
#endif /* __rtems__ */
-#ifndef __rtems__
-#ifdef CTASSERT
-#if defined(__i386__) || defined(__amd64__)
-/* Required for counters(9) to work on x86. */
-CTASSERT(sizeof(struct pcpu) == UMA_PCPU_ALLOC_SIZE);
-#else
-/*
- * To minimize memory waste in per-cpu UMA zones, size of struct pcpu
- * should be denominator of PAGE_SIZE.
- */
-CTASSERT((PAGE_SIZE / sizeof(struct pcpu)) * sizeof(struct pcpu) == PAGE_SIZE);
-#endif /* UMA_PCPU_ALLOC_SIZE && x86 */
-#endif /* CTASSERT */
-#endif /* __rtems__ */
+#include <machine/pcpu_aux.h>
+
+#ifndef curthread
+#define curthread PCPU_GET(curthread)
+#endif
+#define curproc (curthread->td_proc)
/* Accessor to elements allocated via UMA_ZONE_PCPU zone. */
static inline void *
@@ -268,6 +259,18 @@ zpcpu_get_cpu(void *base, int cpu)
}
/*
+ * This operation is NOT atomic and does not post any barriers.
+ * If you use this the assumption is that the target CPU will not
+ * be modifying this variable.
+ * If you need atomicity use xchg.
+ * */
+#define zpcpu_replace_cpu(base, val, cpu) ({ \
+ __typeof(val) _old = *(__typeof(val) *)zpcpu_get_cpu(base, cpu);\
+ *(__typeof(val) *)zpcpu_get_cpu(base, cpu) = val; \
+ _old; \
+})
+
+/*
* Machine dependent callouts. cpu_pcpu_init() is responsible for
* initializing machine dependent fields of struct pcpu, and
* db_show_mdpcpu() is responsible for handling machine dependent
diff --git a/freebsd/sys/sys/proc.h b/freebsd/sys/sys/proc.h
index 6c352059..592a8ef1 100644
--- a/freebsd/sys/sys/proc.h
+++ b/freebsd/sys/sys/proc.h
@@ -42,6 +42,9 @@
#include <sys/callout.h> /* For struct callout. */
#include <sys/event.h> /* For struct klist. */
+#ifdef _KERNEL
+#include <sys/_eventhandler.h>
+#endif
#include <sys/condvar.h>
#ifndef _KERNEL
#include <sys/filedesc.h>
@@ -178,6 +181,7 @@ struct filecaps;
struct filemon;
struct kaioinfo;
struct kaudit_record;
+struct kcov_info;
struct kdtrace_proc;
struct kdtrace_thread;
struct mqueue_notifier;
@@ -334,7 +338,9 @@ struct thread {
void *td_su; /* (k) FFS SU private */
sbintime_t td_sleeptimo; /* (t) Sleep timeout. */
int td_rtcgen; /* (s) rtc_generation of abs. sleep */
+ int td_errno; /* (k) Error from last syscall. */
size_t td_vslock_sz; /* (k) amount of vslock-ed space */
+ struct kcov_info *td_kcov_info; /* (*) Kernel code coverage data */
#define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */
@@ -359,7 +365,7 @@ struct thread {
* or already have been set in the allocator, constructor, etc.
*/
struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */
- enum {
+ enum td_states {
TDS_INACTIVE = 0x0,
TDS_INHIBITED,
TDS_CAN_RUN,
@@ -387,8 +393,6 @@ struct thread {
struct kaudit_record *td_ar; /* (k) Active audit record, if any. */
struct lpohead td_lprof[2]; /* (a) lock profiling objects. */
struct kdtrace_thread *td_dtrace; /* (*) DTrace-specific data. */
- int td_errno; /* Error returned by last syscall. */
- /* LP64 hole */
struct vnet *td_vnet; /* (k) Effective vnet. */
const char *td_vnet_lpush; /* (k) Debugging vnet push / pop. */
struct trapframe *td_intr_frame;/* (k) Frame of the current irq */
@@ -620,7 +624,7 @@ struct proc {
int p_flag; /* (c) P_* flags. */
int p_flag2; /* (c) P2_* flags. */
- enum {
+ enum p_states {
PRS_NEW = 0, /* In creation */
PRS_NORMAL, /* threads can be run. */
PRS_ZOMBIE
@@ -664,7 +668,6 @@ struct proc {
struct sigiolst p_sigiolst; /* (c) List of sigio sources. */
int p_sigparent; /* (c) Signal to parent on exit. */
int p_sig; /* (n) For core dump/debugger XXX. */
- u_long p_code; /* (n) For core dump/debugger XXX. */
u_int p_stops; /* (c) Stop event bitmask. */
u_int p_stype; /* (c) Stop event type. */
char p_step; /* (c) Process is stopped. */
@@ -804,6 +807,13 @@ struct proc {
#define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */
#define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */
#define P2_TRAPCAP 0x00000020 /* SIGTRAP on ENOTCAPABLE */
+#define P2_ASLR_ENABLE 0x00000040 /* Force enable ASLR. */
+#define P2_ASLR_DISABLE 0x00000080 /* Force disable ASLR. */
+#define P2_ASLR_IGNSTART 0x00000100 /* Enable ASLR to consume sbrk area. */
+#define P2_PROTMAX_ENABLE 0x00000200 /* Force enable implied PROT_MAX. */
+#define P2_PROTMAX_DISABLE 0x00000400 /* Force disable implied PROT_MAX. */
+#define P2_STKGAP_DISABLE 0x00000800 /* Disable stack gap for MAP_STACK */
+#define P2_STKGAP_DISABLE_EXEC 0x00001000 /* Stack gap disabled after exec */
/* Flags protected by proctree_lock, kept in p_treeflags. */
#define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */
@@ -1014,7 +1024,6 @@ extern u_long pgrphash;
extern struct sx allproc_lock;
extern int allproc_gen;
-extern struct sx zombproc_lock;
extern struct sx proctree_lock;
extern struct mtx ppeers_lock;
extern struct mtx procid_lock;
@@ -1032,15 +1041,16 @@ LIST_HEAD(proclist, proc);
TAILQ_HEAD(procqueue, proc);
TAILQ_HEAD(threadqueue, thread);
extern struct proclist allproc; /* List of all processes. */
-extern struct proclist zombproc; /* List of zombie processes. */
extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
extern struct uma_zone *proc_zone;
struct proc *pfind(pid_t); /* Find process by id. */
struct proc *pfind_any(pid_t); /* Find (zombie) process by id. */
+struct proc *pfind_any_locked(pid_t pid); /* Find process by id, locked. */
struct pgrp *pgfind(pid_t); /* Find process group by id. */
-struct proc *zpfind(pid_t); /* Find zombie process by id. */
+void pidhash_slockall(void); /* Shared lock all pid hash lists. */
+void pidhash_sunlockall(void); /* Shared unlock all pid hash lists. */
struct fork_req {
int fr_flags;
@@ -1123,11 +1133,13 @@ void proc_linkup(struct proc *p, struct thread *td);
struct proc *proc_realparent(struct proc *child);
void proc_reap(struct thread *td, struct proc *p, int *status, int options);
void proc_reparent(struct proc *child, struct proc *newparent, bool set_oppid);
+void proc_add_orphan(struct proc *child, struct proc *parent);
void proc_set_traced(struct proc *p, bool stop);
void proc_wkilled(struct proc *p);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
void pstats_free(struct pstats *ps);
+void proc_clear_orphan(struct proc *p);
void reaper_abandon_children(struct proc *p, bool exiting);
#ifndef __rtems__
int securelevel_ge(struct ucred *cr, int level);
@@ -1158,9 +1170,12 @@ void userret(struct thread *, struct trapframe *);
void cpu_exit(struct thread *);
void exit1(struct thread *, int, int) __dead2;
void cpu_copy_thread(struct thread *td, struct thread *td0);
+bool cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map);
int cpu_fetch_syscall_args(struct thread *td);
void cpu_fork(struct thread *, struct proc *, struct thread *, int);
void cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *);
+int cpu_procctl(struct thread *td, int idtype, id_t id, int com,
+ void *data);
void cpu_set_syscall_retval(struct thread *, int);
#ifndef __rtems__
void cpu_set_upcall(struct thread *, void (*)(void *), void *,
@@ -1250,6 +1265,18 @@ void proc_id_set(int type, pid_t id);
void proc_id_set_cond(int type, pid_t id);
void proc_id_clear(int type, pid_t id);
+EVENTHANDLER_LIST_DECLARE(process_ctor);
+EVENTHANDLER_LIST_DECLARE(process_dtor);
+EVENTHANDLER_LIST_DECLARE(process_init);
+EVENTHANDLER_LIST_DECLARE(process_fini);
+EVENTHANDLER_LIST_DECLARE(process_exit);
+EVENTHANDLER_LIST_DECLARE(process_fork);
+EVENTHANDLER_LIST_DECLARE(process_exec);
+
+EVENTHANDLER_LIST_DECLARE(thread_ctor);
+EVENTHANDLER_LIST_DECLARE(thread_dtor);
+EVENTHANDLER_LIST_DECLARE(thread_init);
+
#endif /* _KERNEL */
#endif /* !_SYS_PROC_H_ */
diff --git a/freebsd/sys/sys/random.h b/freebsd/sys/sys/random.h
index aa6f6458..5dd7ee0b 100644
--- a/freebsd/sys/sys/random.h
+++ b/freebsd/sys/sys/random.h
@@ -37,31 +37,29 @@
struct uio;
-#if defined(DEV_RANDOM)
-u_int read_random(void *, u_int);
-int read_random_uio(struct uio *, bool);
-#else
-static __inline int
-read_random_uio(void *a __unused, u_int b __unused)
-{
- return (0);
-}
#ifndef __rtems__
-static __inline u_int
-read_random(void *a __unused, u_int b __unused)
-{
- return (0);
-}
+void read_random(void *, u_int);
#else /* __rtems__ */
#include <unistd.h>
static __inline u_int
read_random(void *ptr, u_int n)
{
+
getentropy(ptr, n);
return (n);
}
#endif /* __rtems__ */
-#endif
+int read_random_uio(struct uio *, bool);
+#ifndef __rtems__
+bool is_random_seeded(void);
+#else /* __rtems__ */
+static __inline bool
+is_random_seeded(void)
+{
+
+ return (true);
+}
+#endif /* __rtems__ */
/*
* Note: if you add or remove members of random_entropy_source, remember to
@@ -97,6 +95,7 @@ enum random_entropy_source {
RANDOM_PURE_BROADCOM,
RANDOM_PURE_CCP,
RANDOM_PURE_DARN,
+ RANDOM_PURE_TPM,
ENTROPYSOURCE
};
_Static_assert(ENTROPYSOURCE <= 32,
@@ -104,9 +103,8 @@ _Static_assert(ENTROPYSOURCE <= 32,
#define RANDOM_LEGACY_BOOT_ENTROPY_MODULE "/boot/entropy"
#define RANDOM_CACHED_BOOT_ENTROPY_MODULE "boot_entropy_cache"
-#define RANDOM_CACHED_SKIP_START 256
-#if defined(DEV_RANDOM)
+#ifndef __rtems__
extern u_int hc_source_mask;
void random_harvest_queue_(const void *, u_int, enum random_entropy_source);
void random_harvest_fast_(const void *, u_int);
@@ -163,6 +161,9 @@ void random_harvest_deregister_source(enum random_entropy_source);
#define GRND_NONBLOCK 0x1
#define GRND_RANDOM 0x2
+
+__BEGIN_DECLS
ssize_t getrandom(void *buf, size_t buflen, unsigned int flags);
+__END_DECLS
#endif /* _SYS_RANDOM_H_ */
diff --git a/freebsd/sys/sys/refcount.h b/freebsd/sys/sys/refcount.h
index 0cc4eb41..c21a0dca 100644
--- a/freebsd/sys/sys/refcount.h
+++ b/freebsd/sys/sys/refcount.h
@@ -2,7 +2,6 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2005 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,82 +30,150 @@
#ifndef __SYS_REFCOUNT_H__
#define __SYS_REFCOUNT_H__
-#include <sys/limits.h>
#include <machine/atomic.h>
#ifdef _KERNEL
#include <sys/systm.h>
#else
+#include <stdbool.h>
#define KASSERT(exp, msg) /* */
#endif
+#define REFCOUNT_WAITER (1U << 31) /* Refcount has waiter. */
+#define REFCOUNT_SATURATION_VALUE (3U << 29)
+
+#define REFCOUNT_SATURATED(val) (((val) & (1U << 30)) != 0)
+#define REFCOUNT_COUNT(x) ((x) & ~REFCOUNT_WAITER)
+
+bool refcount_release_last(volatile u_int *count, u_int n, u_int old);
+void refcount_sleep(volatile u_int *count, const char *wmesg, int prio);
+
+/*
+ * Attempt to handle reference count overflow and underflow. Force the counter
+ * to stay at the saturation value so that a counter overflow cannot trigger
+ * destruction of the containing object and instead leads to a less harmful
+ * memory leak.
+ */
static __inline void
-refcount_init(volatile u_int *count, u_int value)
+_refcount_update_saturated(volatile u_int *count)
{
+#ifdef INVARIANTS
+ panic("refcount %p wraparound", count);
+#else
+ atomic_store_int((volatile int *)count, REFCOUNT_SATURATION_VALUE);
+#endif
+}
+static __inline void
+refcount_init(volatile u_int *count, u_int value)
+{
+ KASSERT(!REFCOUNT_SATURATED(value),
+ ("invalid initial refcount value %u", value));
*count = value;
}
static __inline void
refcount_acquire(volatile u_int *count)
{
+ u_int old;
- KASSERT(*count < UINT_MAX, ("refcount %p overflowed", count));
- atomic_add_int((volatile int *)count, 1);
+ old = atomic_fetchadd_int((volatile int *)count, 1);
+ if (__predict_false(REFCOUNT_SATURATED(old)))
+ _refcount_update_saturated(count);
}
-static __inline int
-refcount_release(volatile u_int *count)
+static __inline void
+refcount_acquiren(volatile u_int *count, u_int n)
+{
+ u_int old;
+
+ KASSERT(n < REFCOUNT_SATURATION_VALUE / 2,
+ ("refcount_acquiren: n=%u too large", n));
+ old = atomic_fetchadd_int((volatile int *)count, n);
+ if (__predict_false(REFCOUNT_SATURATED(old)))
+ _refcount_update_saturated(count);
+}
+
+static __inline __result_use_check bool
+refcount_acquire_checked(volatile u_int *count)
+{
+ u_int lcount;
+
+ for (lcount = *count;;) {
+ if (__predict_false(REFCOUNT_SATURATED(lcount + 1)))
+ return (false);
+ if (__predict_true(atomic_fcmpset_int((volatile int *)count,
+ (int *)&lcount, lcount + 1) == 1))
+ return (true);
+ }
+}
+
+static __inline bool
+refcount_releasen(volatile u_int *count, u_int n)
{
u_int old;
+ KASSERT(n < REFCOUNT_SATURATION_VALUE / 2,
+ ("refcount_releasen: n=%u too large", n));
+
atomic_thread_fence_rel();
- old = atomic_fetchadd_int((volatile int *)count, -1);
- KASSERT(old > 0, ("refcount %p is zero", count));
- if (old > 1)
- return (0);
-
- /*
- * Last reference. Signal the user to call the destructor.
- *
- * Ensure that the destructor sees all updates. The fence_rel
- * at the start of the function synchronized with this fence.
- */
- atomic_thread_fence_acq();
- return (1);
+ old = atomic_fetchadd_int((volatile int *)count, -n);
+ if (__predict_false(n >= REFCOUNT_COUNT(old) ||
+ REFCOUNT_SATURATED(old)))
+ return (refcount_release_last(count, n, old));
+ return (false);
+}
+
+static __inline bool
+refcount_release(volatile u_int *count)
+{
+
+ return (refcount_releasen(count, 1));
+}
+
+static __inline void
+refcount_wait(volatile u_int *count, const char *wmesg, int prio)
+{
+
+ while (*count != 0)
+ refcount_sleep(count, wmesg, prio);
}
/*
* This functions returns non-zero if the refcount was
* incremented. Else zero is returned.
*/
-static __inline __result_use_check int
+static __inline __result_use_check bool
refcount_acquire_if_not_zero(volatile u_int *count)
{
u_int old;
old = *count;
for (;;) {
- KASSERT(old < UINT_MAX, ("refcount %p overflowed", count));
- if (old == 0)
- return (0);
- if (atomic_fcmpset_int(count, &old, old + 1))
- return (1);
+ if (REFCOUNT_COUNT(old) == 0)
+ return (false);
+ if (__predict_false(REFCOUNT_SATURATED(old)))
+ return (true);
+ if (atomic_fcmpset_int((volatile int *)count,
+ (int *)&old, old + 1))
+ return (true);
}
}
-static __inline __result_use_check int
+static __inline __result_use_check bool
refcount_release_if_not_last(volatile u_int *count)
{
u_int old;
old = *count;
for (;;) {
- KASSERT(old > 0, ("refcount %p is zero", count));
- if (old == 1)
- return (0);
- if (atomic_fcmpset_int(count, &old, old - 1))
- return (1);
+ if (REFCOUNT_COUNT(old) == 1)
+ return (false);
+ if (__predict_false(REFCOUNT_SATURATED(old)))
+ return (true);
+ if (atomic_fcmpset_int((volatile int *)count,
+ (int *)&old, old - 1))
+ return (true);
}
}
diff --git a/freebsd/sys/sys/rmlock.h b/freebsd/sys/sys/rmlock.h
index 1dd2740c..ca098b2d 100644
--- a/freebsd/sys/sys/rmlock.h
+++ b/freebsd/sys/sys/rmlock.h
@@ -55,7 +55,6 @@ void rm_init_flags(struct rmlock *rm, const char *name, int opts);
void rm_destroy(struct rmlock *rm);
int rm_wowned(const struct rmlock *rm);
void rm_sysinit(void *arg);
-void rm_sysinit_flags(void *arg);
void _rm_wlock_debug(struct rmlock *rm, const char *file, int line);
void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line);
diff --git a/freebsd/sys/sys/rwlock.h b/freebsd/sys/sys/rwlock.h
index 531f10d2..c1e5a45c 100644
--- a/freebsd/sys/sys/rwlock.h
+++ b/freebsd/sys/sys/rwlock.h
@@ -2,7 +2,6 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -136,7 +135,6 @@
void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts);
void _rw_destroy(volatile uintptr_t *c);
void rw_sysinit(void *arg);
-void rw_sysinit_flags(void *arg);
int _rw_wowned(const volatile uintptr_t *c);
void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line);
int __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
diff --git a/freebsd/sys/sys/sbuf.h b/freebsd/sys/sys/sbuf.h
index 8e958cbe..10b59f36 100644
--- a/freebsd/sys/sys/sbuf.h
+++ b/freebsd/sys/sys/sbuf.h
@@ -52,11 +52,13 @@ struct sbuf {
#define SBUF_AUTOEXTEND 0x00000001 /* automatically extend buffer */
#define SBUF_INCLUDENUL 0x00000002 /* nulterm byte is counted in len */
#define SBUF_DRAINTOEOR 0x00000004 /* use section 0 as drain EOR marker */
+#define SBUF_NOWAIT 0x00000008 /* Extend with non-blocking malloc */
#define SBUF_USRFLAGMSK 0x0000ffff /* mask of flags the user may specify */
#define SBUF_DYNAMIC 0x00010000 /* s_buf must be freed */
#define SBUF_FINISHED 0x00020000 /* set by sbuf_finish() */
#define SBUF_DYNSTRUCT 0x00080000 /* sbuf must be freed */
#define SBUF_INSECTION 0x00100000 /* set by sbuf_start_section() */
+#define SBUF_DRAINATEOL 0x00200000 /* drained contents ended in \n */
int s_flags; /* flags */
ssize_t s_sect_len; /* current length of section */
ssize_t s_rec_off; /* current record start offset */
@@ -90,6 +92,7 @@ int sbuf_printf(struct sbuf *, const char *, ...)
__printflike(2, 3);
int sbuf_vprintf(struct sbuf *, const char *, __va_list)
__printflike(2, 0);
+int sbuf_nl_terminate(struct sbuf *);
int sbuf_putc(struct sbuf *, int);
void sbuf_set_drain(struct sbuf *, sbuf_drain_func *, void *);
int sbuf_trim(struct sbuf *);
@@ -103,6 +106,8 @@ void sbuf_start_section(struct sbuf *, ssize_t *);
ssize_t sbuf_end_section(struct sbuf *, ssize_t, size_t, int);
void sbuf_hexdump(struct sbuf *, const void *, int, const char *,
int);
+int sbuf_count_drain(void *arg, const char *data, int len);
+int sbuf_printf_drain(void *arg, const char *data, int len);
void sbuf_putbuf(struct sbuf *);
#ifdef _KERNEL
diff --git a/freebsd/sys/sys/seq.h b/freebsd/sys/sys/seq.h
deleted file mode 100644
index c5f00bcb..00000000
--- a/freebsd/sys/sys/seq.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*-
- * Copyright (c) 2014 Mateusz Guzik <mjg@FreeBSD.org>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SYS_SEQ_H_
-#define _SYS_SEQ_H_
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-#endif
-#include <sys/types.h>
-
-/*
- * seq_t may be included in structs visible to userspace
- */
-typedef uint32_t seq_t;
-
-#ifdef _KERNEL
-
-/*
- * seq allows readers and writers to work with a consistent snapshot. Modifying
- * operations must be enclosed within a transaction delineated by
- * seq_write_beg/seq_write_end. The trick works by having the writer increment
- * the sequence number twice, at the beginning and end of the transaction.
- * The reader detects that the sequence number has not changed between its start
- * and end, and that the sequence number is even, to validate consistency.
- *
- * Some fencing (both hard fencing and compiler barriers) may be needed,
- * depending on the cpu. Modern AMD cpus provide strong enough guarantees to not
- * require any fencing by the reader or writer.
- *
- * Example usage:
- *
- * writers:
- * lock_exclusive(&obj->lock);
- * seq_write_begin(&obj->seq);
- * obj->var1 = ...;
- * obj->var2 = ...;
- * seq_write_end(&obj->seq);
- * unlock_exclusive(&obj->lock);
- *
- * readers:
- * int var1, var2;
- * seq_t seq;
- *
- * for (;;) {
- * seq = seq_read(&obj->seq);
- * var1 = obj->var1;
- * var2 = obj->var2;
- * if (seq_consistent(&obj->seq, seq))
- * break;
- * }
- * .....
- *
- * Writers may not block or sleep in any way.
- *
- * There are 2 minor caveats in this implementation:
- *
- * 1. There is no guarantee of progress. That is, a large number of writers can
- * interfere with the execution of the readers and cause the code to live-lock
- * in a loop trying to acquire a consistent snapshot.
- *
- * 2. If the reader loops long enough, the counter may overflow and eventually
- * wrap back to its initial value, fooling the reader into accepting the
- * snapshot. Given that this needs 4 billion transactional writes across a
- * single contended reader, it is unlikely to ever happen.
- */
-
-/* A hack to get MPASS macro */
-#include <sys/lock.h>
-
-#include <machine/cpu.h>
-
-static __inline bool
-seq_in_modify(seq_t seqp)
-{
-
- return (seqp & 1);
-}
-
-static __inline void
-seq_write_begin(seq_t *seqp)
-{
-
- critical_enter();
- MPASS(!seq_in_modify(*seqp));
- *seqp += 1;
- atomic_thread_fence_rel();
-}
-
-static __inline void
-seq_write_end(seq_t *seqp)
-{
-
- atomic_store_rel_32(seqp, *seqp + 1);
- MPASS(!seq_in_modify(*seqp));
- critical_exit();
-}
-
-static __inline seq_t
-seq_read(const seq_t *seqp)
-{
- seq_t ret;
-
- for (;;) {
- ret = atomic_load_acq_32(__DECONST(seq_t *, seqp));
- if (seq_in_modify(ret)) {
- cpu_spinwait();
- continue;
- }
- break;
- }
-
- return (ret);
-}
-
-static __inline seq_t
-seq_consistent_nomb(const seq_t *seqp, seq_t oldseq)
-{
-
- return (*seqp == oldseq);
-}
-
-static __inline seq_t
-seq_consistent(const seq_t *seqp, seq_t oldseq)
-{
-
- atomic_thread_fence_acq();
- return (seq_consistent_nomb(seqp, oldseq));
-}
-
-#endif /* _KERNEL */
-#endif /* _SYS_SEQ_H_ */
diff --git a/freebsd/sys/sys/seqc.h b/freebsd/sys/sys/seqc.h
new file mode 100644
index 00000000..00e4cc3c
--- /dev/null
+++ b/freebsd/sys/sys/seqc.h
@@ -0,0 +1,107 @@
+/*-
+ * Copyright (c) 2014 Mateusz Guzik <mjg@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_SEQC_H_
+#define _SYS_SEQC_H_
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#endif
+#include <sys/types.h>
+
+/*
+ * seqc_t may be included in structs visible to userspace
+ */
+typedef uint32_t seqc_t;
+
+#ifdef _KERNEL
+
+/* A hack to get MPASS macro */
+#include <sys/lock.h>
+
+#include <machine/cpu.h>
+
+static __inline bool
+seqc_in_modify(seqc_t seqcp)
+{
+
+ return (seqcp & 1);
+}
+
+static __inline void
+seqc_write_begin(seqc_t *seqcp)
+{
+
+ critical_enter();
+ MPASS(!seqc_in_modify(*seqcp));
+ *seqcp += 1;
+ atomic_thread_fence_rel();
+}
+
+static __inline void
+seqc_write_end(seqc_t *seqcp)
+{
+
+ atomic_store_rel_int(seqcp, *seqcp + 1);
+ MPASS(!seqc_in_modify(*seqcp));
+ critical_exit();
+}
+
+static __inline seqc_t
+seqc_read(const seqc_t *seqcp)
+{
+ seqc_t ret;
+
+ for (;;) {
+ ret = atomic_load_acq_int(__DECONST(seqc_t *, seqcp));
+ if (__predict_false(seqc_in_modify(ret))) {
+ cpu_spinwait();
+ continue;
+ }
+ break;
+ }
+
+ return (ret);
+}
+
+static __inline bool
+seqc_consistent_nomb(const seqc_t *seqcp, seqc_t oldseqc)
+{
+
+ return (*seqcp == oldseqc);
+}
+
+static __inline bool
+seqc_consistent(const seqc_t *seqcp, seqc_t oldseqc)
+{
+
+ atomic_thread_fence_acq();
+ return (seqc_consistent_nomb(seqcp, oldseqc));
+}
+
+#endif /* _KERNEL */
+#endif /* _SYS_SEQC_H_ */
diff --git a/freebsd/sys/sys/sglist.h b/freebsd/sys/sys/sglist.h
index 5674416c..f11c74a4 100644
--- a/freebsd/sys/sys/sglist.h
+++ b/freebsd/sys/sys/sglist.h
@@ -57,6 +57,7 @@ struct sglist {
struct bio;
struct mbuf;
+struct mbuf_ext_pgs;
struct uio;
static __inline void
@@ -87,6 +88,9 @@ sglist_hold(struct sglist *sg)
struct sglist *sglist_alloc(int nsegs, int mflags);
int sglist_append(struct sglist *sg, void *buf, size_t len);
int sglist_append_bio(struct sglist *sg, struct bio *bp);
+int sglist_append_ext_pgs(struct sglist *sg, struct mbuf_ext_pgs *ext_pgs,
+ size_t off, size_t len);
+int sglist_append_mb_ext_pgs(struct sglist *sg, struct mbuf *m);
int sglist_append_mbuf(struct sglist *sg, struct mbuf *m0);
int sglist_append_phys(struct sglist *sg, vm_paddr_t paddr,
size_t len);
@@ -101,6 +105,9 @@ struct sglist *sglist_build(void *buf, size_t len, int mflags);
struct sglist *sglist_clone(struct sglist *sg, int mflags);
int sglist_consume_uio(struct sglist *sg, struct uio *uio, size_t resid);
int sglist_count(void *buf, size_t len);
+int sglist_count_ext_pgs(struct mbuf_ext_pgs *ext_pgs, size_t off,
+ size_t len);
+int sglist_count_mb_ext_pgs(struct mbuf *m);
int sglist_count_vmpages(vm_page_t *m, size_t pgoff, size_t len);
void sglist_free(struct sglist *sg);
int sglist_join(struct sglist *first, struct sglist *second);
diff --git a/freebsd/sys/sys/sleepqueue.h b/freebsd/sys/sys/sleepqueue.h
index 07530e3b..8974869f 100644
--- a/freebsd/sys/sys/sleepqueue.h
+++ b/freebsd/sys/sys/sleepqueue.h
@@ -2,7 +2,6 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -85,6 +84,7 @@ struct thread;
#define SLEEPQ_SX 0x03 /* Used by an sx lock. */
#define SLEEPQ_LK 0x04 /* Used by a lockmgr. */
#define SLEEPQ_INTERRUPTIBLE 0x100 /* Sleep is interruptible. */
+#define SLEEPQ_UNFAIR 0x200 /* Unfair wakeup order. */
void init_sleepqueues(void);
int sleepq_abort(struct thread *td, int intrval);
diff --git a/freebsd/sys/sys/slicer.h b/freebsd/sys/sys/slicer.h
index 1565ecce..675b5acc 100644
--- a/freebsd/sys/sys/slicer.h
+++ b/freebsd/sys/sys/slicer.h
@@ -58,7 +58,7 @@ typedef int (*flash_slicer_t)(device_t dev, const char *provider,
#define FLASH_SLICES_TYPE_SPI 2
#define FLASH_SLICES_TYPE_MMC 3
-/* Use NULL for deregistering a slicer */
+/* Use NULL and set force to true for deregistering a slicer */
void flash_register_slicer(flash_slicer_t slicer, u_int type, bool force);
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/smp.h b/freebsd/sys/sys/smp.h
index aa0c3119..22b7dcd5 100644
--- a/freebsd/sys/sys/smp.h
+++ b/freebsd/sys/sys/smp.h
@@ -168,8 +168,10 @@ extern cpuset_t logical_cpus_mask;
#ifndef __rtems__
extern u_int mp_maxid;
extern int mp_maxcpus;
+extern int mp_ncores;
extern int mp_ncpus;
extern volatile int smp_started;
+extern int smp_threads_per_core;
extern cpuset_t all_cpus;
extern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */
diff --git a/freebsd/sys/sys/sockbuf.h b/freebsd/sys/sys/sockbuf.h
index 3b716283..020c6bfe 100644
--- a/freebsd/sys/sys/sockbuf.h
+++ b/freebsd/sys/sys/sockbuf.h
@@ -50,6 +50,7 @@
#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
#define SB_STOP 0x1000 /* backpressure indicator */
#define SB_AIO_RUNNING 0x2000 /* AIO operation running */
+#define SB_TLS_IFNET 0x4000 /* has used / is using ifnet KTLS */
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
@@ -63,6 +64,7 @@
#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
+struct ktls_session;
struct mbuf;
struct sockaddr;
struct socket;
@@ -74,6 +76,7 @@ struct selinfo;
*
* Locking key to struct sockbuf:
* (a) locked by SOCKBUF_LOCK().
+ * (b) locked by sblock()
*/
struct sockbuf {
struct mtx sb_mtx; /* sockbuf lock */
@@ -98,7 +101,9 @@ struct sockbuf {
u_int sb_ctl; /* (a) non-data chars in buffer */
int sb_lowat; /* (a) low water mark */
sbintime_t sb_timeo; /* (a) timeout for read/write */
- short sb_flags; /* (a) flags, see below */
+ uint64_t sb_tls_seqno; /* (a) TLS seqno */
+ struct ktls_session *sb_tls_info; /* (a + b) TLS state */
+ short sb_flags; /* (a) flags, see above */
int (*sb_upcall)(struct socket *, void *, int); /* (a) */
void *sb_upcallarg; /* (a) */
#ifndef __rtems__
diff --git a/freebsd/sys/sys/socketvar.h b/freebsd/sys/sys/socketvar.h
index af85aa05..74fc1306 100644
--- a/freebsd/sys/sys/socketvar.h
+++ b/freebsd/sys/sys/socketvar.h
@@ -180,13 +180,13 @@ struct socket {
/*
* Socket state bits.
*
- * Historically, this bits were all kept in the so_state field. For
- * locking reasons, they are now in multiple fields, as they are
- * locked differently. so_state maintains basic socket state protected
- * by the socket lock. so_qstate holds information about the socket
- * accept queues. Each socket buffer also has a state field holding
- * information relevant to that socket buffer (can't send, rcv). Many
- * fields will be read without locks to improve performance and avoid
+ * Historically, these bits were all kept in the so_state field.
+ * They are now split into separate, lock-specific fields.
+ * so_state maintains basic socket state protected by the socket lock.
+ * so_qstate holds information about the socket accept queues.
+ * Each socket buffer also has a state field holding information
+ * relevant to that socket buffer (can't send, rcv).
+ * Many fields will be read without locks to improve performance and avoid
* lock order issues. However, this approach must be used with caution.
*/
#define SS_NOFDREF 0x0001 /* no file table ref any more */
diff --git a/freebsd/sys/sys/sysctl.h b/freebsd/sys/sys/sysctl.h
index a8562b8d..52f92265 100644
--- a/freebsd/sys/sys/sysctl.h
+++ b/freebsd/sys/sys/sysctl.h
@@ -218,6 +218,7 @@ int sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS);
int sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS);
int sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS);
+int sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS);
int sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS);
int sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS);
@@ -391,6 +392,25 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
NULL); \
})
+/* Oid for a constant '\0' terminated string. */
+#define SYSCTL_CONST_STRING(parent, nbr, name, access, arg, descr) \
+ SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access), \
+ __DECONST(char *, arg), 0, sysctl_handle_string, "A", descr); \
+ CTASSERT(!(access & CTLFLAG_WR)); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING)
+
+#define SYSCTL_ADD_CONST_STRING(ctx, parent, nbr, name, access, arg, descr) \
+({ \
+ char *__arg = __DECONST(char *, arg); \
+ CTASSERT(!(access & CTLFLAG_WR)); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING); \
+ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access), \
+ __arg, 0, sysctl_handle_string, "A", __DESCR(descr), \
+ NULL); \
+})
+
/* Oid for a bool. If ptr is NULL, val is returned. */
#define SYSCTL_NULL_BOOL_PTR ((bool *)NULL)
#define SYSCTL_BOOL(parent, nbr, name, access, ptr, val, descr) \
@@ -875,6 +895,24 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
NULL); \
})
+/* OID expressing a struct timeval as seconds */
+#define SYSCTL_TIMEVAL_SEC(parent, nbr, name, access, ptr, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access), \
+ (ptr), 0, sysctl_sec_to_timeval, "I", descr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT)
+#define SYSCTL_ADD_TIMEVAL_SEC(ctx, parent, nbr, name, access, ptr, descr) \
+({ \
+ struct timeval *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access), \
+ __ptr, 0, sysctl_sec_to_timeval, "I", __DESCR(descr), \
+ NULL); \
+})
+
/*
* A macro to generate a read-only sysctl to indicate the presence of optional
* kernel features.
@@ -888,7 +926,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
/*
* Top-level identifiers
*/
-#define CTL_UNSPEC 0 /* unused */
+#define CTL_SYSCTL 0 /* "magic" numbers */
#define CTL_KERN 1 /* "high kernel": proc, limits */
#define CTL_VM 2 /* virtual memory */
#define CTL_VFS 3 /* filesystem, mount type is next */
@@ -900,6 +938,17 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define CTL_P1003_1B 9 /* POSIX 1003.1B */
/*
+ * CTL_SYSCTL identifiers
+ */
+#define CTL_SYSCTL_DEBUG 0 /* printf all nodes */
+#define CTL_SYSCTL_NAME 1 /* string name of OID */
+#define CTL_SYSCTL_NEXT 2 /* next OID */
+#define CTL_SYSCTL_NAME2OID 3 /* int array of name */
+#define CTL_SYSCTL_OIDFMT 4 /* OID's kind and format */
+#define CTL_SYSCTL_OIDDESCR 5 /* OID's description */
+#define CTL_SYSCTL_OIDLABEL 6 /* aggregation label */
+
+/*
* CTL_KERN identifiers
*/
#define KERN_OSTYPE 1 /* string: system version */
@@ -1140,6 +1189,9 @@ int sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
void sysctl_wlock(void);
void sysctl_wunlock(void);
int sysctl_wire_old_buffer(struct sysctl_req *req, size_t len);
+int kern___sysctlbyname(struct thread *td, const char *name,
+ size_t namelen, void *old, size_t *oldlenp, void *new,
+ size_t newlen, size_t *retval, int flags, bool inkernel);
struct sbuf;
struct sbuf *sbuf_new_for_sysctl(struct sbuf *, char *, int,
diff --git a/freebsd/sys/sys/sysproto.h b/freebsd/sys/sys/sysproto.h
index faa5a62f..8fb785b3 100644
--- a/freebsd/sys/sys/sysproto.h
+++ b/freebsd/sys/sys/sysproto.h
@@ -1,7 +1,7 @@
/*
* System call prototypes.
*
- * DO NOT EDIT-- this file is automatically generated.
+ * DO NOT EDIT-- this file is automatically @generated.
* $FreeBSD$
*/
@@ -597,7 +597,7 @@ struct sysctl_args {
char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)];
char old_l_[PADL_(void *)]; void * old; char old_r_[PADR_(void *)];
char oldlenp_l_[PADL_(size_t *)]; size_t * oldlenp; char oldlenp_r_[PADR_(size_t *)];
- char new_l_[PADL_(void *)]; void * new; char new_r_[PADR_(void *)];
+ char new_l_[PADL_(const void *)]; const void * new; char new_r_[PADR_(const void *)];
char newlen_l_[PADL_(size_t)]; size_t newlen; char newlen_r_[PADR_(size_t)];
};
struct mlock_args {
@@ -1837,6 +1837,28 @@ struct fhreadlink_args {
char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
char bufsize_l_[PADL_(size_t)]; size_t bufsize; char bufsize_r_[PADR_(size_t)];
};
+struct funlinkat_args {
+ char dfd_l_[PADL_(int)]; int dfd; char dfd_r_[PADR_(int)];
+ char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
+};
+struct copy_file_range_args {
+ char infd_l_[PADL_(int)]; int infd; char infd_r_[PADR_(int)];
+ char inoffp_l_[PADL_(off_t *)]; off_t * inoffp; char inoffp_r_[PADR_(off_t *)];
+ char outfd_l_[PADL_(int)]; int outfd; char outfd_r_[PADR_(int)];
+ char outoffp_l_[PADL_(off_t *)]; off_t * outoffp; char outoffp_r_[PADR_(off_t *)];
+ char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
+ char flags_l_[PADL_(unsigned int)]; unsigned int flags; char flags_r_[PADR_(unsigned int)];
+};
+struct __sysctlbyname_args {
+ char name_l_[PADL_(const char *)]; const char * name; char name_r_[PADR_(const char *)];
+ char namelen_l_[PADL_(size_t)]; size_t namelen; char namelen_r_[PADR_(size_t)];
+ char old_l_[PADL_(void *)]; void * old; char old_r_[PADR_(void *)];
+ char oldlenp_l_[PADL_(size_t *)]; size_t * oldlenp; char oldlenp_r_[PADR_(size_t *)];
+ char new_l_[PADL_(void *)]; void * new; char new_r_[PADR_(void *)];
+ char newlen_l_[PADL_(size_t)]; size_t newlen; char newlen_r_[PADR_(size_t)];
+};
int nosys(struct thread *, struct nosys_args *);
void sys_sys_exit(struct thread *, struct sys_exit_args *);
int sys_fork(struct thread *, struct fork_args *);
@@ -2221,6 +2243,9 @@ int sys_getfhat(struct thread *, struct getfhat_args *);
int sys_fhlink(struct thread *, struct fhlink_args *);
int sys_fhlinkat(struct thread *, struct fhlinkat_args *);
int sys_fhreadlink(struct thread *, struct fhreadlink_args *);
+int sys_funlinkat(struct thread *, struct funlinkat_args *);
+int sys_copy_file_range(struct thread *, struct copy_file_range_args *);
+int sys___sysctlbyname(struct thread *, struct __sysctlbyname_args *);
#ifdef COMPAT_43
@@ -3127,6 +3152,9 @@ int freebsd11_mknodat(struct thread *, struct freebsd11_mknodat_args *);
#define SYS_AUE_fhlink AUE_NULL
#define SYS_AUE_fhlinkat AUE_NULL
#define SYS_AUE_fhreadlink AUE_NULL
+#define SYS_AUE_funlinkat AUE_UNLINKAT
+#define SYS_AUE_copy_file_range AUE_NULL
+#define SYS_AUE___sysctlbyname AUE_SYSCTL
#endif /* __rtems__ */
#undef PAD_
diff --git a/freebsd/sys/sys/systm.h b/freebsd/sys/sys/systm.h
index 1aa57670..8bb44703 100644
--- a/freebsd/sys/sys/systm.h
+++ b/freebsd/sys/sys/systm.h
@@ -105,7 +105,8 @@ extern int vm_guest; /* Running as virtual machine guest? */
* Keep in sync with vm_guest_sysctl_names[].
*/
enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV,
- VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_LAST };
+ VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_GUEST_VBOX,
+ VM_GUEST_PARALLELS, VM_LAST };
/*
* These functions need to be declared before the KASSERT macro is invoked in
@@ -270,6 +271,13 @@ void init_param2(long physpages);
void init_static_kenv(char *, size_t);
void tablefull(const char *);
+/*
+ * Allocate per-thread "current" state in the linuxkpi
+ */
+extern int (*lkpi_alloc_current)(struct thread *, int);
+int linux_alloc_current_noop(struct thread *, int);
+
+
#if defined(KLD_MODULE) || defined(KTR_CRITICAL) || !defined(_KERNEL) || defined(GENOFFSET)
#define critical_enter() critical_enter_KBI()
#define critical_exit() critical_exit_KBI()
@@ -612,6 +620,7 @@ int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr,
_sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags))
void wakeup(void * chan);
void wakeup_one(void * chan);
+void wakeup_any(void * chan);
/*
* Common `struct cdev *' stuff are declared here to avoid #include poisoning
@@ -687,6 +696,7 @@ void counted_warning(unsigned *counter, const char *msg);
/*
* APIs to manage deprecation and obsolescence.
*/
+#ifndef __rtems__
struct device;
void _gone_in(int major, const char *msg);
void _gone_in_dev(struct device *dev, int major, const char *msg);
@@ -699,9 +709,10 @@ void _gone_in_dev(struct device *dev, int major, const char *msg);
#endif
#define gone_in(major, msg) __gone_ok(major, msg) _gone_in(major, msg)
#define gone_in_dev(dev, major, msg) __gone_ok(major, msg) _gone_in_dev(dev, major, msg)
-#define gone_by_fcp101_dev(dev) \
- gone_in_dev((dev), 13, \
- "see https://github.com/freebsd/fcp/blob/master/fcp-0101.md")
+#else /* __rtems__ */
+#define gone_in(major, msg) do { } while (0)
+#define gone_in_dev(dev, major, msg) do { } while (0)
+#endif /* __rtems__ */
__NULLABILITY_PRAGMA_POP
diff --git a/freebsd/sys/sys/tree.h b/freebsd/sys/sys/tree.h
index 539afb42..345f7dec 100644
--- a/freebsd/sys/sys/tree.h
+++ b/freebsd/sys/sys/tree.h
@@ -125,7 +125,7 @@ struct type *name##_SPLAY_INSERT(struct name *, struct type *); \
struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \
\
/* Finds the node with the same key as elm */ \
-static __inline struct type * \
+static __unused __inline struct type * \
name##_SPLAY_FIND(struct name *head, struct type *elm) \
{ \
if (SPLAY_EMPTY(head)) \
@@ -136,7 +136,7 @@ name##_SPLAY_FIND(struct name *head, struct type *elm) \
return (NULL); \
} \
\
-static __inline struct type * \
+static __unused __inline struct type * \
name##_SPLAY_NEXT(struct name *head, struct type *elm) \
{ \
name##_SPLAY(head, elm); \
@@ -150,7 +150,7 @@ name##_SPLAY_NEXT(struct name *head, struct type *elm) \
return (elm); \
} \
\
-static __inline struct type * \
+static __unused __inline struct type * \
name##_SPLAY_MIN_MAX(struct name *head, int val) \
{ \
name##_SPLAY_MINMAX(head, val); \
diff --git a/freebsd/sys/sys/ucred.h b/freebsd/sys/sys/ucred.h
index 37a93357..cf785c63 100644
--- a/freebsd/sys/sys/ucred.h
+++ b/freebsd/sys/sys/ucred.h
@@ -92,7 +92,10 @@ struct xucred {
uid_t cr_uid; /* effective user id */
short cr_ngroups; /* number of groups */
gid_t cr_groups[XU_NGROUPS]; /* groups */
- void *_cr_unused1; /* compatibility with old ucred */
+ union {
+ void *_cr_unused1; /* compatibility with old ucred */
+ pid_t cr_pid;
+ };
#endif /* __rtems__ */
};
#define XUCRED_VERSION 0
@@ -121,12 +124,14 @@ void crfree(struct ucred *cr);
struct ucred *crget(void);
struct ucred *crhold(struct ucred *cr);
void cru2x(struct ucred *cr, struct xucred *xcr);
+void cru2xt(struct thread *td, struct xucred *xcr);
void crsetgroups(struct ucred *cr, int n, gid_t *groups);
int groupmember(gid_t gid, struct ucred *cred);
#else /* __rtems__ */
#define crfree(cr) do { } while (0)
#define crhold(cr) NULL
#define cru2x(cr, xcr) do { } while (0)
+#define cru2xt(td, xcr) do { } while (0)
#define groupmember(gid, cred) 1
#endif /* __rtems__ */
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/user.h b/freebsd/sys/sys/user.h
index 80716460..b2338f01 100644
--- a/freebsd/sys/sys/user.h
+++ b/freebsd/sys/sys/user.h
@@ -477,6 +477,7 @@ struct kinfo_file {
#define KVME_FLAG_SUPER 0x00000008
#define KVME_FLAG_GROWS_UP 0x00000010
#define KVME_FLAG_GROWS_DOWN 0x00000020
+#define KVME_FLAG_USER_WIRED 0x00000040
#if defined(__amd64__)
#define KINFO_OVMENTRY_SIZE 1168
@@ -613,6 +614,7 @@ int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags);
int vntype_to_kinfo(int vtype);
+void pack_kinfo(struct kinfo_file *kif);
#endif /* !_KERNEL */
#endif
diff --git a/freebsd/sys/sys/vmmeter.h b/freebsd/sys/sys/vmmeter.h
index 579d1675..3714a069 100644
--- a/freebsd/sys/sys/vmmeter.h
+++ b/freebsd/sys/sys/vmmeter.h
@@ -153,6 +153,8 @@ extern domainset_t vm_severe_domains;
#define VM_CNT_INC(var) VM_CNT_ADD(var, 1)
#define VM_CNT_FETCH(var) counter_u64_fetch(vm_cnt.var)
+extern u_long vm_user_wire_count;
+
static inline void
vm_wire_add(int cnt)
{
diff --git a/freebsd/sys/sys/vnode.h b/freebsd/sys/sys/vnode.h
index 81f90bde..7c75adb4 100644
--- a/freebsd/sys/sys/vnode.h
+++ b/freebsd/sys/sys/vnode.h
@@ -59,6 +59,7 @@
enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD,
VMARKER };
+enum vgetstate { VGET_HOLDCNT, VGET_USECOUNT };
/*
* Each underlying filesystem allocates its own private area and hangs
* it from v_data. If non-null, this area is freed in getnewvnode().
@@ -170,7 +171,8 @@ struct vnode {
u_int v_iflag; /* i vnode flags (see below) */
u_int v_vflag; /* v vnode flags */
u_int v_mflag; /* l mnt-specific vnode flags */
- int v_writecount; /* v ref count of writers */
+ int v_writecount; /* I ref count of writers or
+ (negative) text users */
u_int v_hash;
enum vtype v_type; /* u vnode type */
};
@@ -233,6 +235,7 @@ struct xvnode {
* VI_DOOMED is doubly protected by the interlock and vnode lock. Both
* are required for writing but the status may be checked with either.
*/
+#define VI_TEXT_REF 0x0001 /* Text ref grabbed use ref */
#define VI_MOUNT 0x0020 /* Mount in progress */
#define VI_DOOMED 0x0080 /* This vnode is being recycled */
#define VI_FREE 0x0100 /* This vnode is on the freelist */
@@ -245,7 +248,6 @@ struct xvnode {
#define VV_NOSYNC 0x0004 /* unlinked, stop syncing */
#define VV_ETERNALDEV 0x0008 /* device that is never destroyed */
#define VV_CACHEDLABEL 0x0010 /* Vnode has valid cached MAC label */
-#define VV_TEXT 0x0020 /* vnode is a pure text prototype */
#define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */
#define VV_SYSTEM 0x0080 /* vnode being used by kernel */
#define VV_PROCDEP 0x0100 /* vnode is process dependent */
@@ -652,14 +654,18 @@ int vcount(struct vnode *vp);
#define vdropl(vp) _vdrop((vp), 1)
void _vdrop(struct vnode *, bool);
int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
-int vget(struct vnode *vp, int lockflag, struct thread *td);
+int vget(struct vnode *vp, int flags, struct thread *td);
+enum vgetstate vget_prep(struct vnode *vp);
+int vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
void vgone(struct vnode *vp);
#define vhold(vp) _vhold((vp), 0)
#define vholdl(vp) _vhold((vp), 1)
void _vhold(struct vnode *, bool);
+void vholdnz(struct vnode *);
void vinactive(struct vnode *, struct thread *);
int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
-int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length,
+int vtruncbuf(struct vnode *vp, off_t length, int blksize);
+void v_inval_buf_range(struct vnode *vp, daddr_t startlbn, daddr_t endlbn,
int blksize);
void vunref(struct vnode *);
void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
@@ -669,8 +675,18 @@ int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off,
struct ucred *cred);
int vn_close(struct vnode *vp,
int flags, struct ucred *file_cred, struct thread *td);
+int vn_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp,
+ unsigned int flags, struct ucred *incred, struct ucred *outcred,
+ struct thread *fsize_td);
void vn_finished_write(struct mount *mp);
void vn_finished_secondary_write(struct mount *mp);
+int vn_fsync_buf(struct vnode *vp, int waitfor);
+int vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp,
+ unsigned int flags, struct ucred *incred, struct ucred *outcred,
+ struct thread *fsize_td);
+int vn_need_pageq_flush(struct vnode *vp);
int vn_isdisk(struct vnode *vp, int *errp);
int _vn_lock(struct vnode *vp, int flags, char *file, int line);
#define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__)
@@ -696,6 +712,8 @@ int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
int vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
int vn_start_secondary_write(struct vnode *vp, struct mount **mpp,
int flags);
+int vn_truncate_locked(struct vnode *vp, off_t length, bool sync,
+ struct ucred *cred);
int vn_writechk(struct vnode *vp);
int vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
const char *attrname, int *buflen, char *buf, struct thread *td);
@@ -721,8 +739,12 @@ int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
VI_MTX(vp))
#define vn_rangelock_rlock(vp, start, end) \
rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+#define vn_rangelock_tryrlock(vp, start, end) \
+ rangelock_tryrlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
#define vn_rangelock_wlock(vp, start, end) \
rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+#define vn_rangelock_trywlock(vp, start, end) \
+ rangelock_trywlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
int vfs_cache_lookup(struct vop_lookup_args *ap);
void vfs_timestamp(struct timespec *);
@@ -736,6 +758,7 @@ int vop_stdfsync(struct vop_fsync_args *);
int vop_stdgetwritemount(struct vop_getwritemount_args *);
int vop_stdgetpages(struct vop_getpages_args *);
int vop_stdinactive(struct vop_inactive_args *);
+int vop_stdneed_inactive(struct vop_need_inactive_args *);
int vop_stdislocked(struct vop_islocked_args *);
int vop_stdkqfilter(struct vop_kqfilter_args *);
int vop_stdlock(struct vop_lock1_args *);
@@ -749,6 +772,7 @@ int vop_stdadvlock(struct vop_advlock_args *ap);
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
int vop_stdallocate(struct vop_allocate_args *ap);
+int vop_stdset_text(struct vop_set_text_args *ap);
int vop_stdpathconf(struct vop_pathconf_args *);
int vop_stdpoll(struct vop_poll_args *);
int vop_stdvptocnp(struct vop_vptocnp_args *ap);
@@ -793,14 +817,18 @@ int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
void vop_strategy_pre(void *a);
void vop_lock_pre(void *a);
void vop_lock_post(void *a, int rc);
-void vop_unlock_post(void *a, int rc);
void vop_unlock_pre(void *a);
+void vop_unlock_post(void *a, int rc);
+void vop_need_inactive_pre(void *a);
+void vop_need_inactive_post(void *a, int rc);
#else
#define vop_strategy_pre(x) do { } while (0)
#define vop_lock_pre(x) do { } while (0)
#define vop_lock_post(x, y) do { } while (0)
-#define vop_unlock_post(x, y) do { } while (0)
#define vop_unlock_pre(x) do { } while (0)
+#define vop_unlock_post(x, y) do { } while (0)
+#define vop_need_inactive_pre(x) do { } while (0)
+#define vop_need_inactive_post(x, y) do { } while (0)
#endif
void vop_rename_fail(struct vop_rename_args *ap);
@@ -828,6 +856,36 @@ void vop_rename_fail(struct vop_rename_args *ap);
#define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__)
+#ifdef INVARIANTS
+#define VOP_ADD_WRITECOUNT_CHECKED(vp, cnt) \
+do { \
+ int error_; \
+ \
+ error_ = VOP_ADD_WRITECOUNT((vp), (cnt)); \
+ VNASSERT(error_ == 0, (vp), ("VOP_ADD_WRITECOUNT returned %d", \
+ error_)); \
+} while (0)
+#define VOP_SET_TEXT_CHECKED(vp) \
+do { \
+ int error_; \
+ \
+ error_ = VOP_SET_TEXT((vp)); \
+ VNASSERT(error_ == 0, (vp), ("VOP_SET_TEXT returned %d", \
+ error_)); \
+} while (0)
+#define VOP_UNSET_TEXT_CHECKED(vp) \
+do { \
+ int error_; \
+ \
+ error_ = VOP_UNSET_TEXT((vp)); \
+ VNASSERT(error_ == 0, (vp), ("VOP_UNSET_TEXT returned %d", \
+ error_)); \
+} while (0)
+#else
+#define VOP_ADD_WRITECOUNT_CHECKED(vp, cnt) VOP_ADD_WRITECOUNT((vp), (cnt))
+#define VOP_SET_TEXT_CHECKED(vp) VOP_SET_TEXT((vp))
+#define VOP_UNSET_TEXT_CHECKED(vp) VOP_UNSET_TEXT((vp))
+#endif
void vput(struct vnode *vp);
void vrele(struct vnode *vp);
@@ -873,6 +931,7 @@ int vfs_kqfilter(struct vop_kqfilter_args *);
void vfs_mark_atime(struct vnode *vp, struct ucred *cred);
struct dirent;
int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off);
+int vfs_emptydir(struct vnode *vp);
int vfs_unixify_accmode(accmode_t *accmode);
diff --git a/freebsd/sys/sys/watchdog.h b/freebsd/sys/sys/watchdog.h
index 191456a4..3728d742 100644
--- a/freebsd/sys/sys/watchdog.h
+++ b/freebsd/sys/sys/watchdog.h
@@ -104,7 +104,7 @@
#ifdef _KERNEL
-#include <sys/eventhandler.h>
+#include <sys/_eventhandler.h>
typedef void (*watchdog_fn)(void *, u_int, int *);
diff --git a/freebsd/sys/vm/uma.h b/freebsd/sys/vm/uma.h
index f05f686c..d7c41c85 100644
--- a/freebsd/sys/vm/uma.h
+++ b/freebsd/sys/vm/uma.h
@@ -50,8 +50,6 @@ struct uma_zone;
/* Opaque type used as a handle to the zone */
typedef struct uma_zone * uma_zone_t;
-void zone_drain(uma_zone_t);
-
/*
* Item constructor
*
@@ -218,17 +216,6 @@ uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
uma_init zinit, uma_fini zfini, uma_zone_t master);
/*
- * Add a second master to a secondary zone. This provides multiple data
- * backends for objects with the same size. Both masters must have
- * compatible allocation flags. Presently, UMA_ZONE_MALLOC type zones are
- * the only supported.
- *
- * Returns:
- * Error on failure, 0 on success.
- */
-int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
-
-/*
* Create cache-only zones.
*
* This allows uma's per-cpu cache facilities to handle arbitrary
@@ -286,20 +273,23 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
* NUMA aware Zone. Implements a best
* effort first-touch policy.
*/
+#define UMA_ZONE_MINBUCKET 0x20000 /* Use smallest buckets. */
#endif /* __rtems__ */
-#define UMA_ZONE_NOBUCKETCACHE 0x20000 /*
- * Don't cache full buckets. Limit
- * UMA to per-cpu state.
- */
/*
* These flags are shared between the keg and zone. In zones wishing to add
* new kegs these flags must be compatible. Some are determined based on
* physical parameters of the request and may not be provided by the consumer.
*/
+#ifndef __rtems__
+#define UMA_ZONE_INHERIT \
+ (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \
+ UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU | UMA_ZONE_NUMA)
+#else /* __rtems__ */
#define UMA_ZONE_INHERIT \
(UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \
UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
+#endif /* __rtems__ */
/* Definitions for align */
#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */
@@ -310,6 +300,8 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
#define UMA_ALIGN_CACHE (0 - 1) /* Cache line size align */
#define UMA_ALIGNOF(type) (_Alignof(type) - 1) /* Alignment fit for 'type' */
+#define UMA_ANYDOMAIN -1 /* Special value for domain search. */
+
/*
* Destroys an empty uma zone. If the zone is not empty uma complains loudly.
*
@@ -452,17 +444,18 @@ typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
/*
- * Reclaims unused memory for all zones
+ * Reclaims unused memory
*
* Arguments:
- * None
+ * req Reclamation request type.
* Returns:
* None
- *
- * This should only be called by the page out daemon.
*/
-
-void uma_reclaim(void);
+#define UMA_RECLAIM_DRAIN 1 /* release bucket cache */
+#define UMA_RECLAIM_DRAIN_CPU 2 /* release bucket and per-CPU caches */
+#define UMA_RECLAIM_TRIM 3 /* trim bucket cache to WSS */
+void uma_reclaim(int req);
+void uma_zone_reclaim(uma_zone_t, int req);
/*
* Sets the alignment mask to be used for all zones requesting cache
@@ -514,6 +507,18 @@ int uma_zone_reserve_kva(uma_zone_t zone, int nitems);
int uma_zone_set_max(uma_zone_t zone, int nitems);
/*
+ * Sets a high limit on the number of items allowed in zone's bucket cache
+ *
+ * Arguments:
+ * zone The zone to limit
+ * nitems The requested upper limit on the number of items allowed
+ *
+ * Returns:
+ * int The effective value of nitems set
+ */
+int uma_zone_set_maxcache(uma_zone_t zone, int nitems);
+
+/*
* Obtains the effective limit on the number of items in a zone
*
* Arguments:
@@ -657,8 +662,8 @@ int uma_zone_exhausted_nolock(uma_zone_t zone);
/*
* Common UMA_ZONE_PCPU zones.
*/
+extern uma_zone_t pcpu_zone_int;
extern uma_zone_t pcpu_zone_64;
-extern uma_zone_t pcpu_zone_ptr;
/*
* Exported statistics structures to be used by user space monitoring tools.
@@ -698,7 +703,8 @@ struct uma_type_header {
uint64_t uth_frees; /* Zone: number of frees. */
uint64_t uth_fails; /* Zone: number of alloc failures. */
uint64_t uth_sleeps; /* Zone: number of alloc sleeps. */
- uint64_t _uth_reserved1[2]; /* Reserved. */
+ uint64_t uth_xdomain; /* Zone: Number of cross domain frees. */
+ uint64_t _uth_reserved1[1]; /* Reserved. */
};
struct uma_percpu_stat {
diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c
index e4161510..4f2127cd 100644
--- a/freebsd/sys/vm/uma_core.c
+++ b/freebsd/sys/vm/uma_core.c
@@ -174,11 +174,18 @@ static char *bootmem;
static int boot_pages;
#endif /* __rtems__ */
-static struct sx uma_drain_lock;
+static struct sx uma_reclaim_lock;
-/* kmem soft limit. */
+/*
+ * kmem soft limit, initialized by uma_set_limit(). Ensure that early
+ * allocations don't trigger a wakeup of the reclaim thread.
+ */
static unsigned long uma_kmem_limit = LONG_MAX;
-static volatile unsigned long uma_kmem_total;
+SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
+ "UMA kernel memory soft limit");
+static unsigned long uma_kmem_total;
+SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
+ "UMA kernel memory usage");
#ifndef __rtems__
/* Is the VM done starting up? */
@@ -237,6 +244,7 @@ struct uma_bucket_zone {
#ifndef __rtems__
#define BUCKET_MAX BUCKET_SIZE(256)
+#define BUCKET_MIN BUCKET_SIZE(4)
#else /* __rtems__ */
#define BUCKET_MAX BUCKET_SIZE(128)
#endif /* __rtems__ */
@@ -259,9 +267,12 @@ struct uma_bucket_zone bucket_zones[] = {
/*
* Flags and enumerations to be passed to internal functions.
*/
-enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
-
-#define UMA_ANYDOMAIN -1 /* Special value for domain search. */
+enum zfreeskip {
+ SKIP_NONE = 0,
+ SKIP_CNT = 0x00000001,
+ SKIP_DTOR = 0x00010000,
+ SKIP_FINI = 0x00020000,
+};
/* Prototypes.. */
@@ -286,10 +297,10 @@ static void page_free(void *, vm_size_t, uint8_t);
#ifndef __rtems__
static void pcpu_page_free(void *, vm_size_t, uint8_t);
#endif /* __rtems__ */
-static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int);
+static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
static void cache_drain(uma_zone_t);
static void bucket_drain(uma_zone_t, uma_bucket_t);
-static void bucket_cache_drain(uma_zone_t zone);
+static void bucket_cache_reclaim(uma_zone_t zone, bool);
static int keg_ctor(void *, int, void *, int);
static void keg_dtor(void *, int, void *);
static int zone_ctor(void *, int, void *, int);
@@ -299,25 +310,23 @@ static void keg_small_init(uma_keg_t keg);
static void keg_large_init(uma_keg_t keg);
static void zone_foreach(void (*zfunc)(uma_zone_t));
static void zone_timeout(uma_zone_t zone);
-static int hash_alloc(struct uma_hash *);
+static int hash_alloc(struct uma_hash *, u_int);
static int hash_expand(struct uma_hash *, struct uma_hash *);
static void hash_free(struct uma_hash *hash);
static void uma_timeout(void *);
static void uma_startup3(void);
static void *zone_alloc_item(uma_zone_t, void *, int, int);
+static void *zone_alloc_item_locked(uma_zone_t, void *, int, int);
static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
static void bucket_enable(void);
static void bucket_init(void);
static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
static void bucket_zone_drain(void);
-static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
+static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int, int);
static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
-#ifndef __rtems__
-static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int);
-#endif /* __rtems__ */
static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
-static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
+static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
uma_fini fini, int align, uint32_t flags);
static int zone_import(uma_zone_t, void **, int, int, int);
@@ -520,37 +529,53 @@ bucket_zone_drain(void)
struct uma_bucket_zone *ubz;
for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
- zone_drain(ubz->ubz_zone);
+ uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
}
+/*
+ * Attempt to satisfy an allocation by retrieving a full bucket from one of the
+ * zone's caches.
+ */
static uma_bucket_t
-zone_try_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, const bool ws)
+zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
{
uma_bucket_t bucket;
ZONE_LOCK_ASSERT(zone);
- if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+ if ((bucket = TAILQ_FIRST(&zdom->uzd_buckets)) != NULL) {
MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
- LIST_REMOVE(bucket, ub_link);
+ TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
zdom->uzd_nitems -= bucket->ub_cnt;
- if (ws && zdom->uzd_imin > zdom->uzd_nitems)
+ if (zdom->uzd_imin > zdom->uzd_nitems)
zdom->uzd_imin = zdom->uzd_nitems;
+ zone->uz_bkt_count -= bucket->ub_cnt;
}
return (bucket);
}
+/*
+ * Insert a full bucket into the specified cache. The "ws" parameter indicates
+ * whether the bucket's contents should be counted as part of the zone's working
+ * set.
+ */
static void
zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
const bool ws)
{
ZONE_LOCK_ASSERT(zone);
+ KASSERT(zone->uz_bkt_count < zone->uz_bkt_max, ("%s: zone %p overflow",
+ __func__, zone));
- LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+ if (ws)
+ TAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+ else
+ TAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
zdom->uzd_nitems += bucket->ub_cnt;
if (ws && zdom->uzd_imax < zdom->uzd_nitems)
zdom->uzd_imax = zdom->uzd_nitems;
+ zone->uz_bkt_count += bucket->ub_cnt;
}
static void
@@ -573,15 +598,6 @@ zone_maxaction(uma_zone_t zone)
taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
}
-static void
-zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
-{
- uma_klink_t klink;
-
- LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
- kegfn(klink->kl_keg);
-}
-
/*
* Routine called by timeout which is used to fire off some time interval
* based calculations. (stats, hash size, etc.)
@@ -616,7 +632,7 @@ zone_domain_update_wss(uma_zone_domain_t zdom)
MPASS(zdom->uzd_imax >= zdom->uzd_imin);
wss = zdom->uzd_imax - zdom->uzd_imin;
zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
- zdom->uzd_wss = (3 * wss + 2 * zdom->uzd_wss) / 5;
+ zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
}
/*
@@ -626,8 +642,10 @@ zone_domain_update_wss(uma_zone_domain_t zdom)
* Returns nothing.
*/
static void
-keg_timeout(uma_keg_t keg)
+zone_timeout(uma_zone_t zone)
{
+ uma_keg_t keg = zone->uz_keg;
+ u_int slabs;
KEG_LOCK(keg);
/*
@@ -638,7 +656,8 @@ keg_timeout(uma_keg_t keg)
* may be a little aggressive. Should I allow for two collisions max?
*/
if (keg->uk_flags & UMA_ZONE_HASH &&
- keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
+ (slabs = keg->uk_pages / keg->uk_ppera) >
+ keg->uk_hash.uh_hashsize) {
struct uma_hash newhash;
struct uma_hash oldhash;
int ret;
@@ -649,9 +668,8 @@ keg_timeout(uma_keg_t keg)
* I have to do everything in stages and check for
* races.
*/
- newhash = keg->uk_hash;
KEG_UNLOCK(keg);
- ret = hash_alloc(&newhash);
+ ret = hash_alloc(&newhash, 1 << fls(slabs));
KEG_LOCK(keg);
if (ret) {
if (hash_expand(&keg->uk_hash, &newhash)) {
@@ -666,17 +684,9 @@ keg_timeout(uma_keg_t keg)
}
}
KEG_UNLOCK(keg);
-}
-
-static void
-zone_timeout(uma_zone_t zone)
-{
- int i;
-
- zone_foreach_keg(zone, &keg_timeout);
ZONE_LOCK(zone);
- for (i = 0; i < vm_ndomains; i++)
+ for (int i = 0; i < vm_ndomains; i++)
zone_domain_update_wss(&zone->uz_domain[i]);
ZONE_UNLOCK(zone);
}
@@ -692,16 +702,13 @@ zone_timeout(uma_zone_t zone)
* 1 on success and 0 on failure.
*/
static int
-hash_alloc(struct uma_hash *hash)
+hash_alloc(struct uma_hash *hash, u_int size)
{
- int oldsize;
- int alloc;
+ size_t alloc;
- oldsize = hash->uh_hashsize;
-
- /* We're just going to go to a power of two greater */
- if (oldsize) {
- hash->uh_hashsize = oldsize * 2;
+ KASSERT(powerof2(size), ("hash size must be power of 2"));
+ if (size > UMA_HASH_SIZE_INIT) {
+ hash->uh_hashsize = size;
alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
M_UMAHASH, M_NOWAIT);
@@ -738,8 +745,8 @@ static int
hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
{
uma_slab_t slab;
- int hval;
- int i;
+ u_int hval;
+ u_int idx;
if (!newhash->uh_slab_hash)
return (0);
@@ -752,10 +759,10 @@ hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
* full rehash.
*/
- for (i = 0; i < oldhash->uh_hashsize; i++)
- while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
- slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
- SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
+ for (idx = 0; idx < oldhash->uh_hashsize; idx++)
+ while (!SLIST_EMPTY(&oldhash->uh_slab_hash[idx])) {
+ slab = SLIST_FIRST(&oldhash->uh_slab_hash[idx]);
+ SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[idx], us_hlink);
hval = UMA_HASH(newhash, slab->us_data);
SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
slab, us_hlink);
@@ -808,6 +815,13 @@ bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
for (i = 0; i < bucket->ub_cnt; i++)
zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
+ if (zone->uz_max_items > 0) {
+ ZONE_LOCK(zone);
+ zone->uz_items -= bucket->ub_cnt;
+ if (zone->uz_sleepers && zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ ZONE_UNLOCK(zone);
+ }
bucket->ub_cnt = 0;
}
@@ -838,22 +852,27 @@ cache_drain(uma_zone_t zone)
* XXX: It would good to be able to assert that the zone is being
* torn down to prevent improper use of cache_drain().
*
- * XXX: We lock the zone before passing into bucket_cache_drain() as
+ * XXX: We lock the zone before passing into bucket_cache_reclaim() as
* it is used elsewhere. Should the tear-down path be made special
* there in some form?
*/
CPU_FOREACH(cpu) {
cache = &zone->uz_cpu[cpu];
bucket_drain(zone, cache->uc_allocbucket);
- bucket_drain(zone, cache->uc_freebucket);
if (cache->uc_allocbucket != NULL)
bucket_free(zone, cache->uc_allocbucket, NULL);
+ cache->uc_allocbucket = NULL;
+ bucket_drain(zone, cache->uc_freebucket);
if (cache->uc_freebucket != NULL)
bucket_free(zone, cache->uc_freebucket, NULL);
- cache->uc_allocbucket = cache->uc_freebucket = NULL;
+ cache->uc_freebucket = NULL;
+ bucket_drain(zone, cache->uc_crossbucket);
+ if (cache->uc_crossbucket != NULL)
+ bucket_free(zone, cache->uc_crossbucket, NULL);
+ cache->uc_crossbucket = NULL;
}
ZONE_LOCK(zone);
- bucket_cache_drain(zone);
+ bucket_cache_reclaim(zone, true);
ZONE_UNLOCK(zone);
}
@@ -874,13 +893,13 @@ static void
cache_drain_safe_cpu(uma_zone_t zone)
{
uma_cache_t cache;
- uma_bucket_t b1, b2;
+ uma_bucket_t b1, b2, b3;
int domain;
if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
return;
- b1 = b2 = NULL;
+ b1 = b2 = b3 = NULL;
ZONE_LOCK(zone);
critical_enter();
#ifndef __rtems__
@@ -906,12 +925,18 @@ cache_drain_safe_cpu(uma_zone_t zone)
b2 = cache->uc_freebucket;
cache->uc_freebucket = NULL;
}
+ b3 = cache->uc_crossbucket;
+ cache->uc_crossbucket = NULL;
critical_exit();
ZONE_UNLOCK(zone);
if (b1)
bucket_free(zone, b1, NULL);
if (b2)
bucket_free(zone, b2, NULL);
+ if (b3) {
+ bucket_drain(zone, b3);
+ bucket_free(zone, b3, NULL);
+ }
}
/*
@@ -922,7 +947,7 @@ cache_drain_safe_cpu(uma_zone_t zone)
* Zone lock must not be held on call this function.
*/
static void
-cache_drain_safe(uma_zone_t zone)
+pcpu_cache_drain_safe(uma_zone_t zone)
{
int cpu;
@@ -951,22 +976,46 @@ cache_drain_safe(uma_zone_t zone)
#endif /* __rtems__ */
/*
- * Drain the cached buckets from a zone. Expects a locked zone on entry.
+ * Reclaim cached buckets from a zone. All buckets are reclaimed if the caller
+ * requested a drain, otherwise the per-domain caches are trimmed to either
+ * estimated working set size.
*/
static void
-bucket_cache_drain(uma_zone_t zone)
+bucket_cache_reclaim(uma_zone_t zone, bool drain)
{
uma_zone_domain_t zdom;
uma_bucket_t bucket;
+ long target, tofree;
int i;
- /*
- * Drain the bucket queues and free the buckets.
- */
for (i = 0; i < vm_ndomains; i++) {
zdom = &zone->uz_domain[i];
- while ((bucket = zone_try_fetch_bucket(zone, zdom, false)) !=
- NULL) {
+
+ /*
+ * If we were asked to drain the zone, we are done only once
+ * this bucket cache is empty. Otherwise, we reclaim items in
+ * excess of the zone's estimated working set size. If the
+ * difference nitems - imin is larger than the WSS estimate,
+ * then the estimate will grow at the end of this interval and
+ * we ignore the historical average.
+ */
+ target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
+ zdom->uzd_imin);
+ while (zdom->uzd_nitems > target) {
+ bucket = TAILQ_LAST(&zdom->uzd_buckets, uma_bucketlist);
+ if (bucket == NULL)
+ break;
+ tofree = bucket->ub_cnt;
+ TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
+ zdom->uzd_nitems -= tofree;
+
+ /*
+ * Shift the bounds of the current WSS interval to avoid
+ * perturbing the estimate.
+ */
+ zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
+ zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
+
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
@@ -975,8 +1024,8 @@ bucket_cache_drain(uma_zone_t zone)
}
/*
- * Shrink further bucket sizes. Price of single zone lock collision
- * is probably lower then price of global cache drain.
+ * Shrink the zone bucket size to ensure that the per-CPU caches
+ * don't grow too large.
*/
if (zone->uz_count > zone->uz_count_min)
zone->uz_count--;
@@ -1076,7 +1125,7 @@ finished:
}
static void
-zone_drain_wait(uma_zone_t zone, int waitok)
+zone_reclaim(uma_zone_t zone, int waitok, bool drain)
{
/*
@@ -1086,32 +1135,40 @@ zone_drain_wait(uma_zone_t zone, int waitok)
* when it wakes up.
*/
ZONE_LOCK(zone);
- while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
+ while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
if (waitok == M_NOWAIT)
goto out;
msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
}
- zone->uz_flags |= UMA_ZFLAG_DRAINING;
- bucket_cache_drain(zone);
+ zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
+ bucket_cache_reclaim(zone, drain);
ZONE_UNLOCK(zone);
+
/*
* The DRAINING flag protects us from being freed while
* we're running. Normally the uma_rwlock would protect us but we
* must be able to release and acquire the right lock for each keg.
*/
- zone_foreach_keg(zone, &keg_drain);
+ keg_drain(zone->uz_keg);
ZONE_LOCK(zone);
- zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
+ zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
wakeup(zone);
out:
ZONE_UNLOCK(zone);
}
-void
+static void
zone_drain(uma_zone_t zone)
{
- zone_drain_wait(zone, M_NOWAIT);
+ zone_reclaim(zone, M_NOWAIT, true);
+}
+
+static void
+zone_trim(uma_zone_t zone)
+{
+
+ zone_reclaim(zone, M_NOWAIT, false);
}
/*
@@ -1120,25 +1177,28 @@ zone_drain(uma_zone_t zone)
* otherwise the keg will be left unlocked.
*
* Arguments:
- * wait Shall we wait?
+ * flags Wait flags for the item initialization routine
+ * aflags Wait flags for the slab allocation
*
* Returns:
* The slab that was allocated or NULL if there is no memory and the
* caller specified M_NOWAIT.
*/
static uma_slab_t
-keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
+keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
+ int aflags)
{
uma_alloc allocf;
uma_slab_t slab;
unsigned long size;
uint8_t *mem;
- uint8_t flags;
+ uint8_t sflags;
int i;
KASSERT(domain >= 0 && domain < vm_ndomains,
("keg_alloc_slab: domain %d out of range", domain));
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
+ MPASS(zone->uz_lockptr == &keg->uk_lock);
allocf = keg->uk_allocf;
KEG_UNLOCK(keg);
@@ -1146,7 +1206,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
slab = NULL;
mem = NULL;
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
- slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait);
+ slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags);
if (slab == NULL)
goto out;
}
@@ -1159,16 +1219,16 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
*/
if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
- wait |= M_ZERO;
+ aflags |= M_ZERO;
else
- wait &= ~M_ZERO;
+ aflags &= ~M_ZERO;
if (keg->uk_flags & UMA_ZONE_NODUMP)
- wait |= M_NODUMP;
+ aflags |= M_NODUMP;
/* zone is passed for legacy reasons. */
size = keg->uk_ppera * PAGE_SIZE;
- mem = allocf(zone, size, domain, &flags, wait);
+ mem = allocf(zone, size, domain, &sflags, aflags);
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
@@ -1188,7 +1248,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
slab->us_keg = keg;
slab->us_data = mem;
slab->us_freecount = keg->uk_ipers;
- slab->us_flags = flags;
+ slab->us_flags = sflags;
slab->us_domain = domain;
BIT_FILL(SLAB_SETSIZE, &slab->us_free);
#ifdef INVARIANTS
@@ -1198,7 +1258,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
if (keg->uk_init != NULL) {
for (i = 0; i < keg->uk_ipers; i++)
if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
- keg->uk_size, wait) != 0)
+ keg->uk_size, flags) != 0)
break;
if (i != keg->uk_ipers) {
keg_free_slab(keg, slab, i);
@@ -1235,8 +1295,7 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
void *mem;
int pages;
- keg = zone_first_keg(zone);
-
+ keg = zone->uz_keg;
/*
* If we are in BOOT_BUCKETS or higher, than switch to real
* allocator. Zones with page sized slabs switch at BOOT_PAGEALLOC.
@@ -1351,9 +1410,9 @@ pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
zkva += PAGE_SIZE;
}
return ((void*)addr);
- fail:
+fail:
TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
- vm_page_unwire(p, PQ_NONE);
+ vm_page_unwire_noq(p);
vm_page_free(p);
}
return (NULL);
@@ -1381,7 +1440,7 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
uma_keg_t keg;
TAILQ_INIT(&alloctail);
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
npages = howmany(bytes, PAGE_SIZE);
while (npages > 0) {
@@ -1403,7 +1462,7 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
* exit.
*/
TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
- vm_page_unwire(p, PQ_NONE);
+ vm_page_unwire_noq(p);
vm_page_free(p);
}
return (NULL);
@@ -1473,7 +1532,7 @@ pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
paddr = pmap_kextract(curva);
m = PHYS_TO_VM_PAGE(paddr);
- vm_page_unwire(m, PQ_NONE);
+ vm_page_unwire_noq(m);
vm_page_free(m);
}
pmap_qremove(sva, size >> PAGE_SHIFT);
@@ -1613,8 +1672,6 @@ keg_large_init(uma_keg_t keg)
{
KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
- KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
- ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
@@ -1799,7 +1856,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
}
if (keg->uk_flags & UMA_ZONE_HASH)
- hash_alloc(&keg->uk_hash);
+ hash_alloc(&keg->uk_hash, 0);
CTR5(KTR_UMA, "keg_ctor %p zone %s(%p) out %d free %d\n",
keg, zone->uz_name, zone,
@@ -1814,6 +1871,15 @@ keg_ctor(void *mem, int size, void *udata, int flags)
return (0);
}
+static void
+zone_alloc_counters(uma_zone_t zone)
+{
+
+ zone->uz_allocs = counter_u64_alloc(M_WAITOK);
+ zone->uz_frees = counter_u64_alloc(M_WAITOK);
+ zone->uz_fails = counter_u64_alloc(M_WAITOK);
+}
+
/*
* Zone header ctor. This initializes all fields, locks, etc.
*
@@ -1827,30 +1893,42 @@ zone_ctor(void *mem, int size, void *udata, int flags)
uma_zone_t zone = mem;
uma_zone_t z;
uma_keg_t keg;
+ int i;
bzero(zone, size);
zone->uz_name = arg->name;
zone->uz_ctor = arg->ctor;
zone->uz_dtor = arg->dtor;
- zone->uz_slab = zone_fetch_slab;
zone->uz_init = NULL;
zone->uz_fini = NULL;
- zone->uz_allocs = 0;
- zone->uz_frees = 0;
- zone->uz_fails = 0;
zone->uz_sleeps = 0;
+ zone->uz_xdomain = 0;
zone->uz_count = 0;
zone->uz_count_min = 0;
+ zone->uz_count_max = BUCKET_MAX;
zone->uz_flags = 0;
zone->uz_warning = NULL;
#ifndef __rtems__
/* The domain structures follow the cpu structures. */
zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
#endif /* __rtems__ */
+ zone->uz_bkt_max = ULONG_MAX;
timevalclear(&zone->uz_ratecheck);
- keg = arg->keg;
- ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
+#ifndef __rtems__
+ if (__predict_true(booted == BOOT_RUNNING))
+#else /* __rtems__ */
+ if (__predict_true(pcpu_zone_64 != NULL))
+#endif /* __rtems__ */
+ zone_alloc_counters(zone);
+ else {
+ zone->uz_allocs = EARLY_COUNTER;
+ zone->uz_frees = EARLY_COUNTER;
+ zone->uz_fails = EARLY_COUNTER;
+ }
+
+ for (i = 0; i < vm_ndomains; i++)
+ TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
/*
* This is a pure cache zone, no kegs.
@@ -1864,6 +1942,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
zone->uz_release = arg->release;
zone->uz_arg = arg->arg;
zone->uz_lockptr = &zone->uz_lock;
+ ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
rw_wlock(&uma_rwlock);
LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
rw_wunlock(&uma_rwlock);
@@ -1876,6 +1955,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
zone->uz_import = (uma_import)zone_import;
zone->uz_release = (uma_release)zone_release;
zone->uz_arg = zone;
+ keg = arg->keg;
if (arg->flags & UMA_ZONE_SECONDARY) {
KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
@@ -1914,12 +1994,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
return (error);
}
- /*
- * Link in the first keg.
- */
- zone->uz_klink.kl_keg = keg;
- LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
- zone->uz_lockptr = &keg->uk_lock;
+ zone->uz_keg = keg;
zone->uz_size = keg->uk_size;
zone->uz_flags |= (keg->uk_flags &
(UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
@@ -1938,9 +2013,14 @@ out:
KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
(UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
("Invalid zone flag combination"));
- if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0)
+ if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0) {
zone->uz_count = BUCKET_MAX;
- else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
+#ifndef __rtems__
+ } else if ((arg->flags & UMA_ZONE_MINBUCKET) != 0) {
+ zone->uz_count = BUCKET_MIN;
+ zone->uz_count_max = BUCKET_MIN;
+#endif /* __rtems__ */
+ } else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
zone->uz_count = 0;
else
zone->uz_count = bucket_select(zone->uz_size);
@@ -1985,12 +2065,10 @@ keg_dtor(void *arg, int size, void *udata)
static void
zone_dtor(void *arg, int size, void *udata)
{
- uma_klink_t klink;
uma_zone_t zone;
uma_keg_t keg;
zone = (uma_zone_t)arg;
- keg = zone_first_keg(zone);
if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
cache_drain(zone);
@@ -2004,27 +2082,22 @@ zone_dtor(void *arg, int size, void *udata)
* released and then refilled before we
* remove it... we dont care for now
*/
- zone_drain_wait(zone, M_WAITOK);
+ zone_reclaim(zone, M_WAITOK, true);
/*
- * Unlink all of our kegs.
+ * We only destroy kegs from non secondary/non cache zones.
*/
- while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
- klink->kl_keg = NULL;
- LIST_REMOVE(klink, kl_link);
- if (klink == &zone->uz_klink)
- continue;
- free(klink, M_TEMP);
- }
- /*
- * We only destroy kegs from non secondary zones.
- */
- if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
+ if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
+ keg = zone->uz_keg;
rw_wlock(&uma_rwlock);
LIST_REMOVE(keg, uk_link);
rw_wunlock(&uma_rwlock);
zone_free_item(kegs, keg, NULL, SKIP_NONE);
}
- ZONE_LOCK_FINI(zone);
+ counter_u64_free(zone->uz_allocs);
+ counter_u64_free(zone->uz_frees);
+ counter_u64_free(zone->uz_fails);
+ if (zone->uz_lockptr == &zone->uz_lock)
+ ZONE_LOCK_FINI(zone);
}
/*
@@ -2043,12 +2116,23 @@ zone_foreach(void (*zfunc)(uma_zone_t))
uma_keg_t keg;
uma_zone_t zone;
- rw_rlock(&uma_rwlock);
+ /*
+ * Before BOOT_RUNNING we are guaranteed to be single
+ * threaded, so locking isn't needed. Startup functions
+ * are allowed to use M_WAITOK.
+ */
+#ifndef __rtems__
+ if (__predict_true(booted == BOOT_RUNNING))
+#endif /* __rtems__ */
+ rw_rlock(&uma_rwlock);
LIST_FOREACH(keg, &uma_kegs, uk_link) {
LIST_FOREACH(zone, &keg->uk_zones, uz_link)
zfunc(zone);
}
- rw_runlock(&uma_rwlock);
+#ifndef __rtems__
+ if (__predict_true(booted == BOOT_RUNNING))
+#endif /* __rtems__ */
+ rw_runlock(&uma_rwlock);
}
#ifndef __rtems__
@@ -2235,7 +2319,7 @@ uma_startup2(void)
printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
#endif
booted = BOOT_BUCKETS;
- sx_init(&uma_drain_lock, "umadrain");
+ sx_init(&uma_reclaim_lock, "umareclaim");
bucket_enable();
}
#endif /* __rtems__ */
@@ -2255,6 +2339,7 @@ uma_startup3(void)
uma_skip_cnt = counter_u64_alloc(M_WAITOK);
#endif /* __rtems__ */
#endif
+ zone_foreach(zone_alloc_counters);
callout_init(&uma_callout, 1);
callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
#ifndef __rtems__
@@ -2302,6 +2387,11 @@ uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
align, name));
+ /* Sets all zones to a first-touch domain policy. */
+#ifdef UMA_FIRSTTOUCH
+ flags |= UMA_ZONE_NUMA;
+#endif
+
/* This stuff is essential for the zone ctor */
memset(&args, 0, sizeof(args));
args.name = name;
@@ -2333,7 +2423,7 @@ uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
locked = false;
} else {
#endif /* __rtems__ */
- sx_slock(&uma_drain_lock);
+ sx_slock(&uma_reclaim_lock);
#ifndef __rtems__
locked = true;
}
@@ -2342,7 +2432,7 @@ uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
#ifndef __rtems__
if (locked)
#endif /* __rtems__ */
- sx_sunlock(&uma_drain_lock);
+ sx_sunlock(&uma_reclaim_lock);
return (res);
}
@@ -2358,7 +2448,7 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
bool locked;
#endif /* __rtems__ */
- keg = zone_first_keg(master);
+ keg = master->uz_keg;
memset(&args, 0, sizeof(args));
args.name = name;
args.size = keg->uk_size;
@@ -2375,7 +2465,7 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
locked = false;
} else {
#endif /* __rtems__ */
- sx_slock(&uma_drain_lock);
+ sx_slock(&uma_reclaim_lock);
#ifndef __rtems__
locked = true;
}
@@ -2385,7 +2475,7 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
#ifndef __rtems__
if (locked)
#endif /* __rtems__ */
- sx_sunlock(&uma_drain_lock);
+ sx_sunlock(&uma_reclaim_lock);
return (res);
}
@@ -2408,100 +2498,19 @@ uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
args.release = zrelease;
args.arg = arg;
args.align = 0;
- args.flags = flags;
+ args.flags = flags | UMA_ZFLAG_CACHE;
return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
}
-#ifndef __rtems__
-static void
-zone_lock_pair(uma_zone_t a, uma_zone_t b)
-{
- if (a < b) {
- ZONE_LOCK(a);
- mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
- } else {
- ZONE_LOCK(b);
- mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
- }
-}
-
-static void
-zone_unlock_pair(uma_zone_t a, uma_zone_t b)
-{
-
- ZONE_UNLOCK(a);
- ZONE_UNLOCK(b);
-}
-
-int
-uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
-{
- uma_klink_t klink;
- uma_klink_t kl;
- int error;
-
- error = 0;
- klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
-
- zone_lock_pair(zone, master);
- /*
- * zone must use vtoslab() to resolve objects and must already be
- * a secondary.
- */
- if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
- != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
- error = EINVAL;
- goto out;
- }
- /*
- * The new master must also use vtoslab().
- */
- if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
- error = EINVAL;
- goto out;
- }
-
- /*
- * The underlying object must be the same size. rsize
- * may be different.
- */
- if (master->uz_size != zone->uz_size) {
- error = E2BIG;
- goto out;
- }
- /*
- * Put it at the end of the list.
- */
- klink->kl_keg = zone_first_keg(master);
- LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
- if (LIST_NEXT(kl, kl_link) == NULL) {
- LIST_INSERT_AFTER(kl, klink, kl_link);
- break;
- }
- }
- klink = NULL;
- zone->uz_flags |= UMA_ZFLAG_MULTI;
- zone->uz_slab = zone_fetch_slab_multi;
-
-out:
- zone_unlock_pair(zone, master);
- if (klink != NULL)
- free(klink, M_TEMP);
-
- return (error);
-}
-#endif /* __rtems__ */
-
-
/* See uma.h */
void
uma_zdestroy(uma_zone_t zone)
{
- sx_slock(&uma_drain_lock);
+ sx_slock(&uma_reclaim_lock);
zone_free_item(zones, zone, NULL, SKIP_NONE);
- sx_sunlock(&uma_drain_lock);
+ sx_sunlock(&uma_reclaim_lock);
}
void
@@ -2555,7 +2564,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
uma_bucket_t bucket;
uma_cache_t cache;
void *item;
- int cpu, domain, lockfail;
+ int cpu, domain, lockfail, maxbucket;
#ifdef INVARIANTS
bool skipdbg;
#endif
@@ -2634,8 +2643,8 @@ zalloc_start:
zone->uz_dtor != trash_dtor) &&
#endif
zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
- atomic_add_long(&zone->uz_fails, 1);
- zone_free_item(zone, item, udata, SKIP_DTOR);
+ counter_u64_add(zone->uz_fails, 1);
+ zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
return (NULL);
}
#ifdef INVARIANTS
@@ -2670,18 +2679,17 @@ zalloc_start:
if (bucket != NULL)
bucket_free(zone, bucket, udata);
+ /* Short-circuit for zones without buckets and low memory. */
+ if (zone->uz_count == 0 || bucketdisable) {
+ ZONE_LOCK(zone);
#ifndef __rtems__
- if (zone->uz_flags & UMA_ZONE_NUMA) {
- domain = PCPU_GET(domain);
- if (VM_DOMAIN_EMPTY(domain))
- domain = UMA_ANYDOMAIN;
- } else
+ if (zone->uz_flags & UMA_ZONE_NUMA)
+ domain = PCPU_GET(domain);
+ else
#endif /* __rtems__ */
- domain = UMA_ANYDOMAIN;
-
- /* Short-circuit for zones without buckets and low memory. */
- if (zone->uz_count == 0 || bucketdisable)
+ domain = UMA_ANYDOMAIN;
goto zalloc_item;
+ }
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
@@ -2711,11 +2719,19 @@ zalloc_start:
/*
* Check the zone's cache of buckets.
*/
- if (domain == UMA_ANYDOMAIN)
- zdom = &zone->uz_domain[0];
- else
+#ifndef __rtems__
+ if (zone->uz_flags & UMA_ZONE_NUMA) {
+ domain = PCPU_GET(domain);
zdom = &zone->uz_domain[domain];
- if ((bucket = zone_try_fetch_bucket(zone, zdom, true)) != NULL) {
+ } else {
+#endif /* __rtems__ */
+ domain = UMA_ANYDOMAIN;
+ zdom = &zone->uz_domain[0];
+#ifndef __rtems__
+ }
+#endif /* __rtems__ */
+
+ if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
KASSERT(bucket->ub_cnt != 0,
("uma_zalloc_arg: Returning an empty bucket."));
cache->uc_allocbucket = bucket;
@@ -2729,8 +2745,17 @@ zalloc_start:
* We bump the uz count when the cache size is insufficient to
* handle the working set.
*/
- if (lockfail && zone->uz_count < BUCKET_MAX)
+ if (lockfail && zone->uz_count < zone->uz_count_max)
zone->uz_count++;
+
+ if (zone->uz_max_items > 0) {
+ if (zone->uz_items >= zone->uz_max_items)
+ goto zalloc_item;
+ maxbucket = MIN(zone->uz_count,
+ zone->uz_max_items - zone->uz_items);
+ zone->uz_items += maxbucket;
+ } else
+ maxbucket = zone->uz_count;
ZONE_UNLOCK(zone);
/*
@@ -2738,11 +2763,18 @@ zalloc_start:
* works we'll restart the allocation from the beginning and it
* will use the just filled bucket.
*/
- bucket = zone_alloc_bucket(zone, udata, domain, flags);
+ bucket = zone_alloc_bucket(zone, udata, domain, flags, maxbucket);
CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
zone->uz_name, zone, bucket);
+ ZONE_LOCK(zone);
if (bucket != NULL) {
- ZONE_LOCK(zone);
+ if (zone->uz_max_items > 0 && bucket->ub_cnt < maxbucket) {
+ MPASS(zone->uz_items >= maxbucket - bucket->ub_cnt);
+ zone->uz_items -= maxbucket - bucket->ub_cnt;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ }
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
@@ -2761,7 +2793,7 @@ zalloc_start:
#endif /* __rtems__ */
cache->uc_allocbucket = bucket;
zdom->uzd_imax += bucket->ub_cnt;
- } else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
+ } else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
critical_exit();
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
@@ -2771,13 +2803,18 @@ zalloc_start:
zone_put_bucket(zone, zdom, bucket, false);
ZONE_UNLOCK(zone);
goto zalloc_start;
+ } else if (zone->uz_max_items > 0) {
+ zone->uz_items -= maxbucket;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items + 1 < zone->uz_max_items)
+ wakeup_one(zone);
}
/*
* We may not be able to get a bucket so return an actual item.
*/
zalloc_item:
- item = zone_alloc_item(zone, udata, domain, flags);
+ item = zone_alloc_item_locked(zone, udata, domain, flags);
return (item);
}
@@ -2822,6 +2859,7 @@ keg_first_slab(uma_keg_t keg, int domain, bool rr)
KASSERT(domain >= 0 && domain < vm_ndomains,
("keg_first_slab: domain %d out of range", domain));
+ KEG_LOCK_ASSERT(keg);
slab = NULL;
start = domain;
@@ -2849,7 +2887,7 @@ keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
{
uint32_t reserve;
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
if (keg->uk_free <= reserve)
@@ -2871,7 +2909,7 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
#ifndef __rtems__
restart:
#endif /* __rtems__ */
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
/*
* Use the keg's policy if upper layers haven't already specified a
@@ -2910,24 +2948,11 @@ restart:
if (flags & M_NOVM)
break;
- if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
- keg->uk_flags |= UMA_ZFLAG_FULL;
- /*
- * If this is not a multi-zone, set the FULL bit.
- * Otherwise slab_multi() takes care of it.
- */
- if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
- zone->uz_flags |= UMA_ZFLAG_FULL;
- zone_log_warning(zone);
- zone_maxaction(zone);
- }
- if (flags & M_NOWAIT)
- return (NULL);
- zone->uz_sleeps++;
- msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
- continue;
- }
- slab = keg_alloc_slab(keg, zone, domain, aflags);
+ KASSERT(zone->uz_max_items == 0 ||
+ zone->uz_items <= zone->uz_max_items,
+ ("%s: zone %p overflow", __func__, zone));
+
+ slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
/*
* If we got a slab here it's safe to mark it partially used
* and return. We assume that the caller is going to remove
@@ -2973,7 +2998,7 @@ zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int domain, int flags)
uma_slab_t slab;
if (keg == NULL) {
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
KEG_LOCK(keg);
}
@@ -2988,89 +3013,6 @@ zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int domain, int flags)
return (NULL);
}
-#ifndef __rtems__
-/*
- * uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
- * with the keg locked. On NULL no lock is held.
- *
- * The last pointer is used to seed the search. It is not required.
- */
-static uma_slab_t
-zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int domain, int rflags)
-{
- uma_klink_t klink;
- uma_slab_t slab;
- uma_keg_t keg;
- int flags;
- int empty;
- int full;
-
- /*
- * Don't wait on the first pass. This will skip limit tests
- * as well. We don't want to block if we can find a provider
- * without blocking.
- */
- flags = (rflags & ~M_WAITOK) | M_NOWAIT;
- /*
- * Use the last slab allocated as a hint for where to start
- * the search.
- */
- if (last != NULL) {
- slab = keg_fetch_slab(last, zone, domain, flags);
- if (slab)
- return (slab);
- KEG_UNLOCK(last);
- }
- /*
- * Loop until we have a slab incase of transient failures
- * while M_WAITOK is specified. I'm not sure this is 100%
- * required but we've done it for so long now.
- */
- for (;;) {
- empty = 0;
- full = 0;
- /*
- * Search the available kegs for slabs. Be careful to hold the
- * correct lock while calling into the keg layer.
- */
- LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
- keg = klink->kl_keg;
- KEG_LOCK(keg);
- if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
- slab = keg_fetch_slab(keg, zone, domain, flags);
- if (slab)
- return (slab);
- }
- if (keg->uk_flags & UMA_ZFLAG_FULL)
- full++;
- else
- empty++;
- KEG_UNLOCK(keg);
- }
- if (rflags & (M_NOWAIT | M_NOVM))
- break;
- flags = rflags;
- /*
- * All kegs are full. XXX We can't atomically check all kegs
- * and sleep so just sleep for a short period and retry.
- */
- if (full && !empty) {
- ZONE_LOCK(zone);
- zone->uz_flags |= UMA_ZFLAG_FULL;
- zone->uz_sleeps++;
- zone_log_warning(zone);
- zone_maxaction(zone);
- msleep(zone, zone->uz_lockptr, PVM,
- "zonelimit", hz/100);
- zone->uz_flags &= ~UMA_ZFLAG_FULL;
- ZONE_UNLOCK(zone);
- continue;
- }
- }
- return (NULL);
-}
-#endif /* __rtems__ */
-
static void *
slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
{
@@ -3079,7 +3021,7 @@ slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
uint8_t freei;
MPASS(keg == slab->us_keg);
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
@@ -3111,7 +3053,7 @@ zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
keg = NULL;
/* Try to keep the buckets totally full */
for (i = 0; i < max; ) {
- if ((slab = zone->uz_slab(zone, keg, domain, flags)) == NULL)
+ if ((slab = zone_fetch_slab(zone, keg, domain, flags)) == NULL)
break;
keg = slab->us_keg;
#ifdef NUMA
@@ -3146,21 +3088,25 @@ zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
}
static uma_bucket_t
-zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
+zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags, int max)
{
uma_bucket_t bucket;
- int max;
CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
+#ifndef __rtems__
+ /* Avoid allocs targeting empty domains. */
+ if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
+#endif /* __rtems__ */
+
/* Don't wait for buckets, preserve caller's NOVM setting. */
bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
if (bucket == NULL)
return (NULL);
- max = MIN(bucket->ub_entries, zone->uz_count);
bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
- max, domain, flags);
+ MIN(max, bucket->ub_entries), domain, flags);
/*
* Initialize the memory if necessary.
@@ -3189,7 +3135,7 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
if (bucket->ub_cnt == 0) {
bucket_free(zone, bucket, udata);
- atomic_add_long(&zone->uz_fails, 1);
+ counter_u64_add(zone->uz_fails, 1);
return (NULL);
}
@@ -3213,23 +3159,54 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
static void *
zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
{
+
+ ZONE_LOCK(zone);
+ return (zone_alloc_item_locked(zone, udata, domain, flags));
+}
+
+/*
+ * Returns with zone unlocked.
+ */
+static void *
+zone_alloc_item_locked(uma_zone_t zone, void *udata, int domain, int flags)
+{
void *item;
#ifdef INVARIANTS
bool skipdbg;
#endif
- item = NULL;
+ ZONE_LOCK_ASSERT(zone);
-#ifndef __rtems__
- if (domain != UMA_ANYDOMAIN) {
- /* avoid allocs targeting empty domains */
- if (VM_DOMAIN_EMPTY(domain))
- domain = UMA_ANYDOMAIN;
+ if (zone->uz_max_items > 0) {
+ if (zone->uz_items >= zone->uz_max_items) {
+ zone_log_warning(zone);
+ zone_maxaction(zone);
+ if (flags & M_NOWAIT) {
+ ZONE_UNLOCK(zone);
+ return (NULL);
+ }
+ zone->uz_sleeps++;
+ zone->uz_sleepers++;
+ while (zone->uz_items >= zone->uz_max_items)
+ mtx_sleep(zone, zone->uz_lockptr, PVM,
+ "zonelimit", 0);
+ zone->uz_sleepers--;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items + 1 < zone->uz_max_items)
+ wakeup_one(zone);
+ }
+ zone->uz_items++;
}
+ ZONE_UNLOCK(zone);
+
+#ifndef __rtems__
+ /* Avoid allocs targeting empty domains. */
+ if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
+ domain = UMA_ANYDOMAIN;
#endif /* __rtems__ */
+
if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
goto fail;
- atomic_add_long(&zone->uz_allocs, 1);
#ifdef INVARIANTS
skipdbg = uma_dbg_zskip(zone, item);
@@ -3242,7 +3219,7 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
*/
if (zone->uz_init != NULL) {
if (zone->uz_init(item, zone->uz_size, flags) != 0) {
- zone_free_item(zone, item, udata, SKIP_FINI);
+ zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT);
goto fail;
}
}
@@ -3252,7 +3229,7 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
zone->uz_dtor != trash_dtor) &&
#endif
zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
- zone_free_item(zone, item, udata, SKIP_DTOR);
+ zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
goto fail;
}
#ifdef INVARIANTS
@@ -3262,15 +3239,21 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
if (flags & M_ZERO)
uma_zero_item(item, zone);
+ counter_u64_add(zone->uz_allocs, 1);
CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
zone->uz_name, zone);
return (item);
fail:
+ if (zone->uz_max_items > 0) {
+ ZONE_LOCK(zone);
+ zone->uz_items--;
+ ZONE_UNLOCK(zone);
+ }
+ counter_u64_add(zone->uz_fails, 1);
CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
zone->uz_name, zone);
- atomic_add_long(&zone->uz_fails, 1);
return (NULL);
}
@@ -3282,10 +3265,14 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
uma_bucket_t bucket;
uma_zone_domain_t zdom;
#ifndef __rtems__
- int cpu, domain, lockfail;
+ int cpu, domain;
#else /* __rtems__ */
- int cpu, lockfail;
+ int cpu;
#endif /* __rtems__ */
+#ifdef UMA_XDOMAIN
+ int itemdomain;
+#endif
+ bool lockfail;
#ifdef INVARIANTS
bool skipdbg;
#endif
@@ -3333,9 +3320,14 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
* The race here is acceptable. If we miss it we'll just have to wait
* a little longer for the limits to be reset.
*/
- if (zone->uz_flags & UMA_ZFLAG_FULL)
+ if (zone->uz_sleepers > 0)
goto zfree_item;
+#ifdef UMA_XDOMAIN
+ if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
+ itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+#endif
+
/*
* If possible, free to the per-CPU cache. There are two
* requirements for safe access to the per-CPU cache: (1) the thread
@@ -3353,14 +3345,28 @@ zfree_restart:
cache = &zone->uz_cpu[cpu];
zfree_start:
+#ifndef __rtems__
+ domain = PCPU_GET(domain);
+#endif /* __rtems__ */
+#ifdef UMA_XDOMAIN
+ if ((zone->uz_flags & UMA_ZONE_NUMA) == 0)
+ itemdomain = domain;
+#endif
/*
* Try to free into the allocbucket first to give LIFO ordering
* for cache-hot datastructures. Spill over into the freebucket
* if necessary. Alloc will swap them if one runs dry.
*/
- bucket = cache->uc_allocbucket;
- if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
- bucket = cache->uc_freebucket;
+#ifdef UMA_XDOMAIN
+ if (domain != itemdomain) {
+ bucket = cache->uc_crossbucket;
+ } else
+#endif
+ {
+ bucket = cache->uc_allocbucket;
+ if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
+ bucket = cache->uc_freebucket;
+ }
if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
("uma_zfree: Freeing to non free bucket index."));
@@ -3383,34 +3389,80 @@ zfree_start:
if (zone->uz_count == 0 || bucketdisable)
goto zfree_item;
- lockfail = 0;
+ lockfail = false;
if (ZONE_TRYLOCK(zone) == 0) {
/* Record contention to size the buckets. */
ZONE_LOCK(zone);
- lockfail = 1;
+ lockfail = true;
}
critical_enter();
cpu = curcpu;
+#ifndef __rtems__
+ domain = PCPU_GET(domain);
+#endif /* __rtems__ */
cache = &zone->uz_cpu[cpu];
- bucket = cache->uc_freebucket;
+#ifdef UMA_XDOMAIN
+ if (domain != itemdomain)
+ bucket = cache->uc_crossbucket;
+ else
+#endif
+ bucket = cache->uc_freebucket;
if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
ZONE_UNLOCK(zone);
goto zfree_start;
}
- cache->uc_freebucket = NULL;
+#ifdef UMA_XDOMAIN
+ if (domain != itemdomain)
+ cache->uc_crossbucket = NULL;
+ else
+#endif
+ cache->uc_freebucket = NULL;
/* We are no longer associated with this CPU. */
critical_exit();
+#ifdef UMA_XDOMAIN
+ if (domain != itemdomain) {
+ if (bucket != NULL) {
+ zone->uz_xdomain += bucket->ub_cnt;
+ if (vm_ndomains > 2 ||
+ zone->uz_bkt_count >= zone->uz_bkt_max) {
+ ZONE_UNLOCK(zone);
+ bucket_drain(zone, bucket);
+ bucket_free(zone, bucket, udata);
+ } else {
+ zdom = &zone->uz_domain[itemdomain];
+ zone_put_bucket(zone, zdom, bucket, true);
+ ZONE_UNLOCK(zone);
+ }
+ } else
+ ZONE_UNLOCK(zone);
+ bucket = bucket_alloc(zone, udata, M_NOWAIT);
+ if (bucket == NULL)
+ goto zfree_item;
+ critical_enter();
+ cpu = curcpu;
+ cache = &zone->uz_cpu[cpu];
+ if (cache->uc_crossbucket == NULL) {
+ cache->uc_crossbucket = bucket;
+ goto zfree_start;
+ }
+ critical_exit();
+ bucket_free(zone, bucket, udata);
+ goto zfree_restart;
+ }
+#endif
+
#ifndef __rtems__
if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
- domain = PCPU_GET(domain);
- if (VM_DOMAIN_EMPTY(domain))
- domain = UMA_ANYDOMAIN;
- } else
+ zdom = &zone->uz_domain[domain];
+ } else {
domain = 0;
-#endif /* __rtems__ */
+ zdom = &zone->uz_domain[0];
+ }
+#else /* __rtems__ */
zdom = &zone->uz_domain[0];
+#endif /* __rtems__ */
/* Can we throw this on the zone full list? */
if (bucket != NULL) {
@@ -3418,9 +3470,9 @@ zfree_start:
"uma_zfree: zone %s(%p) putting bucket %p on free list",
zone->uz_name, zone, bucket);
/* ub_cnt is pointing to the last free item */
- KASSERT(bucket->ub_cnt != 0,
- ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
- if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
+ KASSERT(bucket->ub_cnt == bucket->ub_entries,
+ ("uma_zfree: Attempting to insert not full bucket onto the full list.\n"));
+ if (zone->uz_bkt_count >= zone->uz_bkt_max) {
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
bucket_free(zone, bucket, udata);
@@ -3433,7 +3485,7 @@ zfree_start:
* We bump the uz count when the cache size is insufficient to
* handle the working set.
*/
- if (lockfail && zone->uz_count < BUCKET_MAX)
+ if (lockfail && zone->uz_count < zone->uz_count_max)
zone->uz_count++;
ZONE_UNLOCK(zone);
@@ -3468,8 +3520,6 @@ zfree_start:
*/
zfree_item:
zone_free_item(zone, item, udata, SKIP_DTOR);
-
- return;
}
void
@@ -3494,12 +3544,15 @@ uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
}
static void
-slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
+slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
{
+ uma_keg_t keg;
uma_domain_t dom;
uint8_t freei;
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ keg = zone->uz_keg;
+ MPASS(zone->uz_lockptr == &keg->uk_lock);
+ KEG_LOCK_ASSERT(keg);
MPASS(keg == slab->us_keg);
dom = &keg->uk_domain[slab->us_domain];
@@ -3529,11 +3582,9 @@ zone_release(uma_zone_t zone, void **bucket, int cnt)
uma_slab_t slab;
uma_keg_t keg;
uint8_t *mem;
- int clearfull;
int i;
- clearfull = 0;
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
KEG_LOCK(keg);
for (i = 0; i < cnt; i++) {
item = bucket[i];
@@ -3547,37 +3598,11 @@ zone_release(uma_zone_t zone, void **bucket, int cnt)
}
} else {
slab = vtoslab((vm_offset_t)item);
- if (slab->us_keg != keg) {
- KEG_UNLOCK(keg);
- keg = slab->us_keg;
- KEG_LOCK(keg);
- }
- }
- slab_free_item(keg, slab, item);
- if (keg->uk_flags & UMA_ZFLAG_FULL) {
- if (keg->uk_pages < keg->uk_maxpages) {
- keg->uk_flags &= ~UMA_ZFLAG_FULL;
- clearfull = 1;
- }
-
- /*
- * We can handle one more allocation. Since we're
- * clearing ZFLAG_FULL, wake up all procs blocked
- * on pages. This should be uncommon, so keeping this
- * simple for now (rather than adding count of blocked
- * threads etc).
- */
- wakeup(keg);
+ MPASS(slab->us_keg == keg);
}
+ slab_free_item(zone, slab, item);
}
KEG_UNLOCK(keg);
- if (clearfull) {
- ZONE_LOCK(zone);
- zone->uz_flags &= ~UMA_ZFLAG_FULL;
- wakeup(zone);
- ZONE_UNLOCK(zone);
- }
-
}
/*
@@ -3614,34 +3639,60 @@ zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
if (skip < SKIP_FINI && zone->uz_fini)
zone->uz_fini(item, zone->uz_size);
- atomic_add_long(&zone->uz_frees, 1);
zone->uz_release(zone->uz_arg, &item, 1);
+
+ if (skip & SKIP_CNT)
+ return;
+
+ counter_u64_add(zone->uz_frees, 1);
+
+ if (zone->uz_max_items > 0) {
+ ZONE_LOCK(zone);
+ zone->uz_items--;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ ZONE_UNLOCK(zone);
+ }
}
/* See uma.h */
int
uma_zone_set_max(uma_zone_t zone, int nitems)
{
- uma_keg_t keg;
+ struct uma_bucket_zone *ubz;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- KEG_LOCK(keg);
-#ifdef __rtems__
-#ifdef SMP
/*
- * Ensure we have enough items to fill the per-processor caches. This
- * is a heuristic approach and works not under all conditions.
+ * If limit is very low we may need to limit how
+ * much items are allowed in CPU caches.
*/
- nitems += 2 * BUCKET_MAX * (mp_maxid + 1);
-#endif
-#endif /* __rtems__ */
- keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
- if (keg->uk_maxpages * keg->uk_ipers < nitems)
- keg->uk_maxpages += keg->uk_ppera;
- nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
- KEG_UNLOCK(keg);
+ ubz = &bucket_zones[0];
+ for (; ubz->ubz_entries != 0; ubz++)
+ if (ubz->ubz_entries * 2 * mp_ncpus > nitems)
+ break;
+ if (ubz == &bucket_zones[0])
+ nitems = ubz->ubz_entries * 2 * mp_ncpus;
+ else
+ ubz--;
+
+ ZONE_LOCK(zone);
+ zone->uz_count_max = zone->uz_count = ubz->ubz_entries;
+ if (zone->uz_count_min > zone->uz_count_max)
+ zone->uz_count_min = zone->uz_count_max;
+ zone->uz_max_items = nitems;
+ ZONE_UNLOCK(zone);
+
+ return (nitems);
+}
+
+/* See uma.h */
+int
+uma_zone_set_maxcache(uma_zone_t zone, int nitems)
+{
+
+ ZONE_LOCK(zone);
+ zone->uz_bkt_max = nitems;
+ ZONE_UNLOCK(zone);
return (nitems);
}
@@ -3651,14 +3702,10 @@ int
uma_zone_get_max(uma_zone_t zone)
{
int nitems;
- uma_keg_t keg;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- KEG_LOCK(keg);
- nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
- KEG_UNLOCK(keg);
+ ZONE_LOCK(zone);
+ nitems = zone->uz_max_items;
+ ZONE_UNLOCK(zone);
return (nitems);
}
@@ -3691,10 +3738,11 @@ uma_zone_get_cur(uma_zone_t zone)
u_int i;
ZONE_LOCK(zone);
- nitems = zone->uz_allocs - zone->uz_frees;
+ nitems = counter_u64_fetch(zone->uz_allocs) -
+ counter_u64_fetch(zone->uz_frees);
CPU_FOREACH(i) {
/*
- * See the comment in sysctl_vm_zone_stats() regarding the
+ * See the comment in uma_vm_zone_stats() regarding the
* safety of accessing the per-cpu caches. With the zone lock
* held, it is safe, but can potentially result in stale data.
*/
@@ -3712,8 +3760,7 @@ uma_zone_set_init(uma_zone_t zone, uma_init uminit)
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
KASSERT(keg->uk_pages == 0,
("uma_zone_set_init on non-empty keg"));
@@ -3727,8 +3774,7 @@ uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
KASSERT(keg->uk_pages == 0,
("uma_zone_set_fini on non-empty keg"));
@@ -3742,7 +3788,7 @@ uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
{
ZONE_LOCK(zone);
- KASSERT(zone_first_keg(zone)->uk_pages == 0,
+ KASSERT(zone->uz_keg->uk_pages == 0,
("uma_zone_set_zinit on non-empty keg"));
zone->uz_init = zinit;
ZONE_UNLOCK(zone);
@@ -3754,7 +3800,7 @@ uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
{
ZONE_LOCK(zone);
- KASSERT(zone_first_keg(zone)->uk_pages == 0,
+ KASSERT(zone->uz_keg->uk_pages == 0,
("uma_zone_set_zfini on non-empty keg"));
zone->uz_fini = zfini;
ZONE_UNLOCK(zone);
@@ -3767,7 +3813,7 @@ uma_zone_set_freef(uma_zone_t zone, uma_free freef)
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
+ KEG_GET(zone, keg);
KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
KEG_LOCK(keg);
keg->uk_freef = freef;
@@ -3781,7 +3827,7 @@ uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
keg->uk_allocf = allocf;
KEG_UNLOCK(keg);
@@ -3793,14 +3839,10 @@ uma_zone_reserve(uma_zone_t zone, int items)
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return;
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
keg->uk_reserve = items;
KEG_UNLOCK(keg);
-
- return;
}
#ifndef __rtems__
@@ -3812,11 +3854,9 @@ uma_zone_reserve_kva(uma_zone_t zone, int count)
vm_offset_t kva;
u_int pages;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- pages = count / keg->uk_ipers;
+ KEG_GET(zone, keg);
+ pages = count / keg->uk_ipers;
if (pages * keg->uk_ipers < count)
pages++;
pages *= keg->uk_ppera;
@@ -3831,17 +3871,19 @@ uma_zone_reserve_kva(uma_zone_t zone, int count)
return (0);
} else
kva = 0;
- KEG_LOCK(keg);
+
+ ZONE_LOCK(zone);
+ MPASS(keg->uk_kva == 0);
keg->uk_kva = kva;
keg->uk_offset = 0;
- keg->uk_maxpages = pages;
+ zone->uz_max_items = pages * keg->uk_ipers;
#ifdef UMA_MD_SMALL_ALLOC
keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
#else
keg->uk_allocf = noobj_alloc;
#endif
keg->uk_flags |= UMA_ZONE_NOFREE;
- KEG_UNLOCK(keg);
+ ZONE_UNLOCK(zone);
return (1);
}
@@ -3854,46 +3896,65 @@ uma_prealloc(uma_zone_t zone, int items)
uma_domain_t dom;
uma_slab_t slab;
uma_keg_t keg;
- int domain, flags, slabs;
+ int aflags, domain, slabs;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return;
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
slabs = items / keg->uk_ipers;
if (slabs * keg->uk_ipers < items)
slabs++;
- flags = M_WAITOK;
- vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &flags);
while (slabs-- > 0) {
- slab = keg_alloc_slab(keg, zone, domain, flags);
- if (slab == NULL)
- return;
- MPASS(slab->us_keg == keg);
- dom = &keg->uk_domain[slab->us_domain];
- LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
- if (vm_domainset_iter_policy(&di, &domain) != 0)
- break;
+ aflags = M_NOWAIT;
+ vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
+ &aflags);
+ for (;;) {
+ slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
+ aflags);
+ if (slab != NULL) {
+ MPASS(slab->us_keg == keg);
+ dom = &keg->uk_domain[slab->us_domain];
+ LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
+ us_link);
+ break;
+ }
+ KEG_LOCK(keg);
+ if (vm_domainset_iter_policy(&di, &domain) != 0) {
+ KEG_UNLOCK(keg);
+ vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
+ KEG_LOCK(keg);
+ }
+ }
}
KEG_UNLOCK(keg);
}
#endif /* __rtems__ */
/* See uma.h */
-static void
-uma_reclaim_locked(bool kmem_danger)
+void
+uma_reclaim(int req)
{
CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
- sx_assert(&uma_drain_lock, SA_XLOCKED);
+ sx_xlock(&uma_reclaim_lock);
bucket_enable();
- zone_foreach(zone_drain);
-#ifndef __rtems__
- if (vm_page_count_min() || kmem_danger) {
- cache_drain_safe(NULL);
+
+ switch (req) {
+ case UMA_RECLAIM_TRIM:
+ zone_foreach(zone_trim);
+ break;
+ case UMA_RECLAIM_DRAIN:
+ case UMA_RECLAIM_DRAIN_CPU:
zone_foreach(zone_drain);
- }
+#ifndef __rtems__
+ if (req == UMA_RECLAIM_DRAIN_CPU) {
+ pcpu_cache_drain_safe(NULL);
+ zone_foreach(zone_drain);
+ }
#endif /* __rtems__ */
+ break;
+ default:
+ panic("unhandled reclamation request %d", req);
+ }
/*
* Some slabs may have been freed but this zone will be visited early
@@ -3902,15 +3963,7 @@ uma_reclaim_locked(bool kmem_danger)
*/
zone_drain(slabzone);
bucket_zone_drain();
-}
-
-void
-uma_reclaim(void)
-{
-
- sx_xlock(&uma_drain_lock);
- uma_reclaim_locked(false);
- sx_xunlock(&uma_drain_lock);
+ sx_xunlock(&uma_reclaim_lock);
}
static volatile int uma_reclaim_needed;
@@ -3928,31 +3981,52 @@ uma_reclaim_worker(void *arg __unused)
{
for (;;) {
- sx_xlock(&uma_drain_lock);
+ sx_xlock(&uma_reclaim_lock);
while (atomic_load_int(&uma_reclaim_needed) == 0)
- sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl",
+ sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
hz);
+ sx_xunlock(&uma_reclaim_lock);
#ifndef __rtems__
- sx_xunlock(&uma_drain_lock);
EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
- sx_xlock(&uma_drain_lock);
#endif /* __rtems__ */
- uma_reclaim_locked(true);
+ uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
atomic_store_int(&uma_reclaim_needed, 0);
- sx_xunlock(&uma_drain_lock);
/* Don't fire more than once per-second. */
pause("umarclslp", hz);
}
}
/* See uma.h */
+void
+uma_zone_reclaim(uma_zone_t zone, int req)
+{
+
+ switch (req) {
+ case UMA_RECLAIM_TRIM:
+ zone_trim(zone);
+ break;
+ case UMA_RECLAIM_DRAIN:
+ zone_drain(zone);
+ break;
+#ifndef __rtems__
+ case UMA_RECLAIM_DRAIN_CPU:
+ pcpu_cache_drain_safe(zone);
+ zone_drain(zone);
+ break;
+#endif /* __rtems__ */
+ default:
+ panic("unhandled reclamation request %d", req);
+ }
+}
+
+/* See uma.h */
int
uma_zone_exhausted(uma_zone_t zone)
{
int full;
ZONE_LOCK(zone);
- full = (zone->uz_flags & UMA_ZFLAG_FULL);
+ full = zone->uz_sleepers > 0;
ZONE_UNLOCK(zone);
return (full);
}
@@ -3960,7 +4034,7 @@ uma_zone_exhausted(uma_zone_t zone)
int
uma_zone_exhausted_nolock(uma_zone_t zone)
{
- return (zone->uz_flags & UMA_ZFLAG_FULL);
+ return (zone->uz_sleepers > 0);
}
#ifndef __rtems__
@@ -4041,14 +4115,14 @@ unsigned long
uma_size(void)
{
- return (uma_kmem_total);
+ return (atomic_load_long(&uma_kmem_total));
}
long
uma_avail(void)
{
- return (uma_kmem_limit - uma_kmem_total);
+ return (uma_kmem_limit - uma_size());
}
void
@@ -4067,11 +4141,13 @@ slab_print(uma_slab_t slab)
static void
cache_print(uma_cache_t cache)
{
- printf("alloc: %p(%d), free: %p(%d)\n",
+ printf("alloc: %p(%d), free: %p(%d), cross: %p(%d)j\n",
cache->uc_allocbucket,
cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
cache->uc_freebucket,
- cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
+ cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0,
+ cache->uc_crossbucket,
+ cache->uc_crossbucket?cache->uc_crossbucket->ub_cnt:0);
}
static void
@@ -4082,11 +4158,11 @@ uma_print_keg(uma_keg_t keg)
int i;
printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
- "out %d free %d limit %d\n",
+ "out %d free %d\n",
keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
keg->uk_ipers, keg->uk_ppera,
(keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
- keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
+ keg->uk_free);
for (i = 0; i < vm_ndomains; i++) {
dom = &keg->uk_domain[i];
printf("Part slabs:\n");
@@ -4105,13 +4181,13 @@ void
uma_print_zone(uma_zone_t zone)
{
uma_cache_t cache;
- uma_klink_t kl;
int i;
- printf("zone: %s(%p) size %d flags %#x\n",
- zone->uz_name, zone, zone->uz_size, zone->uz_flags);
- LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
- uma_print_keg(kl->kl_keg);
+ printf("zone: %s(%p) size %d maxitems %ju flags %#x\n",
+ zone->uz_name, zone, zone->uz_size, (uintmax_t)zone->uz_max_items,
+ zone->uz_flags);
+ if (zone->uz_lockptr != &zone->uz_lock)
+ uma_print_keg(zone->uz_keg);
CPU_FOREACH(i) {
cache = &zone->uz_cpu[i];
printf("CPU %d Cache:\n", i);
@@ -4134,13 +4210,13 @@ uma_print_zone(uma_zone_t zone)
*/
static void
uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
- uint64_t *freesp, uint64_t *sleepsp)
+ uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp)
{
uma_cache_t cache;
- uint64_t allocs, frees, sleeps;
+ uint64_t allocs, frees, sleeps, xdomain;
int cachefree, cpu;
- allocs = frees = sleeps = 0;
+ allocs = frees = sleeps = xdomain = 0;
cachefree = 0;
CPU_FOREACH(cpu) {
cache = &z->uz_cpu[cpu];
@@ -4148,12 +4224,17 @@ uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
cachefree += cache->uc_allocbucket->ub_cnt;
if (cache->uc_freebucket != NULL)
cachefree += cache->uc_freebucket->ub_cnt;
+ if (cache->uc_crossbucket != NULL) {
+ xdomain += cache->uc_crossbucket->ub_cnt;
+ cachefree += cache->uc_crossbucket->ub_cnt;
+ }
allocs += cache->uc_allocs;
frees += cache->uc_frees;
}
- allocs += z->uz_allocs;
- frees += z->uz_frees;
+ allocs += counter_u64_fetch(z->uz_allocs);
+ frees += counter_u64_fetch(z->uz_frees);
sleeps += z->uz_sleeps;
+ xdomain += z->uz_xdomain;
if (cachefreep != NULL)
*cachefreep = cachefree;
if (allocsp != NULL)
@@ -4162,6 +4243,8 @@ uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
*freesp = frees;
if (sleepsp != NULL)
*sleepsp = sleeps;
+ if (xdomainp != NULL)
+ *xdomainp = xdomain;
}
#endif /* DDB */
#endif /* __rtems__ */
@@ -4179,23 +4262,67 @@ sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
LIST_FOREACH(z, &kz->uk_zones, uz_link)
count++;
}
+ LIST_FOREACH(z, &uma_cachezones, uz_link)
+ count++;
+
rw_runlock(&uma_rwlock);
return (sysctl_handle_int(oidp, &count, 0, req));
}
+static void
+uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf,
+ struct uma_percpu_stat *ups, bool internal)
+{
+ uma_zone_domain_t zdom;
+ uma_cache_t cache;
+ int i;
+
+
+ for (i = 0; i < vm_ndomains; i++) {
+ zdom = &z->uz_domain[i];
+ uth->uth_zone_free += zdom->uzd_nitems;
+ }
+ uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
+ uth->uth_frees = counter_u64_fetch(z->uz_frees);
+ uth->uth_fails = counter_u64_fetch(z->uz_fails);
+ uth->uth_sleeps = z->uz_sleeps;
+ uth->uth_xdomain = z->uz_xdomain;
+ /*
+ * While it is not normally safe to access the cache
+ * bucket pointers while not on the CPU that owns the
+ * cache, we only allow the pointers to be exchanged
+ * without the zone lock held, not invalidated, so
+ * accept the possible race associated with bucket
+ * exchange during monitoring.
+ */
+ for (i = 0; i < mp_maxid + 1; i++) {
+ bzero(&ups[i], sizeof(*ups));
+ if (internal || CPU_ABSENT(i))
+ continue;
+ cache = &z->uz_cpu[i];
+ if (cache->uc_allocbucket != NULL)
+ ups[i].ups_cache_free +=
+ cache->uc_allocbucket->ub_cnt;
+ if (cache->uc_freebucket != NULL)
+ ups[i].ups_cache_free +=
+ cache->uc_freebucket->ub_cnt;
+ if (cache->uc_crossbucket != NULL)
+ ups[i].ups_cache_free +=
+ cache->uc_crossbucket->ub_cnt;
+ ups[i].ups_allocs = cache->uc_allocs;
+ ups[i].ups_frees = cache->uc_frees;
+ }
+}
+
static int
sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
{
struct uma_stream_header ush;
struct uma_type_header uth;
struct uma_percpu_stat *ups;
- uma_zone_domain_t zdom;
struct sbuf sbuf;
- uma_cache_t cache;
- uma_klink_t kl;
uma_keg_t kz;
uma_zone_t z;
- uma_keg_t k;
int count, error, i;
error = sysctl_wire_old_buffer(req, 0);
@@ -4212,6 +4339,9 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
count++;
}
+ LIST_FOREACH(z, &uma_cachezones, uz_link)
+ count++;
+
/*
* Insert stream header.
*/
@@ -4229,14 +4359,15 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
uth.uth_align = kz->uk_align;
uth.uth_size = kz->uk_size;
uth.uth_rsize = kz->uk_rsize;
- LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
- k = kl->kl_keg;
- uth.uth_maxpages += k->uk_maxpages;
- uth.uth_pages += k->uk_pages;
- uth.uth_keg_free += k->uk_free;
- uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
- * k->uk_ipers;
- }
+ if (z->uz_max_items > 0)
+ uth.uth_pages = (z->uz_items / kz->uk_ipers) *
+ kz->uk_ppera;
+ else
+ uth.uth_pages = kz->uk_pages;
+ uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
+ kz->uk_ppera;
+ uth.uth_limit = z->uz_max_items;
+ uth.uth_keg_free = z->uz_keg->uk_free;
/*
* A zone is secondary is it is not the first entry
@@ -4245,44 +4376,26 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z))
uth.uth_zone_flags = UTH_ZONE_SECONDARY;
-
- for (i = 0; i < vm_ndomains; i++) {
- zdom = &z->uz_domain[i];
- uth.uth_zone_free += zdom->uzd_nitems;
- }
- uth.uth_allocs = z->uz_allocs;
- uth.uth_frees = z->uz_frees;
- uth.uth_fails = z->uz_fails;
- uth.uth_sleeps = z->uz_sleeps;
- /*
- * While it is not normally safe to access the cache
- * bucket pointers while not on the CPU that owns the
- * cache, we only allow the pointers to be exchanged
- * without the zone lock held, not invalidated, so
- * accept the possible race associated with bucket
- * exchange during monitoring.
- */
- for (i = 0; i < mp_maxid + 1; i++) {
- bzero(&ups[i], sizeof(*ups));
- if (kz->uk_flags & UMA_ZFLAG_INTERNAL ||
- CPU_ABSENT(i))
- continue;
- cache = &z->uz_cpu[i];
- if (cache->uc_allocbucket != NULL)
- ups[i].ups_cache_free +=
- cache->uc_allocbucket->ub_cnt;
- if (cache->uc_freebucket != NULL)
- ups[i].ups_cache_free +=
- cache->uc_freebucket->ub_cnt;
- ups[i].ups_allocs = cache->uc_allocs;
- ups[i].ups_frees = cache->uc_frees;
- }
+ uma_vm_zone_stats(&uth, z, &sbuf, ups,
+ kz->uk_flags & UMA_ZFLAG_INTERNAL);
ZONE_UNLOCK(z);
(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
for (i = 0; i < mp_maxid + 1; i++)
(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
}
}
+ LIST_FOREACH(z, &uma_cachezones, uz_link) {
+ bzero(&uth, sizeof(uth));
+ ZONE_LOCK(z);
+ strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
+ uth.uth_size = z->uz_size;
+ uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
+ ZONE_UNLOCK(z);
+ (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
+ for (i = 0; i < mp_maxid + 1; i++)
+ (void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
+ }
+
rw_runlock(&uma_rwlock);
error = sbuf_finish(&sbuf);
sbuf_delete(&sbuf);
@@ -4333,8 +4446,10 @@ uma_dbg_getslab(uma_zone_t zone, void *item)
* zone is unlocked because the item's allocation state
* essentially holds a reference.
*/
+ if (zone->uz_lockptr == &zone->uz_lock)
+ return (NULL);
ZONE_LOCK(zone);
- keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
+ keg = zone->uz_keg;
if (keg->uk_flags & UMA_ZONE_HASH)
slab = hash_sfind(&keg->uk_hash, mem);
else
@@ -4348,12 +4463,11 @@ uma_dbg_getslab(uma_zone_t zone, void *item)
static bool
uma_dbg_zskip(uma_zone_t zone, void *mem)
{
- uma_keg_t keg;
- if ((keg = zone_first_keg(zone)) == NULL)
+ if (zone->uz_lockptr == &zone->uz_lock)
return (true);
- return (uma_dbg_kskip(keg, mem));
+ return (uma_dbg_kskip(zone->uz_keg, mem));
}
static bool
@@ -4453,32 +4567,32 @@ DB_SHOW_COMMAND(uma, db_show_uma)
{
uma_keg_t kz;
uma_zone_t z;
- uint64_t allocs, frees, sleeps;
+ uint64_t allocs, frees, sleeps, xdomain;
long cachefree;
int i;
- db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
- "Free", "Requests", "Sleeps", "Bucket");
+ db_printf("%18s %8s %8s %8s %12s %8s %8s %8s\n", "Zone", "Size", "Used",
+ "Free", "Requests", "Sleeps", "Bucket", "XFree");
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
- allocs = z->uz_allocs;
- frees = z->uz_frees;
+ allocs = counter_u64_fetch(z->uz_allocs);
+ frees = counter_u64_fetch(z->uz_frees);
sleeps = z->uz_sleeps;
cachefree = 0;
} else
uma_zone_sumstat(z, &cachefree, &allocs,
- &frees, &sleeps);
+ &frees, &sleeps, &xdomain);
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z)))
cachefree += kz->uk_free;
for (i = 0; i < vm_ndomains; i++)
cachefree += z->uz_domain[i].uzd_nitems;
- db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u\n",
+ db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u %8ju\n",
z->uz_name, (uintmax_t)kz->uk_size,
(intmax_t)(allocs - frees), cachefree,
- (uintmax_t)allocs, sleeps, z->uz_count);
+ (uintmax_t)allocs, sleeps, z->uz_count, xdomain);
if (db_pager_quit)
return;
}
@@ -4495,7 +4609,7 @@ DB_SHOW_COMMAND(umacache, db_show_umacache)
db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
"Requests", "Bucket");
LIST_FOREACH(z, &uma_cachezones, uz_link) {
- uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
+ uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
for (i = 0; i < vm_ndomains; i++)
cachefree += z->uz_domain[i].uzd_nitems;
db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
@@ -4516,7 +4630,7 @@ rtems_bsd_uma_startup(void *unused)
uma_kmem_limit = rtems_bsd_get_allocator_domain_size(
RTEMS_BSD_ALLOCATOR_DOMAIN_PAGE);
- sx_init_flags(&uma_drain_lock, "umadrain", SX_RECURSE);
+ sx_init_flags(&uma_reclaim_lock, "umareclaim", SX_RECURSE);
uma_startup(NULL, 0);
}
diff --git a/freebsd/sys/vm/uma_int.h b/freebsd/sys/vm/uma_int.h
index 0429bac6..1d055ed6 100644
--- a/freebsd/sys/vm/uma_int.h
+++ b/freebsd/sys/vm/uma_int.h
@@ -30,6 +30,7 @@
*
*/
+#include <sys/counter.h>
#include <sys/_bitset.h>
#include <sys/_domainset.h>
#include <sys/_task.h>
@@ -178,8 +179,8 @@ SLIST_HEAD(slabhead, uma_slab);
struct uma_hash {
struct slabhead *uh_slab_hash; /* Hash table for slabs */
- int uh_hashsize; /* Current size of the hash table */
- int uh_hashmask; /* Mask used during hashing */
+ u_int uh_hashsize; /* Current size of the hash table */
+ u_int uh_hashmask; /* Mask used during hashing */
};
/*
@@ -196,7 +197,7 @@ struct uma_hash {
*/
struct uma_bucket {
- LIST_ENTRY(uma_bucket) ub_link; /* Link into the zone */
+ TAILQ_ENTRY(uma_bucket) ub_link; /* Link into the zone */
int16_t ub_cnt; /* Count of items in bucket. */
int16_t ub_entries; /* Max items. */
void *ub_bucket[]; /* actual allocation storage */
@@ -207,6 +208,7 @@ typedef struct uma_bucket * uma_bucket_t;
struct uma_cache {
uma_bucket_t uc_freebucket; /* Bucket we're freeing to */
uma_bucket_t uc_allocbucket; /* Bucket to allocate from */
+ uma_bucket_t uc_crossbucket; /* cross domain bucket */
uint64_t uc_allocs; /* Count of allocations */
uint64_t uc_frees; /* Count of frees */
} UMA_ALIGN;
@@ -231,7 +233,9 @@ typedef struct uma_domain * uma_domain_t;
*
*/
struct uma_keg {
- struct mtx uk_lock; /* Lock for the keg */
+ struct mtx uk_lock; /* Lock for the keg must be first.
+ * See shared uz_keg/uz_lockptr
+ * member of struct uma_zone. */
struct uma_hash uk_hash;
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
@@ -244,7 +248,6 @@ struct uma_keg {
uint32_t uk_reserve; /* Number of reserved items. */
uint32_t uk_size; /* Requested size of each item */
uint32_t uk_rsize; /* Real size of each item */
- uint32_t uk_maxpages; /* Maximum number of pages to alloc */
uma_init uk_init; /* Keg's init routine */
uma_fini uk_fini; /* Keg's fini routine */
@@ -308,16 +311,11 @@ struct uma_slab {
#endif
typedef struct uma_slab * uma_slab_t;
-typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int, int);
-struct uma_klink {
- LIST_ENTRY(uma_klink) kl_link;
- uma_keg_t kl_keg;
-};
-typedef struct uma_klink *uma_klink_t;
+TAILQ_HEAD(uma_bucketlist, uma_bucket);
struct uma_zone_domain {
- LIST_HEAD(,uma_bucket) uzd_buckets; /* full buckets */
+ struct uma_bucketlist uzd_buckets; /* full buckets */
long uzd_nitems; /* total item count */
long uzd_imax; /* maximum item count this period */
long uzd_imin; /* minimum item count this period */
@@ -334,8 +332,10 @@ typedef struct uma_zone_domain * uma_zone_domain_t;
*/
struct uma_zone {
/* Offset 0, used in alloc/free fast/medium fast path and const. */
- struct mtx *uz_lockptr;
- const char *uz_name; /* Text name of the zone */
+ union {
+ uma_keg_t uz_keg; /* This zone's keg */
+ struct mtx *uz_lockptr; /* To keg or to self */
+ };
#ifndef __rtems__
struct uma_zone_domain *uz_domain; /* per-domain buckets */
#else /* __rtems__ */
@@ -345,19 +345,21 @@ struct uma_zone {
uint32_t uz_size; /* Size inherited from kegs */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
- uma_init uz_init; /* Initializer for each item */
- uma_fini uz_fini; /* Finalizer for each item. */
+ uint64_t uz_items; /* Total items count */
+ uint64_t uz_max_items; /* Maximum number of items to alloc */
+ uint32_t uz_sleepers; /* Number of sleepers on memory */
+ uint16_t uz_count; /* Amount of items in full bucket */
+ uint16_t uz_count_max; /* Maximum amount of items there */
/* Offset 64, used in bucket replenish. */
uma_import uz_import; /* Import new memory to cache. */
uma_release uz_release; /* Release memory from cache. */
void *uz_arg; /* Import/release argument. */
- uma_slaballoc uz_slab; /* Allocate a slab from the backend. */
- uint16_t uz_count; /* Amount of items in full bucket */
- uint16_t uz_count_min; /* Minimal amount of items there */
- /* 32bit pad on 64bit. */
- LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
- LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */
+ uma_init uz_init; /* Initializer for each item */
+ uma_fini uz_fini; /* Finalizer for each item. */
+ void *uz_spare;
+ uint64_t uz_bkt_count; /* Items in bucket cache */
+ uint64_t uz_bkt_max; /* Maximum bucket cache size */
/* Offset 128 Rare. */
/*
@@ -366,19 +368,20 @@ struct uma_zone {
* members to reduce alignment overhead.
*/
struct mtx uz_lock; /* Lock for the zone */
- struct uma_klink uz_klink; /* klink for first keg. */
+ LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
+ const char *uz_name; /* Text name of the zone */
/* The next two fields are used to print a rate-limited warnings. */
const char *uz_warning; /* Warning to print on failure */
struct timeval uz_ratecheck; /* Warnings rate-limiting */
struct task uz_maxaction; /* Task to run when at limit */
+ uint16_t uz_count_min; /* Minimal amount of items in bucket */
- /* 16 bytes of pad. */
-
- /* Offset 256, atomic stats. */
- volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */
- volatile u_long uz_fails; /* Total number of alloc failures */
- volatile u_long uz_frees; /* Total number of frees */
+ /* Offset 256, stats. */
+ counter_u64_t uz_allocs; /* Total number of allocations */
+ counter_u64_t uz_frees; /* Total number of frees */
+ counter_u64_t uz_fails; /* Total number of alloc failures */
uint64_t uz_sleeps; /* Total number of alloc sleeps */
+ uint64_t uz_xdomain; /* Total number of cross-domain frees */
/*
* This HAS to be the last item because we adjust the zone size
@@ -392,25 +395,15 @@ struct uma_zone {
/*
* These flags must not overlap with the UMA_ZONE flags specified in uma.h.
*/
-#define UMA_ZFLAG_MULTI 0x04000000 /* Multiple kegs in the zone. */
-#define UMA_ZFLAG_DRAINING 0x08000000 /* Running zone_drain. */
+#define UMA_ZFLAG_CACHE 0x04000000 /* uma_zcache_create()d it */
+#define UMA_ZFLAG_RECLAIMING 0x08000000 /* Running zone_reclaim(). */
#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */
#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */
-#define UMA_ZFLAG_FULL 0x40000000 /* Reached uz_maxpages */
#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */
#define UMA_ZFLAG_INHERIT \
(UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET)
-static inline uma_keg_t
-zone_first_keg(uma_zone_t zone)
-{
- uma_klink_t klink;
-
- klink = LIST_FIRST(&zone->uz_kegs);
- return (klink != NULL) ? klink->kl_keg : NULL;
-}
-
#undef UMA_ALIGN
#ifdef _KERNEL
@@ -435,6 +428,13 @@ void uma_large_free(uma_slab_t slab);
#define KEG_LOCK_FINI(k) mtx_destroy(&(k)->uk_lock)
#define KEG_LOCK(k) mtx_lock(&(k)->uk_lock)
#define KEG_UNLOCK(k) mtx_unlock(&(k)->uk_lock)
+#define KEG_LOCK_ASSERT(k) mtx_assert(&(k)->uk_lock, MA_OWNED)
+
+#define KEG_GET(zone, keg) do { \
+ (keg) = (zone)->uz_keg; \
+ KASSERT((void *)(keg) != (void *)&(zone)->uz_lock, \
+ ("%s: Invalid zone %p type", __func__, (zone))); \
+ } while (0)
#define ZONE_LOCK_INIT(z, lc) \
do { \
@@ -467,7 +467,7 @@ static __inline uma_slab_t
hash_sfind(struct uma_hash *hash, uint8_t *data)
{
uma_slab_t slab;
- int hval;
+ u_int hval;
hval = UMA_HASH(hash, data);
diff --git a/freebsd/sys/x86/include/machine/bus.h b/freebsd/sys/x86/include/machine/bus.h
index 297b5edc..2427ae51 100644
--- a/freebsd/sys/x86/include/machine/bus.h
+++ b/freebsd/sys/x86/include/machine/bus.h
@@ -114,7 +114,11 @@
#define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF
#define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF
+#if defined(__amd64__)
+#define BUS_SPACE_MAXSIZE 0xFFFFFFFFFFFFFFFFULL
+#else
#define BUS_SPACE_MAXSIZE 0xFFFFFFFF
+#endif
#define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF
#define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF
#if defined(__amd64__) || defined(PAE)
diff --git a/freebsd/sys/x86/include/machine/pci_cfgreg.h b/freebsd/sys/x86/include/machine/pci_cfgreg.h
index 8083eb0e..85d6485c 100644
--- a/freebsd/sys/x86/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/x86/include/machine/pci_cfgreg.h
@@ -48,6 +48,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+enum {
+ CFGMECH_NONE = 0,
+ CFGMECH_1,
+ CFGMECH_2,
+ CFGMECH_PCIE,
+};
+
+extern int cfgmech;
+
rman_res_t hostb_alloc_start(int type, rman_res_t start, rman_res_t end, rman_res_t count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);