summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2013-11-06 16:20:21 +0100
committerSebastian Huber <sebastian.huber@embedded-brains.de>2013-11-11 10:08:08 +0100
commit66659ff1ad6831b0ea7425fa6ecd8a8687523658 (patch)
tree48e22b475fa8854128e0861a33fed6f78c8094b5
parentDefine __GLOBL1() and __GLOBL() (diff)
downloadrtems-libbsd-66659ff1ad6831b0ea7425fa6ecd8a8687523658.tar.bz2
Update to FreeBSD 9.2
-rw-r--r--Makefile81
m---------freebsd-org0
-rwxr-xr-xfreebsd-to-rtems.py95
-rw-r--r--freebsd/include/arpa/ftp.h8
-rw-r--r--freebsd/include/arpa/inet.h6
-rw-r--r--freebsd/include/arpa/nameser.h6
-rw-r--r--freebsd/include/arpa/nameser_compat.h6
-rw-r--r--freebsd/include/db.h6
-rw-r--r--freebsd/include/err.h6
-rw-r--r--freebsd/include/mpool.h6
-rw-r--r--freebsd/include/netdb.h6
-rw-r--r--freebsd/include/nlist.h6
-rw-r--r--freebsd/include/nsswitch.h7
-rw-r--r--freebsd/include/resolv.h6
-rw-r--r--freebsd/include/rpc/auth.h9
-rw-r--r--freebsd/include/rpc/auth_unix.h6
-rw-r--r--freebsd/include/rpc/xdr.h1
-rw-r--r--freebsd/include/sysexits.h8
-rw-r--r--freebsd/lib/libc/db/btree/bt_split.c7
-rw-r--r--freebsd/lib/libc/gen/feature_present.c64
-rw-r--r--freebsd/lib/libc/include/libc_private.h25
-rw-r--r--freebsd/lib/libc/include/port_before.h2
-rw-r--r--freebsd/lib/libc/include/reentrant.h7
-rw-r--r--freebsd/lib/libc/net/getaddrinfo.c4
-rw-r--r--freebsd/lib/libc/net/getnameinfo.c11
-rw-r--r--freebsd/lib/libc/net/nslexer.l4
-rw-r--r--freebsd/lib/libc/net/nsparser.y1
-rw-r--r--freebsd/lib/libc/resolv/res_send.c2
-rw-r--r--freebsd/lib/libc/stdio/local.h28
-rw-r--r--freebsd/lib/libipsec/ipsec_dump_policy.c3
-rw-r--r--freebsd/lib/libipsec/policy_token.l5
-rw-r--r--freebsd/lib/libkvm/kvm.h2
-rw-r--r--freebsd/lib/libmemstat/memstat.c22
-rw-r--r--freebsd/lib/libmemstat/memstat.h8
-rw-r--r--freebsd/lib/libmemstat/memstat_internal.h14
-rw-r--r--freebsd/lib/libmemstat/memstat_malloc.c36
-rw-r--r--freebsd/lib/libmemstat/memstat_uma.c46
-rw-r--r--freebsd/lib/libutil/expand_number.c65
-rw-r--r--freebsd/lib/libutil/libutil.h209
-rw-r--r--freebsd/sbin/dhclient/clparse.c6
-rw-r--r--freebsd/sbin/dhclient/packet.c2
-rw-r--r--freebsd/sbin/dhclient/parse.c12
-rw-r--r--freebsd/sbin/ifconfig/af_inet.c7
-rw-r--r--freebsd/sbin/ifconfig/af_inet6.c13
-rw-r--r--freebsd/sbin/ifconfig/af_nd6.c80
-rw-r--r--freebsd/sbin/ifconfig/ifconfig.c67
-rw-r--r--freebsd/sbin/ifconfig/ifconfig.h1
-rw-r--r--freebsd/sbin/ifconfig/ifgif.c24
-rw-r--r--freebsd/sbin/ifconfig/iflagg.c7
-rw-r--r--freebsd/sbin/ifconfig/ifmedia.c4
-rw-r--r--freebsd/sbin/ping/ping.c50
-rw-r--r--freebsd/sbin/ping6/ping6.c184
-rw-r--r--freebsd/sbin/route/keywords1
-rw-r--r--freebsd/sbin/route/route.c557
-rw-r--r--freebsd/sys/arm/arm/legacy.c20
-rw-r--r--freebsd/sys/arm/include/machine/cpufunc.h32
-rw-r--r--freebsd/sys/arm/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/arm/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/arm/pci/pci_bus.c147
-rw-r--r--freebsd/sys/avr/avr/legacy.c20
-rw-r--r--freebsd/sys/avr/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/avr/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/avr/pci/pci_bus.c147
-rw-r--r--freebsd/sys/bfin/bfin/legacy.c20
-rw-r--r--freebsd/sys/bfin/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/bfin/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/bfin/pci/pci_bus.c147
-rw-r--r--freebsd/sys/bsm/audit_kevents.h1
-rw-r--r--freebsd/sys/cam/ata/ata_all.h40
-rw-r--r--freebsd/sys/cam/cam.c50
-rw-r--r--freebsd/sys/cam/cam.h19
-rw-r--r--freebsd/sys/cam/cam_ccb.h143
-rw-r--r--freebsd/sys/cam/cam_periph.h48
-rw-r--r--freebsd/sys/cam/cam_sim.h9
-rw-r--r--freebsd/sys/cam/cam_xpt.h2
-rw-r--r--freebsd/sys/cam/cam_xpt_sim.h4
-rw-r--r--freebsd/sys/cam/scsi/scsi_all.c2466
-rw-r--r--freebsd/sys/cam/scsi/scsi_all.h1392
-rw-r--r--freebsd/sys/cam/scsi/scsi_da.h51
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_cbq.c14
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_cdnr.c4
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_hfsc.c8
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_priq.c8
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_red.c13
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_rio.c2
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_rmclass.c4
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_subr.c89
-rw-r--r--freebsd/sys/contrib/altq/altq/altq_var.h4
-rw-r--r--freebsd/sys/contrib/altq/altq/if_altq.h6
-rw-r--r--freebsd/sys/contrib/pf/net/if_pflog.c57
-rw-r--r--freebsd/sys/contrib/pf/net/if_pflog.h26
-rw-r--r--freebsd/sys/contrib/pf/net/if_pflow.h126
-rw-r--r--freebsd/sys/contrib/pf/net/if_pfsync.c4025
-rw-r--r--freebsd/sys/contrib/pf/net/if_pfsync.h493
-rw-r--r--freebsd/sys/contrib/pf/net/pf.c5732
-rw-r--r--freebsd/sys/contrib/pf/net/pf_if.c308
-rw-r--r--freebsd/sys/contrib/pf/net/pf_ioctl.c1390
-rw-r--r--freebsd/sys/contrib/pf/net/pf_lb.c795
-rw-r--r--freebsd/sys/contrib/pf/net/pf_mtag.h6
-rw-r--r--freebsd/sys/contrib/pf/net/pf_norm.c425
-rw-r--r--freebsd/sys/contrib/pf/net/pf_osfp.c142
-rw-r--r--freebsd/sys/contrib/pf/net/pf_ruleset.c94
-rw-r--r--freebsd/sys/contrib/pf/net/pf_table.c427
-rw-r--r--freebsd/sys/contrib/pf/net/pfvar.h814
-rw-r--r--freebsd/sys/contrib/pf/netinet/in4_cksum.c2
-rw-r--r--freebsd/sys/dev/bce/if_bce.c406
-rw-r--r--freebsd/sys/dev/bce/if_bcefw.h131
-rw-r--r--freebsd/sys/dev/bce/if_bcereg.h23
-rw-r--r--freebsd/sys/dev/bfe/if_bfe.c6
-rw-r--r--freebsd/sys/dev/bge/if_bge.c241
-rw-r--r--freebsd/sys/dev/bge/if_bgereg.h3
-rw-r--r--freebsd/sys/dev/dc/dcphy.c39
-rw-r--r--freebsd/sys/dev/dc/if_dc.c10
-rw-r--r--freebsd/sys/dev/dc/if_dcreg.h2
-rw-r--r--freebsd/sys/dev/dc/pnphy.c36
-rw-r--r--freebsd/sys/dev/e1000/e1000_api.c3
-rw-r--r--freebsd/sys/dev/e1000/if_em.c179
-rw-r--r--freebsd/sys/dev/e1000/if_em.h1
-rw-r--r--freebsd/sys/dev/e1000/if_igb.c195
-rw-r--r--freebsd/sys/dev/e1000/if_igb.h2
-rw-r--r--freebsd/sys/dev/e1000/if_lem.c93
-rw-r--r--freebsd/sys/dev/e1000/if_lem.h1
-rw-r--r--freebsd/sys/dev/fxp/if_fxp.c23
-rw-r--r--freebsd/sys/dev/mii/brgphy.c186
-rw-r--r--freebsd/sys/dev/mii/icsphy.c67
-rw-r--r--freebsd/sys/dev/mii/mii.c60
-rw-r--r--freebsd/sys/dev/mii/mii.h4
-rw-r--r--freebsd/sys/dev/mii/mii_physubr.c144
-rw-r--r--freebsd/sys/dev/mii/miivar.h40
-rw-r--r--freebsd/sys/dev/pci/pci.c542
-rw-r--r--freebsd/sys/dev/pci/pci_pci.c367
-rw-r--r--freebsd/sys/dev/pci/pci_private.h6
-rw-r--r--freebsd/sys/dev/pci/pci_user.c246
-rw-r--r--freebsd/sys/dev/pci/pcib_private.h29
-rw-r--r--freebsd/sys/dev/pci/pcireg.h39
-rw-r--r--freebsd/sys/dev/pci/pcivar.h7
-rw-r--r--freebsd/sys/dev/re/if_re.c71
-rw-r--r--freebsd/sys/dev/smc/if_smc.c4
-rw-r--r--freebsd/sys/dev/usb/controller/ehci.c18
-rw-r--r--freebsd/sys/dev/usb/controller/ehci.h2
-rw-r--r--freebsd/sys/dev/usb/controller/ohci.c2
-rw-r--r--freebsd/sys/dev/usb/controller/usb_controller.c27
-rw-r--r--freebsd/sys/dev/usb/controller/xhcireg.h221
-rw-r--r--freebsd/sys/dev/usb/quirk/usb_quirk.c15
-rw-r--r--freebsd/sys/dev/usb/quirk/usb_quirk.h1
-rw-r--r--freebsd/sys/dev/usb/storage/umass.c292
-rw-r--r--freebsd/sys/dev/usb/usb.h16
-rw-r--r--freebsd/sys/dev/usb/usb_dev.c8
-rw-r--r--freebsd/sys/dev/usb/usb_device.c75
-rw-r--r--freebsd/sys/dev/usb/usb_device.h3
-rw-r--r--freebsd/sys/dev/usb/usb_freebsd.h8
-rw-r--r--freebsd/sys/dev/usb/usb_generic.c17
-rw-r--r--freebsd/sys/dev/usb/usb_hid.c76
-rw-r--r--freebsd/sys/dev/usb/usb_hub.c2
-rw-r--r--freebsd/sys/dev/usb/usb_ioctl.h3
-rw-r--r--freebsd/sys/dev/usb/usb_msctest.c52
-rw-r--r--freebsd/sys/dev/usb/usb_process.c8
-rw-r--r--freebsd/sys/dev/usb/usb_process.h6
-rw-r--r--freebsd/sys/dev/usb/usb_request.c54
-rw-r--r--freebsd/sys/dev/usb/usb_request.h2
-rw-r--r--freebsd/sys/dev/usb/usb_transfer.c6
-rw-r--r--freebsd/sys/dev/usb/usb_util.c53
-rw-r--r--freebsd/sys/dev/usb/usb_util.h1
-rw-r--r--freebsd/sys/dev/usb/usbhid.h2
-rw-r--r--freebsd/sys/fs/devfs/devfs_int.h17
-rw-r--r--freebsd/sys/h8300/h8300/legacy.c20
-rw-r--r--freebsd/sys/h8300/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/h8300/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/h8300/pci/pci_bus.c147
-rw-r--r--freebsd/sys/i386/i386/legacy.c20
-rw-r--r--freebsd/sys/i386/include/machine/cpufunc.h119
-rw-r--r--freebsd/sys/i386/include/machine/in_cksum.h6
-rw-r--r--freebsd/sys/i386/include/machine/intr_machdep.h5
-rw-r--r--freebsd/sys/i386/include/machine/md_var.h1
-rw-r--r--freebsd/sys/i386/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/i386/include/machine/specialreg.h11
-rw-r--r--freebsd/sys/i386/pci/pci_bus.c147
-rw-r--r--freebsd/sys/kern/init_main.c96
-rw-r--r--freebsd/sys/kern/kern_event.c78
-rw-r--r--freebsd/sys/kern/kern_hhook.c155
-rw-r--r--freebsd/sys/kern/kern_intr.c173
-rw-r--r--freebsd/sys/kern/kern_khelp.c211
-rw-r--r--freebsd/sys/kern/kern_linker.c42
-rw-r--r--freebsd/sys/kern/kern_mbuf.c21
-rw-r--r--freebsd/sys/kern/kern_mib.c66
-rw-r--r--freebsd/sys/kern/kern_module.c12
-rw-r--r--freebsd/sys/kern/kern_sysctl.c98
-rw-r--r--freebsd/sys/kern/kern_time.c61
-rw-r--r--freebsd/sys/kern/kern_timeout.c126
-rw-r--r--freebsd/sys/kern/subr_bus.c304
-rw-r--r--freebsd/sys/kern/subr_hash.c130
-rw-r--r--freebsd/sys/kern/subr_kobj.c2
-rw-r--r--freebsd/sys/kern/subr_module.c77
-rw-r--r--freebsd/sys/kern/subr_rman.c31
-rw-r--r--freebsd/sys/kern/subr_sbuf.c314
-rw-r--r--freebsd/sys/kern/subr_taskqueue.c201
-rw-r--r--freebsd/sys/kern/subr_uio.c629
-rw-r--r--freebsd/sys/kern/sys_generic.c135
-rw-r--r--freebsd/sys/kern/sys_socket.c4
-rw-r--r--freebsd/sys/kern/uipc_domain.c17
-rw-r--r--freebsd/sys/kern/uipc_mbuf.c11
-rw-r--r--freebsd/sys/kern/uipc_mbuf2.c4
-rw-r--r--freebsd/sys/kern/uipc_sockbuf.c9
-rw-r--r--freebsd/sys/kern/uipc_socket.c309
-rw-r--r--freebsd/sys/kern/uipc_syscalls.c461
-rw-r--r--freebsd/sys/libkern/arc4random.c5
-rw-r--r--freebsd/sys/lm32/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/lm32/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/lm32/lm32/legacy.c20
-rw-r--r--freebsd/sys/lm32/pci/pci_bus.c147
-rw-r--r--freebsd/sys/m32c/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/m32c/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/m32c/m32c/legacy.c20
-rw-r--r--freebsd/sys/m32c/pci/pci_bus.c147
-rw-r--r--freebsd/sys/m32r/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/m32r/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/m32r/m32r/legacy.c20
-rw-r--r--freebsd/sys/m32r/pci/pci_bus.c147
-rw-r--r--freebsd/sys/m68k/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/m68k/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/m68k/m68k/legacy.c20
-rw-r--r--freebsd/sys/m68k/pci/pci_bus.c147
-rw-r--r--freebsd/sys/mips/include/machine/cpufunc.h140
-rw-r--r--freebsd/sys/mips/include/machine/cpuregs.h13
-rw-r--r--freebsd/sys/mips/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/mips/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/mips/mips/legacy.c20
-rw-r--r--freebsd/sys/mips/pci/pci_bus.c147
-rw-r--r--freebsd/sys/net/bpf.c309
-rw-r--r--freebsd/sys/net/bpf.h408
-rw-r--r--freebsd/sys/net/bpf_buffer.c9
-rw-r--r--freebsd/sys/net/bpf_filter.c2
-rw-r--r--freebsd/sys/net/bpf_jitter.c62
-rw-r--r--freebsd/sys/net/bpf_jitter.h5
-rw-r--r--freebsd/sys/net/bpfdesc.h2
-rw-r--r--freebsd/sys/net/flowtable.h1
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c80
-rw-r--r--freebsd/sys/net/if.c117
-rw-r--r--freebsd/sys/net/if.h15
-rw-r--r--freebsd/sys/net/if_arcsubr.c2
-rw-r--r--freebsd/sys/net/if_arp.h1
-rw-r--r--freebsd/sys/net/if_atmsubr.c2
-rw-r--r--freebsd/sys/net/if_bridge.c104
-rw-r--r--freebsd/sys/net/if_dead.c7
-rw-r--r--freebsd/sys/net/if_enc.c36
-rw-r--r--freebsd/sys/net/if_epair.c2
-rw-r--r--freebsd/sys/net/if_ethersubr.c52
-rw-r--r--freebsd/sys/net/if_fwsubr.c2
-rw-r--r--freebsd/sys/net/if_gif.c2
-rw-r--r--freebsd/sys/net/if_gre.c15
-rw-r--r--freebsd/sys/net/if_gre.h7
-rw-r--r--freebsd/sys/net/if_iso88025subr.c2
-rw-r--r--freebsd/sys/net/if_lagg.c123
-rw-r--r--freebsd/sys/net/if_llatbl.c68
-rw-r--r--freebsd/sys/net/if_llatbl.h54
-rw-r--r--freebsd/sys/net/if_loop.c49
-rw-r--r--freebsd/sys/net/if_media.h11
-rw-r--r--freebsd/sys/net/if_mib.c4
-rw-r--r--freebsd/sys/net/if_spppsubr.c2
-rw-r--r--freebsd/sys/net/if_stf.c28
-rw-r--r--freebsd/sys/net/if_tap.c30
-rw-r--r--freebsd/sys/net/if_tun.c2
-rw-r--r--freebsd/sys/net/if_types.h1
-rw-r--r--freebsd/sys/net/if_var.h106
-rw-r--r--freebsd/sys/net/if_vlan.c307
-rw-r--r--freebsd/sys/net/if_vlan_var.h18
-rw-r--r--freebsd/sys/net/netisr.c293
-rw-r--r--freebsd/sys/net/netisr.h19
-rw-r--r--freebsd/sys/net/netisr_internal.h3
-rw-r--r--freebsd/sys/net/pfil.c24
-rw-r--r--freebsd/sys/net/pfil.h4
-rw-r--r--freebsd/sys/net/radix.c4
-rw-r--r--freebsd/sys/net/radix.h3
-rw-r--r--freebsd/sys/net/raw_cb.c3
-rw-r--r--freebsd/sys/net/route.c82
-rw-r--r--freebsd/sys/net/route.h20
-rw-r--r--freebsd/sys/net/rtsock.c35
-rw-r--r--freebsd/sys/net/vnet.h3
-rw-r--r--freebsd/sys/net80211/_ieee80211.h15
-rw-r--r--freebsd/sys/net80211/ieee80211.c101
-rw-r--r--freebsd/sys/net80211/ieee80211.h1
-rw-r--r--freebsd/sys/net80211/ieee80211_acl.c7
-rw-r--r--freebsd/sys/net80211/ieee80211_adhoc.c124
-rw-r--r--freebsd/sys/net80211/ieee80211_ageq.c2
-rw-r--r--freebsd/sys/net80211/ieee80211_crypto.h4
-rw-r--r--freebsd/sys/net80211/ieee80211_crypto_ccmp.c3
-rw-r--r--freebsd/sys/net80211/ieee80211_crypto_tkip.c3
-rw-r--r--freebsd/sys/net80211/ieee80211_dfs.c4
-rw-r--r--freebsd/sys/net80211/ieee80211_freebsd.c2
-rw-r--r--freebsd/sys/net80211/ieee80211_hostap.c36
-rw-r--r--freebsd/sys/net80211/ieee80211_ht.c417
-rw-r--r--freebsd/sys/net80211/ieee80211_ht.h5
-rw-r--r--freebsd/sys/net80211/ieee80211_hwmp.c2
-rw-r--r--freebsd/sys/net80211/ieee80211_input.c48
-rw-r--r--freebsd/sys/net80211/ieee80211_input.h98
-rw-r--r--freebsd/sys/net80211/ieee80211_ioctl.c182
-rw-r--r--freebsd/sys/net80211/ieee80211_ioctl.h4
-rw-r--r--freebsd/sys/net80211/ieee80211_mesh.c86
-rw-r--r--freebsd/sys/net80211/ieee80211_mesh.h5
-rw-r--r--freebsd/sys/net80211/ieee80211_node.c26
-rw-r--r--freebsd/sys/net80211/ieee80211_node.h7
-rw-r--r--freebsd/sys/net80211/ieee80211_output.c41
-rw-r--r--freebsd/sys/net80211/ieee80211_power.c4
-rw-r--r--freebsd/sys/net80211/ieee80211_proto.c46
-rw-r--r--freebsd/sys/net80211/ieee80211_proto.h27
-rw-r--r--freebsd/sys/net80211/ieee80211_scan.c5
-rw-r--r--freebsd/sys/net80211/ieee80211_scan_sta.c40
-rw-r--r--freebsd/sys/net80211/ieee80211_sta.c45
-rw-r--r--freebsd/sys/net80211/ieee80211_var.h25
-rw-r--r--freebsd/sys/net80211/ieee80211_wds.c61
-rw-r--r--freebsd/sys/netinet/accf_http.c2
-rw-r--r--freebsd/sys/netinet/icmp6.h3
-rw-r--r--freebsd/sys/netinet/if_ether.c196
-rw-r--r--freebsd/sys/netinet/igmp.c7
-rw-r--r--freebsd/sys/netinet/in.c201
-rw-r--r--freebsd/sys/netinet/in.h29
-rw-r--r--freebsd/sys/netinet/in_gif.c2
-rw-r--r--freebsd/sys/netinet/in_mcast.c60
-rw-r--r--freebsd/sys/netinet/in_pcb.c697
-rw-r--r--freebsd/sys/netinet/in_pcb.h195
-rw-r--r--freebsd/sys/netinet/in_proto.c34
-rw-r--r--freebsd/sys/netinet/in_var.h15
-rw-r--r--freebsd/sys/netinet/ip.h8
-rw-r--r--freebsd/sys/netinet/ip6.h10
-rw-r--r--freebsd/sys/netinet/ip_carp.c59
-rw-r--r--freebsd/sys/netinet/ip_divert.c206
-rw-r--r--freebsd/sys/netinet/ip_dummynet.h54
-rw-r--r--freebsd/sys/netinet/ip_fastfwd.c25
-rw-r--r--freebsd/sys/netinet/ip_fw.h16
-rw-r--r--freebsd/sys/netinet/ip_gre.c17
-rw-r--r--freebsd/sys/netinet/ip_gre.h7
-rw-r--r--freebsd/sys/netinet/ip_icmp.c31
-rw-r--r--freebsd/sys/netinet/ip_input.c52
-rw-r--r--freebsd/sys/netinet/ip_ipsec.c17
-rw-r--r--freebsd/sys/netinet/ip_ipsec.h3
-rw-r--r--freebsd/sys/netinet/ip_mroute.c13
-rw-r--r--freebsd/sys/netinet/ip_mroute.h1
-rw-r--r--freebsd/sys/netinet/ip_options.c2
-rw-r--r--freebsd/sys/netinet/ip_output.c146
-rw-r--r--freebsd/sys/netinet/ip_var.h2
-rw-r--r--freebsd/sys/netinet/libalias/alias.h22
-rw-r--r--freebsd/sys/netinet/libalias/alias_db.c13
-rw-r--r--freebsd/sys/netinet/libalias/alias_sctp.c6
-rw-r--r--freebsd/sys/netinet/libalias/alias_sctp.h9
-rw-r--r--freebsd/sys/netinet/raw_ip.c83
-rw-r--r--freebsd/sys/netinet/sctp_constants.h8
-rw-r--r--freebsd/sys/netinet/sctp_indata.c2
-rw-r--r--freebsd/sys/netinet/sctp_indata.h4
-rw-r--r--freebsd/sys/netinet/sctp_input.c80
-rw-r--r--freebsd/sys/netinet/sctp_output.c33
-rw-r--r--freebsd/sys/netinet/sctp_pcb.c155
-rw-r--r--freebsd/sys/netinet/sctp_structs.h2
-rw-r--r--freebsd/sys/netinet/sctp_sysctl.c6
-rw-r--r--freebsd/sys/netinet/sctp_sysctl.h7
-rw-r--r--freebsd/sys/netinet/sctp_uio.h56
-rw-r--r--freebsd/sys/netinet/sctp_usrreq.c58
-rw-r--r--freebsd/sys/netinet/sctp_var.h52
-rw-r--r--freebsd/sys/netinet/sctputil.c26
-rw-r--r--freebsd/sys/netinet/tcp.h57
-rw-r--r--freebsd/sys/netinet/tcp_hostcache.c2
-rw-r--r--freebsd/sys/netinet/tcp_input.c609
-rw-r--r--freebsd/sys/netinet/tcp_lro.c814
-rw-r--r--freebsd/sys/netinet/tcp_lro.h60
-rw-r--r--freebsd/sys/netinet/tcp_offload.c209
-rw-r--r--freebsd/sys/netinet/tcp_offload.h364
-rw-r--r--freebsd/sys/netinet/tcp_output.c253
-rw-r--r--freebsd/sys/netinet/tcp_reass.c36
-rw-r--r--freebsd/sys/netinet/tcp_sack.c2
-rw-r--r--freebsd/sys/netinet/tcp_subr.c488
-rw-r--r--freebsd/sys/netinet/tcp_syncache.c253
-rw-r--r--freebsd/sys/netinet/tcp_syncache.h21
-rw-r--r--freebsd/sys/netinet/tcp_timer.c114
-rw-r--r--freebsd/sys/netinet/tcp_timer.h17
-rw-r--r--freebsd/sys/netinet/tcp_timewait.c72
-rw-r--r--freebsd/sys/netinet/tcp_usrreq.c331
-rw-r--r--freebsd/sys/netinet/tcp_var.h74
-rw-r--r--freebsd/sys/netinet/toecore.h130
-rw-r--r--freebsd/sys/netinet/udp.h2
-rw-r--r--freebsd/sys/netinet/udp_usrreq.c347
-rw-r--r--freebsd/sys/netinet/udp_var.h2
-rw-r--r--freebsd/sys/netinet6/dest6.c2
-rw-r--r--freebsd/sys/netinet6/frag6.c14
-rw-r--r--freebsd/sys/netinet6/icmp6.c190
-rw-r--r--freebsd/sys/netinet6/in6.c410
-rw-r--r--freebsd/sys/netinet6/in6.h144
-rw-r--r--freebsd/sys/netinet6/in6_cksum.c122
-rw-r--r--freebsd/sys/netinet6/in6_gif.c8
-rw-r--r--freebsd/sys/netinet6/in6_gif.h10
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.c68
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.h13
-rw-r--r--freebsd/sys/netinet6/in6_mcast.c5
-rw-r--r--freebsd/sys/netinet6/in6_pcb.c349
-rw-r--r--freebsd/sys/netinet6/in6_pcb.h64
-rw-r--r--freebsd/sys/netinet6/in6_proto.c57
-rw-r--r--freebsd/sys/netinet6/in6_src.c66
-rw-r--r--freebsd/sys/netinet6/in6_var.h55
-rw-r--r--freebsd/sys/netinet6/ip6_forward.c103
-rw-r--r--freebsd/sys/netinet6/ip6_input.c301
-rw-r--r--freebsd/sys/netinet6/ip6_ipsec.c9
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.c90
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.h1
-rw-r--r--freebsd/sys/netinet6/ip6_output.c262
-rw-r--r--freebsd/sys/netinet6/ip6_var.h138
-rw-r--r--freebsd/sys/netinet6/ip6protosw.h18
-rw-r--r--freebsd/sys/netinet6/mld6.c19
-rw-r--r--freebsd/sys/netinet6/nd6.c216
-rw-r--r--freebsd/sys/netinet6/nd6.h113
-rw-r--r--freebsd/sys/netinet6/nd6_nbr.c66
-rw-r--r--freebsd/sys/netinet6/nd6_rtr.c56
-rw-r--r--freebsd/sys/netinet6/pim6_var.h2
-rw-r--r--freebsd/sys/netinet6/raw_ip6.c44
-rw-r--r--freebsd/sys/netinet6/raw_ip6.h2
-rw-r--r--freebsd/sys/netinet6/route6.c8
-rw-r--r--freebsd/sys/netinet6/scope6_var.h24
-rw-r--r--freebsd/sys/netinet6/sctp6_usrreq.c29
-rw-r--r--freebsd/sys/netinet6/sctp6_var.h28
-rw-r--r--freebsd/sys/netinet6/send.h45
-rw-r--r--freebsd/sys/netinet6/tcp6_var.h6
-rw-r--r--freebsd/sys/netinet6/udp6_usrreq.c155
-rw-r--r--freebsd/sys/netipsec/ah_var.h2
-rw-r--r--freebsd/sys/netipsec/esp_var.h2
-rw-r--r--freebsd/sys/netipsec/ipcomp_var.h2
-rw-r--r--freebsd/sys/netipsec/ipip_var.h2
-rw-r--r--freebsd/sys/netipsec/ipsec.c10
-rw-r--r--freebsd/sys/netipsec/ipsec.h3
-rw-r--r--freebsd/sys/netipsec/ipsec6.h1
-rw-r--r--freebsd/sys/netipsec/ipsec_input.c94
-rw-r--r--freebsd/sys/netipsec/ipsec_mbuf.c8
-rw-r--r--freebsd/sys/netipsec/ipsec_output.c55
-rw-r--r--freebsd/sys/netipsec/key.c65
-rw-r--r--freebsd/sys/netipsec/keydb.h2
-rw-r--r--freebsd/sys/netipsec/keysock.c48
-rw-r--r--freebsd/sys/netipsec/keysock.h2
-rw-r--r--freebsd/sys/netipsec/xform_ah.c83
-rw-r--r--freebsd/sys/netipsec/xform_esp.c95
-rw-r--r--freebsd/sys/netipsec/xform_ipcomp.c78
-rw-r--r--freebsd/sys/netipsec/xform_ipip.c108
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_heap.c554
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_heap.h191
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched.h191
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_fifo.c122
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_prio.c231
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_qfq.c866
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_rr.c309
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c375
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_dn_glue.c848
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_dn_io.c852
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_dn_private.h403
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_dummynet.c2309
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw2.c2825
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_log.c470
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_nat.c670
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_pfil.c466
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_private.h341
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c1449
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_table.c764
-rw-r--r--freebsd/sys/nios2/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/nios2/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/nios2/nios2/legacy.c20
-rw-r--r--freebsd/sys/nios2/pci/pci_bus.c147
-rw-r--r--freebsd/sys/opencrypto/cryptodev.c1178
-rw-r--r--freebsd/sys/powerpc/include/machine/cpufunc.h32
-rw-r--r--freebsd/sys/powerpc/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/powerpc/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/powerpc/include/machine/psl.h75
-rw-r--r--freebsd/sys/powerpc/include/machine/spr.h62
-rw-r--r--freebsd/sys/powerpc/pci/pci_bus.c147
-rw-r--r--freebsd/sys/powerpc/powerpc/in_cksum.c2
-rw-r--r--freebsd/sys/powerpc/powerpc/legacy.c20
-rw-r--r--freebsd/sys/security/audit/audit.h23
-rw-r--r--freebsd/sys/security/mac/mac_framework.h14
-rw-r--r--freebsd/sys/sh/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/sh/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/sh/pci/pci_bus.c147
-rw-r--r--freebsd/sys/sh/sh/legacy.c20
-rw-r--r--freebsd/sys/sparc/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/sparc/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/sparc/pci/pci_bus.c147
-rw-r--r--freebsd/sys/sparc/sparc/legacy.c20
-rw-r--r--freebsd/sys/sparc64/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/sparc64/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/sparc64/pci/pci_bus.c147
-rw-r--r--freebsd/sys/sparc64/sparc64/legacy.c20
-rw-r--r--freebsd/sys/sys/_callout.h61
-rw-r--r--freebsd/sys/sys/_cpuset.h52
-rw-r--r--freebsd/sys/sys/_lockmgr.h1
-rw-r--r--freebsd/sys/sys/_null.h4
-rw-r--r--freebsd/sys/sys/_rmlock.h10
-rw-r--r--freebsd/sys/sys/_semaphore.h23
-rw-r--r--freebsd/sys/sys/_sockaddr_storage.h54
-rw-r--r--freebsd/sys/sys/_task.h14
-rw-r--r--freebsd/sys/sys/acl.h15
-rw-r--r--freebsd/sys/sys/ata.h59
-rw-r--r--freebsd/sys/sys/buf.h37
-rw-r--r--freebsd/sys/sys/buf_ring.h60
-rw-r--r--freebsd/sys/sys/bus.h31
-rw-r--r--freebsd/sys/sys/bus_dma.h115
-rw-r--r--freebsd/sys/sys/callout.h23
-rw-r--r--freebsd/sys/sys/capability.h209
-rw-r--r--freebsd/sys/sys/conf.h49
-rw-r--r--freebsd/sys/sys/cpuset.h229
-rw-r--r--freebsd/sys/sys/endian.h41
-rw-r--r--freebsd/sys/sys/eventhandler.h13
-rw-r--r--freebsd/sys/sys/file.h145
-rw-r--r--freebsd/sys/sys/filedesc.h15
-rw-r--r--freebsd/sys/sys/fnv_hash.h3
-rw-r--r--freebsd/sys/sys/hhook.h4
-rw-r--r--freebsd/sys/sys/interrupt.h6
-rw-r--r--freebsd/sys/sys/jail.h31
-rw-r--r--freebsd/sys/sys/kernel.h12
-rw-r--r--freebsd/sys/sys/kthread.h4
-rw-r--r--freebsd/sys/sys/ktr.h18
-rw-r--r--freebsd/sys/sys/libkern.h9
-rw-r--r--freebsd/sys/sys/linker.h4
-rw-r--r--freebsd/sys/sys/linker_set.h2
-rw-r--r--freebsd/sys/sys/lockmgr.h6
-rw-r--r--freebsd/sys/sys/loginclass.h53
-rw-r--r--freebsd/sys/sys/malloc.h2
-rw-r--r--freebsd/sys/sys/mbuf.h80
-rw-r--r--freebsd/sys/sys/module_khelp.h22
-rw-r--r--freebsd/sys/sys/mount.h305
-rw-r--r--freebsd/sys/sys/mutex.h172
-rw-r--r--freebsd/sys/sys/nlist_aout.h4
-rw-r--r--freebsd/sys/sys/pcpu.h29
-rw-r--r--freebsd/sys/sys/priority.h36
-rw-r--r--freebsd/sys/sys/priv.h12
-rw-r--r--freebsd/sys/sys/proc.h91
-rw-r--r--freebsd/sys/sys/racct.h165
-rw-r--r--freebsd/sys/sys/resourcevar.h12
-rw-r--r--freebsd/sys/sys/rmlock.h22
-rw-r--r--freebsd/sys/sys/sbuf.h22
-rw-r--r--freebsd/sys/sys/sdt.h49
-rw-r--r--freebsd/sys/sys/smp.h40
-rw-r--r--freebsd/sys/sys/sockbuf.h2
-rw-r--r--freebsd/sys/sys/socket.h28
-rw-r--r--freebsd/sys/sys/socketvar.h18
-rw-r--r--freebsd/sys/sys/stddef.h3
-rw-r--r--freebsd/sys/sys/stdint.h56
-rw-r--r--freebsd/sys/sys/sx.h95
-rw-r--r--freebsd/sys/sys/sysctl.h214
-rw-r--r--freebsd/sys/sys/sysproto.h817
-rw-r--r--freebsd/sys/sys/systm.h32
-rw-r--r--freebsd/sys/sys/taskqueue.h22
-rw-r--r--freebsd/sys/sys/timetc.h11
-rw-r--r--freebsd/sys/sys/tty.h9
-rw-r--r--freebsd/sys/sys/ttycom.h21
-rw-r--r--freebsd/sys/sys/ttydevsw.h23
-rw-r--r--freebsd/sys/sys/ttydisc.h1
-rw-r--r--freebsd/sys/sys/ucred.h10
-rw-r--r--freebsd/sys/sys/user.h151
-rw-r--r--freebsd/sys/sys/vmmeter.h12
-rw-r--r--freebsd/sys/sys/vnode.h843
-rw-r--r--freebsd/sys/v850/include/machine/in_cksum.h4
-rw-r--r--freebsd/sys/v850/include/machine/pci_cfgreg.h8
-rw-r--r--freebsd/sys/v850/pci/pci_bus.c147
-rw-r--r--freebsd/sys/v850/v850/legacy.c20
-rw-r--r--freebsd/sys/vm/uma.h12
-rw-r--r--freebsd/sys/vm/uma_core.c135
-rw-r--r--freebsd/sys/vm/uma_int.h31
-rw-r--r--freebsd/sys/vm/vm.h14
-rw-r--r--freebsd/sys/vm/vm_extern.h13
-rw-r--r--freebsd/sys/x86/pci/pci_bus.c736
-rw-r--r--freebsd/usr.bin/netstat/atalk.c4
-rw-r--r--freebsd/usr.bin/netstat/if.c12
-rw-r--r--freebsd/usr.bin/netstat/inet.c53
-rw-r--r--freebsd/usr.bin/netstat/inet6.c44
-rw-r--r--freebsd/usr.bin/netstat/ipsec.c4
-rw-r--r--freebsd/usr.bin/netstat/main.c26
-rw-r--r--freebsd/usr.bin/netstat/mbuf.c22
-rw-r--r--freebsd/usr.bin/netstat/netstat.h7
-rw-r--r--freebsd/usr.bin/netstat/pfkey.c4
-rw-r--r--freebsd/usr.bin/netstat/route.c4
-rw-r--r--freebsd/usr.bin/netstat/sctp.c2
-rw-r--r--freebsd/usr.bin/netstat/unix.c30
-rw-r--r--rtemsbsd/include/cam/cam_xpt_internal.h (renamed from rtemsbsd/include/sys/cpuset.h)0
-rw-r--r--rtemsbsd/include/cam/cam_xpt_periph.h (renamed from rtemsbsd/include/sys/vnode.h)0
-rw-r--r--rtemsbsd/include/cam/scsi/smp_all.h1
-rw-r--r--rtemsbsd/include/machine/_align.h1
-rw-r--r--rtemsbsd/include/machine/rtems-bsd-syscall-api.h6
-rw-r--r--rtemsbsd/include/rtems/bsd/local/bus_if.h21
-rw-r--r--rtemsbsd/include/rtems/bsd/local/miidevs.h646
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_capsicum.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_ofed.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_pcbgroup.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/pcib_if.h13
-rw-r--r--rtemsbsd/include/rtems/bsd/local/usbdevs.h31
-rw-r--r--rtemsbsd/include/rtems/bsd/local/usbdevs_data.h150
-rw-r--r--rtemsbsd/include/rtems/bsd/sys/_types.h10
-rw-r--r--rtemsbsd/include/rtems/bsd/sys/param.h6
-rw-r--r--rtemsbsd/include/sys/_stdint.h1
-rw-r--r--rtemsbsd/include/sys/rangelock.h1
-rw-r--r--rtemsbsd/local/bus_if.c10
-rw-r--r--rtemsbsd/local/pcib_if.c8
-rw-r--r--rtemsbsd/rtems/rtems-bsd-sysctl.c4
-rw-r--r--rtemsbsd/rtems/rtems-bsd-sysctlbyname.c2
-rw-r--r--testsuite/selectpollkqueue01/test_main.c2
596 files changed, 50531 insertions, 19227 deletions
diff --git a/Makefile b/Makefile
index e2f14ca9..569dc7e2 100644
--- a/Makefile
+++ b/Makefile
@@ -129,19 +129,20 @@ LIB_C_FILES += freebsd/sys/kern/kern_mib.c
LIB_C_FILES += freebsd/sys/kern/kern_module.c
LIB_C_FILES += freebsd/sys/kern/kern_mtxpool.c
LIB_C_FILES += freebsd/sys/kern/kern_osd.c
-LIB_C_FILES += freebsd/sys/kern/kern_subr.c
LIB_C_FILES += freebsd/sys/kern/kern_sysctl.c
LIB_C_FILES += freebsd/sys/kern/kern_time.c
LIB_C_FILES += freebsd/sys/kern/kern_timeout.c
LIB_C_FILES += freebsd/sys/kern/subr_bufring.c
LIB_C_FILES += freebsd/sys/kern/subr_bus.c
LIB_C_FILES += freebsd/sys/kern/subr_eventhandler.c
+LIB_C_FILES += freebsd/sys/kern/subr_hash.c
LIB_C_FILES += freebsd/sys/kern/subr_hints.c
LIB_C_FILES += freebsd/sys/kern/subr_kobj.c
LIB_C_FILES += freebsd/sys/kern/subr_module.c
LIB_C_FILES += freebsd/sys/kern/subr_rman.c
LIB_C_FILES += freebsd/sys/kern/subr_sbuf.c
LIB_C_FILES += freebsd/sys/kern/subr_taskqueue.c
+LIB_C_FILES += freebsd/sys/kern/subr_uio.c
LIB_C_FILES += freebsd/sys/kern/subr_unit.c
LIB_C_FILES += freebsd/sys/kern/sys_generic.c
LIB_C_FILES += freebsd/sys/kern/uipc_accf.c
@@ -254,22 +255,22 @@ LIB_C_FILES += freebsd/sys/netinet/tcp_syncache.c
LIB_C_FILES += freebsd/sys/netinet/tcp_timer.c
LIB_C_FILES += freebsd/sys/netinet/tcp_timewait.c
LIB_C_FILES += freebsd/sys/netinet/tcp_usrreq.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/dn_heap.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/dn_sched_fifo.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/dn_sched_prio.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/dn_sched_qfq.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/dn_sched_rr.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_dn_glue.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_dn_io.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_dummynet.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_fw2.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_fw_log.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_fw_nat.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
+LIB_C_FILES += freebsd/sys/netpfil/ipfw/ip_fw_table.c
LIB_C_FILES += freebsd/sys/netinet/udp_usrreq.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/dn_sched_fifo.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/dn_sched_rr.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_fw_log.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/dn_sched_qfq.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/dn_sched_prio.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_dn_glue.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_fw2.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/dn_heap.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_dummynet.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_fw_sockopt.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/dn_sched_wf2q.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_fw_nat.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_fw_pfil.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_dn_io.c
-LIB_C_FILES += freebsd/sys/netinet/ipfw/ip_fw_table.c
LIB_C_FILES += freebsd/sys/netinet/libalias/alias_dummy.c
LIB_C_FILES += freebsd/sys/netinet/libalias/alias_pptp.c
LIB_C_FILES += freebsd/sys/netinet/libalias/alias_smedia.c
@@ -374,7 +375,6 @@ LIB_C_FILES += freebsd/sys/opencrypto/rmd160.c
LIB_C_FILES += freebsd/sys/opencrypto/xform.c
LIB_C_FILES += freebsd/sys/opencrypto/skipjack.c
LIB_C_FILES += freebsd/sys/opencrypto/cast.c
-LIB_C_FILES += freebsd/sys/opencrypto/cryptodev.c
LIB_C_FILES += freebsd/sys/crypto/sha1.c
LIB_C_FILES += freebsd/sys/crypto/sha2/sha2.c
LIB_C_FILES += freebsd/sys/crypto/rijndael/rijndael-alg-fst.c
@@ -397,17 +397,17 @@ LIB_C_FILES += freebsd/sys/contrib/altq/altq/altq_priq.c
LIB_C_FILES += freebsd/sys/contrib/altq/altq/altq_cbq.c
LIB_C_FILES += freebsd/sys/contrib/altq/altq/altq_hfsc.c
LIB_C_FILES += freebsd/sys/contrib/altq/altq/altq_red.c
-LIB_C_FILES += freebsd/sys/contrib/pf/netinet/in4_cksum.c
-LIB_C_FILES += freebsd/sys/contrib/pf/net/pf.c
LIB_C_FILES += freebsd/sys/contrib/pf/net/if_pflog.c
-LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_subr.c
-LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_ioctl.c
-LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_table.c
+LIB_C_FILES += freebsd/sys/contrib/pf/net/if_pfsync.c
+LIB_C_FILES += freebsd/sys/contrib/pf/net/pf.c
LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_if.c
-LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_osfp.c
+LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_ioctl.c
+LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_lb.c
LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_norm.c
+LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_osfp.c
LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_ruleset.c
-LIB_C_FILES += freebsd/sys/contrib/pf/net/if_pfsync.c
+LIB_C_FILES += freebsd/sys/contrib/pf/net/pf_table.c
+LIB_C_FILES += freebsd/sys/contrib/pf/netinet/in4_cksum.c
LIB_C_FILES += freebsd/sys/dev/mii/mii.c
LIB_C_FILES += freebsd/sys/dev/mii/mii_bitbang.c
LIB_C_FILES += freebsd/sys/dev/mii/mii_physubr.c
@@ -443,74 +443,74 @@ LIB_C_FILES += freebsd/sys/dev/pci/pci.c
LIB_C_FILES += freebsd/sys/dev/pci/pci_user.c
LIB_C_FILES += freebsd/sys/dev/pci/pci_pci.c
ifeq ($(RTEMS_CPU), arm)
-LIB_C_FILES += freebsd/sys/arm/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/arm/arm/legacy.c
+LIB_C_FILES += freebsd/sys/arm/pci/pci_bus.c
NEED_DUMMY_PIC_IRQ=no
endif
ifeq ($(RTEMS_CPU), avr)
-LIB_C_FILES += freebsd/sys/avr/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/avr/avr/legacy.c
+LIB_C_FILES += freebsd/sys/avr/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), bfin)
-LIB_C_FILES += freebsd/sys/bfin/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/bfin/bfin/legacy.c
+LIB_C_FILES += freebsd/sys/bfin/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), h8300)
-LIB_C_FILES += freebsd/sys/h8300/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/h8300/h8300/legacy.c
+LIB_C_FILES += freebsd/sys/h8300/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), i386)
-LIB_C_FILES += freebsd/sys/i386/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/i386/i386/legacy.c
+LIB_C_FILES += freebsd/sys/i386/pci/pci_bus.c
NEED_DUMMY_PIC_IRQ=no
endif
ifeq ($(RTEMS_CPU), lm32)
-LIB_C_FILES += freebsd/sys/lm32/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/lm32/lm32/legacy.c
+LIB_C_FILES += freebsd/sys/lm32/pci/pci_bus.c
NEED_DUMMY_PIC_IRQ=no
endif
ifeq ($(RTEMS_CPU), m32c)
-LIB_C_FILES += freebsd/sys/m32c/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/m32c/m32c/legacy.c
+LIB_C_FILES += freebsd/sys/m32c/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), m32r)
-LIB_C_FILES += freebsd/sys/m32r/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/m32r/m32r/legacy.c
+LIB_C_FILES += freebsd/sys/m32r/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), m68k)
-LIB_C_FILES += freebsd/sys/m68k/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/m68k/m68k/legacy.c
+LIB_C_FILES += freebsd/sys/m68k/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), mips)
-LIB_C_FILES += freebsd/sys/mips/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/mips/mips/legacy.c
+LIB_C_FILES += freebsd/sys/mips/pci/pci_bus.c
NEED_DUMMY_PIC_IRQ=no
endif
ifeq ($(RTEMS_CPU), nios2)
-LIB_C_FILES += freebsd/sys/nios2/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/nios2/nios2/legacy.c
+LIB_C_FILES += freebsd/sys/nios2/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), powerpc)
-LIB_C_FILES += freebsd/sys/powerpc/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/powerpc/powerpc/legacy.c
+LIB_C_FILES += freebsd/sys/powerpc/pci/pci_bus.c
NEED_DUMMY_PIC_IRQ=no
endif
ifeq ($(RTEMS_CPU), sh)
-LIB_C_FILES += freebsd/sys/sh/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/sh/sh/legacy.c
+LIB_C_FILES += freebsd/sys/sh/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), sparc)
-LIB_C_FILES += freebsd/sys/sparc/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/sparc/sparc/legacy.c
+LIB_C_FILES += freebsd/sys/sparc/pci/pci_bus.c
NEED_DUMMY_PIC_IRQ=no
endif
ifeq ($(RTEMS_CPU), sparc64)
-LIB_C_FILES += freebsd/sys/sparc64/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/sparc64/sparc64/legacy.c
+LIB_C_FILES += freebsd/sys/sparc64/pci/pci_bus.c
endif
ifeq ($(RTEMS_CPU), v850)
-LIB_C_FILES += freebsd/sys/v850/pci/pci_bus.c
LIB_C_FILES += freebsd/sys/v850/v850/legacy.c
+LIB_C_FILES += freebsd/sys/v850/pci/pci_bus.c
endif
LIB_C_FILES += freebsd/sys/dev/random/harvest.c
LIB_C_FILES += freebsd/sys/netinet/tcp_hostcache.c
@@ -641,6 +641,7 @@ LIB_C_FILES += freebsd/lib/libc/db/recno/rec_search.c
LIB_C_FILES += freebsd/lib/libc/db/recno/rec_seq.c
LIB_C_FILES += freebsd/lib/libc/db/recno/rec_utils.c
LIB_C_FILES += freebsd/lib/libc/gen/err.c
+LIB_C_FILES += freebsd/lib/libc/gen/feature_present.c
LIB_C_FILES += freebsd/lib/libc/gen/gethostname.c
LIB_C_FILES += freebsd/lib/libc/inet/inet_addr.c
LIB_C_FILES += freebsd/lib/libc/inet/inet_cidr_ntop.c
diff --git a/freebsd-org b/freebsd-org
-Subproject 9001b0780a86959b6c84bf1d794ce4542218865
+Subproject 74d8320d56de778817fa39bc074b177c9394548
diff --git a/freebsd-to-rtems.py b/freebsd-to-rtems.py
index 0dbeb86d..a36799d3 100755
--- a/freebsd-to-rtems.py
+++ b/freebsd-to-rtems.py
@@ -682,9 +682,13 @@ base.addHeaderFiles(
'sys/sys/_bus_dma.h',
'sys/sys/bus_dma.h',
'sys/sys/bus.h',
+ 'sys/sys/_callout.h',
'sys/sys/callout.h',
+ 'sys/sys/capability.h',
'sys/sys/condvar.h',
'sys/sys/conf.h',
+ 'sys/sys/_cpuset.h',
+ 'sys/sys/cpuset.h',
'sys/sys/cpu.h',
'sys/sys/ctype.h',
'sys/sys/domain.h',
@@ -712,6 +716,7 @@ base.addHeaderFiles(
'sys/sys/lockmgr.h',
'sys/sys/lock_profile.h',
'sys/sys/lockstat.h',
+ 'sys/sys/loginclass.h',
'sys/sys/mac.h',
'sys/sys/malloc.h',
'sys/sys/mbuf.h',
@@ -727,6 +732,7 @@ base.addHeaderFiles(
'sys/sys/priv.h',
'sys/sys/proc.h',
'sys/sys/protosw.h',
+ 'sys/sys/racct.h',
'sys/sys/random.h',
'sys/sys/reboot.h',
'sys/sys/refcount.h',
@@ -747,6 +753,7 @@ base.addHeaderFiles(
'sys/sys/sigio.h',
'sys/sys/_sigset.h',
'sys/sys/smp.h',
+ 'sys/sys/_sockaddr_storage.h',
'sys/sys/sockbuf.h',
'sys/sys/socket.h',
'sys/sys/socketvar.h',
@@ -775,6 +782,7 @@ base.addHeaderFiles(
'sys/sys/un.h',
'sys/sys/unpcb.h',
'sys/sys/vmmeter.h',
+ 'sys/sys/vnode.h',
'sys/vm/uma_dbg.h',
'sys/vm/uma.h',
'sys/vm/uma_int.h',
@@ -795,19 +803,20 @@ base.addSourceFiles(
'sys/kern/kern_module.c',
'sys/kern/kern_mtxpool.c',
'sys/kern/kern_osd.c',
- 'sys/kern/kern_subr.c',
'sys/kern/kern_sysctl.c',
'sys/kern/kern_time.c',
'sys/kern/kern_timeout.c',
'sys/kern/subr_bufring.c',
'sys/kern/subr_bus.c',
'sys/kern/subr_eventhandler.c',
+ 'sys/kern/subr_hash.c',
'sys/kern/subr_hints.c',
'sys/kern/subr_kobj.c',
'sys/kern/subr_module.c',
'sys/kern/subr_rman.c',
'sys/kern/subr_sbuf.c',
'sys/kern/subr_taskqueue.c',
+ 'sys/kern/subr_uio.c',
'sys/kern/subr_unit.c',
'sys/kern/sys_generic.c',
'sys/kern/uipc_accf.c',
@@ -915,6 +924,7 @@ devUsbController.addHeaderFiles(
'sys/dev/usb/controller/ehci.h',
'sys/dev/usb/controller/ehcireg.h',
'sys/dev/usb/controller/uhcireg.h',
+ 'sys/dev/usb/controller/xhcireg.h',
]
)
devUsbController.addSourceFiles(
@@ -1513,10 +1523,10 @@ netinet.addHeaderFiles(
'sys/netinet/ip_mroute.h',
'sys/netinet/ip_options.h',
'sys/netinet/ip_var.h',
- 'sys/netinet/ipfw/ip_dn_private.h',
- 'sys/netinet/ipfw/ip_fw_private.h',
- 'sys/netinet/ipfw/dn_sched.h',
- 'sys/netinet/ipfw/dn_heap.h',
+ 'sys/netpfil/ipfw/dn_heap.h',
+ 'sys/netpfil/ipfw/dn_sched.h',
+ 'sys/netpfil/ipfw/ip_dn_private.h',
+ 'sys/netpfil/ipfw/ip_fw_private.h',
'sys/netinet/pim.h',
'sys/netinet/pim_var.h',
'sys/netinet/sctp_asconf.h',
@@ -1553,7 +1563,7 @@ netinet.addHeaderFiles(
'sys/netinet/tcp_syncache.h',
'sys/netinet/tcp_timer.h',
'sys/netinet/tcp_var.h',
- 'sys/netinet/toedev.h',
+ 'sys/netinet/toecore.h',
'sys/netinet/udp.h',
'sys/netinet/udp_var.h',
'sys/netinet/libalias/alias_local.h',
@@ -1620,23 +1630,23 @@ netinet.addSourceFiles(
'sys/netinet/tcp_timer.c',
'sys/netinet/tcp_timewait.c',
'sys/netinet/tcp_usrreq.c',
+ 'sys/netpfil/ipfw/dn_heap.c',
+ 'sys/netpfil/ipfw/dn_sched_fifo.c',
+ 'sys/netpfil/ipfw/dn_sched_prio.c',
+ 'sys/netpfil/ipfw/dn_sched_qfq.c',
+ 'sys/netpfil/ipfw/dn_sched_rr.c',
+ 'sys/netpfil/ipfw/dn_sched_wf2q.c',
+ 'sys/netpfil/ipfw/ip_dn_glue.c',
+ 'sys/netpfil/ipfw/ip_dn_io.c',
+ 'sys/netpfil/ipfw/ip_dummynet.c',
+ 'sys/netpfil/ipfw/ip_fw2.c',
+ #'sys/netpfil/ipfw/ip_fw_dynamic.c',
+ 'sys/netpfil/ipfw/ip_fw_log.c',
+ 'sys/netpfil/ipfw/ip_fw_nat.c',
+ 'sys/netpfil/ipfw/ip_fw_pfil.c',
+ 'sys/netpfil/ipfw/ip_fw_sockopt.c',
+ 'sys/netpfil/ipfw/ip_fw_table.c',
'sys/netinet/udp_usrreq.c',
- 'sys/netinet/ipfw/dn_sched_fifo.c',
- 'sys/netinet/ipfw/dn_sched_rr.c',
- 'sys/netinet/ipfw/ip_fw_log.c',
- 'sys/netinet/ipfw/dn_sched_qfq.c',
- 'sys/netinet/ipfw/dn_sched_prio.c',
- #'netinet/ipfw/ip_fw_dynamic.c',
- 'sys/netinet/ipfw/ip_dn_glue.c',
- 'sys/netinet/ipfw/ip_fw2.c',
- 'sys/netinet/ipfw/dn_heap.c',
- 'sys/netinet/ipfw/ip_dummynet.c',
- 'sys/netinet/ipfw/ip_fw_sockopt.c',
- 'sys/netinet/ipfw/dn_sched_wf2q.c',
- 'sys/netinet/ipfw/ip_fw_nat.c',
- 'sys/netinet/ipfw/ip_fw_pfil.c',
- 'sys/netinet/ipfw/ip_dn_io.c',
- 'sys/netinet/ipfw/ip_fw_table.c',
'sys/netinet/libalias/alias_dummy.c',
'sys/netinet/libalias/alias_pptp.c',
'sys/netinet/libalias/alias_smedia.c',
@@ -1679,6 +1689,7 @@ netinet6.addHeaderFiles(
'sys/netinet6/raw_ip6.h',
'sys/netinet6/scope6_var.h',
'sys/netinet6/sctp6_var.h',
+ 'sys/netinet6/send.h',
'sys/netinet6/tcp6_var.h',
'sys/netinet6/udp6_var.h',
]
@@ -1853,7 +1864,6 @@ opencrypto.addSourceFiles(
'sys/opencrypto/xform.c',
'sys/opencrypto/skipjack.c',
'sys/opencrypto/cast.c',
- 'sys/opencrypto/cryptodev.c',
]
)
@@ -1938,25 +1948,26 @@ altq.addSourceFiles(
pf = Module('pf')
pf.addHeaderFiles(
[
- 'sys/contrib/pf/net/pf_mtag.h',
+ 'sys/contrib/pf/net/if_pflog.h',
+ 'sys/contrib/pf/net/if_pflow.h',
'sys/contrib/pf/net/if_pfsync.h',
'sys/contrib/pf/net/pfvar.h',
- 'sys/contrib/pf/net/if_pflog.h',
+ 'sys/contrib/pf/net/pf_mtag.h',
]
)
pf.addSourceFiles(
[
- 'sys/contrib/pf/netinet/in4_cksum.c',
- 'sys/contrib/pf/net/pf.c',
'sys/contrib/pf/net/if_pflog.c',
- 'sys/contrib/pf/net/pf_subr.c',
- 'sys/contrib/pf/net/pf_ioctl.c',
- 'sys/contrib/pf/net/pf_table.c',
+ 'sys/contrib/pf/net/if_pfsync.c',
+ 'sys/contrib/pf/net/pf.c',
'sys/contrib/pf/net/pf_if.c',
- 'sys/contrib/pf/net/pf_osfp.c',
+ 'sys/contrib/pf/net/pf_ioctl.c',
+ 'sys/contrib/pf/net/pf_lb.c',
'sys/contrib/pf/net/pf_norm.c',
+ 'sys/contrib/pf/net/pf_osfp.c',
'sys/contrib/pf/net/pf_ruleset.c',
- 'sys/contrib/pf/net/if_pfsync.c',
+ 'sys/contrib/pf/net/pf_table.c',
+ 'sys/contrib/pf/netinet/in4_cksum.c',
]
)
@@ -1974,12 +1985,12 @@ pci.addHeaderFiles(
'sys/dev/pci/pci_private.h',
'sys/dev/pci/pcireg.h',
'sys/dev/pci/pcivar.h',
+ 'sys/dev/pci/pcivar.h',
]
)
pci.addCPUDependentHeaderFiles(
[
'sys/i386/include/legacyvar.h',
- 'sys/i386/include/pci_cfgreg.h',
]
)
pci.addTargetSourceCPUDependentHeaderFiles(
@@ -1987,13 +1998,18 @@ pci.addTargetSourceCPUDependentHeaderFiles(
'i386',
[
'sys/i386/include/legacyvar.h',
- 'sys/i386/include/pci_cfgreg.h',
+ ]
+)
+pci.addTargetSourceCPUDependentHeaderFiles(
+ [ 'arm', 'avr', 'bfin', 'h8300', 'i386', 'lm32', 'm32c', 'm32r', 'm68k', 'mips', 'nios2', 'powerpc', 'sh', 'sparc', 'sparc64', 'v850' ],
+ 'x86',
+ [
+ 'sys/x86/include/pci_cfgreg.h',
]
)
pci.addCPUDependentSourceFiles(
'i386',
[
- 'sys/i386/pci/pci_bus.c',
'sys/i386/i386/legacy.c',
]
)
@@ -2001,10 +2017,16 @@ pci.addTargetSourceCPUDependentSourceFiles(
[ 'arm', 'avr', 'bfin', 'h8300', 'lm32', 'm32c', 'm32r', 'm68k', 'mips', 'nios2', 'powerpc', 'sh', 'sparc', 'sparc64', 'v850' ],
'i386',
[
- 'sys/i386/pci/pci_bus.c',
'sys/i386/i386/legacy.c',
]
)
+pci.addTargetSourceCPUDependentSourceFiles(
+ [ 'arm', 'avr', 'bfin', 'h8300', 'i386', 'lm32', 'm32c', 'm32r', 'm68k', 'mips', 'nios2', 'powerpc', 'sh', 'sparc', 'sparc64', 'v850' ],
+ 'x86',
+ [
+ 'sys/x86/pci/pci_bus.c',
+ ]
+)
userSpace = Module('userSpace')
userSpace.addUserSpaceHeaderFiles(
@@ -2169,6 +2191,7 @@ userSpace.addUserSpaceSourceFiles(
'lib/libc/db/recno/rec_seq.c',
'lib/libc/db/recno/rec_utils.c',
'lib/libc/gen/err.c',
+ 'lib/libc/gen/feature_present.c',
'lib/libc/gen/gethostname.c',
'lib/libc/inet/inet_addr.c',
'lib/libc/inet/inet_cidr_ntop.c',
diff --git a/freebsd/include/arpa/ftp.h b/freebsd/include/arpa/ftp.h
index 9a364885..081c037e 100644
--- a/freebsd/include/arpa/ftp.h
+++ b/freebsd/include/arpa/ftp.h
@@ -10,11 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -31,6 +27,8 @@
* SUCH DAMAGE.
*
* @(#)ftp.h 8.1 (Berkeley) 6/2/93
+ *
+ * $FreeBSD$
*/
#ifndef _ARPA_FTP_H_
diff --git a/freebsd/include/arpa/inet.h b/freebsd/include/arpa/inet.h
index aaddbb9b..079ba7a2 100644
--- a/freebsd/include/arpa/inet.h
+++ b/freebsd/include/arpa/inet.h
@@ -12,11 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/arpa/nameser.h b/freebsd/include/arpa/nameser.h
index cef012dd..b0250fd3 100644
--- a/freebsd/include/arpa/nameser.h
+++ b/freebsd/include/arpa/nameser.h
@@ -10,11 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/arpa/nameser_compat.h b/freebsd/include/arpa/nameser_compat.h
index 64d05f82..161fed35 100644
--- a/freebsd/include/arpa/nameser_compat.h
+++ b/freebsd/include/arpa/nameser_compat.h
@@ -9,11 +9,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/db.h b/freebsd/include/db.h
index 025a4228..a19ef546 100644
--- a/freebsd/include/db.h
+++ b/freebsd/include/db.h
@@ -10,11 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/err.h b/freebsd/include/err.h
index 65aff6f2..81ff4421 100644
--- a/freebsd/include/err.h
+++ b/freebsd/include/err.h
@@ -10,11 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/mpool.h b/freebsd/include/mpool.h
index c74764d7..3f22ab78 100644
--- a/freebsd/include/mpool.h
+++ b/freebsd/include/mpool.h
@@ -10,11 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/netdb.h b/freebsd/include/netdb.h
index 52ab3933..baf96f6f 100644
--- a/freebsd/include/netdb.h
+++ b/freebsd/include/netdb.h
@@ -10,11 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/nlist.h b/freebsd/include/nlist.h
index 3dccf9e7..a4965fd9 100644
--- a/freebsd/include/nlist.h
+++ b/freebsd/include/nlist.h
@@ -15,11 +15,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/nsswitch.h b/freebsd/include/nsswitch.h
index 85dfe296..93ce937c 100644
--- a/freebsd/include/nsswitch.h
+++ b/freebsd/include/nsswitch.h
@@ -16,13 +16,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
diff --git a/freebsd/include/resolv.h b/freebsd/include/resolv.h
index b70ef300..7aa61f62 100644
--- a/freebsd/include/resolv.h
+++ b/freebsd/include/resolv.h
@@ -10,11 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/include/rpc/auth.h b/freebsd/include/rpc/auth.h
index d39ad120..b19addc8 100644
--- a/freebsd/include/rpc/auth.h
+++ b/freebsd/include/rpc/auth.h
@@ -243,14 +243,13 @@ __END_DECLS
* System style authentication
* AUTH *authunix_create(machname, uid, gid, len, aup_gids)
* char *machname;
- * int uid;
- * int gid;
+ * u_int uid;
+ * u_int gid;
* int len;
- * int *aup_gids;
+ * u_int *aup_gids;
*/
__BEGIN_DECLS
-extern AUTH *authunix_create(char *, int, int, int,
- int *);
+extern AUTH *authunix_create(char *, u_int, u_int, int, u_int *);
extern AUTH *authunix_create_default(void); /* takes no parameters */
extern AUTH *authnone_create(void); /* takes no parameters */
__END_DECLS
diff --git a/freebsd/include/rpc/auth_unix.h b/freebsd/include/rpc/auth_unix.h
index 48373b2c..b005bac8 100644
--- a/freebsd/include/rpc/auth_unix.h
+++ b/freebsd/include/rpc/auth_unix.h
@@ -60,10 +60,10 @@
struct authunix_parms {
u_long aup_time;
char *aup_machname;
- int aup_uid;
- int aup_gid;
+ u_int aup_uid;
+ u_int aup_gid;
u_int aup_len;
- int *aup_gids;
+ u_int *aup_gids;
};
#define authsys_parms authunix_parms
diff --git a/freebsd/include/rpc/xdr.h b/freebsd/include/rpc/xdr.h
index 76968afc..ada5c5bd 100644
--- a/freebsd/include/rpc/xdr.h
+++ b/freebsd/include/rpc/xdr.h
@@ -322,6 +322,7 @@ extern bool_t xdr_hyper(XDR *, quad_t *);
extern bool_t xdr_u_hyper(XDR *, u_quad_t *);
extern bool_t xdr_longlong_t(XDR *, quad_t *);
extern bool_t xdr_u_longlong_t(XDR *, u_quad_t *);
+extern unsigned long xdr_sizeof(xdrproc_t, void *);
__END_DECLS
/*
diff --git a/freebsd/include/sysexits.h b/freebsd/include/sysexits.h
index 464cb11b..e52bb5be 100644
--- a/freebsd/include/sysexits.h
+++ b/freebsd/include/sysexits.h
@@ -10,11 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -31,6 +27,8 @@
* SUCH DAMAGE.
*
* @(#)sysexits.h 8.1 (Berkeley) 6/2/93
+ *
+ * $FreeBSD$
*/
#ifndef _SYSEXITS_H_
diff --git a/freebsd/lib/libc/db/btree/bt_split.c b/freebsd/lib/libc/db/btree/bt_split.c
index 37bf356a..16efb701 100644
--- a/freebsd/lib/libc/db/btree/bt_split.c
+++ b/freebsd/lib/libc/db/btree/bt_split.c
@@ -39,6 +39,7 @@ static char sccsid[] = "@(#)bt_split.c 8.10 (Berkeley) 1/9/95";
__FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/types.h>
+#include <rtems/bsd/sys/param.h>
#include <limits.h>
#include <stdio.h>
@@ -484,7 +485,7 @@ bt_rroot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
WR_RINTERNAL(dest,
l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno);
- h->linp[1] = h->upper -= NRINTERNAL;
+ __PAST_END(h->linp, 1) = h->upper -= NRINTERNAL;
dest = (char *)h + h->upper;
WR_RINTERNAL(dest,
r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno);
@@ -536,7 +537,7 @@ bt_broot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
case P_BLEAF:
bl = GETBLEAF(r, 0);
nbytes = NBINTERNAL(bl->ksize);
- h->linp[1] = h->upper -= nbytes;
+ __PAST_END(h->linp, 1) = h->upper -= nbytes;
dest = (char *)h + h->upper;
WR_BINTERNAL(dest, bl->ksize, r->pgno, 0);
memmove(dest, bl->bytes, bl->ksize);
@@ -552,7 +553,7 @@ bt_broot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
case P_BINTERNAL:
bi = GETBINTERNAL(r, 0);
nbytes = NBINTERNAL(bi->ksize);
- h->linp[1] = h->upper -= nbytes;
+ __PAST_END(h->linp, 1) = h->upper -= nbytes;
dest = (char *)h + h->upper;
memmove(dest, bi, nbytes);
((BINTERNAL *)dest)->pgno = r->pgno;
diff --git a/freebsd/lib/libc/gen/feature_present.c b/freebsd/lib/libc/gen/feature_present.c
new file mode 100644
index 00000000..5fbd8b29
--- /dev/null
+++ b/freebsd/lib/libc/gen/feature_present.c
@@ -0,0 +1,64 @@
+#include <machine/rtems-bsd-user-space.h>
+
+/*-
+ * Copyright (c) 2008 Yahoo!, Inc.
+ * All rights reserved.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/types.h>
+#include <sys/sysctl.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Returns true if the named feature is present in the currently
+ * running kernel. A feature's presence is indicated by an integer
+ * sysctl node called kern.feature.<feature> that is non-zero.
+ */
+int
+feature_present(const char *feature)
+{
+ char *mib;
+ size_t len;
+ int i;
+
+ if (asprintf(&mib, "kern.features.%s", feature) < 0)
+ return (0);
+ len = sizeof(i);
+ if (sysctlbyname(mib, &i, &len, NULL, 0) < 0) {
+ free(mib);
+ return (0);
+ }
+ free(mib);
+ if (len != sizeof(i))
+ return (0);
+ return (i != 0);
+}
diff --git a/freebsd/lib/libc/include/libc_private.h b/freebsd/lib/libc/include/libc_private.h
index 4e1430ca..09dfacf0 100644
--- a/freebsd/lib/libc/include/libc_private.h
+++ b/freebsd/lib/libc/include/libc_private.h
@@ -34,6 +34,7 @@
#ifndef _LIBC_PRIVATE_H_
#define _LIBC_PRIVATE_H_
+#include <rtems/bsd/sys/_types.h>
#include <sys/_pthreadtypes.h>
/*
@@ -48,6 +49,15 @@ extern int __isthreaded;
#endif /* __rtems__ */
/*
+ * Elf_Auxinfo *__elf_aux_vector, the pointer to the ELF aux vector
+ * provided by kernel. Either set for us by rtld, or found at runtime
+ * on stack for static binaries.
+ *
+ * Type is void to avoid polluting whole libc with ELF types.
+ */
+extern void *__elf_aux_vector;
+
+/*
* libc should use libc_dlopen internally, which respects a global
* flag where loading of new shared objects can be restricted.
*/
@@ -155,6 +165,10 @@ typedef enum {
PJT_SETSPECIFIC,
PJT_SIGMASK,
PJT_TESTCANCEL,
+ PJT_CLEANUP_POP_IMP,
+ PJT_CLEANUP_PUSH_IMP,
+ PJT_CANCEL_ENTER,
+ PJT_CANCEL_LEAVE,
PJT_MAX
} pjt_index_t;
@@ -236,10 +250,21 @@ extern void * __sys_freebsd6_mmap(void *, __size_t, int, int, int, int, __off_t)
/* Without back-compat translation */
extern int __sys_fcntl(int, int, ...);
+struct timespec;
+struct timeval;
+struct timezone;
+int __sys_gettimeofday(struct timeval *, struct timezone *);
+int __sys_clock_gettime(__clockid_t, struct timespec *ts);
+
/* execve() with PATH processing to implement posix_spawnp() */
int _execvpe(const char *, char * const *, char * const *);
+int _elf_aux_info(int aux, void *buf, int buflen);
struct dl_phdr_info;
int __elf_phdr_match_addr(struct dl_phdr_info *, void *);
+void __init_elf_aux_vector(void);
+
+void _pthread_cancel_enter(int);
+void _pthread_cancel_leave(int);
#endif /* _LIBC_PRIVATE_H_ */
diff --git a/freebsd/lib/libc/include/port_before.h b/freebsd/lib/libc/include/port_before.h
index beef71d0..304dd66d 100644
--- a/freebsd/lib/libc/include/port_before.h
+++ b/freebsd/lib/libc/include/port_before.h
@@ -17,6 +17,6 @@
var = _u.v; \
} while (0)
-#define UNUSED(x) (x) = (x)
+#define UNUSED(x) (void)(x)
#endif /* _PORT_BEFORE_H_ */
diff --git a/freebsd/lib/libc/include/reentrant.h b/freebsd/lib/libc/include/reentrant.h
index 8ab328bc..22a2325d 100644
--- a/freebsd/lib/libc/include/reentrant.h
+++ b/freebsd/lib/libc/include/reentrant.h
@@ -13,13 +13,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
diff --git a/freebsd/lib/libc/net/getaddrinfo.c b/freebsd/lib/libc/net/getaddrinfo.c
index 2c99688e..5c1965b6 100644
--- a/freebsd/lib/libc/net/getaddrinfo.c
+++ b/freebsd/lib/libc/net/getaddrinfo.c
@@ -466,7 +466,7 @@ getaddrinfo(const char *hostname, const char *servname,
}
error = get_portmatch(pai, servname);
if (error)
- ERR(error);
+ goto bad;
*pai = ai0;
}
@@ -695,6 +695,8 @@ get_addrselectpolicy(struct policyhead *head)
if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), NULL, &l, NULL, 0) < 0)
return (0);
+ if (l == 0)
+ return (0);
if ((buf = malloc(l)) == NULL)
return (0);
if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), buf, &l, NULL, 0) < 0) {
diff --git a/freebsd/lib/libc/net/getnameinfo.c b/freebsd/lib/libc/net/getnameinfo.c
index bc862428..74ea9a84 100644
--- a/freebsd/lib/libc/net/getnameinfo.c
+++ b/freebsd/lib/libc/net/getnameinfo.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
+#include <net/firewire.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <arpa/nameser.h>
@@ -387,6 +388,7 @@ getnameinfo_link(const struct sockaddr *sa, socklen_t salen,
{
const struct sockaddr_dl *sdl =
(const struct sockaddr_dl *)(const void *)sa;
+ const struct fw_hwaddr *iha;
int n;
if (serv != NULL && servlen > 0)
@@ -402,6 +404,15 @@ getnameinfo_link(const struct sockaddr *sa, socklen_t salen,
}
switch (sdl->sdl_type) {
+ case IFT_IEEE1394:
+ if (sdl->sdl_alen < sizeof(iha->sender_unique_ID_hi) +
+ sizeof(iha->sender_unique_ID_lo))
+ return EAI_FAMILY;
+ iha = (const struct fw_hwaddr *)(const void *)LLADDR(sdl);
+ return hexname((const u_int8_t *)&iha->sender_unique_ID_hi,
+ sizeof(iha->sender_unique_ID_hi) +
+ sizeof(iha->sender_unique_ID_lo),
+ host, hostlen);
/*
* The following have zero-length addresses.
* IFT_ATM (net/if_atmsubr.c)
diff --git a/freebsd/lib/libc/net/nslexer.l b/freebsd/lib/libc/net/nslexer.l
index 34c79d92..bc36ea2b 100644
--- a/freebsd/lib/libc/net/nslexer.l
+++ b/freebsd/lib/libc/net/nslexer.l
@@ -53,10 +53,10 @@ static char *rcsid =
#include "nsparser.h"
-#define YY_NO_UNPUT
-
%}
+%option noinput
+%option nounput
%option yylineno
BLANK [ \t]
diff --git a/freebsd/lib/libc/net/nsparser.y b/freebsd/lib/libc/net/nsparser.y
index 730458a3..2e57cd26 100644
--- a/freebsd/lib/libc/net/nsparser.y
+++ b/freebsd/lib/libc/net/nsparser.y
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#define _NS_PRIVATE
#include <nsswitch.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include "un-namespace.h"
diff --git a/freebsd/lib/libc/resolv/res_send.c b/freebsd/lib/libc/resolv/res_send.c
index 06fdf471..51bca770 100644
--- a/freebsd/lib/libc/resolv/res_send.c
+++ b/freebsd/lib/libc/resolv/res_send.c
@@ -1104,8 +1104,6 @@ Aerror(const res_state statp, FILE *file, const char *string, int error,
char hbuf[NI_MAXHOST];
char sbuf[NI_MAXSERV];
- alen = alen;
-
if ((statp->options & RES_DEBUG) != 0U) {
if (getnameinfo(address, alen, hbuf, sizeof(hbuf),
sbuf, sizeof(sbuf), niflags)) {
diff --git a/freebsd/lib/libc/stdio/local.h b/freebsd/lib/libc/stdio/local.h
index d1f4bdad..754476e9 100644
--- a/freebsd/lib/libc/stdio/local.h
+++ b/freebsd/lib/libc/stdio/local.h
@@ -5,6 +5,11 @@
* This code is derived from software contributed to Berkeley by
* Chris Torek.
*
+ * Copyright (c) 2011 The FreeBSD Foundation
+ * All rights reserved.
+ * Portions of this software were developed by David Chisnall
+ * under sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -37,6 +42,7 @@
#include <pthread.h>
#include <string.h>
#include <wchar.h>
+#include <locale.h>
/*
* Information local to this implementation of stdio,
@@ -50,8 +56,10 @@ extern int _ftello(FILE *, fpos_t *);
extern int _fseeko(FILE *, off_t, int, int);
extern int __fflush(FILE *fp);
extern void __fcloseall(void);
-extern wint_t __fgetwc(FILE *);
-extern wint_t __fputwc(wchar_t, FILE *);
+#ifndef __rtems__
+extern wint_t __fgetwc(FILE *, locale_t);
+extern wint_t __fputwc(wchar_t, FILE *, locale_t);
+#endif /* __rtems__ */
extern int __sflush(FILE *);
extern FILE *__sfp(void);
extern int __slbexpand(FILE *, size_t);
@@ -76,16 +84,22 @@ extern void _cleanup(void);
extern void __smakebuf(FILE *);
extern int __swhatbuf(FILE *, size_t *, int *);
extern int _fwalk(int (*)(FILE *));
-extern int __svfscanf(FILE *, const char *, __va_list);
+#ifndef __rtems__
+extern int __svfscanf(FILE *, locale_t, const char *, __va_list);
+#endif /* __rtems__ */
extern int __swsetup(FILE *);
extern int __sflags(const char *, int *);
extern int __ungetc(int, FILE *);
-extern wint_t __ungetwc(wint_t, FILE *);
-extern int __vfprintf(FILE *, const char *, __va_list);
+#ifndef __rtems__
+extern wint_t __ungetwc(wint_t, FILE *, locale_t);
+extern int __vfprintf(FILE *, locale_t, const char *, __va_list);
+#endif /* __rtems__ */
extern int __vfscanf(FILE *, const char *, __va_list);
-extern int __vfwprintf(FILE *, const wchar_t *, __va_list);
-extern int __vfwscanf(FILE * __restrict, const wchar_t * __restrict,
+#ifndef __rtems__
+extern int __vfwprintf(FILE *, locale_t, const wchar_t *, __va_list);
+extern int __vfwscanf(FILE * __restrict, locale_t, const wchar_t * __restrict,
__va_list);
+#endif /* __rtems__ */
extern size_t __fread(void * __restrict buf, size_t size, size_t count,
FILE * __restrict fp);
extern int __sdidinit;
diff --git a/freebsd/lib/libipsec/ipsec_dump_policy.c b/freebsd/lib/libipsec/ipsec_dump_policy.c
index 94275e89..33d4bb13 100644
--- a/freebsd/lib/libipsec/ipsec_dump_policy.c
+++ b/freebsd/lib/libipsec/ipsec_dump_policy.c
@@ -164,7 +164,8 @@ ipsec_dump_policy(policy, delimiter)
return NULL;
}
buf = newbuf;
- snprintf(buf, buflen, "%s%s%s", buf, delimiter, isrbuf);
+ snprintf(buf + strlen(buf), buflen - strlen(buf),
+ "%s%s", delimiter, isrbuf);
off += xisr->sadb_x_ipsecrequest_len;
}
diff --git a/freebsd/lib/libipsec/policy_token.l b/freebsd/lib/libipsec/policy_token.l
index cc70ea90..ec099d2b 100644
--- a/freebsd/lib/libipsec/policy_token.l
+++ b/freebsd/lib/libipsec/policy_token.l
@@ -58,6 +58,7 @@ int yylex(void);
%option noyywrap
%option nounput
+%option noinput
/* common section */
nl \n
@@ -142,8 +143,8 @@ void
__policy__strbuffer__init__(msg)
char *msg;
{
- if (yy_current_buffer)
- yy_delete_buffer(yy_current_buffer);
+ if (YY_CURRENT_BUFFER)
+ yy_delete_buffer(YY_CURRENT_BUFFER);
strbuffer = (YY_BUFFER_STATE)yy_scan_string(msg);
yy_switch_to_buffer(strbuffer);
diff --git a/freebsd/lib/libkvm/kvm.h b/freebsd/lib/libkvm/kvm.h
index 4c2b4b8a..a0c2a906 100644
--- a/freebsd/lib/libkvm/kvm.h
+++ b/freebsd/lib/libkvm/kvm.h
@@ -88,7 +88,7 @@ kvm_t *kvm_openfiles
(const char *, const char *, const char *, int, char *);
ssize_t kvm_read(kvm_t *, unsigned long, void *, size_t);
ssize_t kvm_uread
- (kvm_t *, struct kinfo_proc *, unsigned long, char *, size_t);
+ (kvm_t *, const struct kinfo_proc *, unsigned long, char *, size_t);
ssize_t kvm_write(kvm_t *, unsigned long, const void *, size_t);
__END_DECLS
diff --git a/freebsd/lib/libmemstat/memstat.c b/freebsd/lib/libmemstat/memstat.c
index 0705315d..536b47fb 100644
--- a/freebsd/lib/libmemstat/memstat.c
+++ b/freebsd/lib/libmemstat/memstat.c
@@ -51,8 +51,6 @@ memstat_strerror(int error)
return ("Version mismatch");
case MEMSTAT_ERROR_PERMISSION:
return ("Permission denied");
- case MEMSTAT_ERROR_TOOMANYCPUS:
- return ("Too many CPUs");
case MEMSTAT_ERROR_DATAERROR:
return ("Data format error");
case MEMSTAT_ERROR_KVM:
@@ -101,6 +99,8 @@ _memstat_mtl_empty(struct memory_type_list *list)
struct memory_type *mtp;
while ((mtp = LIST_FIRST(&list->mtl_list))) {
+ free(mtp->mt_percpu_alloc);
+ free(mtp->mt_percpu_cache);
LIST_REMOVE(mtp, mt_list);
free(mtp);
}
@@ -149,7 +149,7 @@ memstat_mtl_find(struct memory_type_list *list, int allocator,
*/
struct memory_type *
_memstat_mt_allocate(struct memory_type_list *list, int allocator,
- const char *name)
+ const char *name, int maxcpus)
{
struct memory_type *mtp;
@@ -160,6 +160,10 @@ _memstat_mt_allocate(struct memory_type_list *list, int allocator,
bzero(mtp, sizeof(*mtp));
mtp->mt_allocator = allocator;
+ mtp->mt_percpu_alloc = malloc(sizeof(struct mt_percpu_alloc_s) *
+ maxcpus);
+ mtp->mt_percpu_cache = malloc(sizeof(struct mt_percpu_cache_s) *
+ maxcpus);
strlcpy(mtp->mt_name, name, MEMTYPE_MAXNAME);
LIST_INSERT_HEAD(&list->mtl_list, mtp, mt_list);
return (mtp);
@@ -173,7 +177,7 @@ _memstat_mt_allocate(struct memory_type_list *list, int allocator,
* libmemstat(3) internal function.
*/
void
-_memstat_mt_reset_stats(struct memory_type *mtp)
+_memstat_mt_reset_stats(struct memory_type *mtp, int maxcpus)
{
int i;
@@ -190,11 +194,12 @@ _memstat_mt_reset_stats(struct memory_type *mtp)
mtp->mt_count = 0;
mtp->mt_free = 0;
mtp->mt_failures = 0;
+ mtp->mt_sleeps = 0;
mtp->mt_zonefree = 0;
mtp->mt_kegfree = 0;
- for (i = 0; i < MEMSTAT_MAXCPU; i++) {
+ for (i = 0; i < maxcpus; i++) {
mtp->mt_percpu_alloc[i].mtp_memalloced = 0;
mtp->mt_percpu_alloc[i].mtp_memfreed = 0;
mtp->mt_percpu_alloc[i].mtp_numallocs = 0;
@@ -306,6 +311,13 @@ memstat_get_failures(const struct memory_type *mtp)
return (mtp->mt_failures);
}
+uint64_t
+memstat_get_sleeps(const struct memory_type *mtp)
+{
+
+ return (mtp->mt_sleeps);
+}
+
void *
memstat_get_caller_pointer(const struct memory_type *mtp, int index)
{
diff --git a/freebsd/lib/libmemstat/memstat.h b/freebsd/lib/libmemstat/memstat.h
index aaa85702..cca75b32 100644
--- a/freebsd/lib/libmemstat/memstat.h
+++ b/freebsd/lib/libmemstat/memstat.h
@@ -30,12 +30,6 @@
#define _MEMSTAT_H_
/*
- * Number of CPU slots in library-internal data structures. This should be
- * at least the value of MAXCPU from param.h.
- */
-#define MEMSTAT_MAXCPU 32
-
-/*
* Amount of caller data to maintain for each caller data slot. Applications
* must not request more than this number of caller save data, or risk
* corrupting internal libmemstat(3) data structures. A compile time check
@@ -70,7 +64,6 @@
#define MEMSTAT_ERROR_NOMEMORY 1 /* Out of memory. */
#define MEMSTAT_ERROR_VERSION 2 /* Unsupported version. */
#define MEMSTAT_ERROR_PERMISSION 3 /* Permission denied. */
-#define MEMSTAT_ERROR_TOOMANYCPUS 4 /* Too many CPUs. */
#define MEMSTAT_ERROR_DATAERROR 5 /* Error in stat data. */
#define MEMSTAT_ERROR_KVM 6 /* See kvm_geterr() for err. */
#define MEMSTAT_ERROR_KVM_NOSYMBOL 7 /* Symbol not available. */
@@ -139,6 +132,7 @@ uint64_t memstat_get_bytes(const struct memory_type *mtp);
uint64_t memstat_get_count(const struct memory_type *mtp);
uint64_t memstat_get_free(const struct memory_type *mtp);
uint64_t memstat_get_failures(const struct memory_type *mtp);
+uint64_t memstat_get_sleeps(const struct memory_type *mtp);
void *memstat_get_caller_pointer(const struct memory_type *mtp,
int index);
void memstat_set_caller_pointer(struct memory_type *mtp,
diff --git a/freebsd/lib/libmemstat/memstat_internal.h b/freebsd/lib/libmemstat/memstat_internal.h
index 7123518e..2416e09b 100644
--- a/freebsd/lib/libmemstat/memstat_internal.h
+++ b/freebsd/lib/libmemstat/memstat_internal.h
@@ -65,6 +65,7 @@ struct memory_type {
uint64_t mt_count; /* Number of current allocations. */
uint64_t mt_free; /* Number of cached free items. */
uint64_t mt_failures; /* Number of allocation failures. */
+ uint64_t mt_sleeps; /* Number of allocation sleeps. */
/*
* Caller-owned memory.
@@ -91,7 +92,7 @@ struct memory_type {
* Per-CPU measurements fall into two categories: per-CPU allocation,
* and per-CPU cache state.
*/
- struct {
+ struct mt_percpu_alloc_s {
uint64_t mtp_memalloced;/* Per-CPU mt_memalloced. */
uint64_t mtp_memfreed; /* Per-CPU mt_memfreed. */
uint64_t mtp_numallocs; /* Per-CPU mt_numallocs. */
@@ -99,11 +100,11 @@ struct memory_type {
uint64_t mtp_sizemask; /* Per-CPU mt_sizemask. */
void *mtp_caller_pointer[MEMSTAT_MAXCALLER];
uint64_t mtp_caller_uint64[MEMSTAT_MAXCALLER];
- } mt_percpu_alloc[MEMSTAT_MAXCPU];
+ } *mt_percpu_alloc;
- struct {
+ struct mt_percpu_cache_s {
uint64_t mtp_free; /* Per-CPU cache free items. */
- } mt_percpu_cache[MEMSTAT_MAXCPU];
+ } *mt_percpu_cache;
LIST_ENTRY(memory_type) mt_list; /* List of types. */
};
@@ -118,7 +119,8 @@ struct memory_type_list {
void _memstat_mtl_empty(struct memory_type_list *list);
struct memory_type *_memstat_mt_allocate(struct memory_type_list *list,
- int allocator, const char *name);
-void _memstat_mt_reset_stats(struct memory_type *mtp);
+ int allocator, const char *name, int maxcpus);
+void _memstat_mt_reset_stats(struct memory_type *mtp,
+ int maxcpus);
#endif /* !_MEMSTAT_INTERNAL_H_ */
diff --git a/freebsd/lib/libmemstat/memstat_malloc.c b/freebsd/lib/libmemstat/memstat_malloc.c
index 1f83de33..58e91e8d 100644
--- a/freebsd/lib/libmemstat/memstat_malloc.c
+++ b/freebsd/lib/libmemstat/memstat_malloc.c
@@ -98,11 +98,6 @@ retry:
return (-1);
}
- if (maxcpus > MEMSTAT_MAXCPU) {
- list->mtl_error = MEMSTAT_ERROR_TOOMANYCPUS;
- return (-1);
- }
-
size = sizeof(count);
if (sysctlbyname("kern.malloc_count", &count, &size, NULL, 0) < 0) {
if (errno == EACCES || errno == EPERM)
@@ -162,12 +157,6 @@ retry:
return (-1);
}
- if (mtshp->mtsh_maxcpus > MEMSTAT_MAXCPU) {
- list->mtl_error = MEMSTAT_ERROR_TOOMANYCPUS;
- free(buffer);
- return (-1);
- }
-
/*
* For the remainder of this function, we are quite trusting about
* the layout of structures and sizes, since we've determined we have
@@ -186,7 +175,7 @@ retry:
mtp = NULL;
if (mtp == NULL)
mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
- mthp->mth_name);
+ mthp->mth_name, maxcpus);
if (mtp == NULL) {
_memstat_mtl_empty(list);
free(buffer);
@@ -197,7 +186,7 @@ retry:
/*
* Reset the statistics on a current node.
*/
- _memstat_mt_reset_stats(mtp);
+ _memstat_mt_reset_stats(mtp, maxcpus);
for (j = 0; j < maxcpus; j++) {
mtsp = (struct malloc_type_stats *)p;
@@ -298,7 +287,7 @@ memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
void *kmemstatistics;
int hint_dontsearch, j, mp_maxcpus, ret;
char name[MEMTYPE_MAXNAME];
- struct malloc_type_stats mts[MEMSTAT_MAXCPU], *mtsp;
+ struct malloc_type_stats *mts, *mtsp;
struct malloc_type_internal *mtip;
struct malloc_type type, *typep;
kvm_t *kvm;
@@ -325,11 +314,6 @@ memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
return (-1);
}
- if (mp_maxcpus > MEMSTAT_MAXCPU) {
- list->mtl_error = MEMSTAT_ERROR_TOOMANYCPUS;
- return (-1);
- }
-
ret = kread_symbol(kvm, X_KMEMSTATISTICS, &kmemstatistics,
sizeof(kmemstatistics), 0);
if (ret != 0) {
@@ -337,10 +321,17 @@ memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
return (-1);
}
+ mts = malloc(sizeof(struct malloc_type_stats) * mp_maxcpus);
+ if (mts == NULL) {
+ list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
+ return (-1);
+ }
+
for (typep = kmemstatistics; typep != NULL; typep = type.ks_next) {
ret = kread(kvm, typep, &type, sizeof(type), 0);
if (ret != 0) {
_memstat_mtl_empty(list);
+ free(mts);
list->mtl_error = ret;
return (-1);
}
@@ -348,6 +339,7 @@ memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
MEMTYPE_MAXNAME);
if (ret != 0) {
_memstat_mtl_empty(list);
+ free(mts);
list->mtl_error = ret;
return (-1);
}
@@ -361,6 +353,7 @@ memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
sizeof(struct malloc_type_stats), 0);
if (ret != 0) {
_memstat_mtl_empty(list);
+ free(mts);
list->mtl_error = ret;
return (-1);
}
@@ -371,9 +364,10 @@ memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
mtp = NULL;
if (mtp == NULL)
mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
- name);
+ name, mp_maxcpus);
if (mtp == NULL) {
_memstat_mtl_empty(list);
+ free(mts);
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
@@ -382,7 +376,7 @@ memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
* This logic is replicated from kern_malloc.c, and should
* be kept in sync.
*/
- _memstat_mt_reset_stats(mtp);
+ _memstat_mt_reset_stats(mtp, mp_maxcpus);
for (j = 0; j < mp_maxcpus; j++) {
mtsp = &mts[j];
mtp->mt_memalloced += mtsp->mts_memalloced;
diff --git a/freebsd/lib/libmemstat/memstat_uma.c b/freebsd/lib/libmemstat/memstat_uma.c
index 127a7662..86059b41 100644
--- a/freebsd/lib/libmemstat/memstat_uma.c
+++ b/freebsd/lib/libmemstat/memstat_uma.c
@@ -29,6 +29,7 @@
*/
#include <rtems/bsd/sys/param.h>
+#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
@@ -45,6 +46,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
#include "memstat.h"
#include "memstat_internal.h"
@@ -78,7 +80,7 @@ memstat_sysctl_uma(struct memory_type_list *list, int flags)
struct uma_type_header *uthp;
struct uma_percpu_stat *upsp;
struct memory_type *mtp;
- int count, hint_dontsearch, i, j, maxcpus;
+ int count, hint_dontsearch, i, j, maxcpus, maxid;
char *buffer, *p;
size_t size;
@@ -92,24 +94,19 @@ memstat_sysctl_uma(struct memory_type_list *list, int flags)
* from the header.
*/
retry:
- size = sizeof(maxcpus);
- if (sysctlbyname("kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) {
+ size = sizeof(maxid);
+ if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) {
if (errno == EACCES || errno == EPERM)
list->mtl_error = MEMSTAT_ERROR_PERMISSION;
else
list->mtl_error = MEMSTAT_ERROR_DATAERROR;
return (-1);
}
- if (size != sizeof(maxcpus)) {
+ if (size != sizeof(maxid)) {
list->mtl_error = MEMSTAT_ERROR_DATAERROR;
return (-1);
}
- if (maxcpus > MEMSTAT_MAXCPU) {
- list->mtl_error = MEMSTAT_ERROR_TOOMANYCPUS;
- return (-1);
- }
-
size = sizeof(count);
if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) {
if (errno == EACCES || errno == EPERM)
@@ -124,7 +121,7 @@ retry:
}
size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) *
- maxcpus);
+ (maxid + 1));
buffer = malloc(size);
if (buffer == NULL) {
@@ -169,12 +166,6 @@ retry:
return (-1);
}
- if (ushp->ush_maxcpus > MEMSTAT_MAXCPU) {
- list->mtl_error = MEMSTAT_ERROR_TOOMANYCPUS;
- free(buffer);
- return (-1);
- }
-
/*
* For the remainder of this function, we are quite trusting about
* the layout of structures and sizes, since we've determined we have
@@ -193,7 +184,7 @@ retry:
mtp = NULL;
if (mtp == NULL)
mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA,
- uthp->uth_name);
+ uthp->uth_name, maxid + 1);
if (mtp == NULL) {
_memstat_mtl_empty(list);
free(buffer);
@@ -204,11 +195,12 @@ retry:
/*
* Reset the statistics on a current node.
*/
- _memstat_mt_reset_stats(mtp);
+ _memstat_mt_reset_stats(mtp, maxid + 1);
mtp->mt_numallocs = uthp->uth_allocs;
mtp->mt_numfrees = uthp->uth_frees;
mtp->mt_failures = uthp->uth_fails;
+ mtp->mt_sleeps = uthp->uth_sleeps;
for (j = 0; j < maxcpus; j++) {
upsp = (struct uma_percpu_stat *)p;
@@ -314,7 +306,8 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
struct uma_keg *kzp, kz;
int hint_dontsearch, i, mp_maxid, ret;
char name[MEMTYPE_MAXNAME];
- __cpumask_t all_cpus;
+ cpuset_t all_cpus;
+ long cpusetsize;
kvm_t *kvm;
kvm = (kvm_t *)kvm_handle;
@@ -338,7 +331,13 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
list->mtl_error = ret;
return (-1);
}
- ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, sizeof(all_cpus), 0);
+ cpusetsize = sysconf(_SC_CPUSET_SIZE);
+ if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
+ list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
+ return (-1);
+ }
+ CPU_ZERO(&all_cpus);
+ ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0);
if (ret != 0) {
list->mtl_error = ret;
return (-1);
@@ -390,7 +389,7 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
mtp = NULL;
if (mtp == NULL)
mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA,
- name);
+ name, mp_maxid + 1);
if (mtp == NULL) {
free(ucp_array);
_memstat_mtl_empty(list);
@@ -400,14 +399,15 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
/*
* Reset the statistics on a current node.
*/
- _memstat_mt_reset_stats(mtp);
+ _memstat_mt_reset_stats(mtp, mp_maxid + 1);
mtp->mt_numallocs = uz.uz_allocs;
mtp->mt_numfrees = uz.uz_frees;
mtp->mt_failures = uz.uz_fails;
+ mtp->mt_sleeps = uz.uz_sleeps;
if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
goto skip_percpu;
for (i = 0; i < mp_maxid + 1; i++) {
- if ((all_cpus & (1 << i)) == 0)
+ if (!CPU_ISSET(i, &all_cpus))
continue;
ucp = &ucp_array[i];
mtp->mt_numallocs += ucp->uc_allocs;
diff --git a/freebsd/lib/libutil/expand_number.c b/freebsd/lib/libutil/expand_number.c
index 596a3976..6303c2b5 100644
--- a/freebsd/lib/libutil/expand_number.c
+++ b/freebsd/lib/libutil/expand_number.c
@@ -38,8 +38,8 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
/*
- * Convert an expression of the following forms to a int64_t.
- * 1) A positive decimal number.
+ * Convert an expression of the following forms to a uint64_t.
+ * 1) A positive decimal number.
* 2) A positive decimal number followed by a 'b' or 'B' (mult by 1).
* 3) A positive decimal number followed by a 'k' or 'K' (mult by 1 << 10).
* 4) A positive decimal number followed by a 'm' or 'M' (mult by 1 << 20).
@@ -49,14 +49,13 @@ __FBSDID("$FreeBSD$");
* 8) A positive decimal number followed by a 'e' or 'E' (mult by 1 << 60).
*/
int
-expand_number(const char *buf, int64_t *num)
+expand_number(const char *buf, uint64_t *num)
{
- static const char unit[] = "bkmgtpe";
- char *endptr, s;
- int64_t number;
- int i;
+ uint64_t number;
+ unsigned shift;
+ char *endptr;
- number = strtoimax(buf, &endptr, 0);
+ number = strtoumax(buf, &endptr, 0);
if (endptr == buf) {
/* No valid digits. */
@@ -64,39 +63,41 @@ expand_number(const char *buf, int64_t *num)
return (-1);
}
- if (*endptr == '\0') {
- /* No unit. */
- *num = number;
- return (0);
- }
-
- s = tolower(*endptr);
- switch (s) {
- case 'b':
- case 'k':
- case 'm':
- case 'g':
- case 't':
- case 'p':
+ switch (tolower((unsigned char)*endptr)) {
case 'e':
+ shift = 60;
+ break;
+ case 'p':
+ shift = 50;
break;
+ case 't':
+ shift = 40;
+ break;
+ case 'g':
+ shift = 30;
+ break;
+ case 'm':
+ shift = 20;
+ break;
+ case 'k':
+ shift = 10;
+ break;
+ case 'b':
+ case '\0': /* No unit. */
+ *num = number;
+ return (0);
default:
/* Unrecognized unit. */
errno = EINVAL;
return (-1);
}
- for (i = 0; unit[i] != '\0'; i++) {
- if (s == unit[i])
- break;
- if ((number < 0 && (number << 10) > number) ||
- (number >= 0 && (number << 10) < number)) {
- errno = ERANGE;
- return (-1);
- }
- number <<= 10;
+ if ((number << shift) >> shift != number) {
+ /* Overflow */
+ errno = ERANGE;
+ return (-1);
}
- *num = number;
+ *num = number << shift;
return (0);
}
diff --git a/freebsd/lib/libutil/libutil.h b/freebsd/lib/libutil/libutil.h
index 4b3c31c6..d5808d33 100644
--- a/freebsd/lib/libutil/libutil.h
+++ b/freebsd/lib/libutil/libutil.h
@@ -41,20 +41,16 @@
#include <sys/cdefs.h>
#include <rtems/bsd/sys/_types.h>
+#include <sys/_stdint.h>
#ifndef _GID_T_DECLARED
typedef __gid_t gid_t;
#define _GID_T_DECLARED
#endif
-#ifndef _INT64_T_DECLARED
-typedef __int64_t int64_t;
-#define _INT64_T_DECLARED
-#endif
-
-#ifndef _UINT64_T_DECLARED
-typedef __uint64_t uint64_t;
-#define _UINT64_T_DECLARED
+#ifndef _MODE_T_DECLARED
+typedef __mode_t mode_t;
+#define _MODE_T_DECLARED
#endif
#ifndef _PID_T_DECLARED
@@ -72,81 +68,84 @@ typedef __uid_t uid_t;
#define _UID_T_DECLARED
#endif
-#define PROPERTY_MAX_NAME 64
-#define PROPERTY_MAX_VALUE 512
+#define PROPERTY_MAX_NAME 64
+#define PROPERTY_MAX_VALUE 512
-/* for properties.c */
+/* For properties.c. */
typedef struct _property {
struct _property *next;
- char *name;
- char *value;
+ char *name;
+ char *value;
} *properties;
-#ifdef _SYS_PARAM_H_
-/* for pidfile.c */
-struct pidfh {
- int pf_fd;
- char pf_path[MAXPATHLEN + 1];
- __dev_t pf_dev;
- ino_t pf_ino;
-};
-#endif
-
-/* Avoid pulling in all the include files for no need */
+/* Avoid pulling in all the include files for no need. */
+struct in_addr;
+struct pidfh;
+struct sockaddr;
struct termios;
struct winsize;
-struct utmp;
-struct in_addr;
-struct kinfo_file;
-struct kinfo_vmentry;
__BEGIN_DECLS
+char *auth_getval(const char *_name);
void clean_environment(const char * const *_white,
const char * const *_more_white);
+int expand_number(const char *_buf, uint64_t *_num);
int extattr_namespace_to_string(int _attrnamespace, char **_string);
int extattr_string_to_namespace(const char *_string, int *_attrnamespace);
int flopen(const char *_path, int _flags, ...);
-void hexdump(const void *ptr, int length, const char *hdr, int flags);
-void login(struct utmp *_ut);
-int login_tty(int _fd);
-int logout(const char *_line);
-void logwtmp(const char *_line, const char *_name, const char *_host);
-void trimdomain(char *_fullhost, int _hostsize);
-int openpty(int *_amaster, int *_aslave, char *_name,
- struct termios *_termp, struct winsize *_winp);
int forkpty(int *_amaster, char *_name,
- struct termios *_termp, struct winsize *_winp);
+ struct termios *_termp, struct winsize *_winp);
+void hexdump(const void *_ptr, int _length, const char *_hdr, int _flags);
int humanize_number(char *_buf, size_t _len, int64_t _number,
const char *_suffix, int _scale, int _flags);
-int expand_number(const char *_buf, int64_t *_num);
-const char *uu_lockerr(int _uu_lockresult);
-int uu_lock(const char *_ttyname);
-int uu_unlock(const char *_ttyname);
-int uu_lock_txfr(const char *_ttyname, pid_t _pid);
-int _secure_path(const char *_path, uid_t _uid, gid_t _gid);
-properties properties_read(int fd);
-void properties_free(properties list);
-char *property_find(properties list, const char *name);
-char *auth_getval(const char *name);
-int realhostname(char *host, size_t hsize, const struct in_addr *ip);
-struct sockaddr;
-int realhostname_sa(char *host, size_t hsize, struct sockaddr *addr,
- int addrlen);
-
-int kld_isloaded(const char *name);
-int kld_load(const char *name);
struct kinfo_file *
kinfo_getfile(pid_t _pid, int *_cntp);
struct kinfo_vmentry *
kinfo_getvmmap(pid_t _pid, int *_cntp);
+struct kinfo_proc *
+ kinfo_getallproc(int *_cntp);
+struct kinfo_proc *
+ kinfo_getproc(pid_t _pid);
+int kld_isloaded(const char *_name);
+int kld_load(const char *_name);
+int login_tty(int _fd);
+int openpty(int *_amaster, int *_aslave, char *_name,
+ struct termios *_termp, struct winsize *_winp);
+int pidfile_close(struct pidfh *_pfh);
+int pidfile_fileno(const struct pidfh *_pfh);
+struct pidfh *
+ pidfile_open(const char *_path, mode_t _mode, pid_t *_pidptr);
+int pidfile_remove(struct pidfh *_pfh);
+int pidfile_write(struct pidfh *_pfh);
+void properties_free(properties _list);
+char *property_find(properties _list, const char *_name);
+properties
+ properties_read(int _fd);
+int realhostname(char *_host, size_t _hsize, const struct in_addr *_ip);
+int realhostname_sa(char *_host, size_t _hsize, struct sockaddr *_addr,
+ int _addrlen);
+int _secure_path(const char *_path, uid_t _uid, gid_t _gid);
+void trimdomain(char *_fullhost, int _hostsize);
+const char *
+ uu_lockerr(int _uu_lockresult);
+int uu_lock(const char *_ttyname);
+int uu_unlock(const char *_ttyname);
+int uu_lock_txfr(const char *_ttyname, pid_t _pid);
-#ifdef _STDIO_H_ /* avoid adding new includes */
-char *fparseln(FILE *, size_t *, size_t *, const char[3], int);
+/*
+ * Conditionally prototype the following functions if the include
+ * files upon which they depend have been included.
+ */
+#ifdef _STDIO_H_
+char *fparseln(FILE *_fp, size_t *_len, size_t *_lineno,
+ const char _delim[3], int _flags);
#endif
#ifdef _PWD_H_
-int pw_copy(int _ffd, int _tfd, const struct passwd *_pw, struct passwd *_old_pw);
-struct passwd *pw_dup(const struct passwd *_pw);
+int pw_copy(int _ffd, int _tfd, const struct passwd *_pw,
+ struct passwd *_old_pw);
+struct passwd
+ *pw_dup(const struct passwd *_pw);
int pw_edit(int _notsetuid);
int pw_equal(const struct passwd *_pw1, const struct passwd *_pw2);
void pw_fini(void);
@@ -155,8 +154,10 @@ char *pw_make(const struct passwd *_pw);
char *pw_make_v7(const struct passwd *_pw);
int pw_mkdb(const char *_user);
int pw_lock(void);
-struct passwd *pw_scan(const char *_line, int _flags);
-const char *pw_tempname(void);
+struct passwd *
+ pw_scan(const char *_line, int _flags);
+const char *
+ pw_tempname(void);
int pw_tmp(int _mfd);
#endif
@@ -173,31 +174,28 @@ int gr_tmp(int _mdf);
struct group *gr_scan(const char *line);
#endif
-#ifdef _SYS_PARAM_H_
-struct pidfh *pidfile_open(const char *path, mode_t mode, pid_t *pidptr);
-int pidfile_write(struct pidfh *pfh);
-int pidfile_close(struct pidfh *pfh);
-int pidfile_remove(struct pidfh *pfh);
+#ifdef _UFS_UFS_QUOTA_H_
+struct fstab;
+struct quotafile;
+int quota_check_path(const struct quotafile *_qf, const char *_path);
+void quota_close(struct quotafile *_qf);
+int quota_convert(struct quotafile *_qf, int _wordsize);
+const char *
+ quota_fsname(const struct quotafile *_qf);
+int quota_maxid(struct quotafile *_qf);
+int quota_off(struct quotafile *_qf);
+int quota_on(struct quotafile *_qf);
+struct quotafile *
+ quota_open(struct fstab *_fs, int _quotatype, int _openflags);
+const char *
+ quota_qfname(const struct quotafile *_qf);
+int quota_read(struct quotafile *_qf, struct dqblk *_dqb, int _id);
+int quota_write_limits(struct quotafile *_qf, struct dqblk *_dqb, int _id);
+int quota_write_usage(struct quotafile *_qf, struct dqblk *_dqb, int _id);
#endif
__END_DECLS
-#define UU_LOCK_INUSE (1)
-#define UU_LOCK_OK (0)
-#define UU_LOCK_OPEN_ERR (-1)
-#define UU_LOCK_READ_ERR (-2)
-#define UU_LOCK_CREAT_ERR (-3)
-#define UU_LOCK_WRITE_ERR (-4)
-#define UU_LOCK_LINK_ERR (-5)
-#define UU_LOCK_TRY_ERR (-6)
-#define UU_LOCK_OWNER_ERR (-7)
-
-/* return values from realhostname() */
-#define HOSTNAME_FOUND (0)
-#define HOSTNAME_INCORRECTNAME (1)
-#define HOSTNAME_INVALIDADDR (2)
-#define HOSTNAME_INVALIDNAME (3)
-
/* fparseln(3) */
#define FPARSELN_UNESCESC 0x01
#define FPARSELN_UNESCCONT 0x02
@@ -205,26 +203,43 @@ __END_DECLS
#define FPARSELN_UNESCREST 0x08
#define FPARSELN_UNESCALL 0x0f
-/* pw_scan() */
-#define PWSCAN_MASTER 0x01
-#define PWSCAN_WARN 0x02
-
-/* humanize_number(3) */
-#define HN_DECIMAL 0x01
-#define HN_NOSPACE 0x02
-#define HN_B 0x04
-#define HN_DIVISOR_1000 0x08
-#define HN_IEC_PREFIXES 0x10
-
-/* maxscale = 0x07 */
-#define HN_GETSCALE 0x10
-#define HN_AUTOSCALE 0x20
-
-/* hexdump(3) */
+/* Flags for hexdump(3). */
#define HD_COLUMN_MASK 0xff
#define HD_DELIM_MASK 0xff00
#define HD_OMIT_COUNT (1 << 16)
#define HD_OMIT_HEX (1 << 17)
#define HD_OMIT_CHARS (1 << 18)
+/* Values for humanize_number(3)'s flags parameter. */
+#define HN_DECIMAL 0x01
+#define HN_NOSPACE 0x02
+#define HN_B 0x04
+#define HN_DIVISOR_1000 0x08
+#define HN_IEC_PREFIXES 0x10
+
+/* Values for humanize_number(3)'s scale parameter. */
+#define HN_GETSCALE 0x10
+#define HN_AUTOSCALE 0x20
+
+/* Return values from realhostname(). */
+#define HOSTNAME_FOUND 0
+#define HOSTNAME_INCORRECTNAME 1
+#define HOSTNAME_INVALIDADDR 2
+#define HOSTNAME_INVALIDNAME 3
+
+/* Flags for pw_scan(). */
+#define PWSCAN_MASTER 0x01
+#define PWSCAN_WARN 0x02
+
+/* Return values from uu_lock(). */
+#define UU_LOCK_INUSE 1
+#define UU_LOCK_OK 0
+#define UU_LOCK_OPEN_ERR (-1)
+#define UU_LOCK_READ_ERR (-2)
+#define UU_LOCK_CREAT_ERR (-3)
+#define UU_LOCK_WRITE_ERR (-4)
+#define UU_LOCK_LINK_ERR (-5)
+#define UU_LOCK_TRY_ERR (-6)
+#define UU_LOCK_OWNER_ERR (-7)
+
#endif /* !_LIBUTIL_H_ */
diff --git a/freebsd/sbin/dhclient/clparse.c b/freebsd/sbin/dhclient/clparse.c
index 7fb278dd..b52bc473 100644
--- a/freebsd/sbin/dhclient/clparse.c
+++ b/freebsd/sbin/dhclient/clparse.c
@@ -875,6 +875,7 @@ parse_string_list(FILE *cfile, struct string_list **lp, int multiple)
{
int token;
char *val;
+ size_t valsize;
struct string_list *cur, *tmp;
/* Find the last medium in the media list. */
@@ -892,10 +893,11 @@ parse_string_list(FILE *cfile, struct string_list **lp, int multiple)
return;
}
- tmp = new_string_list(strlen(val) + 1);
+ valsize = strlen(val) + 1;
+ tmp = new_string_list(valsize);
if (tmp == NULL)
error("no memory for string list entry.");
- strlcpy(tmp->string, val, strlen(val) + 1);
+ memcpy(tmp->string, val, valsize);
tmp->next = NULL;
/* Store this medium at the end of the media list. */
diff --git a/freebsd/sbin/dhclient/packet.c b/freebsd/sbin/dhclient/packet.c
index f5366d3c..e4fa0e86 100644
--- a/freebsd/sbin/dhclient/packet.c
+++ b/freebsd/sbin/dhclient/packet.c
@@ -130,7 +130,7 @@ assemble_udp_ip_header(unsigned char *buf, int *bufix, u_int32_t from,
ip.ip_len = htons(sizeof(ip) + sizeof(udp) + len);
ip.ip_id = 0;
ip.ip_off = 0;
- ip.ip_ttl = 16;
+ ip.ip_ttl = 128;
ip.ip_p = IPPROTO_UDP;
ip.ip_sum = 0;
ip.ip_src.s_addr = from;
diff --git a/freebsd/sbin/dhclient/parse.c b/freebsd/sbin/dhclient/parse.c
index 5996ca36..19d407ea 100644
--- a/freebsd/sbin/dhclient/parse.c
+++ b/freebsd/sbin/dhclient/parse.c
@@ -118,6 +118,7 @@ char *
parse_string(FILE *cfile)
{
char *val, *s;
+ size_t valsize;
int token;
token = next_token(&val, cfile);
@@ -126,10 +127,11 @@ parse_string(FILE *cfile)
skip_to_semi(cfile);
return (NULL);
}
- s = malloc(strlen(val) + 1);
+ valsize = strlen(val) + 1;
+ s = malloc(valsize);
if (!s)
error("no memory for string %s.", val);
- strlcpy(s, val, strlen(val) + 1);
+ memcpy(s, val, valsize);
if (!parse_semi(cfile))
return (NULL);
@@ -244,6 +246,7 @@ parse_numeric_aggregate(FILE *cfile, unsigned char *buf, int *max,
unsigned char *bufp = buf, *s = NULL;
int token, count = 0;
char *val, *t;
+ size_t valsize;
pair c = NULL;
if (!bufp && *max) {
@@ -290,10 +293,11 @@ parse_numeric_aggregate(FILE *cfile, unsigned char *buf, int *max,
convert_num(s, val, base, size);
s += size / 8;
} else {
- t = malloc(strlen(val) + 1);
+ valsize = strlen(val) + 1;
+ t = malloc(valsize);
if (!t)
error("no temp space for number.");
- strlcpy(t, val, strlen(val) + 1);
+ memcpy(t, val, valsize);
c = cons(t, c);
}
} while (++count != *max);
diff --git a/freebsd/sbin/ifconfig/af_inet.c b/freebsd/sbin/ifconfig/af_inet.c
index 5d0a3d27..0e6ace11 100644
--- a/freebsd/sbin/ifconfig/af_inet.c
+++ b/freebsd/sbin/ifconfig/af_inet.c
@@ -206,9 +206,16 @@ void
#endif /* __rtems__ */
inet_ctor(void)
{
+
+#ifndef RESCUE
+ if (!feature_present("inet"))
+ return;
+#endif
+
#ifdef __rtems__
memset(&in_addreq, 0, sizeof(in_addreq));
memset(&in_ridreq, 0, sizeof(in_ridreq));
#endif /* __rtems__ */
+
af_register(&af_inet);
}
diff --git a/freebsd/sbin/ifconfig/af_inet6.c b/freebsd/sbin/ifconfig/af_inet6.c
index 894c0493..f74d3e8b 100644
--- a/freebsd/sbin/ifconfig/af_inet6.c
+++ b/freebsd/sbin/ifconfig/af_inet6.c
@@ -72,6 +72,7 @@ static int explicit_prefix = 0;
extern void setnd6flags(const char *, int, int, const struct afswtch *);
extern void setnd6defif(const char *, int, int, const struct afswtch *);
+extern void nd6_status(int);
static char addr_buf[MAXHOSTNAMELEN *2 + 1]; /*for getnameinfo()*/
@@ -510,6 +511,8 @@ static struct cmd inet6_cmds[] = {
DEF_CMD("-autoconf", -IN6_IFF_AUTOCONF, setip6flags),
DEF_CMD("accept_rtadv", ND6_IFF_ACCEPT_RTADV, setnd6flags),
DEF_CMD("-accept_rtadv",-ND6_IFF_ACCEPT_RTADV, setnd6flags),
+ DEF_CMD("no_radr", ND6_IFF_NO_RADR, setnd6flags),
+ DEF_CMD("-no_radr", -ND6_IFF_NO_RADR, setnd6flags),
DEF_CMD("defaultif", 1, setnd6defif),
DEF_CMD("-defaultif", -1, setnd6defif),
DEF_CMD("ifdisabled", ND6_IFF_IFDISABLED, setnd6flags),
@@ -518,6 +521,10 @@ static struct cmd inet6_cmds[] = {
DEF_CMD("-nud", -ND6_IFF_PERFORMNUD, setnd6flags),
DEF_CMD("prefer_source",ND6_IFF_PREFER_SOURCE, setnd6flags),
DEF_CMD("-prefer_source",-ND6_IFF_PREFER_SOURCE,setnd6flags),
+ DEF_CMD("auto_linklocal",ND6_IFF_AUTO_LINKLOCAL,setnd6flags),
+ DEF_CMD("-auto_linklocal",-ND6_IFF_AUTO_LINKLOCAL,setnd6flags),
+ DEF_CMD("no_prefer_iface",ND6_IFF_NO_PREFER_IFACE,setnd6flags),
+ DEF_CMD("-no_prefer_iface",-ND6_IFF_NO_PREFER_IFACE,setnd6flags),
DEF_CMD_ARG("pltime", setip6pltime),
DEF_CMD_ARG("vltime", setip6vltime),
DEF_CMD("eui64", 0, setip6eui64),
@@ -529,6 +536,7 @@ static struct afswtch af_inet6 = {
.af_status = in6_status,
.af_getaddr = in6_getaddr,
.af_getprefix = in6_getprefix,
+ .af_other_status = nd6_status,
.af_postproc = in6_postproc,
.af_status_tunnel = in6_status_tunnel,
.af_settunnel = in6_set_tunnel,
@@ -564,6 +572,11 @@ inet6_ctor(void)
#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
+#ifndef RESCUE
+ if (!feature_present("inet6"))
+ return;
+#endif
+
for (i = 0; i < N(inet6_cmds); i++)
cmd_register(&inet6_cmds[i]);
af_register(&af_inet6);
diff --git a/freebsd/sbin/ifconfig/af_nd6.c b/freebsd/sbin/ifconfig/af_nd6.c
index 2fc62941..a1e930b0 100644
--- a/freebsd/sbin/ifconfig/af_nd6.c
+++ b/freebsd/sbin/ifconfig/af_nd6.c
@@ -60,11 +60,12 @@ static const char rcsid[] =
#define MAX_SYSCTL_TRY 5
#define ND6BITS "\020\001PERFORMNUD\002ACCEPT_RTADV\003PREFER_SOURCE" \
"\004IFDISABLED\005DONT_SET_IFROUTE\006AUTO_LINKLOCAL" \
- "\020DEFAULTIF"
+ "\007NO_RADR\010NO_PREFER_IFACE\020DEFAULTIF"
static int isnd6defif(int);
void setnd6flags(const char *, int, int, const struct afswtch *);
void setnd6defif(const char *, int, int, const struct afswtch *);
+void nd6_status(int);
void
setnd6flags(const char *dummyaddr __unused,
@@ -138,74 +139,25 @@ isnd6defif(int s)
return (ndifreq.ifindex == ifindex);
}
-static void
+void
nd6_status(int s)
{
struct in6_ndireq nd;
- struct rt_msghdr *rtm;
- size_t needed;
- char *buf, *next;
- int mib[6], ntry;
int s6;
int error;
- int isinet6, isdefif;
-
- /* Check if the interface has at least one IPv6 address. */
- mib[0] = CTL_NET;
- mib[1] = PF_ROUTE;
- mib[2] = 0;
- mib[3] = AF_INET6;
- mib[4] = NET_RT_IFLIST;
- mib[5] = if_nametoindex(ifr.ifr_name);
-
- /* Try to prevent a race between two sysctls. */
- ntry = 0;
- do {
- error = sysctl(mib, 6, NULL, &needed, NULL, 0);
- if (error) {
- warn("sysctl(NET_RT_IFLIST)/estimate");
- return;
- }
- buf = malloc(needed);
- if (buf == NULL) {
- warn("malloc for sysctl(NET_RT_IFLIST) failed");
- return;
- }
- if ((error = sysctl(mib, 6, buf, &needed, NULL, 0)) < 0) {
- if (errno != ENOMEM || ++ntry >= MAX_SYSCTL_TRY) {
- warn("sysctl(NET_RT_IFLIST)/get");
- free(buf);
- return;
- }
- free(buf);
- buf = NULL;
- }
- } while (buf == NULL);
-
- isinet6 = 0;
- for (next = buf; next < buf + needed; next += rtm->rtm_msglen) {
- rtm = (struct rt_msghdr *)next;
-
- if (rtm->rtm_version != RTM_VERSION)
- continue;
- if (rtm->rtm_type == RTM_NEWADDR) {
- isinet6 = 1;
- break;
- }
- }
- free(buf);
- if (!isinet6)
- return;
+ int isdefif;
memset(&nd, 0, sizeof(nd));
strncpy(nd.ifname, ifr.ifr_name, sizeof(nd.ifname));
if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
- warn("socket(AF_INET6, SOCK_DGRAM)");
+ if (errno != EAFNOSUPPORT && errno != EPROTONOSUPPORT)
+ warn("socket(AF_INET6, SOCK_DGRAM)");
return;
}
error = ioctl(s6, SIOCGIFINFO_IN6, &nd);
if (error) {
- warn("ioctl(SIOCGIFINFO_IN6)");
+ if (errno != EPFNOSUPPORT)
+ warn("ioctl(SIOCGIFINFO_IN6)");
close(s6);
return;
}
@@ -217,19 +169,3 @@ nd6_status(int s)
(unsigned int)(nd.ndi.flags | (isdefif << 15)), ND6BITS);
putchar('\n');
}
-
-static struct afswtch af_nd6 = {
- .af_name = "nd6",
- .af_af = AF_LOCAL,
- .af_other_status= nd6_status,
-};
-
-#ifndef __rtems__
-static __constructor void
-#else /* __rtems__ */
-void
-#endif /* __rtems__ */
-nd6_ctor(void)
-{
- af_register(&af_nd6);
-}
diff --git a/freebsd/sbin/ifconfig/ifconfig.c b/freebsd/sbin/ifconfig/ifconfig.c
index 0772b647..6c7b8a36 100644
--- a/freebsd/sbin/ifconfig/ifconfig.c
+++ b/freebsd/sbin/ifconfig/ifconfig.c
@@ -175,7 +175,6 @@ int rtems_bsd_command_ifconfig(int argc, char *argv[])
lagg_ctor();
link_ctor();
mac_ctor();
- nd6_ctor();
pfsync_ctor();
vlan_ctor();
@@ -198,7 +197,7 @@ main(int argc, char *argv[])
struct ifaddrs *ifap, *ifa;
struct ifreq paifr;
const struct sockaddr_dl *sdl;
- char options[1024], *cp;
+ char options[1024], *cp, *namecp = NULL;
const char *ifname;
struct option *p;
size_t iflen;
@@ -279,8 +278,10 @@ main(int argc, char *argv[])
ifindex = 0;
if (argc == 1) {
afp = af_getbyname(*argv);
- if (afp == NULL)
+ if (afp == NULL) {
+ warnx("Address family '%s' unknown.", *argv);
usage();
+ }
if (afp->af_name != NULL)
argc--, argv++;
/* leave with afp non-zero */
@@ -354,7 +355,7 @@ main(int argc, char *argv[])
sdl = (const struct sockaddr_dl *) ifa->ifa_addr;
else
sdl = NULL;
- if (cp != NULL && strcmp(cp, ifa->ifa_name) == 0)
+ if (cp != NULL && strcmp(cp, ifa->ifa_name) == 0 && !namesonly)
continue;
iflen = strlcpy(name, ifa->ifa_name, sizeof(name));
if (iflen >= sizeof(name)) {
@@ -370,16 +371,34 @@ main(int argc, char *argv[])
continue;
if (uponly && (ifa->ifa_flags & IFF_UP) == 0)
continue;
- ifindex++;
/*
* Are we just listing the interfaces?
*/
if (namesonly) {
+ if (namecp == cp)
+ continue;
+ if (afp != NULL) {
+ /* special case for "ether" address family */
+ if (!strcmp(afp->af_name, "ether")) {
+ if (sdl == NULL ||
+ (sdl->sdl_type != IFT_ETHER &&
+ sdl->sdl_type != IFT_L2VLAN &&
+ sdl->sdl_type != IFT_BRIDGE) ||
+ sdl->sdl_alen != ETHER_ADDR_LEN)
+ continue;
+ } else {
+ if (ifa->ifa_addr->sa_family != afp->af_af)
+ continue;
+ }
+ }
+ namecp = cp;
+ ifindex++;
if (ifindex > 1)
printf(" ");
fputs(name, stdout);
continue;
}
+ ifindex++;
if (argc > 0)
ifconfig(argc, argv, 0, afp);
@@ -525,7 +544,30 @@ ifconfig(int argc, char *const *argv, int iscreate, const struct afswtch *uafp)
int s;
strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
- afp = uafp != NULL ? uafp : af_getbyname("inet");
+ afp = NULL;
+ if (uafp != NULL)
+ afp = uafp;
+ /*
+ * This is the historical "accident" allowing users to configure IPv4
+ * addresses without the "inet" keyword which while a nice feature has
+ * proven to complicate other things. We cannot remove this but only
+ * make sure we will never have a similar implicit default for IPv6 or
+ * any other address familiy. We need a fallback though for
+ * ifconfig IF up/down etc. to work without INET support as people
+ * never used ifconfig IF link up/down, etc. either.
+ */
+#ifndef RESCUE
+#ifdef INET
+ if (afp == NULL && feature_present("inet"))
+ afp = af_getbyname("inet");
+#endif
+#endif
+ if (afp == NULL)
+ afp = af_getbyname("link");
+ if (afp == NULL) {
+ warnx("Please specify an address_family.");
+ usage();
+ }
top:
ifr.ifr_addr.sa_family =
afp->af_af == AF_LINK || afp->af_af == AF_UNSPEC ?
@@ -928,7 +970,8 @@ unsetifdescr(const char *val, int value, int s, const struct afswtch *afp)
#define IFCAPBITS \
"\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \
"\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
-"\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP"
+"\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
+"\26RXCSUM_IPV6\27TXCSUM_IPV6"
/*
* Print the status of the interface. If an address family was
@@ -1192,6 +1235,10 @@ static struct cmd basic_cmds[] = {
DEF_CMD("-monitor", -IFF_MONITOR, setifflags),
DEF_CMD("staticarp", IFF_STATICARP, setifflags),
DEF_CMD("-staticarp", -IFF_STATICARP, setifflags),
+ DEF_CMD("rxcsum6", IFCAP_RXCSUM_IPV6, setifcap),
+ DEF_CMD("-rxcsum6", -IFCAP_RXCSUM_IPV6, setifcap),
+ DEF_CMD("txcsum6", IFCAP_TXCSUM_IPV6, setifcap),
+ DEF_CMD("-txcsum6", -IFCAP_TXCSUM_IPV6, setifcap),
DEF_CMD("rxcsum", IFCAP_RXCSUM, setifcap),
DEF_CMD("-rxcsum", -IFCAP_RXCSUM, setifcap),
DEF_CMD("txcsum", IFCAP_TXCSUM, setifcap),
@@ -1200,8 +1247,14 @@ static struct cmd basic_cmds[] = {
DEF_CMD("-netcons", -IFCAP_NETCONS, setifcap),
DEF_CMD("polling", IFCAP_POLLING, setifcap),
DEF_CMD("-polling", -IFCAP_POLLING, setifcap),
+ DEF_CMD("tso6", IFCAP_TSO6, setifcap),
+ DEF_CMD("-tso6", -IFCAP_TSO6, setifcap),
+ DEF_CMD("tso4", IFCAP_TSO4, setifcap),
+ DEF_CMD("-tso4", -IFCAP_TSO4, setifcap),
DEF_CMD("tso", IFCAP_TSO, setifcap),
DEF_CMD("-tso", -IFCAP_TSO, setifcap),
+ DEF_CMD("toe", IFCAP_TOE, setifcap),
+ DEF_CMD("-toe", -IFCAP_TOE, setifcap),
DEF_CMD("lro", IFCAP_LRO, setifcap),
DEF_CMD("-lro", -IFCAP_LRO, setifcap),
DEF_CMD("wol", IFCAP_WOL, setifcap),
diff --git a/freebsd/sbin/ifconfig/ifconfig.h b/freebsd/sbin/ifconfig/ifconfig.h
index 46d8382e..074e810e 100644
--- a/freebsd/sbin/ifconfig/ifconfig.h
+++ b/freebsd/sbin/ifconfig/ifconfig.h
@@ -166,7 +166,6 @@ void inet_ctor(void);
void lagg_ctor(void);
void link_ctor(void);
void mac_ctor(void);
-void nd6_ctor(void);
void pfsync_ctor(void);
void vlan_ctor(void);
diff --git a/freebsd/sbin/ifconfig/ifgif.c b/freebsd/sbin/ifconfig/ifgif.c
index 6386751e..e55933a5 100644
--- a/freebsd/sbin/ifconfig/ifgif.c
+++ b/freebsd/sbin/ifconfig/ifgif.c
@@ -53,38 +53,22 @@ static const char rcsid[] =
#include "ifconfig.h"
-static void gif_status(int);
+#define GIFBITS "\020\1ACCEPT_REV_ETHIP_VER\5SEND_REV_ETHIP_VER"
-static const struct {
- const char *label;
- u_int mask;
-} gif_opts[] = {
- { "ACCEPT_REV_ETHIP_VER", GIF_ACCEPT_REVETHIP },
- { "SEND_REV_ETHIP_VER", GIF_SEND_REVETHIP },
-};
+static void gif_status(int);
static void
gif_status(int s)
{
int opts;
- int nopts = 0;
- size_t i;
ifr.ifr_data = (caddr_t)&opts;
if (ioctl(s, GIFGOPTS, &ifr) == -1)
return;
if (opts == 0)
return;
-
- printf("\toptions=%d<", opts);
- for (i=0; i < sizeof(gif_opts)/sizeof(gif_opts[0]); i++) {
- if (opts & gif_opts[i].mask) {
- if (nopts++)
- printf(",");
- printf("%s", gif_opts[i].label);
- }
- }
- printf(">\n");
+ printb("\toptions", opts, GIFBITS);
+ putchar('\n');
}
static void
diff --git a/freebsd/sbin/ifconfig/iflagg.c b/freebsd/sbin/ifconfig/iflagg.c
index 6e2a726e..56970e57 100644
--- a/freebsd/sbin/ifconfig/iflagg.c
+++ b/freebsd/sbin/ifconfig/iflagg.c
@@ -42,7 +42,8 @@ setlaggport(const char *val, int d, int s, const struct afswtch *afp)
strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname));
strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname));
- if (ioctl(s, SIOCSLAGGPORT, &rp))
+ /* Don't choke if the port is already in this lagg. */
+ if (ioctl(s, SIOCSLAGGPORT, &rp) && errno != EEXIST)
err(1, "SIOCSLAGGPORT");
}
@@ -99,10 +100,8 @@ setlagghash(const char *val, int d, int s, const struct afswtch *afp)
rf.rf_flags |= LAGG_F_HASHL3;
else if (strcmp(tok, "l4") == 0)
rf.rf_flags |= LAGG_F_HASHL4;
- else {
- free(str);
+ else
errx(1, "Invalid lagghash option: %s", tok);
- }
}
free(str);
if (rf.rf_flags == 0)
diff --git a/freebsd/sbin/ifconfig/ifmedia.c b/freebsd/sbin/ifconfig/ifmedia.c
index b7296131..0ad008ee 100644
--- a/freebsd/sbin/ifconfig/ifmedia.c
+++ b/freebsd/sbin/ifconfig/ifmedia.c
@@ -47,10 +47,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
diff --git a/freebsd/sbin/ping/ping.c b/freebsd/sbin/ping/ping.c
index 6adc03aa..b45efa6b 100644
--- a/freebsd/sbin/ping/ping.c
+++ b/freebsd/sbin/ping/ping.c
@@ -219,7 +219,7 @@ static void pr_retip(struct ip *);
static void status(int);
static void stopit(int);
#endif /* __rtems__ */
-static void tvsub(struct timeval *, struct timeval *);
+static void tvsub(struct timeval *, const struct timeval *);
static void usage(void) __dead2;
#ifdef __rtems__
@@ -268,12 +268,10 @@ int rtems_bsd_command_ping(int argc, char *argv[])
}
#endif /* __rtems__ */
int
-main(argc, argv)
- int argc;
#ifndef __rtems__
- char *const *argv;
+main(int argc, char *const *argv)
#else /* __rtems__ */
- char **argv;
+main(int argc, char **argv)
#endif /* __rtems__ */
{
struct sockaddr_in from, sock_in;
@@ -998,8 +996,7 @@ main(argc, argv)
* to be called from a signal handler.
*/
void
-stopit(sig)
- int sig __unused;
+stopit(int sig __unused)
{
/*
@@ -1095,11 +1092,7 @@ pinger(void)
* program to be run without having intermingled output (or statistics!).
*/
static void
-pr_pack(buf, cc, from, tv)
- char *buf;
- int cc;
- struct sockaddr_in *from;
- struct timeval *tv;
+pr_pack(char *buf, int cc, struct sockaddr_in *from, struct timeval *tv)
{
struct in_addr ina;
u_char *cp, *dp;
@@ -1363,9 +1356,7 @@ pr_pack(buf, cc, from, tv)
* Checksum routine for Internet Protocol family headers (C Version)
*/
u_short
-in_cksum(addr, len)
- u_short *addr;
- int len;
+in_cksum(u_short *addr, int len)
{
int nleft, sum;
u_short *w;
@@ -1409,8 +1400,7 @@ in_cksum(addr, len)
* be >= in.
*/
static void
-tvsub(out, in)
- struct timeval *out, *in;
+tvsub(struct timeval *out, const struct timeval *in)
{
if ((out->tv_usec -= in->tv_usec) < 0) {
@@ -1427,8 +1417,7 @@ tvsub(out, in)
*/
static void
-status(sig)
- int sig __unused;
+status(int sig __unused)
{
siginfo_p = 1;
@@ -1436,7 +1425,7 @@ status(sig)
#endif /* __rtems__ */
static void
-check_status()
+check_status(void)
{
if (siginfo_p) {
@@ -1456,7 +1445,7 @@ check_status()
* Print out statistics, and give up.
*/
static void
-finish()
+finish(void)
{
(void)signal(SIGINT, SIG_IGN);
@@ -1515,8 +1504,7 @@ static char *ttab[] = {
* Print a descriptive string about an ICMP header.
*/
static void
-pr_icmph(icp)
- struct icmp *icp;
+pr_icmph(struct icmp *icp)
{
switch(icp->icmp_type) {
@@ -1663,8 +1651,7 @@ pr_icmph(icp)
* Print an IP header with options.
*/
static void
-pr_iph(ip)
- struct ip *ip;
+pr_iph(struct ip *ip)
{
u_char *cp;
int hlen;
@@ -1696,8 +1683,7 @@ pr_iph(ip)
* a hostname.
*/
static char *
-pr_addr(ina)
- struct in_addr ina;
+pr_addr(struct in_addr ina)
{
struct hostent *hp;
static char buf[16 + 3 + MAXHOSTNAMELEN];
@@ -1716,8 +1702,7 @@ pr_addr(ina)
* Dump some info on a returned (via ICMP) IP packet.
*/
static void
-pr_retip(ip)
- struct ip *ip;
+pr_retip(struct ip *ip)
{
u_char *cp;
int hlen;
@@ -1735,7 +1720,7 @@ pr_retip(ip)
}
static char *
-pr_ntime (n_time timestamp)
+pr_ntime(n_time timestamp)
{
static char buf[10];
int hour, min, sec;
@@ -1751,8 +1736,7 @@ pr_ntime (n_time timestamp)
}
static void
-fill(bp, patp)
- char *bp, *patp;
+fill(char *bp, char *patp)
{
char *cp;
int pat[16];
@@ -1788,7 +1772,7 @@ fill(bp, patp)
#define SECOPT ""
#endif
static void
-usage()
+usage(void)
{
(void)fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
diff --git a/freebsd/sbin/ping6/ping6.c b/freebsd/sbin/ping6/ping6.c
index b927f110..b2ad6336 100644
--- a/freebsd/sbin/ping6/ping6.c
+++ b/freebsd/sbin/ping6/ping6.c
@@ -48,10 +48,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -79,10 +75,11 @@ static const char copyright[] =
#if 0
static char sccsid[] = "@(#)ping.c 8.1 (Berkeley) 6/5/93";
#endif
-static const char rcsid[] =
- "$FreeBSD$";
#endif /* not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
/*
* Using the InterNet Control Message Protocol (ICMP) "ECHO" facility,
* measure round-trip-delays and packet loss across network paths.
@@ -235,6 +232,13 @@ static char *hostname;
static int ident; /* process id to identify our packets */
static u_int8_t nonce[8]; /* nonce field for node information */
static int hoplimit; /* hoplimit */
+static u_char *packet;
+#ifdef HAVE_POLL_H
+static struct pollfd fdmaskp[1];
+#else
+static fd_set *fdmaskp = NULL;
+static int fdmasks;
+#endif
/* counters */
static long nmissedmax; /* max value of ntransmitted - nreceived - 1 */
@@ -265,8 +269,8 @@ static volatile sig_atomic_t seenint;
static volatile sig_atomic_t seeninfo;
#endif
-static u_char *packet;
-static struct cmsghdr *cm;
+/* For control (ancillary) data received from recvmsg() */
+static struct cmsghdr cm[CONTROLLEN];
static int main(int, char *[]);
static void fill(char *, char *);
@@ -300,7 +304,7 @@ static void tvsub(struct timeval *, struct timeval *);
static int setpolicy(int, char *);
#endif
#endif
-static char *nigroup(char *);
+static char *nigroup(char *, int);
static void usage(void);
#ifdef __rtems__
@@ -336,9 +340,7 @@ int rtems_bsd_command_ping6(int argc, char **argv)
#ifdef SIGINFO
seeninfo = 0;
#endif
-
packet = NULL;
- cm = NULL;
exit_code = rtems_bsd_program_call_main("ping6", main, argc, argv);
@@ -352,15 +354,11 @@ int rtems_bsd_command_ping6(int argc, char **argv)
freeaddrinfo(res);
}
- free(cm);
-
return exit_code;
}
#endif /* __rtems__ */
int
-main(argc, argv)
- int argc;
- char *argv[];
+main(int argc, char *argv[])
{
struct itimerval itimer;
struct sockaddr_in6 from;
@@ -373,14 +371,9 @@ main(argc, argv)
struct timeval timeout, *tv;
#endif
struct addrinfo hints;
-#ifdef HAVE_POLL_H
- struct pollfd fdmaskp[1];
-#else
- fd_set *fdmaskp;
- int fdmasks;
-#endif
int cc, i;
int ch, hold, packlen, preload, optval, ret_ga;
+ int nig_oldmcprefix = -1;
u_char *datap;
char *e, *target, *ifname = NULL, *gateway = NULL;
int ip6optlen = 0;
@@ -572,6 +565,7 @@ main(argc, argv)
break;
case 'N':
options |= F_NIGROUP;
+ nig_oldmcprefix++;
break;
case 'o':
options |= F_ONCE;
@@ -687,7 +681,7 @@ main(argc, argv)
}
if (options & F_NIGROUP) {
- target = nigroup(argv[argc - 1]);
+ target = nigroup(argv[argc - 1], nig_oldmcprefix);
if (target == NULL) {
usage();
/*NOTREACHED*/
@@ -1164,11 +1158,6 @@ main(argc, argv)
seeninfo = 0;
#endif
- /* For control (ancillary) data received from recvmsg() */
- cm = (struct cmsghdr *)malloc(CONTROLLEN);
- if (cm == NULL)
- err(1, "malloc");
-
for (;;) {
struct msghdr m;
struct iovec iov[2];
@@ -1282,12 +1271,27 @@ main(argc, argv)
}
}
summary();
+
+ if (res != NULL) {
+ freeaddrinfo(res);
+ res = NULL;
+ }
+
+ if(packet != NULL) {
+ free(packet);
+ packet = NULL;
+ }
+
+#ifndef HAVE_POLL_H
+ if(fdmaskp != NULL)
+ free(fdmaskp);
+#endif
+
exit(nreceived == 0 ? 2 : 0);
}
void
-onsignal(sig)
- int sig;
+onsignal(int sig)
{
switch (sig) {
@@ -1310,7 +1314,7 @@ onsignal(sig)
* This routine transmits another ping6.
*/
void
-retransmit()
+retransmit(void)
{
struct itimerval itimer;
@@ -1346,7 +1350,7 @@ retransmit()
* byte-order, to compute the round-trip time.
*/
size_t
-pingerlen()
+pingerlen(void)
{
size_t l;
@@ -1365,7 +1369,7 @@ pingerlen()
}
int
-pinger()
+pinger(void)
{
struct icmp6_hdr *icp;
struct iovec iov[2];
@@ -1480,8 +1484,7 @@ pinger()
}
int
-myechoreply(icp)
- const struct icmp6_hdr *icp;
+myechoreply(const struct icmp6_hdr *icp)
{
if (ntohs(icp->icmp6_id) == ident)
return 1;
@@ -1490,8 +1493,7 @@ myechoreply(icp)
}
int
-mynireply(nip)
- const struct icmp6_nodeinfo *nip;
+mynireply(const struct icmp6_nodeinfo *nip)
{
if (memcmp(nip->icmp6_ni_nonce + sizeof(u_int16_t),
nonce + sizeof(u_int16_t),
@@ -1502,12 +1504,9 @@ mynireply(nip)
}
char *
-dnsdecode(sp, ep, base, buf, bufsiz)
- const u_char **sp;
- const u_char *ep;
- const u_char *base; /*base for compressed name*/
- char *buf;
- size_t bufsiz;
+dnsdecode(const u_char **sp, const u_char *ep, const u_char *base, char *buf,
+ size_t bufsiz)
+ /*base for compressed name*/
{
int i;
const u_char *cp;
@@ -1572,10 +1571,7 @@ dnsdecode(sp, ep, base, buf, bufsiz)
* program to be run without having intermingled output (or statistics!).
*/
void
-pr_pack(buf, cc, mhdr)
- u_char *buf;
- int cc;
- struct msghdr *mhdr;
+pr_pack(u_char *buf, int cc, struct msghdr *mhdr)
{
#define safeputc(c) printf((isprint((c)) ? "%c" : "\\%03o"), c)
struct icmp6_hdr *icp;
@@ -1856,8 +1852,7 @@ pr_pack(buf, cc, mhdr)
}
void
-pr_exthdrs(mhdr)
- struct msghdr *mhdr;
+pr_exthdrs(struct msghdr *mhdr)
{
ssize_t bufsize;
void *bufp;
@@ -2032,10 +2027,7 @@ pr_rthdr(void *extbuf, size_t bufsize __unused)
#endif /* USE_RFC2292BIS */
int
-pr_bitrange(v, soff, ii)
- u_int32_t v;
- int soff;
- int ii;
+pr_bitrange(u_int32_t v, int soff, int ii)
{
int off;
int i;
@@ -2081,9 +2073,8 @@ pr_bitrange(v, soff, ii)
}
void
-pr_suptypes(ni, nilen)
- struct icmp6_nodeinfo *ni; /* ni->qtype must be SUPTYPES */
- size_t nilen;
+pr_suptypes(struct icmp6_nodeinfo *ni, size_t nilen)
+ /* ni->qtype must be SUPTYPES */
{
size_t clen;
u_int32_t v;
@@ -2148,9 +2139,8 @@ pr_suptypes(ni, nilen)
}
void
-pr_nodeaddr(ni, nilen)
- struct icmp6_nodeinfo *ni; /* ni->qtype must be NODEADDR */
- int nilen;
+pr_nodeaddr(struct icmp6_nodeinfo *ni, int nilen)
+ /* ni->qtype must be NODEADDR */
{
u_char *cp = (u_char *)(ni + 1);
char ntop_buf[INET6_ADDRSTRLEN];
@@ -2215,8 +2205,7 @@ pr_nodeaddr(ni, nilen)
}
int
-get_hoplim(mhdr)
- struct msghdr *mhdr;
+get_hoplim(struct msghdr *mhdr)
{
struct cmsghdr *cm;
@@ -2235,8 +2224,7 @@ get_hoplim(mhdr)
}
struct in6_pktinfo *
-get_rcvpktinfo(mhdr)
- struct msghdr *mhdr;
+get_rcvpktinfo(struct msghdr *mhdr)
{
struct cmsghdr *cm;
@@ -2255,8 +2243,7 @@ get_rcvpktinfo(mhdr)
}
int
-get_pathmtu(mhdr)
- struct msghdr *mhdr;
+get_pathmtu(struct msghdr *mhdr)
{
#ifdef IPV6_RECVPATHMTU
struct cmsghdr *cm;
@@ -2316,8 +2303,7 @@ get_pathmtu(mhdr)
* be >= in.
*/
void
-tvsub(out, in)
- struct timeval *out, *in;
+tvsub(struct timeval *out, struct timeval *in)
{
if ((out->tv_usec -= in->tv_usec) < 0) {
--out->tv_sec;
@@ -2332,11 +2318,21 @@ tvsub(out, in)
*/
/* ARGSUSED */
void
-onint(notused)
- int notused;
+onint(int notused __unused)
{
summary();
+ if (res != NULL)
+ freeaddrinfo(res);
+
+ if(packet != NULL)
+ free(packet);
+
+#ifndef HAVE_POLL_H
+ if(fdmaskp != NULL)
+ free(fdmaskp);
+#endif
+
(void)signal(SIGINT, SIG_DFL);
(void)kill(getpid(), SIGINT);
@@ -2349,7 +2345,7 @@ onint(notused)
* Print out statistics.
*/
void
-summary()
+summary(void)
{
(void)printf("\n--- %s ping6 statistics ---\n", hostname);
@@ -2397,9 +2393,7 @@ static const char *nircode[] = {
* Print a descriptive string about an ICMP header.
*/
void
-pr_icmph(icp, end)
- struct icmp6_hdr *icp;
- u_char *end;
+pr_icmph(struct icmp6_hdr *icp, u_char *end)
{
char ntop_buf[INET6_ADDRSTRLEN];
struct nd_redirect *red;
@@ -2629,8 +2623,7 @@ pr_icmph(icp, end)
* Print an IP6 header.
*/
void
-pr_iph(ip6)
- struct ip6_hdr *ip6;
+pr_iph(struct ip6_hdr *ip6)
{
u_int32_t flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
u_int8_t tc;
@@ -2658,9 +2651,7 @@ pr_iph(ip6)
* a hostname.
*/
const char *
-pr_addr(addr, addrlen)
- struct sockaddr *addr;
- int addrlen;
+pr_addr(struct sockaddr *addr, int addrlen)
{
static char buf[NI_MAXHOST];
int flag = 0;
@@ -2679,9 +2670,7 @@ pr_addr(addr, addrlen)
* Dump some info on a returned (via ICMPv6) IPv6 packet.
*/
void
-pr_retip(ip6, end)
- struct ip6_hdr *ip6;
- u_char *end;
+pr_retip(struct ip6_hdr *ip6, u_char *end)
{
u_char *cp = (u_char *)ip6, nh;
int hlen;
@@ -2761,8 +2750,7 @@ pr_retip(ip6, end)
}
void
-fill(bp, patp)
- char *bp, *patp;
+fill(char *bp, char *patp)
{
int ii, jj, kk;
int pat[16];
@@ -2795,9 +2783,7 @@ fill(bp, patp)
#ifdef IPSEC
#ifdef IPSEC_POLICY_IPSEC
int
-setpolicy(so, policy)
- int so;
- char *policy;
+setpolicy(int so __unused, char *policy)
{
char *buf;
@@ -2818,8 +2804,7 @@ setpolicy(so, policy)
#endif
char *
-nigroup(name)
- char *name;
+nigroup(char *name, int nig_oldmcprefix)
{
char *p;
char *q;
@@ -2829,6 +2814,7 @@ nigroup(name)
size_t l;
char hbuf[NI_MAXHOST];
struct in6_addr in6;
+ int valid;
p = strchr(name, '.');
if (!p)
@@ -2844,7 +2830,7 @@ nigroup(name)
*q = tolower(*(unsigned char *)q);
}
- /* generate 8 bytes of pseudo-random value. */
+ /* generate 16 bytes of pseudo-random value. */
memset(&ctxt, 0, sizeof(ctxt));
MD5Init(&ctxt);
c = l & 0xff;
@@ -2852,9 +2838,23 @@ nigroup(name)
MD5Update(&ctxt, (unsigned char *)name, l);
MD5Final(digest, &ctxt);
- if (inet_pton(AF_INET6, "ff02::2:0000:0000", &in6) != 1)
+ if (nig_oldmcprefix) {
+ /* draft-ietf-ipngwg-icmp-name-lookup */
+ valid = inet_pton(AF_INET6, "ff02::2:0000:0000", &in6);
+ } else {
+ /* RFC 4620 */
+ valid = inet_pton(AF_INET6, "ff02::2:ff00:0000", &in6);
+ }
+ if (valid != 1)
return NULL; /*XXX*/
- bcopy(digest, &in6.s6_addr[12], 4);
+
+ if (nig_oldmcprefix) {
+ /* draft-ietf-ipngwg-icmp-name-lookup */
+ bcopy(digest, &in6.s6_addr[12], 4);
+ } else {
+ /* RFC 4620 */
+ bcopy(digest, &in6.s6_addr[13], 3);
+ }
if (inet_ntop(AF_INET6, &in6, hbuf, sizeof(hbuf)) == NULL)
return NULL;
@@ -2863,7 +2863,7 @@ nigroup(name)
}
void
-usage()
+usage(void)
{
(void)fprintf(stderr,
#if defined(IPSEC) && !defined(IPSEC_POLICY_IPSEC)
diff --git a/freebsd/sbin/route/keywords b/freebsd/sbin/route/keywords
index 8817f305..adfba7cf 100644
--- a/freebsd/sbin/route/keywords
+++ b/freebsd/sbin/route/keywords
@@ -10,6 +10,7 @@ del
delete
dst
expire
+fib
flush
gateway
genmask
diff --git a/freebsd/sbin/route/route.c b/freebsd/sbin/route/route.c
index 8bf976f9..cc5c7d05 100644
--- a/freebsd/sbin/route/route.c
+++ b/freebsd/sbin/route/route.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ioctl.h>
#include <sys/sysctl.h>
#include <rtems/bsd/sys/types.h>
+#include <sys/queue.h>
#include <net/if.h>
#include <net/route.h>
@@ -85,8 +86,15 @@ static const struct keytab {
{0, 0}
};
+struct fibl {
+ TAILQ_ENTRY(fibl) fl_next;
+
+ int fl_num;
+ int fl_error;
+ int fl_errno;
+};
+
struct rt_ctx {
- struct ortentry route;
union sockunion {
struct sockaddr sa;
struct sockaddr_in sin;
@@ -102,11 +110,13 @@ struct rt_ctx {
int pid, rtm_addrs;
int s;
int forcehost, forcenet, nflag, af, qflag, tflag;
- int iflag, verbose, aflen;
+ int verbose, aflen;
int locking, lockrest, debugonly;
struct rt_metrics rt_metrics;
u_long rtm_inits;
uid_t uid;
+ int defaultfib;
+ int numfibs;
char domain[MAXHOSTNAMELEN + 1];
int domain_initialized;
int rtm_seq;
@@ -116,6 +126,7 @@ struct rt_ctx {
struct rt_msghdr m_rtm;
char m_space[512];
} m_rtmsg;
+ TAILQ_HEAD(fibl_head_t, fibl) fibl_head;
};
#ifndef __rtems__
@@ -128,7 +139,8 @@ static int atalk_aton(const char *, struct at_addr *);
static char *atalk_ntoa(struct at_addr, char [20]);
static void bprintf(FILE *, int, const char *);
static void flushroutes(struct rt_ctx *, int argc, char *argv[]);
-static int getaddr(struct rt_ctx *, int, char *, struct hostent **);
+static int flushroutes_fib(struct rt_ctx *, int);
+static int getaddr(struct rt_ctx *, int, char *, struct hostent **, int);
static int keyword(const char *);
static void inet_makenetandmask(struct rt_ctx *, u_long, struct sockaddr_in *, u_long);
#ifdef INET6
@@ -136,21 +148,27 @@ static int inet6_makenetandmask(struct rt_ctx *, struct sockaddr_in6 *, const ch
#endif
static void interfaces(struct rt_ctx *);
static void mask_addr(struct rt_ctx *);
-static void monitor(struct rt_ctx *);
+static void monitor(struct rt_ctx *, int, char *[]);
static const char *netname(struct rt_ctx *, struct sockaddr *);
static void newroute(struct rt_ctx *, int, char **);
+static int newroute_fib(struct rt_ctx *, int, char *, int);
static void pmsg_addrs(struct rt_ctx *, char *, int, size_t);
static void pmsg_common(struct rt_ctx *, struct rt_msghdr *, size_t);
static int prefixlen(struct rt_ctx *, const char *);
-static void print_getmsg(struct rt_ctx *, struct rt_msghdr *, int);
+static void print_getmsg(struct rt_ctx *, struct rt_msghdr *, int, int);
static void print_rtmsg(struct rt_ctx *, struct rt_msghdr *, size_t);
static const char *routename(struct rt_ctx *, struct sockaddr *);
-static int rtmsg(struct rt_ctx *, int, int);
+static int rtmsg(struct rt_ctx *, int, int, int);
static void set_metric(struct rt_ctx *, char *, int);
+static int set_sofib(struct rt_ctx *, int);
+static int set_procfib(int);
static void sockaddr(char *, struct sockaddr *);
static void sodump(sup, const char *);
extern char *iso_ntoa(void);
+static int fiboptlist_csv(struct rt_ctx *, const char *, struct fibl_head_t *);
+static int fiboptlist_range(struct rt_ctx *, const char *, struct fibl_head_t *);
+
static void usage(const char *) __dead2;
void
@@ -189,16 +207,24 @@ int rtems_bsd_command_route(int argc, char *argv[])
c = calloc(1, sizeof(*c));
if (c != NULL) {
struct main_ctx mc;
+ struct fibl *fl;
+ struct fibl *tfl;
mc.argc = argc;
mc.argv = argv;
mc.c = c;
c->aflen = sizeof(struct sockaddr_in);
+ TAILQ_INIT(&c->fibl_head);
exit_code = rtems_bsd_program_call("route", call_main, &mc);
close(c->s);
+
+ TAILQ_FOREACH_SAFE(fl, &c->fibl_head, fl_next, tfl) {
+ free(fl);
+ }
+
free(c);
} else {
exit_code = EXIT_FAILURE;
@@ -217,6 +243,7 @@ main(int argc, char **argv)
struct rt_ctx *c;
#endif /* __rtems__ */
int ch;
+ size_t len;
#ifdef __rtems__
struct getopt_data getopt_data;
memset(&getopt_data, 0, sizeof(getopt_data));
@@ -267,6 +294,17 @@ main(int argc, char **argv)
c->s = socket(PF_ROUTE, SOCK_RAW, 0);
if (c->s < 0)
err(EX_OSERR, "socket");
+
+ len = sizeof(c->numfibs);
+ if (sysctlbyname("net.fibs", (void *)&c->numfibs, &len, NULL, 0) == -1)
+ c->numfibs = -1;
+
+ len = sizeof(c->defaultfib);
+ if (c->numfibs != -1 &&
+ sysctlbyname("net.my_fibnum", (void *)&c->defaultfib, &len, NULL,
+ 0) == -1)
+ c->defaultfib = -1;
+
if (*argv != NULL)
switch (keyword(*argv)) {
case K_GET:
@@ -282,7 +320,7 @@ main(int argc, char **argv)
/* NOTREACHED */
case K_MONITOR:
- monitor(c);
+ monitor(c, argc, argv);
/* NOTREACHED */
case K_FLUSH:
@@ -294,6 +332,136 @@ main(int argc, char **argv)
/* NOTREACHED */
}
+static int
+set_sofib(struct rt_ctx *c, int fib)
+{
+
+ if (fib < 0)
+ return (0);
+ return (setsockopt(c->s, SOL_SOCKET, SO_SETFIB, (void *)&fib,
+ sizeof(fib)));
+}
+
+static int
+set_procfib(int fib)
+{
+
+ if (fib < 0)
+ return (0);
+ return (setfib(fib));
+}
+
+static int
+fiboptlist_range(struct rt_ctx *c, const char *arg, struct fibl_head_t *flh)
+{
+ struct fibl *fl;
+ char *str0, *str, *token, *endptr;
+ int fib[2], i, error;
+
+ str0 = str = strdup(arg);
+ error = 0;
+ i = 0;
+ while ((token = strsep(&str, "-")) != NULL) {
+ switch (i) {
+ case 0:
+ case 1:
+ errno = 0;
+ fib[i] = strtol(token, &endptr, 0);
+ if (errno == 0) {
+ if (*endptr != '\0' ||
+ fib[i] < 0 ||
+ (c->numfibs != -1 && fib[i] > c->numfibs - 1))
+ errno = EINVAL;
+ }
+ if (errno)
+ error = 1;
+ break;
+ default:
+ error = 1;
+ }
+ if (error)
+ goto fiboptlist_range_ret;
+ i++;
+ }
+ if (fib[0] >= fib[1]) {
+ error = 1;
+ goto fiboptlist_range_ret;
+ }
+ for (i = fib[0]; i <= fib[1]; i++) {
+ fl = calloc(1, sizeof(*fl));
+ if (fl == NULL) {
+ error = 1;
+ goto fiboptlist_range_ret;
+ }
+ fl->fl_num = i;
+ TAILQ_INSERT_TAIL(flh, fl, fl_next);
+ }
+fiboptlist_range_ret:
+ free(str0);
+ return (error);
+}
+
+#define ALLSTRLEN 64
+static int
+fiboptlist_csv(struct rt_ctx *c, const char *arg, struct fibl_head_t *flh)
+{
+ struct fibl *fl;
+ char *str0, *str, *token, *endptr;
+ int fib, error;
+
+ if (strcmp("all", arg) == 0) {
+ str = calloc(1, ALLSTRLEN);
+ if (str == NULL) {
+ error = 1;
+ goto fiboptlist_csv_ret;
+ }
+ if (c->numfibs > 1)
+ snprintf(str, ALLSTRLEN - 1, "%d-%d", 0, c->numfibs - 1);
+ else
+ snprintf(str, ALLSTRLEN - 1, "%d", 0);
+ } else if (strcmp("default", arg) == 0) {
+ str0 = str = calloc(1, ALLSTRLEN);
+ if (str == NULL) {
+ error = 1;
+ goto fiboptlist_csv_ret;
+ }
+ snprintf(str, ALLSTRLEN - 1, "%d", c->defaultfib);
+ } else
+ str0 = str = strdup(arg);
+
+ error = 0;
+ while ((token = strsep(&str, ",")) != NULL) {
+ if (*token != '-' && strchr(token, '-') != NULL) {
+ error = fiboptlist_range(c, token, flh);
+ if (error)
+ goto fiboptlist_csv_ret;
+ } else {
+ errno = 0;
+ fib = strtol(token, &endptr, 0);
+ if (errno == 0) {
+ if (*endptr != '\0' ||
+ fib < 0 ||
+ (c->numfibs != -1 && fib > c->numfibs - 1))
+ errno = EINVAL;
+ }
+ if (errno) {
+ error = 1;
+ goto fiboptlist_csv_ret;
+ }
+ fl = calloc(1, sizeof(*fl));
+ if (fl == NULL) {
+ error = 1;
+ goto fiboptlist_csv_ret;
+ }
+ fl->fl_num = fib;
+ TAILQ_INSERT_TAIL(flh, fl, fl_next);
+ }
+ }
+fiboptlist_csv_ret:
+ free(str0);
+ return (error);
+}
+
/*
* Purge all entries in the routing tables not
* associated with network interfaces.
@@ -301,38 +469,71 @@ main(int argc, char **argv)
static void
flushroutes(struct rt_ctx *c, int argc, char *argv[])
{
- size_t needed;
- int mib[6], rlen, seqno, count = 0;
- char *buf, *next, *lim;
- struct rt_msghdr *rtm;
+ struct fibl *fl;
+ int error;
if (c->uid != 0 && !c->debugonly) {
errx(EX_NOPERM, "must be root to alter routing table");
}
shutdown(c->s, SHUT_RD); /* Don't want to read back our messages */
- if (argc > 1) {
+
+ TAILQ_INIT(&c->fibl_head);
+ while (argc > 1) {
+ argc--;
argv++;
- if (argc == 2 && **argv == '-')
- switch (keyword(*argv + 1)) {
- case K_INET:
- c->af = AF_INET;
- break;
+ if (**argv != '-')
+ usage(*argv);
+ switch (keyword(*argv + 1)) {
+ case K_INET:
+ c->af = AF_INET;
+ break;
#ifdef INET6
- case K_INET6:
- c->af = AF_INET6;
- break;
+ case K_INET6:
+ c->af = AF_INET6;
+ break;
#endif
- case K_ATALK:
- c->af = AF_APPLETALK;
- break;
- case K_LINK:
- c->af = AF_LINK;
- break;
- default:
- goto bad;
- } else
-bad: usage(*argv);
+ case K_ATALK:
+ c->af = AF_APPLETALK;
+ break;
+ case K_LINK:
+ c->af = AF_LINK;
+ break;
+ case K_FIB:
+ if (!--argc)
+ usage(*argv);
+ error = fiboptlist_csv(c, *++argv, &c->fibl_head);
+ if (error)
+ errx(EX_USAGE, "invalid fib number: %s", *argv);
+ break;
+ default:
+ usage(*argv);
+ }
+ }
+ if (TAILQ_EMPTY(&c->fibl_head)) {
+ error = fiboptlist_csv(c, "default", &c->fibl_head);
+ if (error)
+ errx(EX_OSERR, "fiboptlist_csv failed.");
+ }
+ TAILQ_FOREACH(fl, &c->fibl_head, fl_next)
+ flushroutes_fib(c, fl->fl_num);
+}
+
+static int
+flushroutes_fib(struct rt_ctx *c, int fib)
+{
+ struct rt_msghdr *rtm;
+ size_t needed;
+ char *buf, *next, *lim;
+ int mib[6], rlen, seqno, count = 0;
+ int error;
+
+ error = set_sofib(c, fib);
+ error += set_procfib(fib);
+ if (error) {
+ warn("fib number %d is ignored", fib);
+ return (error);
}
+
retry:
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
@@ -390,14 +591,18 @@ retry:
print_rtmsg(c, rtm, rlen);
else {
struct sockaddr *sa = (struct sockaddr *)(rtm + 1);
- (void) printf("%-20.20s ", rtm->rtm_flags & RTF_HOST ?
+
+ printf("%-20.20s ", rtm->rtm_flags & RTF_HOST ?
routename(c, sa) : netname(c, sa));
sa = (struct sockaddr *)(SA_SIZE(sa) + (char *)sa);
- (void) printf("%-20.20s ", routename(c, sa));
- (void) printf("done\n");
+ printf("%-20.20s ", routename(c, sa));
+ if (fib >= 0)
+ printf("-fib %-3d ", fib);
+ printf("done\n");
}
}
free(buf);
+ return (error);
}
static const char *
@@ -547,8 +752,8 @@ netname(struct rt_ctx *c, struct sockaddr *sa)
* Guess at the subnet mask, assuming reasonable
* width subnet fields.
*/
- while (in.s_addr & ~mask)
- mask |= mask >> subnetshift;
+ while (in.s_addr &~ mask)
+ mask = (long)mask >> subnetshift;
net = in.s_addr & mask;
while ((mask & 1) == 0)
mask >>= 1, net >>= 1;
@@ -660,18 +865,32 @@ set_metric(struct rt_ctx *c, char *value, int key)
*valp = atoi(value);
}
+#define F_ISHOST 0x01
+#define F_FORCENET 0x02
+#define F_FORCEHOST 0x04
+#define F_PROXY 0x08
+#define F_INTERFACE 0x10
+
static void
newroute(struct rt_ctx *c, int argc, char **argv)
{
+ struct hostent *hp;
+ struct fibl *fl;
char *cmd;
- const char *dest = "", *gateway = "", *errmsg;
- int ishost = 0, proxy = 0, ret, attempts, oerrno, flags = RTF_STATIC;
- int key;
- struct hostent *hp = 0;
+ const char *dest, *gateway, *errmsg;
+ int key, error, flags, nrflags, fibnum;
if (c->uid != 0) {
errx(EX_NOPERM, "must be root to alter routing table");
}
+
+ dest = NULL;
+ gateway = NULL;
+ flags = RTF_STATIC;
+ nrflags = 0;
+ hp = NULL;
+ TAILQ_INIT(&c->fibl_head);
+
cmd = argv[0];
if (*cmd != 'g' && *cmd != 's')
shutdown(c->s, SHUT_RD); /* Don't want to read back our messages */
@@ -703,7 +922,7 @@ newroute(struct rt_ctx *c, int argc, char **argv)
break;
case K_IFACE:
case K_INTERFACE:
- c->iflag++;
+ nrflags |= F_INTERFACE;
break;
case K_NOSTATIC:
flags &= ~RTF_STATIC;
@@ -715,7 +934,7 @@ newroute(struct rt_ctx *c, int argc, char **argv)
c->lockrest = 1;
break;
case K_HOST:
- c->forcehost++;
+ nrflags |= F_FORCEHOST;
break;
case K_REJECT:
flags |= RTF_REJECT;
@@ -730,7 +949,7 @@ newroute(struct rt_ctx *c, int argc, char **argv)
flags |= RTF_PROTO2;
break;
case K_PROXY:
- proxy = 1;
+ nrflags |= F_PROXY;
break;
case K_XRESOLVE:
flags |= RTF_XRESOLVE;
@@ -744,49 +963,59 @@ newroute(struct rt_ctx *c, int argc, char **argv)
case K_NOSTICK:
flags &= ~RTF_STICKY;
break;
+ case K_FIB:
+ if (!--argc)
+ usage(NULL);
+ error = fiboptlist_csv(c, *++argv, &c->fibl_head);
+ if (error)
+ errx(EX_USAGE,
+ "invalid fib number: %s", *argv);
+ break;
case K_IFA:
if (!--argc)
usage(NULL);
- (void) getaddr(c, RTA_IFA, *++argv, 0);
+ getaddr(c, RTA_IFA, *++argv, 0, nrflags);
break;
case K_IFP:
if (!--argc)
usage(NULL);
- (void) getaddr(c, RTA_IFP, *++argv, 0);
+ getaddr(c, RTA_IFP, *++argv, 0, nrflags);
break;
case K_GENMASK:
if (!--argc)
usage(NULL);
- (void) getaddr(c, RTA_GENMASK, *++argv, 0);
+ getaddr(c, RTA_GENMASK, *++argv, 0, nrflags);
break;
case K_GATEWAY:
if (!--argc)
usage(NULL);
- (void) getaddr(c, RTA_GATEWAY, *++argv, 0);
+ getaddr(c, RTA_GATEWAY, *++argv, 0, nrflags);
+ gateway = *argv;
break;
case K_DST:
if (!--argc)
usage(NULL);
- ishost = getaddr(c, RTA_DST, *++argv, &hp);
+ if (getaddr(c, RTA_DST, *++argv, &hp, nrflags))
+ nrflags |= F_ISHOST;
dest = *argv;
break;
case K_NETMASK:
if (!--argc)
usage(NULL);
- (void) getaddr(c, RTA_NETMASK, *++argv, 0);
+ getaddr(c, RTA_NETMASK, *++argv, 0, nrflags);
/* FALLTHROUGH */
case K_NET:
- c->forcenet++;
+ nrflags |= F_FORCENET;
break;
case K_PREFIXLEN:
if (!--argc)
usage(NULL);
if (prefixlen(c, *++argv) == -1) {
- c->forcenet = 0;
- ishost = 1;
+ nrflags &= ~F_FORCENET;
+ nrflags |= F_ISHOST;
} else {
- c->forcenet = 1;
- ishost = 0;
+ nrflags |= F_FORCENET;
+ nrflags &= ~F_ISHOST;
}
break;
case K_MTU:
@@ -808,18 +1037,20 @@ newroute(struct rt_ctx *c, int argc, char **argv)
} else {
if ((c->rtm_addrs & RTA_DST) == 0) {
dest = *argv;
- ishost = getaddr(c, RTA_DST, *argv, &hp);
+ if (getaddr(c, RTA_DST, *argv, &hp, nrflags))
+ nrflags |= F_ISHOST;
} else if ((c->rtm_addrs & RTA_GATEWAY) == 0) {
gateway = *argv;
- (void) getaddr(c, RTA_GATEWAY, *argv, &hp);
+ getaddr(c, RTA_GATEWAY, *argv, &hp, nrflags);
} else {
- (void) getaddr(c, RTA_NETMASK, *argv, 0);
- c->forcenet = 1;
+ getaddr(c, RTA_NETMASK, *argv, 0, nrflags);
+ nrflags |= F_FORCENET;
}
}
}
- if (c->forcehost) {
- ishost = 1;
+
+ if (nrflags & F_FORCEHOST) {
+ nrflags |= F_ISHOST;
#ifdef INET6
if (c->af == AF_INET6) {
c->rtm_addrs &= ~RTA_NETMASK;
@@ -827,71 +1058,125 @@ newroute(struct rt_ctx *c, int argc, char **argv)
}
#endif
}
- if (c->forcenet)
- ishost = 0;
+ if (nrflags & F_FORCENET)
+ nrflags &= ~F_ISHOST;
flags |= RTF_UP;
- if (ishost)
+ if (nrflags & F_ISHOST)
flags |= RTF_HOST;
- if (c->iflag == 0)
+ if ((nrflags & F_INTERFACE) == 0)
flags |= RTF_GATEWAY;
- if (proxy) {
+ if (nrflags & F_PROXY) {
c->so_dst.sinarp.sin_other = SIN_PROXY;
flags |= RTF_ANNOUNCE;
}
- for (attempts = 1; ; attempts++) {
- errno = 0;
- if ((ret = rtmsg(c, *cmd, flags)) == 0)
- break;
- if (errno != ENETUNREACH && errno != ESRCH)
- break;
- if (c->af == AF_INET && *gateway != '\0' &&
- hp != NULL && hp->h_addr_list[1] != NULL) {
- hp->h_addr_list++;
- memmove(&c->so_gate.sin.sin_addr, hp->h_addr_list[0],
- MIN((size_t)hp->h_length,
- sizeof(c->so_gate.sin.sin_addr)));
- } else
- break;
+ if (dest == NULL)
+ dest = "";
+ if (gateway == NULL)
+ gateway = "";
+
+ if (TAILQ_EMPTY(&c->fibl_head)) {
+ error = fiboptlist_csv(c, "default", &c->fibl_head);
+ if (error)
+ errx(EX_OSERR, "fiboptlist_csv failed.");
+ }
+ error = 0;
+ TAILQ_FOREACH(fl, &c->fibl_head, fl_next) {
+ fl->fl_error = newroute_fib(c, fl->fl_num, cmd, flags);
+ if (fl->fl_error)
+ fl->fl_errno = errno;
+ error += fl->fl_error;
}
if (*cmd == 'g' || *cmd == 's')
- exit(ret != 0);
+ exit(error);
+
+ error = 0;
if (!c->qflag) {
- oerrno = errno;
- (void) printf("%s %s %s", cmd, ishost? "host" : "net", dest);
- if (*gateway) {
- (void) printf(": gateway %s", gateway);
- if (attempts > 1 && ret == 0 && c->af == AF_INET)
- (void) printf(" (%s)",
- inet_ntoa(((struct sockaddr_in *)&c->route.rt_gateway)->sin_addr));
+ fibnum = 0;
+ TAILQ_FOREACH(fl, &c->fibl_head, fl_next) {
+ if (fl->fl_error == 0)
+ fibnum++;
}
- if (ret == 0) {
- (void) printf("\n");
- } else {
- switch (oerrno) {
- case ESRCH:
- errmsg = "not in table";
- break;
- case EBUSY:
- errmsg = "entry in use";
- break;
- case ENOBUFS:
- errmsg = "not enough memory";
- break;
- case EADDRINUSE:
- /* handle recursion avoidance in rt_setgate() */
- errmsg = "gateway uses the same route";
- break;
- case EEXIST:
- errmsg = "route already in table";
- break;
- default:
- errmsg = strerror(oerrno);
- break;
+ if (fibnum > 0) {
+ int firstfib = 1;
+
+ printf("%s %s %s", cmd,
+ (nrflags & F_ISHOST) ? "host" : "net", dest);
+ if (*gateway)
+ printf(": gateway %s", gateway);
+
+ if (c->numfibs > 1) {
+ TAILQ_FOREACH(fl, &c->fibl_head, fl_next) {
+ if (fl->fl_error == 0
+ && fl->fl_num >= 0) {
+ if (firstfib) {
+ printf(" fib ");
+ firstfib = 0;
+ }
+ printf("%d", fl->fl_num);
+ if (fibnum-- > 1)
+ printf(",");
+ }
+ }
+ }
+ printf("\n");
+ }
+
+ fibnum = 0;
+ TAILQ_FOREACH(fl, &c->fibl_head, fl_next) {
+ if (fl->fl_error != 0) {
+ printf("%s %s %s", cmd, (nrflags & F_ISHOST)
+ ? "host" : "net", dest);
+ if (*gateway)
+ printf(": gateway %s", gateway);
+
+ if (fl->fl_num >= 0)
+ printf(" fib %d", fl->fl_num);
+
+ switch (fl->fl_errno) {
+ case ESRCH:
+ errmsg = "not in table";
+ break;
+ case EBUSY:
+ errmsg = "entry in use";
+ break;
+ case ENOBUFS:
+ errmsg = "not enough memory";
+ break;
+ case EADDRINUSE:
+ /*
+ * handle recursion avoidance
+ * in rt_setgate()
+ */
+ errmsg = "gateway uses the same route";
+ break;
+ case EEXIST:
+ errmsg = "route already in table";
+ break;
+ default:
+ errmsg = strerror(fl->fl_errno);
+ break;
+ }
+ printf(": %s\n", errmsg);
+ error = 1;
}
- (void) printf(": %s\n", errmsg);
}
}
- exit(ret != 0);
+ exit(error);
+}
+
+static int
+newroute_fib(struct rt_ctx *c, int fib, char *cmd, int flags)
+{
+ int error;
+
+ error = set_sofib(c, fib);
+ if (error) {
+ warn("fib number %d is ignored", fib);
+ return (error);
+ }
+
+ error = rtmsg(c, *cmd, flags, fib);
+ return (error);
}
static void
@@ -977,7 +1262,7 @@ inet6_makenetandmask(struct rt_ctx *c, struct sockaddr_in6 *sin6, const char *pl
* returning 1 if a host address, 0 if a network address.
*/
static int
-getaddr(struct rt_ctx *c, int which, char *str, struct hostent **hpp)
+getaddr(struct rt_ctx *c, int which, char *str, struct hostent **hpp, int nrflags)
{
sup su;
struct hostent *hp;
@@ -998,7 +1283,7 @@ getaddr(struct rt_ctx *c, int which, char *str, struct hostent **hpp)
break;
case RTA_GATEWAY:
su = &c->so_gate;
- if (c->iflag) {
+ if (nrflags & F_INTERFACE) {
struct ifaddrs *ifap, *ifa;
struct sockaddr_dl *sdl = NULL;
@@ -1058,7 +1343,7 @@ getaddr(struct rt_ctx *c, int which, char *str, struct hostent **hpp)
#if 0
bzero(su, sizeof(*su)); /* for readability */
#endif
- getaddr(c, RTA_NETMASK, str, 0);
+ getaddr(c, RTA_NETMASK, str, 0, nrflags);
break;
#if 0
case RTA_NETMASK:
@@ -1253,10 +1538,39 @@ retry2:
}
static void
-monitor(struct rt_ctx *c)
+monitor(struct rt_ctx *c, int argc, char *argv[])
{
- int n;
- char msg[2048];
+ int n, fib, error;
+ char msg[2048], *endptr;
+
+ fib = c->defaultfib;
+ while (argc > 1) {
+ argc--;
+ argv++;
+ if (**argv != '-')
+ usage(*argv);
+ switch (keyword(*argv + 1)) {
+ case K_FIB:
+ if (!--argc)
+ usage(*argv);
+ errno = 0;
+ fib = strtol(*++argv, &endptr, 0);
+ if (errno == 0) {
+ if (*endptr != '\0' ||
+ fib < 0 ||
+ (c->numfibs != -1 && fib > c->numfibs - 1))
+ errno = EINVAL;
+ }
+ if (errno)
+ errx(EX_USAGE, "invalid fib number: %s", *argv);
+ break;
+ default:
+ usage(*argv);
+ }
+ }
+ error = set_sofib(c, fib);
+ if (error)
+ errx(EX_USAGE, "invalid fib number: %d", fib);
c->verbose = 1;
if (c->debugonly) {
@@ -1273,7 +1587,7 @@ monitor(struct rt_ctx *c)
}
static int
-rtmsg(struct rt_ctx *c, int cmd, int flags)
+rtmsg(struct rt_ctx *c, int cmd, int flags, int fib)
{
int rlen;
char *cp = c->m_rtmsg.m_space;
@@ -1335,7 +1649,7 @@ rtmsg(struct rt_ctx *c, int cmd, int flags)
if (l < 0)
warn("read from routing socket");
else
- print_getmsg(c, &rtm, l);
+ print_getmsg(c, &rtm, l, fib);
}
#undef rtm
return (0);
@@ -1495,6 +1809,7 @@ print_rtmsg(struct rt_ctx *c, struct rt_msghdr *rtm, size_t msglen)
break;
}
printf("\n");
+ fflush(stdout);
break;
default:
@@ -1512,7 +1827,7 @@ badlen:
}
static void
-print_getmsg(struct rt_ctx *c, struct rt_msghdr *rtm, int msglen)
+print_getmsg(struct rt_ctx *c, struct rt_msghdr *rtm, int msglen, int fib)
{
struct sockaddr *dst = NULL, *gate = NULL, *mask = NULL;
struct sockaddr_dl *ifp = NULL;
@@ -1572,6 +1887,8 @@ print_getmsg(struct rt_ctx *c, struct rt_msghdr *rtm, int msglen)
}
if (gate && rtm->rtm_flags & RTF_GATEWAY)
(void)printf(" gateway: %s\n", routename(c, gate));
+ if (fib >= 0)
+ (void)printf(" fib: %u\n", (unsigned int)fib);
if (ifp)
(void)printf(" interface: %.*s\n",
ifp->sdl_nlen, ifp->sdl_data);
diff --git a/freebsd/sys/arm/arm/legacy.c b/freebsd/sys/arm/arm/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/arm/arm/legacy.c
+++ b/freebsd/sys/arm/arm/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/arm/include/machine/cpufunc.h b/freebsd/sys/arm/include/machine/cpufunc.h
index a4c1bc83..55e7ffac 100644
--- a/freebsd/sys/arm/include/machine/cpufunc.h
+++ b/freebsd/sys/arm/include/machine/cpufunc.h
@@ -284,6 +284,28 @@ void arm8_setup (char *string);
u_int arm8_clock_config (u_int, u_int);
#endif
+
+#if defined(CPU_FA526) || defined(CPU_FA626TE)
+void fa526_setup (char *arg);
+void fa526_setttb (u_int ttb);
+void fa526_context_switch (void);
+void fa526_cpu_sleep (int);
+void fa526_tlb_flushI_SE (u_int);
+void fa526_tlb_flushID_SE (u_int);
+void fa526_flush_prefetchbuf (void);
+void fa526_flush_brnchtgt_E (u_int);
+
+void fa526_icache_sync_all (void);
+void fa526_icache_sync_range(vm_offset_t start, vm_size_t end);
+void fa526_dcache_wbinv_all (void);
+void fa526_dcache_wbinv_range(vm_offset_t start, vm_size_t end);
+void fa526_dcache_inv_range (vm_offset_t start, vm_size_t end);
+void fa526_dcache_wb_range (vm_offset_t start, vm_size_t end);
+void fa526_idcache_wbinv_all(void);
+void fa526_idcache_wbinv_range(vm_offset_t start, vm_size_t end);
+#endif
+
+
#ifdef CPU_SA110
void sa110_setup (char *string);
void sa110_context_switch (void);
@@ -379,6 +401,7 @@ extern unsigned arm10_dcache_index_max;
extern unsigned arm10_dcache_index_inc;
u_int sheeva_control_ext (u_int, u_int);
+void sheeva_cpu_sleep (int);
void sheeva_setttb (u_int);
void sheeva_dcache_wbinv_range (vm_offset_t, vm_size_t);
void sheeva_dcache_inv_range (vm_offset_t, vm_size_t);
@@ -443,10 +466,11 @@ extern unsigned armv5_dcache_index_max;
extern unsigned armv5_dcache_index_inc;
#endif
-#if defined(CPU_ARM9) || defined(CPU_ARM9E) || defined(CPU_ARM10) || \
- defined(CPU_SA110) || defined(CPU_SA1100) || defined(CPU_SA1110) || \
- defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
- defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \
+#if defined(CPU_ARM9) || defined(CPU_ARM9E) || defined(CPU_ARM10) || \
+ defined(CPU_SA110) || defined(CPU_SA1100) || defined(CPU_SA1110) || \
+ defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
+ defined(CPU_FA526) || defined(CPU_FA626TE) || \
+ defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \
defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342)
void armv4_tlb_flushID (void);
diff --git a/freebsd/sys/arm/include/machine/in_cksum.h b/freebsd/sys/arm/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/arm/include/machine/in_cksum.h
+++ b/freebsd/sys/arm/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/arm/include/machine/pci_cfgreg.h b/freebsd/sys/arm/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/arm/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/arm/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/arm/pci/pci_bus.c b/freebsd/sys/arm/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/arm/pci/pci_bus.c
+++ b/freebsd/sys/arm/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/avr/avr/legacy.c b/freebsd/sys/avr/avr/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/avr/avr/legacy.c
+++ b/freebsd/sys/avr/avr/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/avr/include/machine/in_cksum.h b/freebsd/sys/avr/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/avr/include/machine/in_cksum.h
+++ b/freebsd/sys/avr/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/avr/include/machine/pci_cfgreg.h b/freebsd/sys/avr/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/avr/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/avr/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/avr/pci/pci_bus.c b/freebsd/sys/avr/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/avr/pci/pci_bus.c
+++ b/freebsd/sys/avr/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/bfin/bfin/legacy.c b/freebsd/sys/bfin/bfin/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/bfin/bfin/legacy.c
+++ b/freebsd/sys/bfin/bfin/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/bfin/include/machine/in_cksum.h b/freebsd/sys/bfin/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/bfin/include/machine/in_cksum.h
+++ b/freebsd/sys/bfin/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/bfin/include/machine/pci_cfgreg.h b/freebsd/sys/bfin/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/bfin/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/bfin/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/bfin/pci/pci_bus.c b/freebsd/sys/bfin/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/bfin/pci/pci_bus.c
+++ b/freebsd/sys/bfin/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/bsm/audit_kevents.h b/freebsd/sys/bsm/audit_kevents.h
index f4f77a55..3eb2e3ab 100644
--- a/freebsd/sys/bsm/audit_kevents.h
+++ b/freebsd/sys/bsm/audit_kevents.h
@@ -602,6 +602,7 @@
#define AUE_PDKILL 43198 /* FreeBSD. */
#define AUE_PDGETPID 43199 /* FreeBSD. */
#define AUE_PDWAIT 43200 /* FreeBSD. */
+#define AUE_WAIT6 43201 /* FreeBSD. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/freebsd/sys/cam/ata/ata_all.h b/freebsd/sys/cam/ata/ata_all.h
index 526fc194..25732b60 100644
--- a/freebsd/sys/cam/ata/ata_all.h
+++ b/freebsd/sys/cam/ata/ata_all.h
@@ -35,7 +35,9 @@ struct ccb_ataio;
struct cam_periph;
union ccb;
-#define SID_DMA 0x10 /* Abuse inq_flags bit to track enabled DMA. */
+#define SID_DMA48 0x01 /* Abuse inq_flags bit to track enabled DMA48. */
+#define SID_AEN 0x04 /* Abuse inq_flags bit to track enabled AEN. */
+#define SID_DMA 0x10 /* Abuse inq_flags bit to track enabled DMA. */
struct ata_cmd {
u_int8_t flags; /* ATA command flags */
@@ -83,6 +85,20 @@ struct ata_res {
u_int8_t sector_count_exp;
};
+struct sep_identify_data {
+ uint8_t length; /* Enclosure descriptor length */
+ uint8_t subenc_id; /* Sub-enclosure identifier */
+ uint8_t logical_id[8]; /* Enclosure logical identifier (WWN) */
+ uint8_t vendor_id[8]; /* Vendor identification string */
+ uint8_t product_id[16]; /* Product identification string */
+ uint8_t product_rev[4]; /* Product revision string */
+ uint8_t channel_id; /* Channel identifier */
+ uint8_t firmware_rev[4];/* Firmware revision */
+ uint8_t interface_id[6];/* Interface spec ("S-E-S "/"SAF-TE")*/
+ uint8_t interface_rev[4];/* Interface spec revision */
+ uint8_t vend_spec[11]; /* Vendor specific information */
+};
+
int ata_version(int ver);
char * ata_op_string(struct ata_cmd *cmd);
@@ -126,4 +142,26 @@ int ata_speed2revision(u_int speed);
int ata_identify_match(caddr_t identbuffer, caddr_t table_entry);
int ata_static_identify_match(caddr_t identbuffer, caddr_t table_entry);
+void semb_print_ident(struct sep_identify_data *ident_data);
+
+void semb_receive_diagnostic_results(struct ccb_ataio *ataio,
+ u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb*),
+ uint8_t tag_action, int pcv, uint8_t page_code,
+ uint8_t *data_ptr, uint16_t allocation_length, uint32_t timeout);
+
+void semb_send_diagnostic(struct ccb_ataio *ataio,
+ u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint8_t *data_ptr, uint16_t param_list_length,
+ uint32_t timeout);
+
+void semb_read_buffer(struct ccb_ataio *ataio,
+ u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb*),
+ uint8_t tag_action, uint8_t page_code,
+ uint8_t *data_ptr, uint16_t allocation_length, uint32_t timeout);
+
+void semb_write_buffer(struct ccb_ataio *ataio,
+ u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint8_t *data_ptr, uint16_t param_list_length,
+ uint32_t timeout);
+
#endif
diff --git a/freebsd/sys/cam/cam.c b/freebsd/sys/cam/cam.c
index 6d22945a..18628cd0 100644
--- a/freebsd/sys/cam/cam.c
+++ b/freebsd/sys/cam/cam.c
@@ -39,18 +39,23 @@ __FBSDID("$FreeBSD$");
#else /* _KERNEL */
#include <stdlib.h>
#include <stdio.h>
+#include <string.h>
#include <camlib.h>
#endif /* _KERNEL */
#include <cam/cam.h>
#include <cam/cam_ccb.h>
#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/smp_all.h>
#include <sys/sbuf.h>
#ifdef _KERNEL
#include <sys/libkern.h>
#include <cam/cam_queue.h>
#include <cam/cam_xpt.h>
+
+FEATURE(scbus, "SCSI devices support");
+
#endif
static int camstatusentrycomp(const void *key, const void *member);
@@ -85,6 +90,8 @@ const struct cam_status_entry cam_status_table[] = {
{ CAM_REQ_TOO_BIG, "The request was too large for this host" },
{ CAM_REQUEUE_REQ, "Unconditionally Re-queue Request", },
{ CAM_ATA_STATUS_ERROR, "ATA Status Error" },
+ { CAM_SCSI_IT_NEXUS_LOST,"Initiator/Target Nexus Lost" },
+ { CAM_SMP_STATUS_ERROR, "SMP Status Error" },
{ CAM_IDE, "Initiator Detected Error Message Received" },
{ CAM_RESRC_UNAVAIL, "Resource Unavailable" },
{ CAM_UNACKED_EVENT, "Unacknowledged Event by Host" },
@@ -105,6 +112,15 @@ const int num_cam_status_entries =
#ifdef _KERNEL
SYSCTL_NODE(_kern, OID_AUTO, cam, CTLFLAG_RD, 0, "CAM Subsystem");
+
+#ifndef CAM_DEFAULT_SORT_IO_QUEUES
+#define CAM_DEFAULT_SORT_IO_QUEUES 1
+#endif
+
+int cam_sort_io_queues = CAM_DEFAULT_SORT_IO_QUEUES;
+TUNABLE_INT("kern.cam.sort_io_queues", &cam_sort_io_queues);
+SYSCTL_INT(_kern_cam, OID_AUTO, sort_io_queues, CTLFLAG_RWTUN,
+ &cam_sort_io_queues, 0, "Sort IO queues to try and optimise disk access patterns");
#endif
void
@@ -266,6 +282,21 @@ cam_error_string(struct cam_device *device, union ccb *ccb, char *str,
break;
}
break;
+ case XPT_SMP_IO:
+ switch (proto_flags & CAM_EPF_LEVEL_MASK) {
+ case CAM_EPF_NONE:
+ break;
+ case CAM_EPF_ALL:
+ proto_flags |= CAM_ESMF_PRINT_FULL_CMD;
+ /* FALLTHROUGH */
+ case CAM_EPF_NORMAL:
+ case CAM_EPF_MINIMAL:
+ proto_flags |= CAM_ESMF_PRINT_STATUS;
+ /* FALLTHROUGH */
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -292,6 +323,12 @@ cam_error_string(struct cam_device *device, union ccb *ccb, char *str,
#endif /* _KERNEL/!_KERNEL */
sbuf_printf(&sb, "\n");
break;
+ case XPT_SMP_IO:
+ smp_command_sbuf(&ccb->smpio, &sb, path_str, 79 -
+ strlen(path_str), (proto_flags &
+ CAM_ESMF_PRINT_FULL_CMD) ? 79 : 0);
+ sbuf_printf(&sb, "\n");
+ break;
default:
break;
}
@@ -358,6 +395,19 @@ cam_error_string(struct cam_device *device, union ccb *ccb, char *str,
#endif /* _KERNEL/!_KERNEL */
}
break;
+ case XPT_SMP_IO:
+ if ((ccb->ccb_h.status & CAM_STATUS_MASK) !=
+ CAM_SMP_STATUS_ERROR)
+ break;
+
+ if (proto_flags & CAM_ESF_PRINT_STATUS) {
+ sbuf_cat(&sb, path_str);
+ sbuf_printf(&sb, "SMP status: %s (%#x)\n",
+ smp_error_desc(ccb->smpio.smp_response[2]),
+ ccb->smpio.smp_response[2]);
+ }
+ /* There is no SMP equivalent to SCSI sense. */
+ break;
default:
break;
}
diff --git a/freebsd/sys/cam/cam.h b/freebsd/sys/cam/cam.h
index 8ea1d04c..af57f1a6 100644
--- a/freebsd/sys/cam/cam.h
+++ b/freebsd/sys/cam/cam.h
@@ -113,6 +113,15 @@ typedef enum {
CAM_RETRY_SELTO = 0x02 /* Retry Selection Timeouts */
} cam_flags;
+enum {
+ SF_RETRY_UA = 0x01, /* Retry UNIT ATTENTION conditions. */
+ SF_NO_PRINT = 0x02, /* Never print error status. */
+ SF_QUIET_IR = 0x04, /* Be quiet about Illegal Request reponses */
+ SF_PRINT_ALWAYS = 0x08, /* Always print error status. */
+ SF_NO_RECOVERY = 0x10, /* Don't do active error recovery. */
+ SF_NO_RETRY = 0x20 /* Don't do any retries. */
+};
+
/* CAM Status field values */
typedef enum {
CAM_REQ_INPROG, /* CCB request is in progress */
@@ -152,6 +161,7 @@ typedef enum {
*/
CAM_ATA_STATUS_ERROR, /* ATA error, look at error code in CCB */
CAM_SCSI_IT_NEXUS_LOST, /* Initiator/Target Nexus lost. */
+ CAM_SMP_STATUS_ERROR, /* SMP error, look at error code in CCB */
CAM_IDE = 0x33, /* Initiator Detected Error */
CAM_RESRC_UNAVAIL, /* Resource Unavailable */
CAM_UNACKED_EVENT, /* Unacknowledged Event by Host */
@@ -203,6 +213,12 @@ typedef enum {
} cam_error_scsi_flags;
typedef enum {
+ CAM_ESMF_PRINT_NONE = 0x00,
+ CAM_ESMF_PRINT_STATUS = 0x10,
+ CAM_ESMF_PRINT_FULL_CMD = 0x20,
+} cam_error_smp_flags;
+
+typedef enum {
CAM_EAF_PRINT_NONE = 0x00,
CAM_EAF_PRINT_STATUS = 0x10,
CAM_EAF_PRINT_RESULT = 0x20
@@ -216,6 +232,9 @@ struct cam_status_entry
extern const struct cam_status_entry cam_status_table[];
extern const int num_cam_status_entries;
+#ifdef _KERNEL
+extern int cam_sort_io_queues;
+#endif
union ccb;
#ifdef SYSCTL_DECL /* from sysctl.h */
diff --git a/freebsd/sys/cam/cam_ccb.h b/freebsd/sys/cam/cam_ccb.h
index ced8cca1..893c6469 100644
--- a/freebsd/sys/cam/cam_ccb.h
+++ b/freebsd/sys/cam/cam_ccb.h
@@ -67,13 +67,19 @@ typedef enum {
* Perform transport negotiation
* with this command.
*/
- CAM_SCATTER_VALID = 0x00000010,/* Scatter/gather list is valid */
+ CAM_DATA_ISPHYS = 0x00200000,/* Data type with physical addrs */
CAM_DIS_AUTOSENSE = 0x00000020,/* Disable autosense feature */
- CAM_DIR_RESV = 0x00000000,/* Data direction (00:reserved) */
+ CAM_DIR_BOTH = 0x00000000,/* Data direction (00:IN/OUT) */
CAM_DIR_IN = 0x00000040,/* Data direction (01:DATA IN) */
CAM_DIR_OUT = 0x00000080,/* Data direction (10:DATA OUT) */
CAM_DIR_NONE = 0x000000C0,/* Data direction (11:no data) */
CAM_DIR_MASK = 0x000000C0,/* Data direction Mask */
+ CAM_DATA_VADDR = 0x00000000,/* Data type (000:Virtual) */
+ CAM_DATA_PADDR = 0x00200000,/* Data type (001:Physical) */
+ CAM_DATA_SG = 0x00000010,/* Data type (010:sglist) */
+ CAM_DATA_SG_PADDR = 0x00200010,/* Data type (011:sglist phys) */
+ CAM_DATA_BIO = 0x00040000,/* Data type (100:bio) */
+ CAM_DATA_MASK = 0x00240010,/* Data type mask */
CAM_SOFT_RST_OP = 0x00000100,/* Use Soft reset alternative */
CAM_ENG_SYNC = 0x00000200,/* Flush resid bytes on complete */
CAM_DEV_QFRZDIS = 0x00000400,/* Disable DEV Q freezing */
@@ -84,13 +90,16 @@ typedef enum {
CAM_TAG_ACTION_VALID = 0x00008000,/* Use the tag action in this ccb*/
CAM_PASS_ERR_RECOVER = 0x00010000,/* Pass driver does err. recovery*/
CAM_DIS_DISCONNECT = 0x00020000,/* Disable disconnect */
- CAM_SG_LIST_PHYS = 0x00040000,/* SG list has physical addrs. */
CAM_MSG_BUF_PHYS = 0x00080000,/* Message buffer ptr is physical*/
CAM_SNS_BUF_PHYS = 0x00100000,/* Autosense data ptr is physical*/
- CAM_DATA_PHYS = 0x00200000,/* SG/Buffer data ptrs are phys. */
CAM_CDB_PHYS = 0x00400000,/* CDB poiner is physical */
CAM_ENG_SGLIST = 0x00800000,/* SG list is for the HBA engine */
+/* Compatibility for FreeBSD 9.x*/
+ CAM_SCATTER_VALID = 0x00000010,/* These exist for src compat for*/
+ CAM_SG_LIST_PHYS = 0x00200010,/* old drivers. Hardly anything */
+ CAM_DATA_PHYS = 0x00200000,/* uses them. */
+
/* Phase cognizant mode flags */
CAM_DIS_AUTOSRP = 0x01000000,/* Disable autosave/restore ptrs */
CAM_DIS_AUTODISC = 0x02000000,/* Disable auto disconnect */
@@ -99,7 +108,7 @@ typedef enum {
CAM_MSGB_VALID = 0x10000000,/* Message buffer valid */
CAM_STATUS_VALID = 0x20000000,/* Status buffer valid */
CAM_DATAB_VALID = 0x40000000,/* Data buffer valid */
-
+
/* Host target Mode flags */
CAM_SEND_SENSE = 0x08000000,/* Send sense data with status */
CAM_TERM_IO = 0x10000000,/* Terminate I/O Message sup. */
@@ -147,6 +156,8 @@ typedef enum {
/* Device statistics (error counts, etc.) */
XPT_FREEZE_QUEUE = 0x0d,
/* Freeze device queue */
+ XPT_DEV_ADVINFO = 0x0e,
+ /* Get/Set Device advanced information */
/* SCSI Control Functions: 0x10->0x1F */
XPT_ABORT = 0x10,
/* Abort the specified CCB */
@@ -188,6 +199,9 @@ typedef enum {
* Set SIM specific knob values.
*/
+ XPT_SMP_IO = 0x1b | XPT_FC_DEV_QUEUED,
+ /* Serial Management Protocol */
+
XPT_SCAN_TGT = 0x1E | XPT_FC_QUEUED | XPT_FC_USER_CCB
| XPT_FC_XPT_ONLY,
/* Scan Target */
@@ -240,6 +254,7 @@ typedef enum {
PROTO_ATA, /* AT Attachment */
PROTO_ATAPI, /* AT Attachment Packetized Interface */
PROTO_SATAPM, /* SATA Port Multiplier */
+ PROTO_SEMB, /* SATA Enclosure Management Bridge */
} cam_proto;
typedef enum {
@@ -399,15 +414,24 @@ typedef enum {
DEV_MATCH_TARGET = 0x002,
DEV_MATCH_LUN = 0x004,
DEV_MATCH_INQUIRY = 0x008,
+ DEV_MATCH_DEVID = 0x010,
DEV_MATCH_ANY = 0x00f
} dev_pattern_flags;
+struct device_id_match_pattern {
+ uint8_t id_len;
+ uint8_t id[256];
+};
+
struct device_match_pattern {
- path_id_t path_id;
- target_id_t target_id;
- lun_id_t target_lun;
- struct scsi_static_inquiry_pattern inq_pat;
- dev_pattern_flags flags;
+ path_id_t path_id;
+ target_id_t target_id;
+ lun_id_t target_lun;
+ dev_pattern_flags flags;
+ union {
+ struct scsi_static_inquiry_pattern inq_pat;
+ struct device_id_match_pattern devid_pat;
+ } data;
};
typedef enum {
@@ -538,7 +562,7 @@ struct ccb_dev_match {
/*
* Definitions for the path inquiry CCB fields.
*/
-#define CAM_VERSION 0x15 /* Hex value for current version */
+#define CAM_VERSION 0x17 /* Hex value for current version */
typedef enum {
PI_MDP_ABLE = 0x80, /* Supports MDP message */
@@ -566,7 +590,8 @@ typedef enum {
PIM_NOINITIATOR = 0x20, /* Initiator role not supported. */
PIM_NOBUSRESET = 0x10, /* User has disabled initial BUS RESET */
PIM_NO_6_BYTE = 0x08, /* Do not send 6-byte commands */
- PIM_SEQSCAN = 0x04 /* Do bus scans sequentially, not in parallel */
+ PIM_SEQSCAN = 0x04, /* Do bus scans sequentially, not in parallel */
+ PIM_UNMAPPED = 0x02,
} pi_miscflag;
/* Path Inquiry CCB */
@@ -617,6 +642,10 @@ struct ccb_pathinq {
char ccb_pathinq_settings_opaque[PATHINQ_SETTINGS_SIZE];
} xport_specific;
u_int maxio; /* Max supported I/O size, in bytes. */
+ u_int16_t hba_vendor; /* HBA vendor ID */
+ u_int16_t hba_device; /* HBA device ID */
+ u_int16_t hba_subvendor; /* HBA subvendor ID */
+ u_int16_t hba_subdevice; /* HBA subdevice ID */
};
/* Path Statistics CCB */
@@ -625,6 +654,32 @@ struct ccb_pathstats {
struct timeval last_reset; /* Time of last bus reset/loop init */
};
+typedef enum {
+ SMP_FLAG_NONE = 0x00,
+ SMP_FLAG_REQ_SG = 0x01,
+ SMP_FLAG_RSP_SG = 0x02
+} ccb_smp_pass_flags;
+
+/*
+ * Serial Management Protocol CCB
+ * XXX Currently the semantics for this CCB are that it is executed either
+ * by the addressed device, or that device's parent (i.e. an expander for
+ * any device on an expander) if the addressed device doesn't support SMP.
+ * Later, once we have the ability to probe SMP-only devices and put them
+ * in CAM's topology, the CCB will only be executed by the addressed device
+ * if possible.
+ */
+struct ccb_smpio {
+ struct ccb_hdr ccb_h;
+ uint8_t *smp_request;
+ int smp_request_len;
+ uint16_t smp_request_sglist_cnt;
+ uint8_t *smp_response;
+ int smp_response_len;
+ uint16_t smp_response_sglist_cnt;
+ ccb_smp_pass_flags flags;
+};
+
typedef union {
u_int8_t *sense_ptr; /*
* Pointer to storage
@@ -729,6 +784,8 @@ struct ccb_relsim {
* Definitions for the asynchronous callback CCB fields.
*/
typedef enum {
+ AC_UNIT_ATTENTION = 0x4000,/* Device reported UNIT ATTENTION */
+ AC_ADVINFO_CHANGED = 0x2000,/* Advance info might have changes */
AC_CONTRACT = 0x1000,/* A contractual callback */
AC_GETDEV_CHANGED = 0x800,/* Getdev info might have changed */
AC_INQ_CHANGED = 0x400,/* Inquiry info might have changed */
@@ -868,9 +925,14 @@ struct ccb_trans_settings_pata {
#define CTS_ATA_VALID_MODE 0x01
#define CTS_ATA_VALID_BYTECOUNT 0x02
#define CTS_ATA_VALID_ATAPI 0x20
+#define CTS_ATA_VALID_CAPS 0x40
int mode; /* Mode */
u_int bytecount; /* Length of PIO transaction */
u_int atapi; /* Length of ATAPI CDB */
+ u_int caps; /* Device and host SATA caps. */
+#define CTS_ATA_CAPS_H 0x0000ffff
+#define CTS_ATA_CAPS_H_DMA48 0x00000001 /* 48-bit DMA */
+#define CTS_ATA_CAPS_D 0xffff0000
};
struct ccb_trans_settings_sata {
@@ -1091,6 +1153,28 @@ struct ccb_eng_exec { /* This structure must match SCSIIO size */
#define XPT_CCB_INVALID -1 /* for signaling a bad CCB to free */
/*
+ * CCB for working with advanced device information. This operates in a fashion
+ * similar to XPT_GDEV_TYPE. Specify the target in ccb_h, the buffer
+ * type requested, and provide a buffer size/buffer to write to. If the
+ * buffer is too small, provsiz will be larger than bufsiz.
+ */
+struct ccb_dev_advinfo {
+ struct ccb_hdr ccb_h;
+ uint32_t flags;
+#define CDAI_FLAG_STORE 0x1 /* If set, action becomes store */
+ uint32_t buftype; /* IN: Type of data being requested */
+ /* NB: buftype is interpreted on a per-transport basis */
+#define CDAI_TYPE_SCSI_DEVID 1
+#define CDAI_TYPE_SERIAL_NUM 2
+#define CDAI_TYPE_PHYS_PATH 3
+#define CDAI_TYPE_RCAPLONG 4
+ off_t bufsiz; /* IN: Size of external buffer */
+#define CAM_SCSI_DEVID_MAXLEN 65536 /* length in buffer is an uint16_t */
+ off_t provsiz; /* OUT: Size required/used */
+ uint8_t *buf; /* IN/OUT: Buffer for requested data */
+};
+
+/*
* Union of all CCB types for kernel space allocation. This union should
* never be used for manipulating CCBs - its only use is for the allocation
* and deallocation of raw CCB space and is the return type of xpt_ccb_alloc
@@ -1124,9 +1208,11 @@ union ccb {
struct ccb_notify_acknowledge cna2;
struct ccb_eng_inq cei;
struct ccb_eng_exec cee;
+ struct ccb_smpio smpio;
struct ccb_rescan crcn;
struct ccb_debug cdbg;
struct ccb_ataio ataio;
+ struct ccb_dev_advinfo cdai;
};
__BEGIN_DECLS
@@ -1153,6 +1239,13 @@ cam_fill_ataio(struct ccb_ataio *ataio, u_int32_t retries,
u_int32_t timeout);
static __inline void
+cam_fill_smpio(struct ccb_smpio *smpio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *), uint32_t flags,
+ uint8_t *smp_request, int smp_request_len,
+ uint8_t *smp_response, int smp_response_len,
+ uint32_t timeout);
+
+static __inline void
cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int32_t flags, u_int8_t tag_action,
@@ -1209,6 +1302,32 @@ cam_fill_ataio(struct ccb_ataio *ataio, u_int32_t retries,
ataio->tag_action = tag_action;
}
+static __inline void
+cam_fill_smpio(struct ccb_smpio *smpio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *), uint32_t flags,
+ uint8_t *smp_request, int smp_request_len,
+ uint8_t *smp_response, int smp_response_len,
+ uint32_t timeout)
+{
+#ifdef _KERNEL
+ KASSERT((flags & CAM_DIR_MASK) == CAM_DIR_BOTH,
+ ("direction != CAM_DIR_BOTH"));
+ KASSERT((smp_request != NULL) && (smp_response != NULL),
+ ("need valid request and response buffers"));
+ KASSERT((smp_request_len != 0) && (smp_response_len != 0),
+ ("need non-zero request and response lengths"));
+#endif /*_KERNEL*/
+ smpio->ccb_h.func_code = XPT_SMP_IO;
+ smpio->ccb_h.flags = flags;
+ smpio->ccb_h.retry_count = retries;
+ smpio->ccb_h.cbfcnp = cbfcnp;
+ smpio->ccb_h.timeout = timeout;
+ smpio->smp_request = smp_request;
+ smpio->smp_request_len = smp_request_len;
+ smpio->smp_response = smp_response;
+ smpio->smp_response_len = smp_response_len;
+}
+
void cam_calc_geometry(struct ccb_calc_geometry *ccg, int extended);
__END_DECLS
diff --git a/freebsd/sys/cam/cam_periph.h b/freebsd/sys/cam/cam_periph.h
index c68fc9ba..102dc3c3 100644
--- a/freebsd/sys/cam/cam_periph.h
+++ b/freebsd/sys/cam/cam_periph.h
@@ -36,6 +36,8 @@
#ifdef _KERNEL
+#include <cam/cam_xpt.h>
+
struct devstat;
extern struct cam_periph *xpt_periph;
@@ -118,7 +120,6 @@ struct cam_periph {
#define CAM_PERIPH_INVALID 0x08
#define CAM_PERIPH_NEW_DEV_FOUND 0x10
#define CAM_PERIPH_RECOVERY_INPROG 0x20
-#define CAM_PERIPH_SENSE_INPROG 0x40
#define CAM_PERIPH_FREE 0x80
u_int32_t immediate_priority;
u_int32_t refcount;
@@ -143,6 +144,7 @@ cam_status cam_periph_alloc(periph_ctor_t *periph_ctor,
char *name, cam_periph_type type, struct cam_path *,
ac_callback_t *, ac_code, void *arg);
struct cam_periph *cam_periph_find(struct cam_path *path, char *name);
+int cam_periph_list(struct cam_path *, struct sbuf *);
cam_status cam_periph_acquire(struct cam_periph *periph);
void cam_periph_release(struct cam_periph *periph);
void cam_periph_release_locked(struct cam_periph *periph);
@@ -202,5 +204,49 @@ cam_periph_owned(struct cam_periph *periph)
return (mtx_owned(periph->sim->mtx));
}
+static __inline int
+cam_periph_sleep(struct cam_periph *periph, void *chan, int priority,
+ const char *wmesg, int timo)
+{
+ return (msleep(chan, periph->sim->mtx, priority, wmesg, timo));
+}
+
+static inline struct cam_periph *
+cam_periph_acquire_first(struct periph_driver *driver)
+{
+ struct cam_periph *periph;
+
+ xpt_lock_buses();
+ periph = TAILQ_FIRST(&driver->units);
+ while (periph != NULL && (periph->flags & CAM_PERIPH_INVALID) != 0)
+ periph = TAILQ_NEXT(periph, unit_links);
+ if (periph != NULL)
+ periph->refcount++;
+ xpt_unlock_buses();
+ return (periph);
+}
+
+static inline struct cam_periph *
+cam_periph_acquire_next(struct cam_periph *pperiph)
+{
+ struct cam_periph *periph = pperiph;
+
+ mtx_assert(pperiph->sim->mtx, MA_NOTOWNED);
+ xpt_lock_buses();
+ do {
+ periph = TAILQ_NEXT(periph, unit_links);
+ } while (periph != NULL && (periph->flags & CAM_PERIPH_INVALID) != 0);
+ if (periph != NULL)
+ periph->refcount++;
+ xpt_unlock_buses();
+ cam_periph_release(pperiph);
+ return (periph);
+}
+
+#define CAM_PERIPH_FOREACH(periph, driver) \
+ for ((periph) = cam_periph_acquire_first(driver); \
+ (periph) != NULL; \
+ (periph) = cam_periph_acquire_next(periph))
+
#endif /* _KERNEL */
#endif /* _CAM_CAM_PERIPH_H */
diff --git a/freebsd/sys/cam/cam_sim.h b/freebsd/sys/cam/cam_sim.h
index ce6b38fc..ba0ac18f 100644
--- a/freebsd/sys/cam/cam_sim.h
+++ b/freebsd/sys/cam/cam_sim.h
@@ -32,6 +32,11 @@
#define _CAM_CAM_SIM_H 1
#ifdef _KERNEL
+#ifdef __rtems__
+#include <rtems/bsd/sys/param.h>
+#include <sys/proc.h>
+#include <sys/condvar.h>
+#endif /* __rtems__ */
/*
* The sim driver creates a sim for each controller. The sim device
@@ -160,8 +165,8 @@ struct cam_sim {
};
-#define CAM_SIM_LOCK(sim) mtx_lock((sim)->mtx);
-#define CAM_SIM_UNLOCK(sim) mtx_unlock((sim)->mtx);
+#define CAM_SIM_LOCK(sim) mtx_lock((sim)->mtx)
+#define CAM_SIM_UNLOCK(sim) mtx_unlock((sim)->mtx)
static __inline u_int32_t
cam_sim_path(struct cam_sim *sim)
diff --git a/freebsd/sys/cam/cam_xpt.h b/freebsd/sys/cam/cam_xpt.h
index c716a6ec..492fa3a4 100644
--- a/freebsd/sys/cam/cam_xpt.h
+++ b/freebsd/sys/cam/cam_xpt.h
@@ -81,6 +81,8 @@ cam_status xpt_create_path_unlocked(struct cam_path **new_path_ptr,
struct cam_periph *perph,
path_id_t path_id,
target_id_t target_id, lun_id_t lun_id);
+int xpt_getattr(char *buf, size_t len, const char *attr,
+ struct cam_path *path);
void xpt_free_path(struct cam_path *path);
void xpt_path_counts(struct cam_path *path, uint32_t *bus_ref,
uint32_t *periph_ref, uint32_t *target_ref,
diff --git a/freebsd/sys/cam/cam_xpt_sim.h b/freebsd/sys/cam/cam_xpt_sim.h
index 67b895f2..d32eea71 100644
--- a/freebsd/sys/cam/cam_xpt_sim.h
+++ b/freebsd/sys/cam/cam_xpt_sim.h
@@ -42,7 +42,11 @@ int32_t xpt_bus_register(struct cam_sim *sim, device_t parent,
int32_t xpt_bus_deregister(path_id_t path_id);
u_int32_t xpt_freeze_simq(struct cam_sim *sim, u_int count);
void xpt_release_simq(struct cam_sim *sim, int run_queue);
+#ifndef __rtems__
u_int32_t xpt_freeze_devq(struct cam_path *path, u_int count);
+#else /* __rtems__ */
+#define xpt_freeze_devq(path, count) do { } while (0)
+#endif /* __rtems__ */
u_int32_t xpt_freeze_devq_rl(struct cam_path *path, cam_rl rl,
u_int count);
void xpt_release_devq(struct cam_path *path,
diff --git a/freebsd/sys/cam/scsi/scsi_all.c b/freebsd/sys/cam/scsi/scsi_all.c
index 151ebb10..5b504010 100644
--- a/freebsd/sys/cam/scsi/scsi_all.c
+++ b/freebsd/sys/cam/scsi/scsi_all.c
@@ -33,6 +33,8 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
+#include <rtems/bsd/sys/types.h>
+#include <sys/stdint.h>
#ifdef _KERNEL
#ifndef __rtems__
@@ -44,6 +46,9 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/libkern.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/sysctl.h>
#else
#include <errno.h>
@@ -57,9 +62,17 @@ __FBSDID("$FreeBSD$");
#include <cam/cam_queue.h>
#include <cam/cam_xpt.h>
#include <cam/scsi/scsi_all.h>
+#include <sys/ata.h>
#include <sys/sbuf.h>
-#ifndef _KERNEL
+
+#ifdef _KERNEL
+#include <cam/cam_periph.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/cam_xpt_periph.h>
+#include <cam/cam_xpt_internal.h>
+#else
#include <camlib.h>
+#include <stddef.h>
#ifndef FALSE
#define FALSE 0
@@ -368,6 +381,8 @@ static struct op_table_entry scsi_op_codes[] = {
{ 0x40, D | T | L | P | W | R | O | M | S | C, "CHANGE DEFINITION" },
/* 41 O WRITE SAME(10) */
{ 0x41, D, "WRITE SAME(10)" },
+ /* 42 O UNMAP */
+ { 0x42, D, "UNMAP" },
/* 42 O READ SUB-CHANNEL */
{ 0x42, R, "READ SUB-CHANNEL" },
/* 43 O READ TOC/PMA/ATIP */
@@ -615,14 +630,24 @@ scsi_op_desc(u_int16_t opcode, struct scsi_inquiry_data *inq_data)
struct op_table_entry *table[2];
int num_tables;
- pd_type = SID_TYPE(inq_data);
+ /*
+ * If we've got inquiry data, use it to determine what type of
+ * device we're dealing with here. Otherwise, assume direct
+ * access.
+ */
+ if (inq_data == NULL) {
+ pd_type = T_DIRECT;
+ match = NULL;
+ } else {
+ pd_type = SID_TYPE(inq_data);
- match = cam_quirkmatch((caddr_t)inq_data,
- (caddr_t)scsi_op_quirk_table,
- sizeof(scsi_op_quirk_table)/
- sizeof(*scsi_op_quirk_table),
- sizeof(*scsi_op_quirk_table),
- scsi_inquiry_match);
+ match = cam_quirkmatch((caddr_t)inq_data,
+ (caddr_t)scsi_op_quirk_table,
+ sizeof(scsi_op_quirk_table)/
+ sizeof(*scsi_op_quirk_table),
+ sizeof(*scsi_op_quirk_table),
+ scsi_inquiry_match);
+ }
if (match != NULL) {
table[0] = ((struct scsi_op_quirk_entry *)match)->op_table;
@@ -690,10 +715,7 @@ const struct sense_key_table_entry sense_key_table[] =
{
{ SSD_KEY_NO_SENSE, SS_NOP, "NO SENSE" },
{ SSD_KEY_RECOVERED_ERROR, SS_NOP|SSQ_PRINT_SENSE, "RECOVERED ERROR" },
- {
- SSD_KEY_NOT_READY, SS_TUR|SSQ_MANY|SSQ_DECREMENT_COUNT|EBUSY,
- "NOT READY"
- },
+ { SSD_KEY_NOT_READY, SS_RDEF, "NOT READY" },
{ SSD_KEY_MEDIUM_ERROR, SS_RDEF, "MEDIUM ERROR" },
{ SSD_KEY_HARDWARE_ERROR, SS_RDEF, "HARDWARE FAILURE" },
{ SSD_KEY_ILLEGAL_REQUEST, SS_FATAL|EINVAL, "ILLEGAL REQUEST" },
@@ -706,7 +728,7 @@ const struct sense_key_table_entry sense_key_table[] =
{ SSD_KEY_EQUAL, SS_NOP, "EQUAL" },
{ SSD_KEY_VOLUME_OVERFLOW, SS_FATAL|EIO, "VOLUME OVERFLOW" },
{ SSD_KEY_MISCOMPARE, SS_NOP, "MISCOMPARE" },
- { SSD_KEY_RESERVED, SS_FATAL|EIO, "RESERVED" }
+ { SSD_KEY_COMPLETED, SS_NOP, "COMPLETED" }
};
const int sense_key_table_size =
@@ -722,6 +744,172 @@ static struct asc_table_entry sony_mo_entries[] = {
"Logical unit not ready, cause not reportable") }
};
+static struct asc_table_entry hgst_entries[] = {
+ { SST(0x04, 0xF0, SS_RDEF,
+ "Vendor Unique - Logical Unit Not Ready") },
+ { SST(0x0A, 0x01, SS_RDEF,
+ "Unrecovered Super Certification Log Write Error") },
+ { SST(0x0A, 0x02, SS_RDEF,
+ "Unrecovered Super Certification Log Read Error") },
+ { SST(0x15, 0x03, SS_RDEF,
+ "Unrecovered Sector Error") },
+ { SST(0x3E, 0x04, SS_RDEF,
+ "Unrecovered Self-Test Hard-Cache Test Fail") },
+ { SST(0x3E, 0x05, SS_RDEF,
+ "Unrecovered Self-Test OTF-Cache Fail") },
+ { SST(0x40, 0x00, SS_RDEF,
+ "Unrecovered SAT No Buffer Overflow Error") },
+ { SST(0x40, 0x01, SS_RDEF,
+ "Unrecovered SAT Buffer Overflow Error") },
+ { SST(0x40, 0x02, SS_RDEF,
+ "Unrecovered SAT No Buffer Overflow With ECS Fault") },
+ { SST(0x40, 0x03, SS_RDEF,
+ "Unrecovered SAT Buffer Overflow With ECS Fault") },
+ { SST(0x40, 0x81, SS_RDEF,
+ "DRAM Failure") },
+ { SST(0x44, 0x0B, SS_RDEF,
+ "Vendor Unique - Internal Target Failure") },
+ { SST(0x44, 0xF2, SS_RDEF,
+ "Vendor Unique - Internal Target Failure") },
+ { SST(0x44, 0xF6, SS_RDEF,
+ "Vendor Unique - Internal Target Failure") },
+ { SST(0x44, 0xF9, SS_RDEF,
+ "Vendor Unique - Internal Target Failure") },
+ { SST(0x44, 0xFA, SS_RDEF,
+ "Vendor Unique - Internal Target Failure") },
+ { SST(0x5D, 0x22, SS_RDEF,
+ "Extreme Over-Temperature Warning") },
+ { SST(0x5D, 0x50, SS_RDEF,
+ "Load/Unload cycle Count Warning") },
+ { SST(0x81, 0x00, SS_RDEF,
+ "Vendor Unique - Internal Logic Error") },
+ { SST(0x85, 0x00, SS_RDEF,
+ "Vendor Unique - Internal Key Seed Error") },
+};
+
+static struct asc_table_entry seagate_entries[] = {
+ { SST(0x04, 0xF0, SS_RDEF,
+ "Logical Unit Not Ready, super certify in Progress") },
+ { SST(0x08, 0x86, SS_RDEF,
+ "Write Fault Data Corruption") },
+ { SST(0x09, 0x0D, SS_RDEF,
+ "Tracking Failure") },
+ { SST(0x09, 0x0E, SS_RDEF,
+ "ETF Failure") },
+ { SST(0x0B, 0x5D, SS_RDEF,
+ "Pre-SMART Warning") },
+ { SST(0x0B, 0x85, SS_RDEF,
+ "5V Voltage Warning") },
+ { SST(0x0B, 0x8C, SS_RDEF,
+ "12V Voltage Warning") },
+ { SST(0x0C, 0xFF, SS_RDEF,
+ "Write Error - Too many error recovery revs") },
+ { SST(0x11, 0xFF, SS_RDEF,
+ "Unrecovered Read Error - Too many error recovery revs") },
+ { SST(0x19, 0x0E, SS_RDEF,
+ "Fewer than 1/2 defect list copies") },
+ { SST(0x20, 0xF3, SS_RDEF,
+ "Illegal CDB linked to skip mask cmd") },
+ { SST(0x24, 0xF0, SS_RDEF,
+ "Illegal byte in CDB, LBA not matching") },
+ { SST(0x24, 0xF1, SS_RDEF,
+ "Illegal byte in CDB, LEN not matching") },
+ { SST(0x24, 0xF2, SS_RDEF,
+ "Mask not matching transfer length") },
+ { SST(0x24, 0xF3, SS_RDEF,
+ "Drive formatted without plist") },
+ { SST(0x26, 0x95, SS_RDEF,
+ "Invalid Field Parameter - CAP File") },
+ { SST(0x26, 0x96, SS_RDEF,
+ "Invalid Field Parameter - RAP File") },
+ { SST(0x26, 0x97, SS_RDEF,
+ "Invalid Field Parameter - TMS Firmware Tag") },
+ { SST(0x26, 0x98, SS_RDEF,
+ "Invalid Field Parameter - Check Sum") },
+ { SST(0x26, 0x99, SS_RDEF,
+ "Invalid Field Parameter - Firmware Tag") },
+ { SST(0x29, 0x08, SS_RDEF,
+ "Write Log Dump data") },
+ { SST(0x29, 0x09, SS_RDEF,
+ "Write Log Dump data") },
+ { SST(0x29, 0x0A, SS_RDEF,
+ "Reserved disk space") },
+ { SST(0x29, 0x0B, SS_RDEF,
+ "SDBP") },
+ { SST(0x29, 0x0C, SS_RDEF,
+ "SDBP") },
+ { SST(0x31, 0x91, SS_RDEF,
+ "Format Corrupted World Wide Name (WWN) is Invalid") },
+ { SST(0x32, 0x03, SS_RDEF,
+ "Defect List - Length exceeds Command Allocated Length") },
+ { SST(0x33, 0x00, SS_RDEF,
+ "Flash not ready for access") },
+ { SST(0x3F, 0x70, SS_RDEF,
+ "Invalid RAP block") },
+ { SST(0x3F, 0x71, SS_RDEF,
+ "RAP/ETF mismatch") },
+ { SST(0x3F, 0x90, SS_RDEF,
+ "Invalid CAP block") },
+ { SST(0x3F, 0x91, SS_RDEF,
+ "World Wide Name (WWN) Mismatch") },
+ { SST(0x40, 0x01, SS_RDEF,
+ "DRAM Parity Error") },
+ { SST(0x40, 0x02, SS_RDEF,
+ "DRAM Parity Error") },
+ { SST(0x42, 0x0A, SS_RDEF,
+ "Loopback Test") },
+ { SST(0x42, 0x0B, SS_RDEF,
+ "Loopback Test") },
+ { SST(0x44, 0xF2, SS_RDEF,
+ "Compare error during data integrity check") },
+ { SST(0x44, 0xF6, SS_RDEF,
+ "Unrecoverable error during data integrity check") },
+ { SST(0x47, 0x80, SS_RDEF,
+ "Fibre Channel Sequence Error") },
+ { SST(0x4E, 0x01, SS_RDEF,
+ "Information Unit Too Short") },
+ { SST(0x80, 0x00, SS_RDEF,
+ "General Firmware Error / Command Timeout") },
+ { SST(0x80, 0x01, SS_RDEF,
+ "Command Timeout") },
+ { SST(0x80, 0x02, SS_RDEF,
+ "Command Timeout") },
+ { SST(0x80, 0x80, SS_RDEF,
+ "FC FIFO Error During Read Transfer") },
+ { SST(0x80, 0x81, SS_RDEF,
+ "FC FIFO Error During Write Transfer") },
+ { SST(0x80, 0x82, SS_RDEF,
+ "DISC FIFO Error During Read Transfer") },
+ { SST(0x80, 0x83, SS_RDEF,
+ "DISC FIFO Error During Write Transfer") },
+ { SST(0x80, 0x84, SS_RDEF,
+ "LBA Seeded LRC Error on Read") },
+ { SST(0x80, 0x85, SS_RDEF,
+ "LBA Seeded LRC Error on Write") },
+ { SST(0x80, 0x86, SS_RDEF,
+ "IOEDC Error on Read") },
+ { SST(0x80, 0x87, SS_RDEF,
+ "IOEDC Error on Write") },
+ { SST(0x80, 0x88, SS_RDEF,
+ "Host Parity Check Failed") },
+ { SST(0x80, 0x89, SS_RDEF,
+ "IOEDC error on read detected by formatter") },
+ { SST(0x80, 0x8A, SS_RDEF,
+ "Host Parity Errors / Host FIFO Initialization Failed") },
+ { SST(0x80, 0x8B, SS_RDEF,
+ "Host Parity Errors") },
+ { SST(0x80, 0x8C, SS_RDEF,
+ "Host Parity Errors") },
+ { SST(0x80, 0x8D, SS_RDEF,
+ "Host Parity Errors") },
+ { SST(0x81, 0x00, SS_RDEF,
+ "LA Check Failed") },
+ { SST(0x82, 0x00, SS_RDEF,
+ "Internal client detected insufficient buffer") },
+ { SST(0x84, 0x00, SS_RDEF,
+ "Scheduled Diagnostic And Repair") },
+};
+
static struct scsi_sense_quirk_entry sense_quirk_table[] = {
{
/*
@@ -744,6 +932,26 @@ static struct scsi_sense_quirk_entry sense_quirk_table[] = {
sizeof(sony_mo_entries)/sizeof(struct asc_table_entry),
/*sense key entries*/NULL,
sony_mo_entries
+ },
+ {
+ /*
+ * HGST vendor-specific error codes
+ */
+ {T_DIRECT, SIP_MEDIA_FIXED, "HGST", "*", "*"},
+ /*num_sense_keys*/0,
+ sizeof(hgst_entries)/sizeof(struct asc_table_entry),
+ /*sense key entries*/NULL,
+ hgst_entries
+ },
+ {
+ /*
+ * SEAGATE vendor-specific error codes
+ */
+ {T_DIRECT, SIP_MEDIA_FIXED, "SEAGATE", "*", "*"},
+ /*num_sense_keys*/0,
+ sizeof(seagate_entries)/sizeof(struct asc_table_entry),
+ /*sense key entries*/NULL,
+ seagate_entries
}
};
@@ -868,7 +1076,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x03, 0x02, SS_RDEF,
"Excessive write errors") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x00, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | EIO,
+ { SST(0x04, 0x00, SS_RDEF,
"Logical unit not ready, cause not reportable") },
/* DTLPWROMAEBKVF */
{ SST(0x04, 0x01, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | EBUSY,
@@ -1117,25 +1325,25 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x10, 0x05, SS_RDEF, /* XXX TBD */
"Logical block protection method error") },
/* DT WRO BK */
- { SST(0x11, 0x00, SS_RDEF,
+ { SST(0x11, 0x00, SS_FATAL|EIO,
"Unrecovered read error") },
/* DT WRO BK */
- { SST(0x11, 0x01, SS_RDEF,
+ { SST(0x11, 0x01, SS_FATAL|EIO,
"Read retries exhausted") },
/* DT WRO BK */
- { SST(0x11, 0x02, SS_RDEF,
+ { SST(0x11, 0x02, SS_FATAL|EIO,
"Error too long to correct") },
/* DT W O BK */
- { SST(0x11, 0x03, SS_RDEF,
+ { SST(0x11, 0x03, SS_FATAL|EIO,
"Multiple read errors") },
/* D W O BK */
- { SST(0x11, 0x04, SS_RDEF,
+ { SST(0x11, 0x04, SS_FATAL|EIO,
"Unrecovered read error - auto reallocate failed") },
/* WRO B */
- { SST(0x11, 0x05, SS_RDEF,
+ { SST(0x11, 0x05, SS_FATAL|EIO,
"L-EC uncorrectable error") },
/* WRO B */
- { SST(0x11, 0x06, SS_RDEF,
+ { SST(0x11, 0x06, SS_FATAL|EIO,
"CIRC unrecovered error") },
/* W O B */
{ SST(0x11, 0x07, SS_RDEF,
@@ -1150,10 +1358,10 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x11, 0x0A, SS_RDEF,
"Miscorrected error") },
/* D W O BK */
- { SST(0x11, 0x0B, SS_RDEF,
+ { SST(0x11, 0x0B, SS_FATAL|EIO,
"Unrecovered read error - recommend reassignment") },
/* D W O BK */
- { SST(0x11, 0x0C, SS_RDEF,
+ { SST(0x11, 0x0C, SS_FATAL|EIO,
"Unrecovered read error - recommend rewrite the data") },
/* DT WRO B */
{ SST(0x11, 0x0D, SS_RDEF,
@@ -2968,7 +3176,10 @@ scsi_sense_desc(int sense_key, int asc, int ascq,
&sense_entry,
&asc_entry);
- *sense_key_desc = sense_entry->desc;
+ if (sense_entry != NULL)
+ *sense_key_desc = sense_entry->desc;
+ else
+ *sense_key_desc = "Invalid Sense Key";
if (asc_entry != NULL)
*asc_desc = asc_entry->desc;
@@ -2994,10 +3205,11 @@ scsi_error_action(struct ccb_scsiio *csio, struct scsi_inquiry_data *inq_data,
int error_code, sense_key, asc, ascq;
scsi_sense_action action;
- scsi_extract_sense(&csio->sense_data, &error_code,
- &sense_key, &asc, &ascq);
-
- if (error_code == SSD_DEFERRED_ERROR) {
+ if (!scsi_extract_sense_ccb((union ccb *)csio,
+ &error_code, &sense_key, &asc, &ascq)) {
+ action = SS_RETRY | SSQ_DECREMENT_COUNT | SSQ_PRINT_SENSE | EIO;
+ } else if ((error_code == SSD_DEFERRED_ERROR)
+ || (error_code == SSD_DESC_DEFERRED_ERROR)) {
/*
* XXX dufault@FreeBSD.org
* This error doesn't relate to the command associated
@@ -3035,8 +3247,10 @@ scsi_error_action(struct ccb_scsiio *csio, struct scsi_inquiry_data *inq_data,
if (asc_entry != NULL
&& (asc != 0 || ascq != 0))
action = asc_entry->action;
- else
+ else if (sense_entry != NULL)
action = sense_entry->action;
+ else
+ action = SS_RETRY|SSQ_DECREMENT_COUNT|SSQ_PRINT_SENSE;
if (sense_key == SSD_KEY_RECOVERED_ERROR) {
/*
@@ -3058,10 +3272,15 @@ scsi_error_action(struct ccb_scsiio *csio, struct scsi_inquiry_data *inq_data,
}
}
}
-#ifdef _KERNEL
- if (bootverbose)
- sense_flags |= SF_PRINT_ALWAYS;
-#endif
+ if ((action & SS_MASK) >= SS_START &&
+ (sense_flags & SF_NO_RECOVERY)) {
+ action &= ~SS_MASK;
+ action |= SS_FAIL;
+ } else if ((action & SS_MASK) == SS_RETRY &&
+ (sense_flags & SF_NO_RETRY)) {
+ action &= ~SS_MASK;
+ action |= SS_FAIL;
+ }
if ((sense_flags & SF_PRINT_ALWAYS) != 0)
action |= SSQ_PRINT_SENSE;
else if ((sense_flags & SF_NO_PRINT) != 0)
@@ -3120,7 +3339,7 @@ scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string, size_t len)
*cdb_string = '\0';
for (i = 0; i < cdb_len; i++)
snprintf(cdb_string + strlen(cdb_string),
- len - strlen(cdb_string), "%x ", cdb_ptr[i]);
+ len - strlen(cdb_string), "%02hhx ", cdb_ptr[i]);
return(cdb_string);
}
@@ -3222,6 +3441,1348 @@ scsi_command_string(struct cam_device *device, struct ccb_scsiio *csio,
return(0);
}
+/*
+ * Iterate over sense descriptors. Each descriptor is passed into iter_func().
+ * If iter_func() returns 0, list traversal continues. If iter_func()
+ * returns non-zero, list traversal is stopped.
+ */
+void
+scsi_desc_iterate(struct scsi_sense_data_desc *sense, u_int sense_len,
+ int (*iter_func)(struct scsi_sense_data_desc *sense,
+ u_int, struct scsi_sense_desc_header *,
+ void *), void *arg)
+{
+ int cur_pos;
+ int desc_len;
+
+ /*
+ * First make sure the extra length field is present.
+ */
+ if (SSD_DESC_IS_PRESENT(sense, sense_len, extra_len) == 0)
+ return;
+
+ /*
+ * The length of data actually returned may be different than the
+ * extra_len recorded in the sturcture.
+ */
+ desc_len = sense_len -offsetof(struct scsi_sense_data_desc, sense_desc);
+
+ /*
+ * Limit this further by the extra length reported, and the maximum
+ * allowed extra length.
+ */
+ desc_len = MIN(desc_len, MIN(sense->extra_len, SSD_EXTRA_MAX));
+
+ /*
+ * Subtract the size of the header from the descriptor length.
+ * This is to ensure that we have at least the header left, so we
+ * don't have to check that inside the loop. This can wind up
+ * being a negative value.
+ */
+ desc_len -= sizeof(struct scsi_sense_desc_header);
+
+ for (cur_pos = 0; cur_pos < desc_len;) {
+ struct scsi_sense_desc_header *header;
+
+ header = (struct scsi_sense_desc_header *)
+ &sense->sense_desc[cur_pos];
+
+ /*
+ * Check to make sure we have the entire descriptor. We
+ * don't call iter_func() unless we do.
+ *
+ * Note that although cur_pos is at the beginning of the
+ * descriptor, desc_len already has the header length
+ * subtracted. So the comparison of the length in the
+ * header (which does not include the header itself) to
+ * desc_len - cur_pos is correct.
+ */
+ if (header->length > (desc_len - cur_pos))
+ break;
+
+ if (iter_func(sense, sense_len, header, arg) != 0)
+ break;
+
+ cur_pos += sizeof(*header) + header->length;
+ }
+}
+
+struct scsi_find_desc_info {
+ uint8_t desc_type;
+ struct scsi_sense_desc_header *header;
+};
+
+static int
+scsi_find_desc_func(struct scsi_sense_data_desc *sense, u_int sense_len,
+ struct scsi_sense_desc_header *header, void *arg)
+{
+ struct scsi_find_desc_info *desc_info;
+
+ desc_info = (struct scsi_find_desc_info *)arg;
+
+ if (header->desc_type == desc_info->desc_type) {
+ desc_info->header = header;
+
+ /* We found the descriptor, tell the iterator to stop. */
+ return (1);
+ } else
+ return (0);
+}
+
+/*
+ * Given a descriptor type, return a pointer to it if it is in the sense
+ * data and not truncated. Avoiding truncating sense data will simplify
+ * things significantly for the caller.
+ */
+uint8_t *
+scsi_find_desc(struct scsi_sense_data_desc *sense, u_int sense_len,
+ uint8_t desc_type)
+{
+ struct scsi_find_desc_info desc_info;
+
+ desc_info.desc_type = desc_type;
+ desc_info.header = NULL;
+
+ scsi_desc_iterate(sense, sense_len, scsi_find_desc_func, &desc_info);
+
+ return ((uint8_t *)desc_info.header);
+}
+#endif /* __rtems__ */
+
+/*
+ * Fill in SCSI sense data with the specified parameters. This routine can
+ * fill in either fixed or descriptor type sense data.
+ */
+void
+scsi_set_sense_data_va(struct scsi_sense_data *sense_data,
+ scsi_sense_data_type sense_format, int current_error,
+ int sense_key, int asc, int ascq, va_list ap)
+{
+ int descriptor_sense;
+ scsi_sense_elem_type elem_type;
+
+ /*
+ * Determine whether to return fixed or descriptor format sense
+ * data. If the user specifies SSD_TYPE_NONE for some reason,
+ * they'll just get fixed sense data.
+ */
+ if (sense_format == SSD_TYPE_DESC)
+ descriptor_sense = 1;
+ else
+ descriptor_sense = 0;
+
+ /*
+ * Zero the sense data, so that we don't pass back any garbage data
+ * to the user.
+ */
+ memset(sense_data, 0, sizeof(*sense_data));
+
+ if (descriptor_sense != 0) {
+ struct scsi_sense_data_desc *sense;
+
+ sense = (struct scsi_sense_data_desc *)sense_data;
+ /*
+ * The descriptor sense format eliminates the use of the
+ * valid bit.
+ */
+ if (current_error != 0)
+ sense->error_code = SSD_DESC_CURRENT_ERROR;
+ else
+ sense->error_code = SSD_DESC_DEFERRED_ERROR;
+ sense->sense_key = sense_key;
+ sense->add_sense_code = asc;
+ sense->add_sense_code_qual = ascq;
+ /*
+ * Start off with no extra length, since the above data
+ * fits in the standard descriptor sense information.
+ */
+ sense->extra_len = 0;
+ while ((elem_type = (scsi_sense_elem_type)va_arg(ap,
+ scsi_sense_elem_type)) != SSD_ELEM_NONE) {
+ int sense_len, len_to_copy;
+ uint8_t *data;
+
+ if (elem_type >= SSD_ELEM_MAX) {
+ printf("%s: invalid sense type %d\n", __func__,
+ elem_type);
+ break;
+ }
+
+ sense_len = (int)va_arg(ap, int);
+ len_to_copy = MIN(sense_len, SSD_EXTRA_MAX -
+ sense->extra_len);
+ data = (uint8_t *)va_arg(ap, uint8_t *);
+
+ /*
+ * We've already consumed the arguments for this one.
+ */
+ if (elem_type == SSD_ELEM_SKIP)
+ continue;
+
+ switch (elem_type) {
+ case SSD_ELEM_DESC: {
+
+ /*
+ * This is a straight descriptor. All we
+ * need to do is copy the data in.
+ */
+ bcopy(data, &sense->sense_desc[
+ sense->extra_len], len_to_copy);
+ sense->extra_len += len_to_copy;
+ break;
+ }
+ case SSD_ELEM_SKS: {
+ struct scsi_sense_sks sks;
+
+ bzero(&sks, sizeof(sks));
+
+ /*
+ * This is already-formatted sense key
+ * specific data. We just need to fill out
+ * the header and copy everything in.
+ */
+ bcopy(data, &sks.sense_key_spec,
+ MIN(len_to_copy,
+ sizeof(sks.sense_key_spec)));
+
+ sks.desc_type = SSD_DESC_SKS;
+ sks.length = sizeof(sks) -
+ offsetof(struct scsi_sense_sks, reserved1);
+ bcopy(&sks,&sense->sense_desc[sense->extra_len],
+ sizeof(sks));
+ sense->extra_len += sizeof(sks);
+ break;
+ }
+ case SSD_ELEM_INFO:
+ case SSD_ELEM_COMMAND: {
+ struct scsi_sense_command cmd;
+ struct scsi_sense_info info;
+ uint8_t *data_dest;
+ uint8_t *descriptor;
+ int descriptor_size, i, copy_len;
+
+ bzero(&cmd, sizeof(cmd));
+ bzero(&info, sizeof(info));
+
+ /*
+ * Command or information data. The
+ * operate in pretty much the same way.
+ */
+ if (elem_type == SSD_ELEM_COMMAND) {
+ len_to_copy = MIN(len_to_copy,
+ sizeof(cmd.command_info));
+ descriptor = (uint8_t *)&cmd;
+ descriptor_size = sizeof(cmd);
+ data_dest =(uint8_t *)&cmd.command_info;
+ cmd.desc_type = SSD_DESC_COMMAND;
+ cmd.length = sizeof(cmd) -
+ offsetof(struct scsi_sense_command,
+ reserved);
+ } else {
+ len_to_copy = MIN(len_to_copy,
+ sizeof(info.info));
+ descriptor = (uint8_t *)&info;
+ descriptor_size = sizeof(cmd);
+ data_dest = (uint8_t *)&info.info;
+ info.desc_type = SSD_DESC_INFO;
+ info.byte2 = SSD_INFO_VALID;
+ info.length = sizeof(info) -
+ offsetof(struct scsi_sense_info,
+ byte2);
+ }
+
+ /*
+ * Copy this in reverse because the spec
+ * (SPC-4) says that when 4 byte quantities
+ * are stored in this 8 byte field, the
+ * first four bytes shall be 0.
+ *
+ * So we fill the bytes in from the end, and
+ * if we have less than 8 bytes to copy,
+ * the initial, most significant bytes will
+ * be 0.
+ */
+ for (i = sense_len - 1; i >= 0 &&
+ len_to_copy > 0; i--, len_to_copy--)
+ data_dest[len_to_copy - 1] = data[i];
+
+ /*
+ * This calculation looks much like the
+ * initial len_to_copy calculation, but
+ * we have to do it again here, because
+ * we're looking at a larger amount that
+ * may or may not fit. It's not only the
+ * data the user passed in, but also the
+ * rest of the descriptor.
+ */
+ copy_len = MIN(descriptor_size,
+ SSD_EXTRA_MAX - sense->extra_len);
+ bcopy(descriptor, &sense->sense_desc[
+ sense->extra_len], copy_len);
+ sense->extra_len += copy_len;
+ break;
+ }
+ case SSD_ELEM_FRU: {
+ struct scsi_sense_fru fru;
+ int copy_len;
+
+ bzero(&fru, sizeof(fru));
+
+ fru.desc_type = SSD_DESC_FRU;
+ fru.length = sizeof(fru) -
+ offsetof(struct scsi_sense_fru, reserved);
+ fru.fru = *data;
+
+ copy_len = MIN(sizeof(fru), SSD_EXTRA_MAX -
+ sense->extra_len);
+ bcopy(&fru, &sense->sense_desc[
+ sense->extra_len], copy_len);
+ sense->extra_len += copy_len;
+ break;
+ }
+ case SSD_ELEM_STREAM: {
+ struct scsi_sense_stream stream_sense;
+ int copy_len;
+
+ bzero(&stream_sense, sizeof(stream_sense));
+ stream_sense.desc_type = SSD_DESC_STREAM;
+ stream_sense.length = sizeof(stream_sense) -
+ offsetof(struct scsi_sense_stream, reserved);
+ stream_sense.byte3 = *data;
+
+ copy_len = MIN(sizeof(stream_sense),
+ SSD_EXTRA_MAX - sense->extra_len);
+ bcopy(&stream_sense, &sense->sense_desc[
+ sense->extra_len], copy_len);
+ sense->extra_len += copy_len;
+ break;
+ }
+ default:
+ /*
+ * We shouldn't get here, but if we do, do
+ * nothing. We've already consumed the
+ * arguments above.
+ */
+ break;
+ }
+ }
+ } else {
+ struct scsi_sense_data_fixed *sense;
+
+ sense = (struct scsi_sense_data_fixed *)sense_data;
+
+ if (current_error != 0)
+ sense->error_code = SSD_CURRENT_ERROR;
+ else
+ sense->error_code = SSD_DEFERRED_ERROR;
+
+ sense->flags = sense_key;
+ sense->add_sense_code = asc;
+ sense->add_sense_code_qual = ascq;
+ /*
+ * We've set the ASC and ASCQ, so we have 6 more bytes of
+ * valid data. If we wind up setting any of the other
+ * fields, we'll bump this to 10 extra bytes.
+ */
+ sense->extra_len = 6;
+
+ while ((elem_type = (scsi_sense_elem_type)va_arg(ap,
+ scsi_sense_elem_type)) != SSD_ELEM_NONE) {
+ int sense_len, len_to_copy;
+ uint8_t *data;
+
+ if (elem_type >= SSD_ELEM_MAX) {
+ printf("%s: invalid sense type %d\n", __func__,
+ elem_type);
+ break;
+ }
+ /*
+ * If we get in here, just bump the extra length to
+ * 10 bytes. That will encompass anything we're
+ * going to set here.
+ */
+ sense->extra_len = 10;
+ sense_len = (int)va_arg(ap, int);
+ len_to_copy = MIN(sense_len, SSD_EXTRA_MAX -
+ sense->extra_len);
+ data = (uint8_t *)va_arg(ap, uint8_t *);
+
+ switch (elem_type) {
+ case SSD_ELEM_SKS:
+ /*
+ * The user passed in pre-formatted sense
+ * key specific data.
+ */
+ bcopy(data, &sense->sense_key_spec[0],
+ MIN(sizeof(sense->sense_key_spec),
+ sense_len));
+ break;
+ case SSD_ELEM_INFO:
+ case SSD_ELEM_COMMAND: {
+ uint8_t *data_dest;
+ int i;
+
+ if (elem_type == SSD_ELEM_COMMAND)
+ data_dest = &sense->cmd_spec_info[0];
+ else {
+ data_dest = &sense->info[0];
+ /*
+ * We're setting the info field, so
+ * set the valid bit.
+ */
+ sense->error_code |= SSD_ERRCODE_VALID;
+ }
+
+ /*
+ * Copy this in reverse so that if we have
+ * less than 4 bytes to fill, the least
+ * significant bytes will be at the end.
+ * If we have more than 4 bytes, only the
+ * least significant bytes will be included.
+ */
+ for (i = sense_len - 1; i >= 0 &&
+ len_to_copy > 0; i--, len_to_copy--)
+ data_dest[len_to_copy - 1] = data[i];
+
+ break;
+ }
+ case SSD_ELEM_FRU:
+ sense->fru = *data;
+ break;
+ case SSD_ELEM_STREAM:
+ sense->flags |= *data;
+ break;
+ case SSD_ELEM_DESC:
+ default:
+
+ /*
+ * If the user passes in descriptor sense,
+ * we can't handle that in fixed format.
+ * So just skip it, and any unknown argument
+ * types.
+ */
+ break;
+ }
+ }
+ }
+}
+
+void
+scsi_set_sense_data(struct scsi_sense_data *sense_data,
+ scsi_sense_data_type sense_format, int current_error,
+ int sense_key, int asc, int ascq, ...)
+{
+ va_list ap;
+
+ va_start(ap, ascq);
+ scsi_set_sense_data_va(sense_data, sense_format, current_error,
+ sense_key, asc, ascq, ap);
+ va_end(ap);
+}
+
+#ifndef __rtems__
+/*
+ * Get sense information for three similar sense data types.
+ */
+int
+scsi_get_sense_info(struct scsi_sense_data *sense_data, u_int sense_len,
+ uint8_t info_type, uint64_t *info, int64_t *signed_info)
+{
+ scsi_sense_data_type sense_type;
+
+ if (sense_len == 0)
+ goto bailout;
+
+ sense_type = scsi_sense_type(sense_data);
+
+ switch (sense_type) {
+ case SSD_TYPE_DESC: {
+ struct scsi_sense_data_desc *sense;
+ uint8_t *desc;
+
+ sense = (struct scsi_sense_data_desc *)sense_data;
+
+ desc = scsi_find_desc(sense, sense_len, info_type);
+ if (desc == NULL)
+ goto bailout;
+
+ switch (info_type) {
+ case SSD_DESC_INFO: {
+ struct scsi_sense_info *info_desc;
+
+ info_desc = (struct scsi_sense_info *)desc;
+ *info = scsi_8btou64(info_desc->info);
+ if (signed_info != NULL)
+ *signed_info = *info;
+ break;
+ }
+ case SSD_DESC_COMMAND: {
+ struct scsi_sense_command *cmd_desc;
+
+ cmd_desc = (struct scsi_sense_command *)desc;
+
+ *info = scsi_8btou64(cmd_desc->command_info);
+ if (signed_info != NULL)
+ *signed_info = *info;
+ break;
+ }
+ case SSD_DESC_FRU: {
+ struct scsi_sense_fru *fru_desc;
+
+ fru_desc = (struct scsi_sense_fru *)desc;
+
+ *info = fru_desc->fru;
+ if (signed_info != NULL)
+ *signed_info = (int8_t)fru_desc->fru;
+ break;
+ }
+ default:
+ goto bailout;
+ break;
+ }
+ break;
+ }
+ case SSD_TYPE_FIXED: {
+ struct scsi_sense_data_fixed *sense;
+
+ sense = (struct scsi_sense_data_fixed *)sense_data;
+
+ switch (info_type) {
+ case SSD_DESC_INFO: {
+ uint32_t info_val;
+
+ if ((sense->error_code & SSD_ERRCODE_VALID) == 0)
+ goto bailout;
+
+ if (SSD_FIXED_IS_PRESENT(sense, sense_len, info) == 0)
+ goto bailout;
+
+ info_val = scsi_4btoul(sense->info);
+
+ *info = info_val;
+ if (signed_info != NULL)
+ *signed_info = (int32_t)info_val;
+ break;
+ }
+ case SSD_DESC_COMMAND: {
+ uint32_t cmd_val;
+
+ if ((SSD_FIXED_IS_PRESENT(sense, sense_len,
+ cmd_spec_info) == 0)
+ || (SSD_FIXED_IS_FILLED(sense, cmd_spec_info) == 0))
+ goto bailout;
+
+ cmd_val = scsi_4btoul(sense->cmd_spec_info);
+ if (cmd_val == 0)
+ goto bailout;
+
+ *info = cmd_val;
+ if (signed_info != NULL)
+ *signed_info = (int32_t)cmd_val;
+ break;
+ }
+ case SSD_DESC_FRU:
+ if ((SSD_FIXED_IS_PRESENT(sense, sense_len, fru) == 0)
+ || (SSD_FIXED_IS_FILLED(sense, fru) == 0))
+ goto bailout;
+
+ if (sense->fru == 0)
+ goto bailout;
+
+ *info = sense->fru;
+ if (signed_info != NULL)
+ *signed_info = (int8_t)sense->fru;
+ break;
+ default:
+ goto bailout;
+ break;
+ }
+ break;
+ }
+ default:
+ goto bailout;
+ break;
+ }
+
+ return (0);
+bailout:
+ return (1);
+}
+
+int
+scsi_get_sks(struct scsi_sense_data *sense_data, u_int sense_len, uint8_t *sks)
+{
+ scsi_sense_data_type sense_type;
+
+ if (sense_len == 0)
+ goto bailout;
+
+ sense_type = scsi_sense_type(sense_data);
+
+ switch (sense_type) {
+ case SSD_TYPE_DESC: {
+ struct scsi_sense_data_desc *sense;
+ struct scsi_sense_sks *desc;
+
+ sense = (struct scsi_sense_data_desc *)sense_data;
+
+ desc = (struct scsi_sense_sks *)scsi_find_desc(sense, sense_len,
+ SSD_DESC_SKS);
+ if (desc == NULL)
+ goto bailout;
+
+ /*
+ * No need to check the SKS valid bit for descriptor sense.
+ * If the descriptor is present, it is valid.
+ */
+ bcopy(desc->sense_key_spec, sks, sizeof(desc->sense_key_spec));
+ break;
+ }
+ case SSD_TYPE_FIXED: {
+ struct scsi_sense_data_fixed *sense;
+
+ sense = (struct scsi_sense_data_fixed *)sense_data;
+
+ if ((SSD_FIXED_IS_PRESENT(sense, sense_len, sense_key_spec)== 0)
+ || (SSD_FIXED_IS_FILLED(sense, sense_key_spec) == 0))
+ goto bailout;
+
+ if ((sense->sense_key_spec[0] & SSD_SCS_VALID) == 0)
+ goto bailout;
+
+ bcopy(sense->sense_key_spec, sks,sizeof(sense->sense_key_spec));
+ break;
+ }
+ default:
+ goto bailout;
+ break;
+ }
+ return (0);
+bailout:
+ return (1);
+}
+
+/*
+ * Provide a common interface for fixed and descriptor sense to detect
+ * whether we have block-specific sense information. It is clear by the
+ * presence of the block descriptor in descriptor mode, but we have to
+ * infer from the inquiry data and ILI bit in fixed mode.
+ */
+int
+scsi_get_block_info(struct scsi_sense_data *sense_data, u_int sense_len,
+ struct scsi_inquiry_data *inq_data, uint8_t *block_bits)
+{
+ scsi_sense_data_type sense_type;
+
+ if (inq_data != NULL) {
+ switch (SID_TYPE(inq_data)) {
+ case T_DIRECT:
+ case T_RBC:
+ break;
+ default:
+ goto bailout;
+ break;
+ }
+ }
+
+ sense_type = scsi_sense_type(sense_data);
+
+ switch (sense_type) {
+ case SSD_TYPE_DESC: {
+ struct scsi_sense_data_desc *sense;
+ struct scsi_sense_block *block;
+
+ sense = (struct scsi_sense_data_desc *)sense_data;
+
+ block = (struct scsi_sense_block *)scsi_find_desc(sense,
+ sense_len, SSD_DESC_BLOCK);
+ if (block == NULL)
+ goto bailout;
+
+ *block_bits = block->byte3;
+ break;
+ }
+ case SSD_TYPE_FIXED: {
+ struct scsi_sense_data_fixed *sense;
+
+ sense = (struct scsi_sense_data_fixed *)sense_data;
+
+ if (SSD_FIXED_IS_PRESENT(sense, sense_len, flags) == 0)
+ goto bailout;
+
+ if ((sense->flags & SSD_ILI) == 0)
+ goto bailout;
+
+ *block_bits = sense->flags & SSD_ILI;
+ break;
+ }
+ default:
+ goto bailout;
+ break;
+ }
+ return (0);
+bailout:
+ return (1);
+}
+
+int
+scsi_get_stream_info(struct scsi_sense_data *sense_data, u_int sense_len,
+ struct scsi_inquiry_data *inq_data, uint8_t *stream_bits)
+{
+ scsi_sense_data_type sense_type;
+
+ if (inq_data != NULL) {
+ switch (SID_TYPE(inq_data)) {
+ case T_SEQUENTIAL:
+ break;
+ default:
+ goto bailout;
+ break;
+ }
+ }
+
+ sense_type = scsi_sense_type(sense_data);
+
+ switch (sense_type) {
+ case SSD_TYPE_DESC: {
+ struct scsi_sense_data_desc *sense;
+ struct scsi_sense_stream *stream;
+
+ sense = (struct scsi_sense_data_desc *)sense_data;
+
+ stream = (struct scsi_sense_stream *)scsi_find_desc(sense,
+ sense_len, SSD_DESC_STREAM);
+ if (stream == NULL)
+ goto bailout;
+
+ *stream_bits = stream->byte3;
+ break;
+ }
+ case SSD_TYPE_FIXED: {
+ struct scsi_sense_data_fixed *sense;
+
+ sense = (struct scsi_sense_data_fixed *)sense_data;
+
+ if (SSD_FIXED_IS_PRESENT(sense, sense_len, flags) == 0)
+ goto bailout;
+
+ if ((sense->flags & (SSD_ILI|SSD_EOM|SSD_FILEMARK)) == 0)
+ goto bailout;
+
+ *stream_bits = sense->flags & (SSD_ILI|SSD_EOM|SSD_FILEMARK);
+ break;
+ }
+ default:
+ goto bailout;
+ break;
+ }
+ return (0);
+bailout:
+ return (1);
+}
+
+void
+scsi_info_sbuf(struct sbuf *sb, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data, uint64_t info)
+{
+ sbuf_printf(sb, "Info: %#jx", info);
+}
+
+void
+scsi_command_sbuf(struct sbuf *sb, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data, uint64_t csi)
+{
+ sbuf_printf(sb, "Command Specific Info: %#jx", csi);
+}
+
+
+void
+scsi_progress_sbuf(struct sbuf *sb, uint16_t progress)
+{
+ sbuf_printf(sb, "Progress: %d%% (%d/%d) complete",
+ (progress * 100) / SSD_SKS_PROGRESS_DENOM,
+ progress, SSD_SKS_PROGRESS_DENOM);
+}
+
+/*
+ * Returns 1 for failure (i.e. SKS isn't valid) and 0 for success.
+ */
+int
+scsi_sks_sbuf(struct sbuf *sb, int sense_key, uint8_t *sks)
+{
+ if ((sks[0] & SSD_SKS_VALID) == 0)
+ return (1);
+
+ switch (sense_key) {
+ case SSD_KEY_ILLEGAL_REQUEST: {
+ struct scsi_sense_sks_field *field;
+ int bad_command;
+ char tmpstr[40];
+
+ /*Field Pointer*/
+ field = (struct scsi_sense_sks_field *)sks;
+
+ if (field->byte0 & SSD_SKS_FIELD_CMD)
+ bad_command = 1;
+ else
+ bad_command = 0;
+
+ tmpstr[0] = '\0';
+
+ /* Bit pointer is valid */
+ if (field->byte0 & SSD_SKS_BPV)
+ snprintf(tmpstr, sizeof(tmpstr), "bit %d ",
+ field->byte0 & SSD_SKS_BIT_VALUE);
+
+ sbuf_printf(sb, "%s byte %d %sis invalid",
+ bad_command ? "Command" : "Data",
+ scsi_2btoul(field->field), tmpstr);
+ break;
+ }
+ case SSD_KEY_UNIT_ATTENTION: {
+ struct scsi_sense_sks_overflow *overflow;
+
+ overflow = (struct scsi_sense_sks_overflow *)sks;
+
+ /*UA Condition Queue Overflow*/
+ sbuf_printf(sb, "Unit Attention Condition Queue %s",
+ (overflow->byte0 & SSD_SKS_OVERFLOW_SET) ?
+ "Overflowed" : "Did Not Overflow??");
+ break;
+ }
+ case SSD_KEY_RECOVERED_ERROR:
+ case SSD_KEY_HARDWARE_ERROR:
+ case SSD_KEY_MEDIUM_ERROR: {
+ struct scsi_sense_sks_retry *retry;
+
+ /*Actual Retry Count*/
+ retry = (struct scsi_sense_sks_retry *)sks;
+
+ sbuf_printf(sb, "Actual Retry Count: %d",
+ scsi_2btoul(retry->actual_retry_count));
+ break;
+ }
+ case SSD_KEY_NO_SENSE:
+ case SSD_KEY_NOT_READY: {
+ struct scsi_sense_sks_progress *progress;
+ int progress_val;
+
+ /*Progress Indication*/
+ progress = (struct scsi_sense_sks_progress *)sks;
+ progress_val = scsi_2btoul(progress->progress);
+
+ scsi_progress_sbuf(sb, progress_val);
+ break;
+ }
+ case SSD_KEY_COPY_ABORTED: {
+ struct scsi_sense_sks_segment *segment;
+ char tmpstr[40];
+
+ /*Segment Pointer*/
+ segment = (struct scsi_sense_sks_segment *)sks;
+
+ tmpstr[0] = '\0';
+
+ if (segment->byte0 & SSD_SKS_SEGMENT_BPV)
+ snprintf(tmpstr, sizeof(tmpstr), "bit %d ",
+ segment->byte0 & SSD_SKS_SEGMENT_BITPTR);
+
+ sbuf_printf(sb, "%s byte %d %sis invalid", (segment->byte0 &
+ SSD_SKS_SEGMENT_SD) ? "Segment" : "Data",
+ scsi_2btoul(segment->field), tmpstr);
+ break;
+ }
+ default:
+ sbuf_printf(sb, "Sense Key Specific: %#x,%#x", sks[0],
+ scsi_2btoul(&sks[1]));
+ break;
+ }
+
+ return (0);
+}
+
+void
+scsi_fru_sbuf(struct sbuf *sb, uint64_t fru)
+{
+ sbuf_printf(sb, "Field Replaceable Unit: %d", (int)fru);
+}
+
+void
+scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits, uint64_t info)
+{
+ int need_comma;
+
+ need_comma = 0;
+ /*
+ * XXX KDM this needs more descriptive decoding.
+ */
+ if (stream_bits & SSD_DESC_STREAM_FM) {
+ sbuf_printf(sb, "Filemark");
+ need_comma = 1;
+ }
+
+ if (stream_bits & SSD_DESC_STREAM_EOM) {
+ sbuf_printf(sb, "%sEOM", (need_comma) ? "," : "");
+ need_comma = 1;
+ }
+
+ if (stream_bits & SSD_DESC_STREAM_ILI)
+ sbuf_printf(sb, "%sILI", (need_comma) ? "," : "");
+
+ sbuf_printf(sb, ": Info: %#jx", (uintmax_t) info);
+}
+
+void
+scsi_block_sbuf(struct sbuf *sb, uint8_t block_bits, uint64_t info)
+{
+ if (block_bits & SSD_DESC_BLOCK_ILI)
+ sbuf_printf(sb, "ILI: residue %#jx", (uintmax_t) info);
+}
+
+void
+scsi_sense_info_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ struct scsi_sense_info *info;
+
+ info = (struct scsi_sense_info *)header;
+
+ scsi_info_sbuf(sb, cdb, cdb_len, inq_data, scsi_8btou64(info->info));
+}
+
+void
+scsi_sense_command_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ struct scsi_sense_command *command;
+
+ command = (struct scsi_sense_command *)header;
+
+ scsi_command_sbuf(sb, cdb, cdb_len, inq_data,
+ scsi_8btou64(command->command_info));
+}
+
+void
+scsi_sense_sks_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ struct scsi_sense_sks *sks;
+ int error_code, sense_key, asc, ascq;
+
+ sks = (struct scsi_sense_sks *)header;
+
+ scsi_extract_sense_len(sense, sense_len, &error_code, &sense_key,
+ &asc, &ascq, /*show_errors*/ 1);
+
+ scsi_sks_sbuf(sb, sense_key, sks->sense_key_spec);
+}
+
+void
+scsi_sense_fru_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ struct scsi_sense_fru *fru;
+
+ fru = (struct scsi_sense_fru *)header;
+
+ scsi_fru_sbuf(sb, (uint64_t)fru->fru);
+}
+
+void
+scsi_sense_stream_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ struct scsi_sense_stream *stream;
+ uint64_t info;
+
+ stream = (struct scsi_sense_stream *)header;
+ info = 0;
+
+ scsi_get_sense_info(sense, sense_len, SSD_DESC_INFO, &info, NULL);
+
+ scsi_stream_sbuf(sb, stream->byte3, info);
+}
+
+void
+scsi_sense_block_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ struct scsi_sense_block *block;
+ uint64_t info;
+
+ block = (struct scsi_sense_block *)header;
+ info = 0;
+
+ scsi_get_sense_info(sense, sense_len, SSD_DESC_INFO, &info, NULL);
+
+ scsi_block_sbuf(sb, block->byte3, info);
+}
+
+void
+scsi_sense_progress_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ struct scsi_sense_progress *progress;
+ const char *sense_key_desc;
+ const char *asc_desc;
+ int progress_val;
+
+ progress = (struct scsi_sense_progress *)header;
+
+ /*
+ * Get descriptions for the sense key, ASC, and ASCQ in the
+ * progress descriptor. These could be different than the values
+ * in the overall sense data.
+ */
+ scsi_sense_desc(progress->sense_key, progress->add_sense_code,
+ progress->add_sense_code_qual, inq_data,
+ &sense_key_desc, &asc_desc);
+
+ progress_val = scsi_2btoul(progress->progress);
+
+ /*
+ * The progress indicator is for the operation described by the
+ * sense key, ASC, and ASCQ in the descriptor.
+ */
+ sbuf_cat(sb, sense_key_desc);
+ sbuf_printf(sb, " asc:%x,%x (%s): ", progress->add_sense_code,
+ progress->add_sense_code_qual, asc_desc);
+ scsi_progress_sbuf(sb, progress_val);
+}
+
+/*
+ * Generic sense descriptor printing routine. This is used when we have
+ * not yet implemented a specific printing routine for this descriptor.
+ */
+void
+scsi_sense_generic_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ int i;
+ uint8_t *buf_ptr;
+
+ sbuf_printf(sb, "Descriptor %#x:", header->desc_type);
+
+ buf_ptr = (uint8_t *)&header[1];
+
+ for (i = 0; i < header->length; i++, buf_ptr++)
+ sbuf_printf(sb, " %02x", *buf_ptr);
+}
+
+/*
+ * Keep this list in numeric order. This speeds the array traversal.
+ */
+struct scsi_sense_desc_printer {
+ uint8_t desc_type;
+ /*
+ * The function arguments here are the superset of what is needed
+ * to print out various different descriptors. Command and
+ * information descriptors need inquiry data and command type.
+ * Sense key specific descriptors need the sense key.
+ *
+ * The sense, cdb, and inquiry data arguments may be NULL, but the
+ * information printed may not be fully decoded as a result.
+ */
+ void (*print_func)(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+} scsi_sense_printers[] = {
+ {SSD_DESC_INFO, scsi_sense_info_sbuf},
+ {SSD_DESC_COMMAND, scsi_sense_command_sbuf},
+ {SSD_DESC_SKS, scsi_sense_sks_sbuf},
+ {SSD_DESC_FRU, scsi_sense_fru_sbuf},
+ {SSD_DESC_STREAM, scsi_sense_stream_sbuf},
+ {SSD_DESC_BLOCK, scsi_sense_block_sbuf},
+ {SSD_DESC_PROGRESS, scsi_sense_progress_sbuf}
+};
+
+void
+scsi_sense_desc_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header)
+{
+ int i;
+
+ for (i = 0; i < (sizeof(scsi_sense_printers) /
+ sizeof(scsi_sense_printers[0])); i++) {
+ struct scsi_sense_desc_printer *printer;
+
+ printer = &scsi_sense_printers[i];
+
+ /*
+ * The list is sorted, so quit if we've passed our
+ * descriptor number.
+ */
+ if (printer->desc_type > header->desc_type)
+ break;
+
+ if (printer->desc_type != header->desc_type)
+ continue;
+
+ printer->print_func(sb, sense, sense_len, cdb, cdb_len,
+ inq_data, header);
+
+ return;
+ }
+
+ /*
+ * No specific printing routine, so use the generic routine.
+ */
+ scsi_sense_generic_sbuf(sb, sense, sense_len, cdb, cdb_len,
+ inq_data, header);
+}
+
+scsi_sense_data_type
+scsi_sense_type(struct scsi_sense_data *sense_data)
+{
+ switch (sense_data->error_code & SSD_ERRCODE) {
+ case SSD_DESC_CURRENT_ERROR:
+ case SSD_DESC_DEFERRED_ERROR:
+ return (SSD_TYPE_DESC);
+ break;
+ case SSD_CURRENT_ERROR:
+ case SSD_DEFERRED_ERROR:
+ return (SSD_TYPE_FIXED);
+ break;
+ default:
+ break;
+ }
+
+ return (SSD_TYPE_NONE);
+}
+
+struct scsi_print_sense_info {
+ struct sbuf *sb;
+ char *path_str;
+ uint8_t *cdb;
+ int cdb_len;
+ struct scsi_inquiry_data *inq_data;
+};
+
+static int
+scsi_print_desc_func(struct scsi_sense_data_desc *sense, u_int sense_len,
+ struct scsi_sense_desc_header *header, void *arg)
+{
+ struct scsi_print_sense_info *print_info;
+
+ print_info = (struct scsi_print_sense_info *)arg;
+
+ switch (header->desc_type) {
+ case SSD_DESC_INFO:
+ case SSD_DESC_FRU:
+ case SSD_DESC_COMMAND:
+ case SSD_DESC_SKS:
+ case SSD_DESC_BLOCK:
+ case SSD_DESC_STREAM:
+ /*
+ * We have already printed these descriptors, if they are
+ * present.
+ */
+ break;
+ default: {
+ sbuf_printf(print_info->sb, "%s", print_info->path_str);
+ scsi_sense_desc_sbuf(print_info->sb,
+ (struct scsi_sense_data *)sense, sense_len,
+ print_info->cdb, print_info->cdb_len,
+ print_info->inq_data, header);
+ sbuf_printf(print_info->sb, "\n");
+ break;
+ }
+ }
+
+ /*
+ * Tell the iterator that we want to see more descriptors if they
+ * are present.
+ */
+ return (0);
+}
+
+void
+scsi_sense_only_sbuf(struct scsi_sense_data *sense, u_int sense_len,
+ struct sbuf *sb, char *path_str,
+ struct scsi_inquiry_data *inq_data, uint8_t *cdb,
+ int cdb_len)
+{
+ int error_code, sense_key, asc, ascq;
+
+ sbuf_cat(sb, path_str);
+
+ scsi_extract_sense_len(sense, sense_len, &error_code, &sense_key,
+ &asc, &ascq, /*show_errors*/ 1);
+
+ sbuf_printf(sb, "SCSI sense: ");
+ switch (error_code) {
+ case SSD_DEFERRED_ERROR:
+ case SSD_DESC_DEFERRED_ERROR:
+ sbuf_printf(sb, "Deferred error: ");
+
+ /* FALLTHROUGH */
+ case SSD_CURRENT_ERROR:
+ case SSD_DESC_CURRENT_ERROR:
+ {
+ struct scsi_sense_data_desc *desc_sense;
+ struct scsi_print_sense_info print_info;
+ const char *sense_key_desc;
+ const char *asc_desc;
+ uint8_t sks[3];
+ uint64_t val;
+ int info_valid;
+
+ /*
+ * Get descriptions for the sense key, ASC, and ASCQ. If
+ * these aren't present in the sense data (i.e. the sense
+ * data isn't long enough), the -1 values that
+ * scsi_extract_sense_len() returns will yield default
+ * or error descriptions.
+ */
+ scsi_sense_desc(sense_key, asc, ascq, inq_data,
+ &sense_key_desc, &asc_desc);
+
+ /*
+ * We first print the sense key and ASC/ASCQ.
+ */
+ sbuf_cat(sb, sense_key_desc);
+ sbuf_printf(sb, " asc:%x,%x (%s)\n", asc, ascq, asc_desc);
+
+ /*
+ * Get the info field if it is valid.
+ */
+ if (scsi_get_sense_info(sense, sense_len, SSD_DESC_INFO,
+ &val, NULL) == 0)
+ info_valid = 1;
+ else
+ info_valid = 0;
+
+ if (info_valid != 0) {
+ uint8_t bits;
+
+ /*
+ * Determine whether we have any block or stream
+ * device-specific information.
+ */
+ if (scsi_get_block_info(sense, sense_len, inq_data,
+ &bits) == 0) {
+ sbuf_cat(sb, path_str);
+ scsi_block_sbuf(sb, bits, val);
+ sbuf_printf(sb, "\n");
+ } else if (scsi_get_stream_info(sense, sense_len,
+ inq_data, &bits) == 0) {
+ sbuf_cat(sb, path_str);
+ scsi_stream_sbuf(sb, bits, val);
+ sbuf_printf(sb, "\n");
+ } else if (val != 0) {
+ /*
+ * The information field can be valid but 0.
+ * If the block or stream bits aren't set,
+ * and this is 0, it isn't terribly useful
+ * to print it out.
+ */
+ sbuf_cat(sb, path_str);
+ scsi_info_sbuf(sb, cdb, cdb_len, inq_data, val);
+ sbuf_printf(sb, "\n");
+ }
+ }
+
+ /*
+ * Print the FRU.
+ */
+ if (scsi_get_sense_info(sense, sense_len, SSD_DESC_FRU,
+ &val, NULL) == 0) {
+ sbuf_cat(sb, path_str);
+ scsi_fru_sbuf(sb, val);
+ sbuf_printf(sb, "\n");
+ }
+
+ /*
+ * Print any command-specific information.
+ */
+ if (scsi_get_sense_info(sense, sense_len, SSD_DESC_COMMAND,
+ &val, NULL) == 0) {
+ sbuf_cat(sb, path_str);
+ scsi_command_sbuf(sb, cdb, cdb_len, inq_data, val);
+ sbuf_printf(sb, "\n");
+ }
+
+ /*
+ * Print out any sense-key-specific information.
+ */
+ if (scsi_get_sks(sense, sense_len, sks) == 0) {
+ sbuf_cat(sb, path_str);
+ scsi_sks_sbuf(sb, sense_key, sks);
+ sbuf_printf(sb, "\n");
+ }
+
+ /*
+ * If this is fixed sense, we're done. If we have
+ * descriptor sense, we might have more information
+ * available.
+ */
+ if (scsi_sense_type(sense) != SSD_TYPE_DESC)
+ break;
+
+ desc_sense = (struct scsi_sense_data_desc *)sense;
+
+ print_info.sb = sb;
+ print_info.path_str = path_str;
+ print_info.cdb = cdb;
+ print_info.cdb_len = cdb_len;
+ print_info.inq_data = inq_data;
+
+ /*
+ * Print any sense descriptors that we have not already printed.
+ */
+ scsi_desc_iterate(desc_sense, sense_len, scsi_print_desc_func,
+ &print_info);
+ break;
+
+ }
+ case -1:
+ /*
+ * scsi_extract_sense_len() sets values to -1 if the
+ * show_errors flag is set and they aren't present in the
+ * sense data. This means that sense_len is 0.
+ */
+ sbuf_printf(sb, "No sense data present\n");
+ break;
+ default: {
+ sbuf_printf(sb, "Error code 0x%x", error_code);
+ if (sense->error_code & SSD_ERRCODE_VALID) {
+ struct scsi_sense_data_fixed *fixed_sense;
+
+ fixed_sense = (struct scsi_sense_data_fixed *)sense;
+
+ if (SSD_FIXED_IS_PRESENT(fixed_sense, sense_len, info)){
+ uint32_t info;
+
+ info = scsi_4btoul(fixed_sense->info);
+
+ sbuf_printf(sb, " at block no. %d (decimal)",
+ info);
+ }
+ }
+ sbuf_printf(sb, "\n");
+ break;
+ }
+ }
+}
/*
* scsi_sense_sbuf() returns 0 for success and -1 for failure.
@@ -3241,11 +4802,8 @@ scsi_sense_sbuf(struct cam_device *device, struct ccb_scsiio *csio,
#ifdef _KERNEL
struct ccb_getdev *cgd;
#endif /* _KERNEL */
- u_int32_t info;
- int error_code;
- int sense_key;
- int asc, ascq;
char path_str[64];
+ uint8_t *cdb;
#ifndef _KERNEL
if (device == NULL)
@@ -3343,129 +4901,14 @@ scsi_sense_sbuf(struct cam_device *device, struct ccb_scsiio *csio,
sense = &csio->sense_data;
}
+ if (csio->ccb_h.flags & CAM_CDB_POINTER)
+ cdb = csio->cdb_io.cdb_ptr;
+ else
+ cdb = csio->cdb_io.cdb_bytes;
- sbuf_cat(sb, path_str);
-
- error_code = sense->error_code & SSD_ERRCODE;
- sense_key = sense->flags & SSD_KEY;
-
- sbuf_printf(sb, "SCSI sense: ");
- switch (error_code) {
- case SSD_DEFERRED_ERROR:
- sbuf_printf(sb, "Deferred error: ");
-
- /* FALLTHROUGH */
- case SSD_CURRENT_ERROR:
- {
- const char *sense_key_desc;
- const char *asc_desc;
-
- asc = (sense->extra_len >= 5) ? sense->add_sense_code : 0;
- ascq = (sense->extra_len >= 6) ? sense->add_sense_code_qual : 0;
- scsi_sense_desc(sense_key, asc, ascq, inq_data,
- &sense_key_desc, &asc_desc);
- sbuf_cat(sb, sense_key_desc);
-
- info = scsi_4btoul(sense->info);
-
- if (sense->error_code & SSD_ERRCODE_VALID) {
-
- switch (sense_key) {
- case SSD_KEY_NOT_READY:
- case SSD_KEY_ILLEGAL_REQUEST:
- case SSD_KEY_UNIT_ATTENTION:
- case SSD_KEY_DATA_PROTECT:
- break;
- case SSD_KEY_BLANK_CHECK:
- sbuf_printf(sb, " req sz: %d (decimal)", info);
- break;
- default:
- if (info) {
- if (sense->flags & SSD_ILI) {
- sbuf_printf(sb, " ILI (length "
- "mismatch): %d", info);
-
- } else {
- sbuf_printf(sb, " info:%x",
- info);
- }
- }
- }
- } else if (info) {
- sbuf_printf(sb, " info?:%x", info);
- }
-
- if (sense->extra_len >= 4) {
- if (bcmp(sense->cmd_spec_info, "\0\0\0\0", 4)) {
- sbuf_printf(sb, " csi:%x,%x,%x,%x",
- sense->cmd_spec_info[0],
- sense->cmd_spec_info[1],
- sense->cmd_spec_info[2],
- sense->cmd_spec_info[3]);
- }
- }
-
- sbuf_printf(sb, " asc:%x,%x (%s)", asc, ascq, asc_desc);
-
- if (sense->extra_len >= 7 && sense->fru) {
- sbuf_printf(sb, " field replaceable unit: %x",
- sense->fru);
- }
-
- if ((sense->extra_len >= 10)
- && (sense->sense_key_spec[0] & SSD_SCS_VALID) != 0) {
- switch(sense_key) {
- case SSD_KEY_ILLEGAL_REQUEST: {
- int bad_command;
- char tmpstr2[40];
-
- if (sense->sense_key_spec[0] & 0x40)
- bad_command = 1;
- else
- bad_command = 0;
-
- tmpstr2[0] = '\0';
-
- /* Bit pointer is valid */
- if (sense->sense_key_spec[0] & 0x08)
- snprintf(tmpstr2, sizeof(tmpstr2),
- "bit %d ",
- sense->sense_key_spec[0] & 0x7);
- sbuf_printf(sb, ": %s byte %d %sis invalid",
- bad_command ? "Command" : "Data",
- scsi_2btoul(
- &sense->sense_key_spec[1]),
- tmpstr2);
- break;
- }
- case SSD_KEY_RECOVERED_ERROR:
- case SSD_KEY_HARDWARE_ERROR:
- case SSD_KEY_MEDIUM_ERROR:
- sbuf_printf(sb, " actual retry count: %d",
- scsi_2btoul(
- &sense->sense_key_spec[1]));
- break;
- default:
- sbuf_printf(sb, " sks:%#x,%#x",
- sense->sense_key_spec[0],
- scsi_2btoul(
- &sense->sense_key_spec[1]));
- break;
- }
- }
- break;
-
- }
- default:
- sbuf_printf(sb, "Error code 0x%x", sense->error_code);
- if (sense->error_code & SSD_ERRCODE_VALID) {
- sbuf_printf(sb, " at block no. %d (decimal)",
- info = scsi_4btoul(sense->info));
- }
- }
-
- sbuf_printf(sb, "\n");
-
+ scsi_sense_only_sbuf(sense, csio->sense_len - csio->sense_resid, sb,
+ path_str, inq_data, cdb, csio->cdb_len);
+
#ifdef _KERNEL
xpt_free_ccb((union ccb*)cgd);
#endif /* _KERNEL/!_KERNEL */
@@ -3535,6 +4978,167 @@ scsi_sense_print(struct cam_device *device, struct ccb_scsiio *csio,
}
#endif /* _KERNEL/!_KERNEL */
+
+/*
+ * Extract basic sense information. This is backward-compatible with the
+ * previous implementation. For new implementations,
+ * scsi_extract_sense_len() is recommended.
+ */
+void
+scsi_extract_sense(struct scsi_sense_data *sense_data, int *error_code,
+ int *sense_key, int *asc, int *ascq)
+{
+ scsi_extract_sense_len(sense_data, sizeof(*sense_data), error_code,
+ sense_key, asc, ascq, /*show_errors*/ 0);
+}
+
+/*
+ * Extract basic sense information from SCSI I/O CCB structure.
+ */
+int
+scsi_extract_sense_ccb(union ccb *ccb,
+ int *error_code, int *sense_key, int *asc, int *ascq)
+{
+ struct scsi_sense_data *sense_data;
+
+ /* Make sure there are some sense data we can access. */
+ if (ccb->ccb_h.func_code != XPT_SCSI_IO ||
+ (ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_SCSI_STATUS_ERROR ||
+ (ccb->csio.scsi_status != SCSI_STATUS_CHECK_COND) ||
+ (ccb->ccb_h.status & CAM_AUTOSNS_VALID) == 0 ||
+ (ccb->ccb_h.flags & CAM_SENSE_PHYS))
+ return (0);
+
+ if (ccb->ccb_h.flags & CAM_SENSE_PTR)
+ bcopy(&ccb->csio.sense_data, &sense_data,
+ sizeof(struct scsi_sense_data *));
+ else
+ sense_data = &ccb->csio.sense_data;
+ scsi_extract_sense_len(sense_data,
+ ccb->csio.sense_len - ccb->csio.sense_resid,
+ error_code, sense_key, asc, ascq, 1);
+ if (*error_code == -1)
+ return (0);
+ return (1);
+}
+#endif /* __rtems__ */
+
+/*
+ * Extract basic sense information. If show_errors is set, sense values
+ * will be set to -1 if they are not present.
+ */
+void
+scsi_extract_sense_len(struct scsi_sense_data *sense_data, u_int sense_len,
+ int *error_code, int *sense_key, int *asc, int *ascq,
+ int show_errors)
+{
+ /*
+ * If we have no length, we have no sense.
+ */
+ if (sense_len == 0) {
+ if (show_errors == 0) {
+ *error_code = 0;
+ *sense_key = 0;
+ *asc = 0;
+ *ascq = 0;
+ } else {
+ *error_code = -1;
+ *sense_key = -1;
+ *asc = -1;
+ *ascq = -1;
+ }
+ return;
+ }
+
+ *error_code = sense_data->error_code & SSD_ERRCODE;
+
+ switch (*error_code) {
+ case SSD_DESC_CURRENT_ERROR:
+ case SSD_DESC_DEFERRED_ERROR: {
+ struct scsi_sense_data_desc *sense;
+
+ sense = (struct scsi_sense_data_desc *)sense_data;
+
+ if (SSD_DESC_IS_PRESENT(sense, sense_len, sense_key))
+ *sense_key = sense->sense_key & SSD_KEY;
+ else
+ *sense_key = (show_errors) ? -1 : 0;
+
+ if (SSD_DESC_IS_PRESENT(sense, sense_len, add_sense_code))
+ *asc = sense->add_sense_code;
+ else
+ *asc = (show_errors) ? -1 : 0;
+
+ if (SSD_DESC_IS_PRESENT(sense, sense_len, add_sense_code_qual))
+ *ascq = sense->add_sense_code_qual;
+ else
+ *ascq = (show_errors) ? -1 : 0;
+ break;
+ }
+ case SSD_CURRENT_ERROR:
+ case SSD_DEFERRED_ERROR:
+ default: {
+ struct scsi_sense_data_fixed *sense;
+
+ sense = (struct scsi_sense_data_fixed *)sense_data;
+
+ if (SSD_FIXED_IS_PRESENT(sense, sense_len, flags))
+ *sense_key = sense->flags & SSD_KEY;
+ else
+ *sense_key = (show_errors) ? -1 : 0;
+
+ if ((SSD_FIXED_IS_PRESENT(sense, sense_len, add_sense_code))
+ && (SSD_FIXED_IS_FILLED(sense, add_sense_code)))
+ *asc = sense->add_sense_code;
+ else
+ *asc = (show_errors) ? -1 : 0;
+
+ if ((SSD_FIXED_IS_PRESENT(sense, sense_len,add_sense_code_qual))
+ && (SSD_FIXED_IS_FILLED(sense, add_sense_code_qual)))
+ *ascq = sense->add_sense_code_qual;
+ else
+ *ascq = (show_errors) ? -1 : 0;
+ break;
+ }
+ }
+}
+
+int
+scsi_get_sense_key(struct scsi_sense_data *sense_data, u_int sense_len,
+ int show_errors)
+{
+ int error_code, sense_key, asc, ascq;
+
+ scsi_extract_sense_len(sense_data, sense_len, &error_code,
+ &sense_key, &asc, &ascq, show_errors);
+
+ return (sense_key);
+}
+
+#ifndef __rtems__
+int
+scsi_get_asc(struct scsi_sense_data *sense_data, u_int sense_len,
+ int show_errors)
+{
+ int error_code, sense_key, asc, ascq;
+
+ scsi_extract_sense_len(sense_data, sense_len, &error_code,
+ &sense_key, &asc, &ascq, show_errors);
+
+ return (asc);
+}
+
+int
+scsi_get_ascq(struct scsi_sense_data *sense_data, u_int sense_len,
+ int show_errors)
+{
+ int error_code, sense_key, asc, ascq;
+
+ scsi_extract_sense_len(sense_data, sense_len, &error_code,
+ &sense_key, &asc, &ascq, show_errors);
+
+ return (ascq);
+}
#endif /* __rtems__ */
/*
@@ -3737,6 +5341,117 @@ scsi_calc_syncparam(u_int period)
}
#endif /* __rtems__ */
+int
+scsi_devid_is_naa_ieee_reg(uint8_t *bufp)
+{
+ struct scsi_vpd_id_descriptor *descr;
+ struct scsi_vpd_id_naa_basic *naa;
+
+ descr = (struct scsi_vpd_id_descriptor *)bufp;
+ naa = (struct scsi_vpd_id_naa_basic *)descr->identifier;
+ if ((descr->id_type & SVPD_ID_TYPE_MASK) != SVPD_ID_TYPE_NAA)
+ return 0;
+ if (descr->length < sizeof(struct scsi_vpd_id_naa_ieee_reg))
+ return 0;
+ if ((naa->naa >> SVPD_ID_NAA_NAA_SHIFT) != SVPD_ID_NAA_IEEE_REG)
+ return 0;
+ return 1;
+}
+
+int
+scsi_devid_is_sas_target(uint8_t *bufp)
+{
+ struct scsi_vpd_id_descriptor *descr;
+
+ descr = (struct scsi_vpd_id_descriptor *)bufp;
+ if (!scsi_devid_is_naa_ieee_reg(bufp))
+ return 0;
+ if ((descr->id_type & SVPD_ID_PIV) == 0) /* proto field reserved */
+ return 0;
+ if ((descr->proto_codeset >> SVPD_ID_PROTO_SHIFT) != SCSI_PROTO_SAS)
+ return 0;
+ return 1;
+}
+
+int
+scsi_devid_is_lun_eui64(uint8_t *bufp)
+{
+ struct scsi_vpd_id_descriptor *descr;
+
+ descr = (struct scsi_vpd_id_descriptor *)bufp;
+ if ((descr->id_type & SVPD_ID_ASSOC_MASK) != SVPD_ID_ASSOC_LUN)
+ return 0;
+ if ((descr->id_type & SVPD_ID_TYPE_MASK) != SVPD_ID_TYPE_EUI64)
+ return 0;
+ return 1;
+}
+
+int
+scsi_devid_is_lun_naa(uint8_t *bufp)
+{
+ struct scsi_vpd_id_descriptor *descr;
+
+ descr = (struct scsi_vpd_id_descriptor *)bufp;
+ if ((descr->id_type & SVPD_ID_ASSOC_MASK) != SVPD_ID_ASSOC_LUN)
+ return 0;
+ if ((descr->id_type & SVPD_ID_TYPE_MASK) != SVPD_ID_TYPE_NAA)
+ return 0;
+ return 1;
+}
+
+int
+scsi_devid_is_lun_t10(uint8_t *bufp)
+{
+ struct scsi_vpd_id_descriptor *descr;
+
+ descr = (struct scsi_vpd_id_descriptor *)bufp;
+ if ((descr->id_type & SVPD_ID_ASSOC_MASK) != SVPD_ID_ASSOC_LUN)
+ return 0;
+ if ((descr->id_type & SVPD_ID_TYPE_MASK) != SVPD_ID_TYPE_T10)
+ return 0;
+ return 1;
+}
+
+int
+scsi_devid_is_lun_name(uint8_t *bufp)
+{
+ struct scsi_vpd_id_descriptor *descr;
+
+ descr = (struct scsi_vpd_id_descriptor *)bufp;
+ if ((descr->id_type & SVPD_ID_ASSOC_MASK) != SVPD_ID_ASSOC_LUN)
+ return 0;
+ if ((descr->id_type & SVPD_ID_TYPE_MASK) != SVPD_ID_TYPE_SCSI_NAME)
+ return 0;
+ return 1;
+}
+
+struct scsi_vpd_id_descriptor *
+scsi_get_devid(struct scsi_vpd_device_id *id, uint32_t page_len,
+ scsi_devid_checkfn_t ck_fn)
+{
+ struct scsi_vpd_id_descriptor *desc;
+ uint8_t *page_end;
+ uint8_t *desc_buf_end;
+
+ page_end = (uint8_t *)id + page_len;
+ if (page_end < id->desc_list)
+ return (NULL);
+
+ desc_buf_end = MIN(id->desc_list + scsi_2btoul(id->length), page_end);
+
+ for (desc = (struct scsi_vpd_id_descriptor *)id->desc_list;
+ desc->identifier <= desc_buf_end
+ && desc->identifier + desc->length <= desc_buf_end;
+ desc = (struct scsi_vpd_id_descriptor *)(desc->identifier
+ + desc->length)) {
+
+ if (ck_fn == NULL || ck_fn((uint8_t *)desc) != 0)
+ return (desc);
+ }
+
+ return (NULL);
+}
+
void
scsi_test_unit_ready(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
@@ -3814,14 +5529,7 @@ scsi_inquiry(struct ccb_scsiio *csio, u_int32_t retries,
scsi_cmd->byte2 |= SI_EVPD;
scsi_cmd->page_code = page_code;
}
- /*
- * A 'transfer units' count of 256 is coded as
- * zero for all commands with a single byte count
- * field.
- */
- if (inq_len == 256)
- inq_len = 0;
- scsi_cmd->length = inq_len;
+ scsi_ulto2b(inq_len, scsi_cmd->length);
}
#ifndef __rtems__
@@ -4236,7 +5944,11 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len,
u_int32_t timeout)
{
+ int read;
u_int8_t cdb_len;
+
+ read = (readop & SCSI_RW_DIRMASK) == SCSI_RW_READ;
+
/*
* Use the smallest possible command to perform the operation
* as some legacy hardware does not support the 10 byte commands.
@@ -4253,7 +5965,7 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
struct scsi_rw_6 *scsi_cmd;
scsi_cmd = (struct scsi_rw_6 *)&csio->cdb_io.cdb_bytes;
- scsi_cmd->opcode = readop ? READ_6 : WRITE_6;
+ scsi_cmd->opcode = read ? READ_6 : WRITE_6;
scsi_ulto3b(lba, scsi_cmd->addr);
scsi_cmd->length = block_count & 0xff;
scsi_cmd->control = 0;
@@ -4272,7 +5984,7 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
struct scsi_rw_10 *scsi_cmd;
scsi_cmd = (struct scsi_rw_10 *)&csio->cdb_io.cdb_bytes;
- scsi_cmd->opcode = readop ? READ_10 : WRITE_10;
+ scsi_cmd->opcode = read ? READ_10 : WRITE_10;
scsi_cmd->byte2 = byte2;
scsi_ulto4b(lba, scsi_cmd->addr);
scsi_cmd->reserved = 0;
@@ -4295,7 +6007,7 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
struct scsi_rw_12 *scsi_cmd;
scsi_cmd = (struct scsi_rw_12 *)&csio->cdb_io.cdb_bytes;
- scsi_cmd->opcode = readop ? READ_12 : WRITE_12;
+ scsi_cmd->opcode = read ? READ_12 : WRITE_12;
scsi_cmd->byte2 = byte2;
scsi_ulto4b(lba, scsi_cmd->addr);
scsi_cmd->reserved = 0;
@@ -4317,7 +6029,7 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
struct scsi_rw_16 *scsi_cmd;
scsi_cmd = (struct scsi_rw_16 *)&csio->cdb_io.cdb_bytes;
- scsi_cmd->opcode = readop ? READ_16 : WRITE_16;
+ scsi_cmd->opcode = read ? READ_16 : WRITE_16;
scsi_cmd->byte2 = byte2;
scsi_u64to8b(lba, scsi_cmd->addr);
scsi_cmd->reserved = 0;
@@ -4328,7 +6040,77 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
cam_fill_csio(csio,
retries,
cbfcnp,
- /*flags*/readop ? CAM_DIR_IN : CAM_DIR_OUT,
+ (read ? CAM_DIR_IN : CAM_DIR_OUT) |
+ ((readop & SCSI_RW_BIO) != 0 ? CAM_DATA_BIO : 0),
+ tag_action,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ cdb_len,
+ timeout);
+}
+
+void
+scsi_write_same(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int8_t byte2,
+ int minimum_cmd_size, u_int64_t lba, u_int32_t block_count,
+ u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len,
+ u_int32_t timeout)
+{
+ u_int8_t cdb_len;
+ if ((minimum_cmd_size < 16) &&
+ ((block_count & 0xffff) == block_count) &&
+ ((lba & 0xffffffff) == lba)) {
+ /*
+ * Need a 10 byte cdb.
+ */
+ struct scsi_write_same_10 *scsi_cmd;
+
+ scsi_cmd = (struct scsi_write_same_10 *)&csio->cdb_io.cdb_bytes;
+ scsi_cmd->opcode = WRITE_SAME_10;
+ scsi_cmd->byte2 = byte2;
+ scsi_ulto4b(lba, scsi_cmd->addr);
+ scsi_cmd->group = 0;
+ scsi_ulto2b(block_count, scsi_cmd->length);
+ scsi_cmd->control = 0;
+ cdb_len = sizeof(*scsi_cmd);
+
+ CAM_DEBUG(csio->ccb_h.path, CAM_DEBUG_SUBTRACE,
+ ("10byte: %x%x%x%x:%x%x: %d\n", scsi_cmd->addr[0],
+ scsi_cmd->addr[1], scsi_cmd->addr[2],
+ scsi_cmd->addr[3], scsi_cmd->length[0],
+ scsi_cmd->length[1], dxfer_len));
+ } else {
+ /*
+ * 16 byte CDB. We'll only get here if the LBA is larger
+ * than 2^32, or if the user asks for a 16 byte command.
+ */
+ struct scsi_write_same_16 *scsi_cmd;
+
+ scsi_cmd = (struct scsi_write_same_16 *)&csio->cdb_io.cdb_bytes;
+ scsi_cmd->opcode = WRITE_SAME_16;
+ scsi_cmd->byte2 = byte2;
+ scsi_u64to8b(lba, scsi_cmd->addr);
+ scsi_ulto4b(block_count, scsi_cmd->length);
+ scsi_cmd->group = 0;
+ scsi_cmd->control = 0;
+ cdb_len = sizeof(*scsi_cmd);
+
+ CAM_DEBUG(csio->ccb_h.path, CAM_DEBUG_SUBTRACE,
+ ("16byte: %x%x%x%x%x%x%x%x:%x%x%x%x: %d\n",
+ scsi_cmd->addr[0], scsi_cmd->addr[1],
+ scsi_cmd->addr[2], scsi_cmd->addr[3],
+ scsi_cmd->addr[4], scsi_cmd->addr[5],
+ scsi_cmd->addr[6], scsi_cmd->addr[7],
+ scsi_cmd->length[0], scsi_cmd->length[1],
+ scsi_cmd->length[2], scsi_cmd->length[3],
+ dxfer_len));
+ }
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_OUT,
tag_action,
data_ptr,
dxfer_len,
@@ -4338,6 +6120,261 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
}
#ifndef __rtems__
+void
+scsi_ata_identify(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int8_t *data_ptr,
+ u_int16_t dxfer_len, u_int8_t sense_len,
+ u_int32_t timeout)
+{
+ scsi_ata_pass_16(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ /*protocol*/AP_PROTO_PIO_IN,
+ /*ata_flags*/AP_FLAG_TDIR_FROM_DEV|
+ AP_FLAG_BYT_BLOK_BYTES|AP_FLAG_TLEN_SECT_CNT,
+ /*features*/0,
+ /*sector_count*/dxfer_len,
+ /*lba*/0,
+ /*command*/ATA_ATA_IDENTIFY,
+ /*control*/0,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ timeout);
+}
+
+void
+scsi_ata_trim(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int16_t block_count,
+ u_int8_t *data_ptr, u_int16_t dxfer_len, u_int8_t sense_len,
+ u_int32_t timeout)
+{
+ scsi_ata_pass_16(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_OUT,
+ tag_action,
+ /*protocol*/AP_EXTEND|AP_PROTO_DMA,
+ /*ata_flags*/AP_FLAG_TLEN_SECT_CNT|AP_FLAG_BYT_BLOK_BLOCKS,
+ /*features*/ATA_DSM_TRIM,
+ /*sector_count*/block_count,
+ /*lba*/0,
+ /*command*/ATA_DATA_SET_MANAGEMENT,
+ /*control*/0,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ timeout);
+}
+
+void
+scsi_ata_pass_16(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int32_t flags, u_int8_t tag_action,
+ u_int8_t protocol, u_int8_t ata_flags, u_int16_t features,
+ u_int16_t sector_count, uint64_t lba, u_int8_t command,
+ u_int8_t control, u_int8_t *data_ptr, u_int16_t dxfer_len,
+ u_int8_t sense_len, u_int32_t timeout)
+{
+ struct ata_pass_16 *ata_cmd;
+
+ ata_cmd = (struct ata_pass_16 *)&csio->cdb_io.cdb_bytes;
+ ata_cmd->opcode = ATA_PASS_16;
+ ata_cmd->protocol = protocol;
+ ata_cmd->flags = ata_flags;
+ ata_cmd->features_ext = features >> 8;
+ ata_cmd->features = features;
+ ata_cmd->sector_count_ext = sector_count >> 8;
+ ata_cmd->sector_count = sector_count;
+ ata_cmd->lba_low = lba;
+ ata_cmd->lba_mid = lba >> 8;
+ ata_cmd->lba_high = lba >> 16;
+ ata_cmd->device = ATA_DEV_LBA;
+ if (protocol & AP_EXTEND) {
+ ata_cmd->lba_low_ext = lba >> 24;
+ ata_cmd->lba_mid_ext = lba >> 32;
+ ata_cmd->lba_high_ext = lba >> 40;
+ } else
+ ata_cmd->device |= (lba >> 24) & 0x0f;
+ ata_cmd->command = command;
+ ata_cmd->control = control;
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ flags,
+ tag_action,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ sizeof(*ata_cmd),
+ timeout);
+}
+
+void
+scsi_unmap(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int8_t byte2,
+ u_int8_t *data_ptr, u_int16_t dxfer_len, u_int8_t sense_len,
+ u_int32_t timeout)
+{
+ struct scsi_unmap *scsi_cmd;
+
+ scsi_cmd = (struct scsi_unmap *)&csio->cdb_io.cdb_bytes;
+ scsi_cmd->opcode = UNMAP;
+ scsi_cmd->byte2 = byte2;
+ scsi_ulto4b(0, scsi_cmd->reserved);
+ scsi_cmd->group = 0;
+ scsi_ulto2b(dxfer_len, scsi_cmd->length);
+ scsi_cmd->control = 0;
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_OUT,
+ tag_action,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_receive_diagnostic_results(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb*),
+ uint8_t tag_action, int pcv, uint8_t page_code,
+ uint8_t *data_ptr, uint16_t allocation_length,
+ uint8_t sense_len, uint32_t timeout)
+{
+ struct scsi_receive_diag *scsi_cmd;
+
+ scsi_cmd = (struct scsi_receive_diag *)&csio->cdb_io.cdb_bytes;
+ memset(scsi_cmd, 0, sizeof(*scsi_cmd));
+ scsi_cmd->opcode = RECEIVE_DIAGNOSTIC;
+ if (pcv) {
+ scsi_cmd->byte2 |= SRD_PCV;
+ scsi_cmd->page_code = page_code;
+ }
+ scsi_ulto2b(allocation_length, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ data_ptr,
+ allocation_length,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_send_diagnostic(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int unit_offline, int device_offline,
+ int self_test, int page_format, int self_test_code,
+ uint8_t *data_ptr, uint16_t param_list_length,
+ uint8_t sense_len, uint32_t timeout)
+{
+ struct scsi_send_diag *scsi_cmd;
+
+ scsi_cmd = (struct scsi_send_diag *)&csio->cdb_io.cdb_bytes;
+ memset(scsi_cmd, 0, sizeof(*scsi_cmd));
+ scsi_cmd->opcode = SEND_DIAGNOSTIC;
+
+ /*
+ * The default self-test mode control and specific test
+ * control are mutually exclusive.
+ */
+ if (self_test)
+ self_test_code = SSD_SELF_TEST_CODE_NONE;
+
+ scsi_cmd->byte2 = ((self_test_code << SSD_SELF_TEST_CODE_SHIFT)
+ & SSD_SELF_TEST_CODE_MASK)
+ | (unit_offline ? SSD_UNITOFFL : 0)
+ | (device_offline ? SSD_DEVOFFL : 0)
+ | (self_test ? SSD_SELFTEST : 0)
+ | (page_format ? SSD_PF : 0);
+ scsi_ulto2b(param_list_length, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/param_list_length ? CAM_DIR_OUT : CAM_DIR_NONE,
+ tag_action,
+ data_ptr,
+ param_list_length,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_read_buffer(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb*),
+ uint8_t tag_action, int mode,
+ uint8_t buffer_id, u_int32_t offset,
+ uint8_t *data_ptr, uint32_t allocation_length,
+ uint8_t sense_len, uint32_t timeout)
+{
+ struct scsi_read_buffer *scsi_cmd;
+
+ scsi_cmd = (struct scsi_read_buffer *)&csio->cdb_io.cdb_bytes;
+ memset(scsi_cmd, 0, sizeof(*scsi_cmd));
+ scsi_cmd->opcode = READ_BUFFER;
+ scsi_cmd->byte2 = mode;
+ scsi_cmd->buffer_id = buffer_id;
+ scsi_ulto3b(offset, scsi_cmd->offset);
+ scsi_ulto3b(allocation_length, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ data_ptr,
+ allocation_length,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_write_buffer(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int mode,
+ uint8_t buffer_id, u_int32_t offset,
+ uint8_t *data_ptr, uint32_t param_list_length,
+ uint8_t sense_len, uint32_t timeout)
+{
+ struct scsi_write_buffer *scsi_cmd;
+
+ scsi_cmd = (struct scsi_write_buffer *)&csio->cdb_io.cdb_bytes;
+ memset(scsi_cmd, 0, sizeof(*scsi_cmd));
+ scsi_cmd->opcode = WRITE_BUFFER;
+ scsi_cmd->byte2 = mode;
+ scsi_cmd->buffer_id = buffer_id;
+ scsi_ulto3b(offset, scsi_cmd->offset);
+ scsi_ulto3b(param_list_length, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/param_list_length ? CAM_DIR_OUT : CAM_DIR_NONE,
+ tag_action,
+ data_ptr,
+ param_list_length,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
void
scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
@@ -4370,7 +6407,6 @@ scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries,
sense_len,
sizeof(*scsi_cmd),
timeout);
-
}
@@ -4428,7 +6464,89 @@ scsi_static_inquiry_match(caddr_t inqbuffer, caddr_t table_entry)
return (-1);
}
+/**
+ * Compare two buffers of vpd device descriptors for a match.
+ *
+ * \param lhs Pointer to first buffer of descriptors to compare.
+ * \param lhs_len The length of the first buffer.
+ * \param rhs Pointer to second buffer of descriptors to compare.
+ * \param rhs_len The length of the second buffer.
+ *
+ * \return 0 on a match, -1 otherwise.
+ *
+ * Treat rhs and lhs as arrays of vpd device id descriptors. Walk lhs matching
+ * agains each element in rhs until all data are exhausted or we have found
+ * a match.
+ */
+int
+scsi_devid_match(uint8_t *lhs, size_t lhs_len, uint8_t *rhs, size_t rhs_len)
+{
+ struct scsi_vpd_id_descriptor *lhs_id;
+ struct scsi_vpd_id_descriptor *lhs_last;
+ struct scsi_vpd_id_descriptor *rhs_last;
+ uint8_t *lhs_end;
+ uint8_t *rhs_end;
+
+ lhs_end = lhs + lhs_len;
+ rhs_end = rhs + rhs_len;
+
+ /*
+ * rhs_last and lhs_last are the last posible position of a valid
+ * descriptor assuming it had a zero length identifier. We use
+ * these variables to insure we can safely dereference the length
+ * field in our loop termination tests.
+ */
+ lhs_last = (struct scsi_vpd_id_descriptor *)
+ (lhs_end - __offsetof(struct scsi_vpd_id_descriptor, identifier));
+ rhs_last = (struct scsi_vpd_id_descriptor *)
+ (rhs_end - __offsetof(struct scsi_vpd_id_descriptor, identifier));
+
+ lhs_id = (struct scsi_vpd_id_descriptor *)lhs;
+ while (lhs_id <= lhs_last
+ && (lhs_id->identifier + lhs_id->length) <= lhs_end) {
+ struct scsi_vpd_id_descriptor *rhs_id;
+
+ rhs_id = (struct scsi_vpd_id_descriptor *)rhs;
+ while (rhs_id <= rhs_last
+ && (rhs_id->identifier + rhs_id->length) <= rhs_end) {
+
+ if (rhs_id->length == lhs_id->length
+ && memcmp(rhs_id->identifier, lhs_id->identifier,
+ rhs_id->length) == 0)
+ return (0);
+
+ rhs_id = (struct scsi_vpd_id_descriptor *)
+ (rhs_id->identifier + rhs_id->length);
+ }
+ lhs_id = (struct scsi_vpd_id_descriptor *)
+ (lhs_id->identifier + lhs_id->length);
+ }
+ return (-1);
+}
+
#ifdef _KERNEL
+int
+scsi_vpd_supported_page(struct cam_periph *periph, uint8_t page_id)
+{
+ struct cam_ed *device;
+ struct scsi_vpd_supported_pages *vpds;
+ int i, num_pages;
+
+ device = periph->path->device;
+ vpds = (struct scsi_vpd_supported_pages *)device->supported_vpds;
+
+ if (vpds != NULL) {
+ num_pages = device->supported_vpds_len -
+ SVPD_SUPPORTED_PAGES_HDR_LEN;
+ for (i = 0; i < num_pages; i++) {
+ if (vpds->page_list[i] == page_id)
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
static void
init_scsi_delay(void)
{
diff --git a/freebsd/sys/cam/scsi/scsi_all.h b/freebsd/sys/cam/scsi/scsi_all.h
index f6608056..4fe0b1dd 100644
--- a/freebsd/sys/cam/scsi/scsi_all.h
+++ b/freebsd/sys/cam/scsi/scsi_all.h
@@ -25,6 +25,7 @@
#define _SCSI_SCSI_ALL_H 1
#include <sys/cdefs.h>
+#include <machine/stdarg.h>
#ifdef _KERNEL
/*
@@ -73,9 +74,6 @@ typedef enum {
SS_TUR = 0x040000, /* Send a Test Unit Ready command to the
* device, then retry the original command.
*/
- SS_REQSENSE = 0x050000, /* Send a RequestSense command to the
- * device, then retry the original command.
- */
SS_MASK = 0xff0000
} scsi_sense_action;
@@ -115,6 +113,7 @@ struct scsi_request_sense
{
u_int8_t opcode;
u_int8_t byte2;
+#define SRS_DESC 0x01
u_int8_t unused[2];
u_int8_t length;
u_int8_t control;
@@ -128,17 +127,33 @@ struct scsi_test_unit_ready
u_int8_t control;
};
-struct scsi_send_diag
-{
- u_int8_t opcode;
- u_int8_t byte2;
-#define SSD_UOL 0x01
-#define SSD_DOL 0x02
-#define SSD_SELFTEST 0x04
-#define SSD_PF 0x10
- u_int8_t unused[1];
- u_int8_t paramlen[2];
- u_int8_t control;
+struct scsi_receive_diag {
+ uint8_t opcode;
+ uint8_t byte2;
+#define SRD_PCV 0x01
+ uint8_t page_code;
+ uint8_t length[2];
+ uint8_t control;
+};
+
+struct scsi_send_diag {
+ uint8_t opcode;
+ uint8_t byte2;
+#define SSD_UNITOFFL 0x01
+#define SSD_DEVOFFL 0x02
+#define SSD_SELFTEST 0x04
+#define SSD_PF 0x10
+#define SSD_SELF_TEST_CODE_MASK 0xE0
+#define SSD_SELF_TEST_CODE_SHIFT 5
+#define SSD_SELF_TEST_CODE_NONE 0x00
+#define SSD_SELF_TEST_CODE_BG_SHORT 0x01
+#define SSD_SELF_TEST_CODE_BG_EXTENDED 0x02
+#define SSD_SELF_TEST_CODE_BG_ABORT 0x04
+#define SSD_SELF_TEST_CODE_FG_SHORT 0x05
+#define SSD_SELF_TEST_CODE_FG_EXTENDED 0x06
+ uint8_t reserved;
+ uint8_t length[2];
+ uint8_t control;
};
struct scsi_sense
@@ -154,10 +169,10 @@ struct scsi_inquiry
{
u_int8_t opcode;
u_int8_t byte2;
-#define SI_EVPD 0x01
+#define SI_EVPD 0x01
+#define SI_CMDDT 0x02
u_int8_t page_code;
- u_int8_t reserved;
- u_int8_t length;
+ u_int8_t length[2];
u_int8_t control;
};
@@ -183,7 +198,9 @@ struct scsi_mode_sense_6
#define SMS_PAGE_CTRL_CHANGEABLE 0x40
#define SMS_PAGE_CTRL_DEFAULT 0x80
#define SMS_PAGE_CTRL_SAVED 0xC0
- u_int8_t unused;
+ u_int8_t subpage;
+#define SMS_SUBPAGE_PAGE_0 0x00
+#define SMS_SUBPAGE_ALL 0xff
u_int8_t length;
u_int8_t control;
};
@@ -192,8 +209,10 @@ struct scsi_mode_sense_10
{
u_int8_t opcode;
u_int8_t byte2; /* same bits as small version */
+#define SMS10_LLBAA 0x10
u_int8_t page; /* same bits as small version */
- u_int8_t unused[4];
+ u_int8_t subpage;
+ u_int8_t unused[3];
u_int8_t length[2];
u_int8_t control;
};
@@ -246,6 +265,120 @@ struct scsi_mode_block_descr
u_int8_t block_len[3];
};
+struct scsi_per_res_in
+{
+ u_int8_t opcode;
+ u_int8_t action;
+#define SPRI_RK 0x00
+#define SPRI_RR 0x01
+#define SPRI_RC 0x02
+#define SPRI_RS 0x03
+ u_int8_t reserved[5];
+ u_int8_t length[2];
+ u_int8_t control;
+};
+
+struct scsi_per_res_in_header
+{
+ u_int8_t generation[4];
+ u_int8_t length[4];
+};
+
+struct scsi_per_res_key
+{
+ u_int8_t key[8];
+};
+
+struct scsi_per_res_in_keys
+{
+ struct scsi_per_res_in_header header;
+ struct scsi_per_res_key keys[0];
+};
+
+struct scsi_per_res_cap
+{
+ uint8_t length[2];
+ uint8_t flags1;
+#define SPRI_CRH 0x10
+#define SPRI_SIP_C 0x08
+#define SPRI_ATP_C 0x04
+#define SPRI_PTPL_C 0x01
+ uint8_t flags2;
+#define SPRI_TMV 0x80
+#define SPRI_PTPL_A 0x01
+ uint8_t type_mask[2];
+#define SPRI_TM_WR_EX_AR 0x8000
+#define SPRI_TM_EX_AC_RO 0x4000
+#define SPRI_TM_WR_EX_RO 0x2000
+#define SPRI_TM_EX_AC 0x0800
+#define SPRI_TM_WR_EX 0x0200
+#define SPRI_TM_EX_AC_AR 0x0001
+ uint8_t reserved[2];
+};
+
+struct scsi_per_res_in_rsrv_data
+{
+ uint8_t reservation[8];
+ uint8_t obsolete1[4];
+ uint8_t reserved;
+ uint8_t scopetype;
+#define SPRT_WE 0x01
+#define SPRT_EA 0x03
+#define SPRT_WERO 0x05
+#define SPRT_EARO 0x06
+#define SPRT_WEAR 0x07
+#define SPRT_EAAR 0x08
+ uint8_t obsolete2[2];
+};
+
+struct scsi_per_res_in_rsrv
+{
+ struct scsi_per_res_in_header header;
+ struct scsi_per_res_in_rsrv_data data;
+};
+
+struct scsi_per_res_out
+{
+ u_int8_t opcode;
+ u_int8_t action;
+#define SPRO_REGISTER 0x00
+#define SPRO_RESERVE 0x01
+#define SPRO_RELEASE 0x02
+#define SPRO_CLEAR 0x03
+#define SPRO_PREEMPT 0x04
+#define SPRO_PRE_ABO 0x05
+#define SPRO_REG_IGNO 0x06
+#define SPRO_REG_MOVE 0x07
+#define SPRO_ACTION_MASK 0x1f
+ u_int8_t scope_type;
+#define SPR_SCOPE_MASK 0xf0
+#define SPR_LU_SCOPE 0x00
+#define SPR_TYPE_MASK 0x0f
+#define SPR_TYPE_WR_EX 0x01
+#define SPR_TYPE_EX_AC 0x03
+#define SPR_TYPE_WR_EX_RO 0x05
+#define SPR_TYPE_EX_AC_RO 0x06
+#define SPR_TYPE_WR_EX_AR 0x07
+#define SPR_TYPE_EX_AC_AR 0x08
+ u_int8_t reserved[2];
+ u_int8_t length[4];
+ u_int8_t control;
+};
+
+struct scsi_per_res_out_parms
+{
+ struct scsi_per_res_key res_key;
+ u_int8_t serv_act_res_key[8];
+ u_int8_t obsolete1[4];
+ u_int8_t flags;
+#define SPR_SPEC_I_PT 0x08
+#define SPR_ALL_TG_PT 0x04
+#define SPR_APTPL 0x01
+ u_int8_t reserved1;
+ u_int8_t obsolete2[2];
+};
+
+
struct scsi_log_sense
{
u_int8_t opcode;
@@ -320,7 +453,16 @@ struct scsi_control_page {
u_int8_t page_code;
u_int8_t page_length;
u_int8_t rlec;
-#define SCB_RLEC 0x01 /*Report Log Exception Cond*/
+#define SCP_RLEC 0x01 /*Report Log Exception Cond*/
+#define SCP_GLTSD 0x02 /*Global Logging target
+ save disable */
+#define SCP_DSENSE 0x04 /*Descriptor Sense */
+#define SCP_DPICZ 0x08 /*Disable Prot. Info Check
+ if Prot. Field is Zero */
+#define SCP_TMF_ONLY 0x10 /*TM Functions Only*/
+#define SCP_TST_MASK 0xE0 /*Task Set Type Mask*/
+#define SCP_TST_ONE 0x00 /*One Task Set*/
+#define SCP_TST_SEPARATE 0x20 /*Separate Task Sets*/
u_int8_t queue_flags;
#define SCP_QUEUE_ALG_MASK 0xF0
#define SCP_QUEUE_ALG_RESTRICTED 0x00
@@ -351,6 +493,90 @@ struct scsi_cache_page {
u_int8_t max_prefetch_ceil[2];
};
+/*
+ * XXX KDM
+ * Updated version of the cache page, as of SBC. Update this to SBC-3 and
+ * rationalize the two.
+ */
+struct scsi_caching_page {
+ uint8_t page_code;
+#define SMS_CACHING_PAGE 0x08
+ uint8_t page_length;
+ uint8_t flags1;
+#define SCP_IC 0x80
+#define SCP_ABPF 0x40
+#define SCP_CAP 0x20
+#define SCP_DISC 0x10
+#define SCP_SIZE 0x08
+#define SCP_WCE 0x04
+#define SCP_MF 0x02
+#define SCP_RCD 0x01
+ uint8_t ret_priority;
+ uint8_t disable_pf_transfer_len[2];
+ uint8_t min_prefetch[2];
+ uint8_t max_prefetch[2];
+ uint8_t max_pf_ceiling[2];
+ uint8_t flags2;
+#define SCP_FSW 0x80
+#define SCP_LBCSS 0x40
+#define SCP_DRA 0x20
+#define SCP_VS1 0x10
+#define SCP_VS2 0x08
+ uint8_t cache_segments;
+ uint8_t cache_seg_size[2];
+ uint8_t reserved;
+ uint8_t non_cache_seg_size[3];
+};
+
+/*
+ * XXX KDM move this off to a vendor shim.
+ */
+struct copan_power_subpage {
+ uint8_t page_code;
+#define PWR_PAGE_CODE 0x00
+ uint8_t subpage;
+#define PWR_SUBPAGE_CODE 0x02
+ uint8_t page_length[2];
+ uint8_t page_version;
+#define PWR_VERSION 0x01
+ uint8_t total_luns;
+ uint8_t max_active_luns;
+#define PWR_DFLT_MAX_LUNS 0x07
+ uint8_t reserved[25];
+};
+
+/*
+ * XXX KDM move this off to a vendor shim.
+ */
+struct copan_aps_subpage {
+ uint8_t page_code;
+#define APS_PAGE_CODE 0x00
+ uint8_t subpage;
+#define APS_SUBPAGE_CODE 0x03
+ uint8_t page_length[2];
+ uint8_t page_version;
+#define APS_VERSION 0x00
+ uint8_t lock_active;
+#define APS_LOCK_ACTIVE 0x01
+#define APS_LOCK_INACTIVE 0x00
+ uint8_t reserved[26];
+};
+
+/*
+ * XXX KDM move this off to a vendor shim.
+ */
+struct copan_debugconf_subpage {
+ uint8_t page_code;
+#define DBGCNF_PAGE_CODE 0x00
+ uint8_t subpage;
+#define DBGCNF_SUBPAGE_CODE 0xF0
+ uint8_t page_length[2];
+ uint8_t page_version;
+#define DBGCNF_VERSION 0x00
+ uint8_t ctl_time_io_secs[2];
+};
+
+
struct scsi_info_exceptions_page {
u_int8_t page_code;
#define SIEP_PAGE_SAVABLE 0x80 /* Page is savable */
@@ -389,20 +615,49 @@ struct scsi_reserve
{
u_int8_t opcode;
u_int8_t byte2;
- u_int8_t unused[2];
- u_int8_t length;
+#define SR_EXTENT 0x01
+#define SR_ID_MASK 0x0e
+#define SR_3RDPTY 0x10
+#define SR_LUN_MASK 0xe0
+ u_int8_t resv_id;
+ u_int8_t length[2];
u_int8_t control;
};
+struct scsi_reserve_10 {
+ uint8_t opcode;
+ uint8_t byte2;
+#define SR10_3RDPTY 0x10
+#define SR10_LONGID 0x02
+#define SR10_EXTENT 0x01
+ uint8_t resv_id;
+ uint8_t thirdparty_id;
+ uint8_t reserved[3];
+ uint8_t length[2];
+ uint8_t control;
+};
+
+
struct scsi_release
{
u_int8_t opcode;
u_int8_t byte2;
- u_int8_t unused[2];
+ u_int8_t resv_id;
+ u_int8_t unused[1];
u_int8_t length;
u_int8_t control;
};
+struct scsi_release_10 {
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t resv_id;
+ uint8_t thirdparty_id;
+ uint8_t reserved[3];
+ uint8_t length[2];
+ uint8_t control;
+};
+
struct scsi_prevent
{
u_int8_t opcode;
@@ -418,12 +673,60 @@ struct scsi_sync_cache
{
u_int8_t opcode;
u_int8_t byte2;
+#define SSC_IMMED 0x02
+#define SSC_RELADR 0x01
u_int8_t begin_lba[4];
u_int8_t reserved;
u_int8_t lb_count[2];
u_int8_t control;
};
+struct scsi_sync_cache_16
+{
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t begin_lba[8];
+ uint8_t lb_count[4];
+ uint8_t reserved;
+ uint8_t control;
+};
+
+struct scsi_format {
+ uint8_t opcode;
+ uint8_t byte2;
+#define SF_LONGLIST 0x20
+#define SF_FMTDATA 0x10
+#define SF_CMPLIST 0x08
+#define SF_FORMAT_MASK 0x07
+#define SF_FORMAT_BLOCK 0x00
+#define SF_FORMAT_LONG_BLOCK 0x03
+#define SF_FORMAT_BFI 0x04
+#define SF_FORMAT_PHYS 0x05
+ uint8_t vendor;
+ uint8_t interleave[2];
+ uint8_t control;
+};
+
+struct scsi_format_header_short {
+ uint8_t reserved;
+#define SF_DATA_FOV 0x80
+#define SF_DATA_DPRY 0x40
+#define SF_DATA_DCRT 0x20
+#define SF_DATA_STPF 0x10
+#define SF_DATA_IP 0x08
+#define SF_DATA_DSP 0x04
+#define SF_DATA_IMMED 0x02
+#define SF_DATA_VS 0x01
+ uint8_t byte2;
+ uint8_t defect_list_len[2];
+};
+
+struct scsi_format_header_long {
+ uint8_t reserved;
+ uint8_t byte2;
+ uint8_t reserved2[2];
+ uint8_t defect_list_len[4];
+};
struct scsi_changedef
{
@@ -442,6 +745,7 @@ struct scsi_read_buffer
u_int8_t byte2;
#define RWB_MODE 0x07
#define RWB_MODE_HDR_DATA 0x00
+#define RWB_MODE_VENDOR 0x01
#define RWB_MODE_DATA 0x02
#define RWB_MODE_DOWNLOAD 0x04
#define RWB_MODE_DOWNLOAD_SAVE 0x05
@@ -512,6 +816,75 @@ struct scsi_rw_16
u_int8_t control;
};
+struct scsi_write_same_10
+{
+ uint8_t opcode;
+ uint8_t byte2;
+#define SWS_LBDATA 0x02
+#define SWS_PBDATA 0x04
+#define SWS_UNMAP 0x08
+#define SWS_ANCHOR 0x10
+ uint8_t addr[4];
+ uint8_t group;
+ uint8_t length[2];
+ uint8_t control;
+};
+
+struct scsi_write_same_16
+{
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t addr[8];
+ uint8_t length[4];
+ uint8_t group;
+ uint8_t control;
+};
+
+struct scsi_unmap
+{
+ uint8_t opcode;
+ uint8_t byte2;
+#define SU_ANCHOR 0x01
+ uint8_t reserved[4];
+ uint8_t group;
+ uint8_t length[2];
+ uint8_t control;
+};
+
+struct scsi_write_verify_10
+{
+ uint8_t opcode;
+ uint8_t byte2;
+#define SWV_BYTCHK 0x02
+#define SWV_DPO 0x10
+#define SWV_WRPROECT_MASK 0xe0
+ uint8_t addr[4];
+ uint8_t group;
+ uint8_t length[2];
+ uint8_t control;
+};
+
+struct scsi_write_verify_12
+{
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t addr[4];
+ uint8_t length[4];
+ uint8_t group;
+ uint8_t control;
+};
+
+struct scsi_write_verify_16
+{
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t addr[8];
+ uint8_t length[4];
+ uint8_t group;
+ uint8_t control;
+};
+
+
struct scsi_start_stop_unit
{
u_int8_t opcode;
@@ -521,12 +894,33 @@ struct scsi_start_stop_unit
u_int8_t how;
#define SSS_START 0x01
#define SSS_LOEJ 0x02
+#define SSS_PC_MASK 0xf0
+#define SSS_PC_START_VALID 0x00
+#define SSS_PC_ACTIVE 0x10
+#define SSS_PC_IDLE 0x20
+#define SSS_PC_STANDBY 0x30
+#define SSS_PC_LU_CONTROL 0x70
+#define SSS_PC_FORCE_IDLE_0 0xa0
+#define SSS_PC_FORCE_STANDBY_0 0xb0
u_int8_t control;
};
struct ata_pass_12 {
u_int8_t opcode;
u_int8_t protocol;
+#define AP_PROTO_HARD_RESET (0x00 << 1)
+#define AP_PROTO_SRST (0x01 << 1)
+#define AP_PROTO_NON_DATA (0x03 << 1)
+#define AP_PROTO_PIO_IN (0x04 << 1)
+#define AP_PROTO_PIO_OUT (0x05 << 1)
+#define AP_PROTO_DMA (0x06 << 1)
+#define AP_PROTO_DMA_QUEUED (0x07 << 1)
+#define AP_PROTO_DEVICE_DIAG (0x08 << 1)
+#define AP_PROTO_DEVICE_RESET (0x09 << 1)
+#define AP_PROTO_UDMA_IN (0x0a << 1)
+#define AP_PROTO_UDMA_OUT (0x0b << 1)
+#define AP_PROTO_FPDMA (0x0c << 1)
+#define AP_PROTO_RESP_INFO (0x0f << 1)
#define AP_MULTI 0xe0
u_int8_t flags;
#define AP_T_LEN 0x03
@@ -545,11 +939,32 @@ struct ata_pass_12 {
u_int8_t control;
};
+struct scsi_maintenance_in
+{
+ uint8_t opcode;
+ uint8_t byte2;
+#define SERVICE_ACTION_MASK 0x1f
+#define SA_RPRT_TRGT_GRP 0x0a
+ uint8_t reserved[4];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
struct ata_pass_16 {
u_int8_t opcode;
u_int8_t protocol;
#define AP_EXTEND 0x01
u_int8_t flags;
+#define AP_FLAG_TLEN_NO_DATA (0 << 0)
+#define AP_FLAG_TLEN_FEAT (1 << 0)
+#define AP_FLAG_TLEN_SECT_CNT (2 << 0)
+#define AP_FLAG_TLEN_STPSIU (3 << 0)
+#define AP_FLAG_BYT_BLOK_BYTES (0 << 2)
+#define AP_FLAG_BYT_BLOK_BLOCKS (1 << 2)
+#define AP_FLAG_TDIR_TO_DEV (0 << 3)
+#define AP_FLAG_TDIR_FROM_DEV (1 << 3)
+#define AP_FLAG_CHK_COND (1 << 5)
u_int8_t features_ext;
u_int8_t features;
u_int8_t sector_count_ext;
@@ -590,18 +1005,29 @@ struct ata_pass_16 {
#define READ_10 0x28
#define WRITE_10 0x2A
#define POSITION_TO_ELEMENT 0x2B
+#define WRITE_VERIFY_10 0x2E
+#define VERIFY_10 0x2F
#define SYNCHRONIZE_CACHE 0x35
#define READ_DEFECT_DATA_10 0x37
#define WRITE_BUFFER 0x3B
#define READ_BUFFER 0x3C
#define CHANGE_DEFINITION 0x40
+#define WRITE_SAME_10 0x41
+#define UNMAP 0x42
#define LOG_SELECT 0x4C
#define LOG_SENSE 0x4D
#define MODE_SELECT_10 0x55
+#define RESERVE_10 0x56
+#define RELEASE_10 0x57
#define MODE_SENSE_10 0x5A
+#define PERSISTENT_RES_IN 0x5E
+#define PERSISTENT_RES_OUT 0x5F
#define ATA_PASS_16 0x85
#define READ_16 0x88
#define WRITE_16 0x8A
+#define WRITE_VERIFY_16 0x8E
+#define SYNCHRONIZE_CACHE_16 0x91
+#define WRITE_SAME_16 0x93
#define SERVICE_ACTION_IN 0x9E
#define REPORT_LUNS 0xA0
#define ATA_PASS_12 0xA1
@@ -610,6 +1036,7 @@ struct ata_pass_16 {
#define MOVE_MEDIUM 0xA5
#define READ_12 0xA8
#define WRITE_12 0xAA
+#define WRITE_VERIFY_12 0xAE
#define READ_ELEMENT_STATUS 0xB8
#define READ_CD 0xBE
@@ -659,7 +1086,7 @@ struct ata_pass_16 {
/*
* This length is the initial inquiry length used by the probe code, as
- * well as the legnth necessary for scsi_print_inquiry() to function
+ * well as the length necessary for scsi_print_inquiry() to function
* correctly. If either use requires a different length in the future,
* the two values should be de-coupled.
*/
@@ -721,10 +1148,12 @@ struct scsi_inquiry_data
u_int8_t response_format;
#define SID_AENC 0x80
#define SID_TrmIOP 0x40
+#define SID_NormACA 0x20
+#define SID_HiSup 0x10
u_int8_t additional_length;
#define SID_ADDITIONAL_LENGTH(iqd) \
((iqd)->additional_length + \
- offsetof(struct scsi_inquiry_data, additional_length) + 1)
+ __offsetof(struct scsi_inquiry_data, additional_length) + 1)
u_int8_t spc3_flags;
#define SPC3_SID_PROTECT 0x01
#define SPC3_SID_3PC 0x08
@@ -734,6 +1163,7 @@ struct scsi_inquiry_data
#define SPC3_SID_ACC 0x40
#define SPC3_SID_SCCS 0x80
u_int8_t spc2_flags;
+#define SPC2_SID_ADDR16 0x01
#define SPC2_SID_MChngr 0x08
#define SPC2_SID_MultiP 0x10
#define SPC2_SID_EncServ 0x40
@@ -793,17 +1223,37 @@ struct scsi_inquiry_data
u_int8_t vendor_specific1[SID_VENDOR_SPECIFIC_1_SIZE];
};
+/*
+ * This structure is more suited to initiator operation, because the
+ * maximum number of supported pages is already allocated.
+ */
struct scsi_vpd_supported_page_list
{
u_int8_t device;
u_int8_t page_code;
-#define SVPD_SUPPORTED_PAGE_LIST 0x00
+#define SVPD_SUPPORTED_PAGE_LIST 0x00
+#define SVPD_SUPPORTED_PAGES_HDR_LEN 4
u_int8_t reserved;
u_int8_t length; /* number of VPD entries */
#define SVPD_SUPPORTED_PAGES_SIZE 251
u_int8_t list[SVPD_SUPPORTED_PAGES_SIZE];
};
+/*
+ * This structure is more suited to target operation, because the
+ * number of supported pages is left to the user to allocate.
+ */
+struct scsi_vpd_supported_pages
+{
+ u_int8_t device;
+ u_int8_t page_code;
+ u_int8_t reserved;
+#define SVPD_SUPPORTED_PAGES 0x00
+ u_int8_t length;
+ u_int8_t page_list[0];
+};
+
+
struct scsi_vpd_unit_serial_number
{
u_int8_t device;
@@ -815,12 +1265,265 @@ struct scsi_vpd_unit_serial_number
u_int8_t serial_num[SVPD_SERIAL_NUM_SIZE];
};
+struct scsi_vpd_device_id
+{
+ u_int8_t device;
+ u_int8_t page_code;
+#define SVPD_DEVICE_ID 0x83
+#define SVPD_DEVICE_ID_MAX_SIZE 252
+#define SVPD_DEVICE_ID_HDR_LEN \
+ __offsetof(struct scsi_vpd_device_id, desc_list)
+ u_int8_t length[2];
+ u_int8_t desc_list[];
+};
+
+struct scsi_vpd_id_descriptor
+{
+ u_int8_t proto_codeset;
+#define SCSI_PROTO_FC 0x00
+#define SCSI_PROTO_SPI 0x01
+#define SCSI_PROTO_SSA 0x02
+#define SCSI_PROTO_1394 0x03
+#define SCSI_PROTO_RDMA 0x04
+#define SCSI_PROTO_iSCSI 0x05
+#define SCSI_PROTO_SAS 0x06
+#define SCSI_PROTO_ADT 0x07
+#define SCSI_PROTO_ATA 0x08
+#define SVPD_ID_PROTO_SHIFT 4
+#define SVPD_ID_CODESET_BINARY 0x01
+#define SVPD_ID_CODESET_ASCII 0x02
+#define SVPD_ID_CODESET_UTF8 0x03
+#define SVPD_ID_CODESET_MASK 0x0f
+ u_int8_t id_type;
+#define SVPD_ID_PIV 0x80
+#define SVPD_ID_ASSOC_LUN 0x00
+#define SVPD_ID_ASSOC_PORT 0x10
+#define SVPD_ID_ASSOC_TARGET 0x20
+#define SVPD_ID_ASSOC_MASK 0x30
+#define SVPD_ID_TYPE_VENDOR 0x00
+#define SVPD_ID_TYPE_T10 0x01
+#define SVPD_ID_TYPE_EUI64 0x02
+#define SVPD_ID_TYPE_NAA 0x03
+#define SVPD_ID_TYPE_RELTARG 0x04
+#define SVPD_ID_TYPE_TPORTGRP 0x05
+#define SVPD_ID_TYPE_LUNGRP 0x06
+#define SVPD_ID_TYPE_MD5_LUN_ID 0x07
+#define SVPD_ID_TYPE_SCSI_NAME 0x08
+#define SVPD_ID_TYPE_MASK 0x0f
+ u_int8_t reserved;
+ u_int8_t length;
+#define SVPD_DEVICE_ID_DESC_HDR_LEN \
+ __offsetof(struct scsi_vpd_id_descriptor, identifier)
+ u_int8_t identifier[];
+};
+
+struct scsi_vpd_id_t10
+{
+ u_int8_t vendor[8];
+ u_int8_t vendor_spec_id[0];
+};
+
+struct scsi_vpd_id_eui64
+{
+ u_int8_t ieee_company_id[3];
+ u_int8_t extension_id[5];
+};
+
+struct scsi_vpd_id_naa_basic
+{
+ uint8_t naa;
+ /* big endian, packed:
+ uint8_t naa : 4;
+ uint8_t naa_desig : 4;
+ */
+#define SVPD_ID_NAA_NAA_SHIFT 4
+#define SVPD_ID_NAA_IEEE_EXT 0x02
+#define SVPD_ID_NAA_LOCAL_REG 0x03
+#define SVPD_ID_NAA_IEEE_REG 0x05
+#define SVPD_ID_NAA_IEEE_REG_EXT 0x06
+ uint8_t naa_data[];
+};
+
+struct scsi_vpd_id_naa_ieee_extended_id
+{
+ uint8_t naa;
+ uint8_t vendor_specific_id_a;
+ uint8_t ieee_company_id[3];
+ uint8_t vendor_specific_id_b[4];
+};
+
+struct scsi_vpd_id_naa_local_reg
+{
+ uint8_t naa;
+ uint8_t local_value[7];
+};
+
+struct scsi_vpd_id_naa_ieee_reg
+{
+ uint8_t naa;
+ uint8_t reg_value[7];
+ /* big endian, packed:
+ uint8_t naa_basic : 4;
+ uint8_t ieee_company_id_0 : 4;
+ uint8_t ieee_company_id_1[2];
+ uint8_t ieee_company_id_2 : 4;
+ uint8_t vendor_specific_id_0 : 4;
+ uint8_t vendor_specific_id_1[4];
+ */
+};
+
+struct scsi_vpd_id_naa_ieee_reg_extended
+{
+ uint8_t naa;
+ uint8_t reg_value[15];
+ /* big endian, packed:
+ uint8_t naa_basic : 4;
+ uint8_t ieee_company_id_0 : 4;
+ uint8_t ieee_company_id_1[2];
+ uint8_t ieee_company_id_2 : 4;
+ uint8_t vendor_specific_id_0 : 4;
+ uint8_t vendor_specific_id_1[4];
+ uint8_t vendor_specific_id_ext[8];
+ */
+};
+
+struct scsi_vpd_id_rel_trgt_port_id
+{
+ uint8_t obsolete[2];
+ uint8_t rel_trgt_port_id[2];
+};
+
+struct scsi_vpd_id_trgt_port_grp_id
+{
+ uint8_t reserved[2];
+ uint8_t trgt_port_grp[2];
+};
+
+struct scsi_vpd_id_lun_grp_id
+{
+ uint8_t reserved[2];
+ uint8_t log_unit_grp[2];
+};
+
+struct scsi_vpd_id_md5_lun_id
+{
+ uint8_t lun_id[16];
+};
+
+struct scsi_vpd_id_scsi_name
+{
+ uint8_t name_string[256];
+};
+
+struct scsi_service_action_in
+{
+ uint8_t opcode;
+ uint8_t service_action;
+ uint8_t action_dependent[13];
+ uint8_t control;
+};
+
+struct scsi_diag_page {
+ uint8_t page_code;
+ uint8_t page_specific_flags;
+ uint8_t length[2];
+ uint8_t params[0];
+};
+
+/*
+ * ATA Information VPD Page based on
+ * T10/2126-D Revision 04
+ */
+#define SVPD_ATA_INFORMATION 0x89
+
+/*
+ * Block Device Characteristics VPD Page based on
+ * T10/1799-D Revision 31
+ */
+struct scsi_vpd_block_characteristics
+{
+ u_int8_t device;
+ u_int8_t page_code;
+#define SVPD_BDC 0xB1
+ u_int8_t page_length[2];
+ u_int8_t medium_rotation_rate[2];
+#define SVPD_BDC_RATE_NOT_REPORTED 0x00
+#define SVPD_BDC_RATE_NONE_ROTATING 0x01
+ u_int8_t reserved1;
+ u_int8_t nominal_form_factor;
+#define SVPD_BDC_FORM_NOT_REPORTED 0x00
+#define SVPD_BDC_FORM_5_25INCH 0x01
+#define SVPD_BDC_FORM_3_5INCH 0x02
+#define SVPD_BDC_FORM_2_5INCH 0x03
+#define SVPD_BDC_FORM_1_5INCH 0x04
+#define SVPD_BDC_FORM_LESSTHAN_1_5INCH 0x05
+ u_int8_t reserved2[56];
+};
+
+/*
+ * Logical Block Provisioning VPD Page based on
+ * T10/1799-D Revision 31
+ */
+struct scsi_vpd_logical_block_prov
+{
+ u_int8_t device;
+ u_int8_t page_code;
+#define SVPD_LBP 0xB2
+ u_int8_t page_length[2];
+#define SVPD_LBP_PL_BASIC 0x04
+ u_int8_t threshold_exponent;
+ u_int8_t flags;
+#define SVPD_LBP_UNMAP 0x80
+#define SVPD_LBP_WS16 0x40
+#define SVPD_LBP_WS10 0x20
+#define SVPD_LBP_RZ 0x04
+#define SVPD_LBP_ANC_SUP 0x02
+#define SVPD_LBP_DP 0x01
+ u_int8_t prov_type;
+#define SVPD_LBP_RESOURCE 0x01
+#define SVPD_LBP_THIN 0x02
+ u_int8_t reserved;
+ /*
+ * Provisioning Group Descriptor can be here if SVPD_LBP_DP is set
+ * Its size can be determined from page_length - 4
+ */
+};
+
+/*
+ * Block Limits VDP Page based on
+ * T10/1799-D Revision 31
+ */
+struct scsi_vpd_block_limits
+{
+ u_int8_t device;
+ u_int8_t page_code;
+#define SVPD_BLOCK_LIMITS 0xB0
+ u_int8_t page_length[2];
+#define SVPD_BL_PL_BASIC 0x10
+#define SVPD_BL_PL_TP 0x3C
+ u_int8_t reserved1;
+ u_int8_t max_cmp_write_len;
+ u_int8_t opt_txfer_len_grain[2];
+ u_int8_t max_txfer_len[4];
+ u_int8_t opt_txfer_len[4];
+ u_int8_t max_prefetch[4];
+ u_int8_t max_unmap_lba_cnt[4];
+ u_int8_t max_unmap_blk_cnt[4];
+ u_int8_t opt_unmap_grain[4];
+ u_int8_t unmap_grain_align[4];
+ u_int8_t max_write_same_length[8];
+ u_int8_t reserved2[20];
+};
+
struct scsi_read_capacity
{
u_int8_t opcode;
u_int8_t byte2;
+#define SRC_RELADR 0x01
u_int8_t addr[4];
- u_int8_t unused[3];
+ u_int8_t unused[2];
+ u_int8_t pmi;
+#define SRC_PMI 0x01
u_int8_t control;
};
@@ -849,14 +1552,24 @@ struct scsi_read_capacity_data_long
uint8_t length[4];
#define SRC16_PROT_EN 0x01
#define SRC16_P_TYPE 0x0e
+#define SRC16_PTYPE_1 0x00
+#define SRC16_PTYPE_2 0x02
+#define SRC16_PTYPE_3 0x04
uint8_t prot;
#define SRC16_LBPPBE 0x0f
#define SRC16_PI_EXPONENT 0xf0
#define SRC16_PI_EXPONENT_SHIFT 4
uint8_t prot_lbppbe;
-#define SRC16_LALBA 0x3fff
-#define SRC16_LBPRZ 0x4000
-#define SRC16_LBPME 0x8000
+#define SRC16_LALBA 0x3f
+#define SRC16_LBPRZ 0x40
+#define SRC16_LBPME 0x80
+/*
+ * Alternate versions of these macros that are intended for use on a 16-bit
+ * version of the lalba_lbp field instead of the array of 2 8 bit numbers.
+ */
+#define SRC16_LALBA_A 0x3fff
+#define SRC16_LBPRZ_A 0x4000
+#define SRC16_LBPME_A 0x8000
uint8_t lalba_lbp[2];
};
@@ -874,18 +1587,11 @@ struct scsi_report_luns
uint8_t control;
};
-struct scsi_report_luns_data {
- u_int8_t length[4]; /* length of LUN inventory, in bytes */
- u_int8_t reserved[4]; /* unused */
- /*
- * LUN inventory- we only support the type zero form for now.
- */
- struct {
- u_int8_t lundata[8];
- } luns[0];
-};
+struct scsi_report_luns_lundata {
+ uint8_t lundata[8];
#define RPL_LUNDATA_PERIPH_BUS_MASK 0x3f
#define RPL_LUNDATA_FLAT_LUN_MASK 0x3f
+#define RPL_LUNDATA_FLAT_LUN_BITS 0x06
#define RPL_LUNDATA_LUN_TARG_MASK 0x3f
#define RPL_LUNDATA_LUN_BUS_MASK 0xe0
#define RPL_LUNDATA_LUN_LUN_MASK 0x1f
@@ -898,6 +1604,16 @@ struct scsi_report_luns_data {
#define RPL_LUNDATA_ATYP_FLAT 0x40
#define RPL_LUNDATA_ATYP_LUN 0x80
#define RPL_LUNDATA_ATYP_EXTLUN 0xc0
+};
+
+struct scsi_report_luns_data {
+ u_int8_t length[4]; /* length of LUN inventory, in bytes */
+ u_int8_t reserved[4]; /* unused */
+ /*
+ * LUN inventory- we only support the type zero form for now.
+ */
+ struct scsi_report_luns_lundata luns[0];
+};
struct scsi_target_group
{
@@ -939,6 +1655,9 @@ struct scsi_target_port_group_descriptor {
uint8_t target_port_group[2];
uint8_t reserved;
uint8_t status;
+#define TPG_UNAVLBL 0
+#define TPG_SET_BY_STPG 0x01
+#define TPG_IMPLICIT 0x02
uint8_t vendor_specific;
uint8_t target_port_count;
struct scsi_target_port_descriptor descriptors[];
@@ -958,8 +1677,49 @@ struct scsi_target_group_data_extended {
};
+typedef enum {
+ SSD_TYPE_NONE,
+ SSD_TYPE_FIXED,
+ SSD_TYPE_DESC
+} scsi_sense_data_type;
+
+typedef enum {
+ SSD_ELEM_NONE,
+ SSD_ELEM_SKIP,
+ SSD_ELEM_DESC,
+ SSD_ELEM_SKS,
+ SSD_ELEM_COMMAND,
+ SSD_ELEM_INFO,
+ SSD_ELEM_FRU,
+ SSD_ELEM_STREAM,
+ SSD_ELEM_MAX
+} scsi_sense_elem_type;
+
+
struct scsi_sense_data
{
+ uint8_t error_code;
+ /*
+ * SPC-4 says that the maximum length of sense data is 252 bytes.
+ * So this structure is exactly 252 bytes log.
+ */
+#define SSD_FULL_SIZE 252
+ uint8_t sense_buf[SSD_FULL_SIZE - 1];
+ /*
+ * XXX KDM is this still a reasonable minimum size?
+ */
+#define SSD_MIN_SIZE 18
+ /*
+ * Maximum value for the extra_len field in the sense data.
+ */
+#define SSD_EXTRA_MAX 244
+};
+
+/*
+ * Fixed format sense data.
+ */
+struct scsi_sense_data_fixed
+{
u_int8_t error_code;
#define SSD_ERRCODE 0x7F
#define SSD_CURRENT_ERROR 0x70
@@ -983,7 +1743,7 @@ struct scsi_sense_data
#define SSD_KEY_EQUAL 0x0c
#define SSD_KEY_VOLUME_OVERFLOW 0x0d
#define SSD_KEY_MISCOMPARE 0x0e
-#define SSD_KEY_RESERVED 0x0f
+#define SSD_KEY_COMPLETED 0x0f
#define SSD_ILI 0x20
#define SSD_EOM 0x40
#define SSD_FILEMARK 0x80
@@ -998,9 +1758,313 @@ struct scsi_sense_data
#define SSD_FIELDPTR_CMD 0x40
#define SSD_BITPTR_VALID 0x08
#define SSD_BITPTR_VALUE 0x07
-#define SSD_MIN_SIZE 18
u_int8_t extra_bytes[14];
-#define SSD_FULL_SIZE sizeof(struct scsi_sense_data)
+#define SSD_FIXED_IS_PRESENT(sense, length, field) \
+ ((length >= (offsetof(struct scsi_sense_data_fixed, field) + \
+ sizeof(sense->field))) ? 1 :0)
+#define SSD_FIXED_IS_FILLED(sense, field) \
+ ((((offsetof(struct scsi_sense_data_fixed, field) + \
+ sizeof(sense->field)) - \
+ (offsetof(struct scsi_sense_data_fixed, extra_len) + \
+ sizeof(sense->extra_len))) <= sense->extra_len) ? 1 : 0)
+};
+
+/*
+ * Descriptor format sense data definitions.
+ * Introduced in SPC-3.
+ */
+struct scsi_sense_data_desc
+{
+ uint8_t error_code;
+#define SSD_DESC_CURRENT_ERROR 0x72
+#define SSD_DESC_DEFERRED_ERROR 0x73
+ uint8_t sense_key;
+ uint8_t add_sense_code;
+ uint8_t add_sense_code_qual;
+ uint8_t reserved[3];
+ /*
+ * Note that SPC-4, section 4.5.2.1 says that the extra_len field
+ * must be less than or equal to 244.
+ */
+ uint8_t extra_len;
+ uint8_t sense_desc[0];
+#define SSD_DESC_IS_PRESENT(sense, length, field) \
+ ((length >= (offsetof(struct scsi_sense_data_desc, field) + \
+ sizeof(sense->field))) ? 1 :0)
+};
+
+struct scsi_sense_desc_header
+{
+ uint8_t desc_type;
+ uint8_t length;
+};
+/*
+ * The information provide in the Information descriptor is device type or
+ * command specific information, and defined in a command standard.
+ *
+ * Note that any changes to the field names or positions in this structure,
+ * even reserved fields, should be accompanied by an examination of the
+ * code in ctl_set_sense() that uses them.
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+struct scsi_sense_info
+{
+ uint8_t desc_type;
+#define SSD_DESC_INFO 0x00
+ uint8_t length;
+ uint8_t byte2;
+#define SSD_INFO_VALID 0x80
+ uint8_t reserved;
+ uint8_t info[8];
+};
+
+/*
+ * Command-specific information depends on the command for which the
+ * reported condition occured.
+ *
+ * Note that any changes to the field names or positions in this structure,
+ * even reserved fields, should be accompanied by an examination of the
+ * code in ctl_set_sense() that uses them.
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+struct scsi_sense_command
+{
+ uint8_t desc_type;
+#define SSD_DESC_COMMAND 0x01
+ uint8_t length;
+ uint8_t reserved[2];
+ uint8_t command_info[8];
+};
+
+/*
+ * Sense key specific descriptor. The sense key specific data format
+ * depends on the sense key in question.
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+struct scsi_sense_sks
+{
+ uint8_t desc_type;
+#define SSD_DESC_SKS 0x02
+ uint8_t length;
+ uint8_t reserved1[2];
+ uint8_t sense_key_spec[3];
+#define SSD_SKS_VALID 0x80
+ uint8_t reserved2;
+};
+
+/*
+ * This is used for the Illegal Request sense key (0x05) only.
+ */
+struct scsi_sense_sks_field
+{
+ uint8_t byte0;
+#define SSD_SKS_FIELD_VALID 0x80
+#define SSD_SKS_FIELD_CMD 0x40
+#define SSD_SKS_BPV 0x08
+#define SSD_SKS_BIT_VALUE 0x07
+ uint8_t field[2];
+};
+
+
+/*
+ * This is used for the Hardware Error (0x04), Medium Error (0x03) and
+ * Recovered Error (0x01) sense keys.
+ */
+struct scsi_sense_sks_retry
+{
+ uint8_t byte0;
+#define SSD_SKS_RETRY_VALID 0x80
+ uint8_t actual_retry_count[2];
+};
+
+/*
+ * Used with the NO Sense (0x00) or Not Ready (0x02) sense keys.
+ */
+struct scsi_sense_sks_progress
+{
+ uint8_t byte0;
+#define SSD_SKS_PROGRESS_VALID 0x80
+ uint8_t progress[2];
+#define SSD_SKS_PROGRESS_DENOM 0x10000
+};
+
+/*
+ * Used with the Copy Aborted (0x0a) sense key.
+ */
+struct scsi_sense_sks_segment
+{
+ uint8_t byte0;
+#define SSD_SKS_SEGMENT_VALID 0x80
+#define SSD_SKS_SEGMENT_SD 0x20
+#define SSD_SKS_SEGMENT_BPV 0x08
+#define SSD_SKS_SEGMENT_BITPTR 0x07
+ uint8_t field[2];
+};
+
+/*
+ * Used with the Unit Attention (0x06) sense key.
+ *
+ * This is currently used to indicate that the unit attention condition
+ * queue has overflowed (when the overflow bit is set).
+ */
+struct scsi_sense_sks_overflow
+{
+ uint8_t byte0;
+#define SSD_SKS_OVERFLOW_VALID 0x80
+#define SSD_SKS_OVERFLOW_SET 0x01
+ uint8_t reserved[2];
+};
+
+/*
+ * This specifies which component is associated with the sense data. There
+ * is no standard meaning for the fru value.
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+struct scsi_sense_fru
+{
+ uint8_t desc_type;
+#define SSD_DESC_FRU 0x03
+ uint8_t length;
+ uint8_t reserved;
+ uint8_t fru;
+};
+
+/*
+ * Used for Stream commands, defined in SSC-4.
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+
+struct scsi_sense_stream
+{
+ uint8_t desc_type;
+#define SSD_DESC_STREAM 0x04
+ uint8_t length;
+ uint8_t reserved;
+ uint8_t byte3;
+#define SSD_DESC_STREAM_FM 0x80
+#define SSD_DESC_STREAM_EOM 0x40
+#define SSD_DESC_STREAM_ILI 0x20
+};
+
+/*
+ * Used for Block commands, defined in SBC-3.
+ *
+ * This is currently (as of SBC-3) only used for the Incorrect Length
+ * Indication (ILI) bit, which says that the data length requested in the
+ * READ LONG or WRITE LONG command did not match the length of the logical
+ * block.
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+struct scsi_sense_block
+{
+ uint8_t desc_type;
+#define SSD_DESC_BLOCK 0x05
+ uint8_t length;
+ uint8_t reserved;
+ uint8_t byte3;
+#define SSD_DESC_BLOCK_ILI 0x20
+};
+
+/*
+ * Used for Object-Based Storage Devices (OSD-3).
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+struct scsi_sense_osd_objid
+{
+ uint8_t desc_type;
+#define SSD_DESC_OSD_OBJID 0x06
+ uint8_t length;
+ uint8_t reserved[6];
+ /*
+ * XXX KDM provide the bit definitions here? There are a lot of
+ * them, and we don't have an OSD driver yet.
+ */
+ uint8_t not_init_cmds[4];
+ uint8_t completed_cmds[4];
+ uint8_t partition_id[8];
+ uint8_t object_id[8];
+};
+
+/*
+ * Used for Object-Based Storage Devices (OSD-3).
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+struct scsi_sense_osd_integrity
+{
+ uint8_t desc_type;
+#define SSD_DESC_OSD_INTEGRITY 0x07
+ uint8_t length;
+ uint8_t integ_check_val[32];
+};
+
+/*
+ * Used for Object-Based Storage Devices (OSD-3).
+ *
+ * Maximum descriptors allowed: 1 (as of SPC-4)
+ */
+struct scsi_sense_osd_attr_id
+{
+ uint8_t desc_type;
+#define SSD_DESC_OSD_ATTR_ID 0x08
+ uint8_t length;
+ uint8_t reserved[2];
+ uint8_t attr_desc[0];
+};
+
+/*
+ * Used with Sense keys No Sense (0x00) and Not Ready (0x02).
+ *
+ * Maximum descriptors allowed: 32 (as of SPC-4)
+ */
+struct scsi_sense_progress
+{
+ uint8_t desc_type;
+#define SSD_DESC_PROGRESS 0x0a
+ uint8_t length;
+ uint8_t sense_key;
+ uint8_t add_sense_code;
+ uint8_t add_sense_code_qual;
+ uint8_t reserved;
+ uint8_t progress[2];
+};
+
+/*
+ * This is typically forwarded as the result of an EXTENDED COPY command.
+ *
+ * Maximum descriptors allowed: 2 (as of SPC-4)
+ */
+struct scsi_sense_forwarded
+{
+ uint8_t desc_type;
+#define SSD_DESC_FORWARDED 0x0c
+ uint8_t length;
+ uint8_t byte2;
+#define SSD_FORWARDED_FSDT 0x80
+#define SSD_FORWARDED_SDS_MASK 0x0f
+#define SSD_FORWARDED_SDS_UNK 0x00
+#define SSD_FORWARDED_SDS_EXSRC 0x01
+#define SSD_FORWARDED_SDS_EXDST 0x02
+};
+
+/*
+ * Vendor-specific sense descriptor. The desc_type field will be in the
+ * range bewteen MIN and MAX inclusive.
+ */
+struct scsi_sense_vendor
+{
+ uint8_t desc_type;
+#define SSD_DESC_VENDOR_MIN 0x80
+#define SSD_DESC_VENDOR_MAX 0xff
+ uint8_t length;
+ uint8_t data[0];
};
struct scsi_mode_header_6
@@ -1023,9 +2087,20 @@ struct scsi_mode_header_10
struct scsi_mode_page_header
{
u_int8_t page_code;
+#define SMPH_PS 0x80
+#define SMPH_SPF 0x40
+#define SMPH_PC_MASK 0x3f
u_int8_t page_length;
};
+struct scsi_mode_page_header_sp
+{
+ uint8_t page_code;
+ uint8_t subpage;
+ uint8_t page_length[2];
+};
+
+
struct scsi_mode_blk_desc
{
u_int8_t density;
@@ -1128,6 +2203,84 @@ scsi_sense_action scsi_error_action(struct ccb_scsiio* csio,
struct scsi_inquiry_data *inq_data,
u_int32_t sense_flags);
const char * scsi_status_string(struct ccb_scsiio *csio);
+
+void scsi_desc_iterate(struct scsi_sense_data_desc *sense, u_int sense_len,
+ int (*iter_func)(struct scsi_sense_data_desc *sense,
+ u_int, struct scsi_sense_desc_header *,
+ void *), void *arg);
+uint8_t *scsi_find_desc(struct scsi_sense_data_desc *sense, u_int sense_len,
+ uint8_t desc_type);
+void scsi_set_sense_data(struct scsi_sense_data *sense_data,
+ scsi_sense_data_type sense_format, int current_error,
+ int sense_key, int asc, int ascq, ...) ;
+void scsi_set_sense_data_va(struct scsi_sense_data *sense_data,
+ scsi_sense_data_type sense_format,
+ int current_error, int sense_key, int asc,
+ int ascq, va_list ap);
+int scsi_get_sense_info(struct scsi_sense_data *sense_data, u_int sense_len,
+ uint8_t info_type, uint64_t *info,
+ int64_t *signed_info);
+int scsi_get_sks(struct scsi_sense_data *sense_data, u_int sense_len,
+ uint8_t *sks);
+int scsi_get_block_info(struct scsi_sense_data *sense_data, u_int sense_len,
+ struct scsi_inquiry_data *inq_data,
+ uint8_t *block_bits);
+int scsi_get_stream_info(struct scsi_sense_data *sense_data, u_int sense_len,
+ struct scsi_inquiry_data *inq_data,
+ uint8_t *stream_bits);
+void scsi_info_sbuf(struct sbuf *sb, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data, uint64_t info);
+void scsi_command_sbuf(struct sbuf *sb, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data, uint64_t csi);
+void scsi_progress_sbuf(struct sbuf *sb, uint16_t progress);
+int scsi_sks_sbuf(struct sbuf *sb, int sense_key, uint8_t *sks);
+void scsi_fru_sbuf(struct sbuf *sb, uint64_t fru);
+void scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits, uint64_t info);
+void scsi_block_sbuf(struct sbuf *sb, uint8_t block_bits, uint64_t info);
+void scsi_sense_info_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+
+void scsi_sense_command_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+void scsi_sense_sks_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+void scsi_sense_fru_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+void scsi_sense_stream_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+void scsi_sense_block_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+void scsi_sense_progress_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+void scsi_sense_generic_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+void scsi_sense_desc_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
+ u_int sense_len, uint8_t *cdb, int cdb_len,
+ struct scsi_inquiry_data *inq_data,
+ struct scsi_sense_desc_header *header);
+scsi_sense_data_type scsi_sense_type(struct scsi_sense_data *sense_data);
+
+void scsi_sense_only_sbuf(struct scsi_sense_data *sense, u_int sense_len,
+ struct sbuf *sb, char *path_str,
+ struct scsi_inquiry_data *inq_data, uint8_t *cdb,
+ int cdb_len);
+
#ifdef _KERNEL
int scsi_command_string(struct ccb_scsiio *csio, struct sbuf *sb);
int scsi_sense_sbuf(struct ccb_scsiio *csio, struct sbuf *sb,
@@ -1135,6 +2288,8 @@ int scsi_sense_sbuf(struct ccb_scsiio *csio, struct sbuf *sb,
char * scsi_sense_string(struct ccb_scsiio *csio,
char *str, int str_len);
void scsi_sense_print(struct ccb_scsiio *csio);
+int scsi_vpd_supported_page(struct cam_periph *periph,
+ uint8_t page_id);
#else /* _KERNEL */
int scsi_command_string(struct cam_device *device,
struct ccb_scsiio *csio, struct sbuf *sb);
@@ -1148,12 +2303,6 @@ void scsi_sense_print(struct cam_device *device,
struct ccb_scsiio *csio, FILE *ofile);
#endif /* _KERNEL */
-#define SF_RETRY_UA 0x01
-#define SF_NO_PRINT 0x02
-#define SF_QUIET_IR 0x04 /* Be quiet about Illegal Request reponses */
-#define SF_PRINT_ALWAYS 0x08
-
-
const char * scsi_op_desc(u_int16_t opcode,
struct scsi_inquiry_data *inq_data);
char * scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string,
@@ -1163,7 +2312,18 @@ void scsi_print_inquiry(struct scsi_inquiry_data *inq_data);
u_int scsi_calc_syncsrate(u_int period_factor);
u_int scsi_calc_syncparam(u_int period);
-
+
+typedef int (*scsi_devid_checkfn_t)(uint8_t *);
+int scsi_devid_is_naa_ieee_reg(uint8_t *bufp);
+int scsi_devid_is_sas_target(uint8_t *bufp);
+int scsi_devid_is_lun_eui64(uint8_t *bufp);
+int scsi_devid_is_lun_naa(uint8_t *bufp);
+int scsi_devid_is_lun_name(uint8_t *bufp);
+int scsi_devid_is_lun_t10(uint8_t *bufp);
+struct scsi_vpd_id_descriptor *
+ scsi_get_devid(struct scsi_vpd_device_id *id, uint32_t len,
+ scsi_devid_checkfn_t ck_fn);
+
void scsi_test_unit_ready(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *,
union ccb *),
@@ -1279,6 +2439,40 @@ void scsi_synchronize_cache(struct ccb_scsiio *csio,
u_int32_t begin_lba, u_int16_t lb_count,
u_int8_t sense_len, u_int32_t timeout);
+void scsi_receive_diagnostic_results(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *,
+ union ccb*),
+ uint8_t tag_action, int pcv,
+ uint8_t page_code, uint8_t *data_ptr,
+ uint16_t allocation_length,
+ uint8_t sense_len, uint32_t timeout);
+
+void scsi_send_diagnostic(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int unit_offline,
+ int device_offline, int self_test, int page_format,
+ int self_test_code, uint8_t *data_ptr,
+ uint16_t param_list_length, uint8_t sense_len,
+ uint32_t timeout);
+
+void scsi_read_buffer(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb*),
+ uint8_t tag_action, int mode,
+ uint8_t buffer_id, u_int32_t offset,
+ uint8_t *data_ptr, uint32_t allocation_length,
+ uint8_t sense_len, uint32_t timeout);
+
+void scsi_write_buffer(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int mode,
+ uint8_t buffer_id, u_int32_t offset,
+ uint8_t *data_ptr, uint32_t param_list_length,
+ uint8_t sense_len, uint32_t timeout);
+
+#define SCSI_RW_READ 0x0001
+#define SCSI_RW_WRITE 0x0002
+#define SCSI_RW_DIRMASK 0x0003
+#define SCSI_RW_BIO 0x1000
void scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int8_t tag_action, int readop, u_int8_t byte2,
@@ -1287,6 +2481,40 @@ void scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries,
u_int32_t dxfer_len, u_int8_t sense_len,
u_int32_t timeout);
+void scsi_write_same(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int8_t byte2,
+ int minimum_cmd_size, u_int64_t lba,
+ u_int32_t block_count, u_int8_t *data_ptr,
+ u_int32_t dxfer_len, u_int8_t sense_len,
+ u_int32_t timeout);
+
+void scsi_ata_identify(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int8_t *data_ptr,
+ u_int16_t dxfer_len, u_int8_t sense_len,
+ u_int32_t timeout);
+
+void scsi_ata_trim(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int16_t block_count,
+ u_int8_t *data_ptr, u_int16_t dxfer_len,
+ u_int8_t sense_len, u_int32_t timeout);
+
+void scsi_ata_pass_16(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int32_t flags, u_int8_t tag_action,
+ u_int8_t protocol, u_int8_t ata_flags, u_int16_t features,
+ u_int16_t sector_count, uint64_t lba, u_int8_t command,
+ u_int8_t control, u_int8_t *data_ptr, u_int16_t dxfer_len,
+ u_int8_t sense_len, u_int32_t timeout);
+
+void scsi_unmap(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int8_t byte2,
+ u_int8_t *data_ptr, u_int16_t dxfer_len,
+ u_int8_t sense_len, u_int32_t timeout);
+
void scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int8_t tag_action, int start, int load_eject,
@@ -1295,32 +2523,34 @@ void scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries,
int scsi_inquiry_match(caddr_t inqbuffer, caddr_t table_entry);
int scsi_static_inquiry_match(caddr_t inqbuffer,
caddr_t table_entry);
-
-static __inline void scsi_extract_sense(struct scsi_sense_data *sense,
- int *error_code, int *sense_key,
- int *asc, int *ascq);
+int scsi_devid_match(uint8_t *rhs, size_t rhs_len,
+ uint8_t *lhs, size_t lhs_len);
+
+void scsi_extract_sense(struct scsi_sense_data *sense, int *error_code,
+ int *sense_key, int *asc, int *ascq);
+int scsi_extract_sense_ccb(union ccb *ccb, int *error_code, int *sense_key,
+ int *asc, int *ascq);
+void scsi_extract_sense_len(struct scsi_sense_data *sense,
+ u_int sense_len, int *error_code, int *sense_key,
+ int *asc, int *ascq, int show_errors);
+int scsi_get_sense_key(struct scsi_sense_data *sense, u_int sense_len,
+ int show_errors);
+int scsi_get_asc(struct scsi_sense_data *sense, u_int sense_len,
+ int show_errors);
+int scsi_get_ascq(struct scsi_sense_data *sense, u_int sense_len,
+ int show_errors);
static __inline void scsi_ulto2b(u_int32_t val, u_int8_t *bytes);
static __inline void scsi_ulto3b(u_int32_t val, u_int8_t *bytes);
static __inline void scsi_ulto4b(u_int32_t val, u_int8_t *bytes);
static __inline void scsi_u64to8b(u_int64_t val, u_int8_t *bytes);
-static __inline u_int32_t scsi_2btoul(u_int8_t *bytes);
-static __inline u_int32_t scsi_3btoul(u_int8_t *bytes);
-static __inline int32_t scsi_3btol(u_int8_t *bytes);
-static __inline u_int32_t scsi_4btoul(u_int8_t *bytes);
-static __inline u_int64_t scsi_8btou64(u_int8_t *bytes);
+static __inline uint32_t scsi_2btoul(const uint8_t *bytes);
+static __inline uint32_t scsi_3btoul(const uint8_t *bytes);
+static __inline int32_t scsi_3btol(const uint8_t *bytes);
+static __inline uint32_t scsi_4btoul(const uint8_t *bytes);
+static __inline uint64_t scsi_8btou64(const uint8_t *bytes);
static __inline void *find_mode_page_6(struct scsi_mode_header_6 *mode_header);
static __inline void *find_mode_page_10(struct scsi_mode_header_10 *mode_header);
-static __inline void scsi_extract_sense(struct scsi_sense_data *sense,
- int *error_code, int *sense_key,
- int *asc, int *ascq)
-{
- *error_code = sense->error_code & SSD_ERRCODE;
- *sense_key = sense->flags & SSD_KEY;
- *asc = (sense->extra_len >= 5) ? sense->add_sense_code : 0;
- *ascq = (sense->extra_len >= 6) ? sense->add_sense_code_qual : 0;
-}
-
static __inline void
scsi_ulto2b(u_int32_t val, u_int8_t *bytes)
{
@@ -1362,20 +2592,20 @@ scsi_u64to8b(u_int64_t val, u_int8_t *bytes)
bytes[7] = val & 0xff;
}
-static __inline u_int32_t
-scsi_2btoul(u_int8_t *bytes)
+static __inline uint32_t
+scsi_2btoul(const uint8_t *bytes)
{
- u_int32_t rv;
+ uint32_t rv;
rv = (bytes[0] << 8) |
bytes[1];
return (rv);
}
-static __inline u_int32_t
-scsi_3btoul(u_int8_t *bytes)
+static __inline uint32_t
+scsi_3btoul(const uint8_t *bytes)
{
- u_int32_t rv;
+ uint32_t rv;
rv = (bytes[0] << 16) |
(bytes[1] << 8) |
@@ -1384,9 +2614,9 @@ scsi_3btoul(u_int8_t *bytes)
}
static __inline int32_t
-scsi_3btol(u_int8_t *bytes)
+scsi_3btol(const uint8_t *bytes)
{
- u_int32_t rc = scsi_3btoul(bytes);
+ uint32_t rc = scsi_3btoul(bytes);
if (rc & 0x00800000)
rc |= 0xff000000;
@@ -1394,10 +2624,10 @@ scsi_3btol(u_int8_t *bytes)
return (int32_t) rc;
}
-static __inline u_int32_t
-scsi_4btoul(u_int8_t *bytes)
+static __inline uint32_t
+scsi_4btoul(const uint8_t *bytes)
{
- u_int32_t rv;
+ uint32_t rv;
rv = (bytes[0] << 24) |
(bytes[1] << 16) |
@@ -1407,7 +2637,7 @@ scsi_4btoul(u_int8_t *bytes)
}
static __inline uint64_t
-scsi_8btou64(uint8_t *bytes)
+scsi_8btou64(const uint8_t *bytes)
{
uint64_t rv;
diff --git a/freebsd/sys/cam/scsi/scsi_da.h b/freebsd/sys/cam/scsi/scsi_da.h
index da099f69..57992381 100644
--- a/freebsd/sys/cam/scsi/scsi_da.h
+++ b/freebsd/sys/cam/scsi/scsi_da.h
@@ -111,6 +111,7 @@ struct scsi_read_defect_data_10
u_int8_t reserved[4];
u_int8_t alloc_length[2];
+#define SRDD10_MAX_LENGTH 0xffff
u_int8_t control;
};
@@ -421,6 +422,56 @@ union disk_pages /* this is the structure copied from osf */
} flexible_disk;
};
+/*
+ * XXX KDM
+ * Here for CTL compatibility, reconcile this.
+ */
+struct scsi_format_page {
+ uint8_t page_code;
+ uint8_t page_length;
+ uint8_t tracks_per_zone[2];
+ uint8_t alt_sectors_per_zone[2];
+ uint8_t alt_tracks_per_zone[2];
+ uint8_t alt_tracks_per_lun[2];
+ uint8_t sectors_per_track[2];
+ uint8_t bytes_per_sector[2];
+ uint8_t interleave[2];
+ uint8_t track_skew[2];
+ uint8_t cylinder_skew[2];
+ uint8_t flags;
+#define SFP_SSEC 0x80
+#define SFP_HSEC 0x40
+#define SFP_RMB 0x20
+#define SFP_SURF 0x10
+ uint8_t reserved[3];
+};
+
+/*
+ * XXX KDM
+ * Here for CTL compatibility, reconcile this.
+ */
+struct scsi_rigid_disk_page {
+ uint8_t page_code;
+#define SMS_RIGID_DISK_PAGE 0x04
+ uint8_t page_length;
+ uint8_t cylinders[3];
+ uint8_t heads;
+ uint8_t start_write_precomp[3];
+ uint8_t start_reduced_current[3];
+ uint8_t step_rate[2];
+ uint8_t landing_zone_cylinder[3];
+ uint8_t rpl;
+#define SRDP_RPL_DISABLED 0x00
+#define SRDP_RPL_SLAVE 0x01
+#define SRDP_RPL_MASTER 0x02
+#define SRDP_RPL_MASTER_CONTROL 0x03
+ uint8_t rotational_offset;
+ uint8_t reserved1;
+ uint8_t rotation_rate[2];
+ uint8_t reserved2[2];
+};
+
+
struct scsi_da_rw_recovery_page {
u_int8_t page_code;
#define SMS_RW_ERROR_RECOVERY_PAGE 0x01
diff --git a/freebsd/sys/contrib/altq/altq/altq_cbq.c b/freebsd/sys/contrib/altq/altq/altq_cbq.c
index 9dde791c..6ebf6551 100644
--- a/freebsd/sys/contrib/altq/altq/altq_cbq.c
+++ b/freebsd/sys/contrib/altq/altq/altq_cbq.c
@@ -35,12 +35,10 @@
#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
-#if (__FreeBSD__ != 2)
#include <rtems/bsd/local/opt_inet.h>
#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
#endif
-#endif
#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
@@ -510,14 +508,8 @@ cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
/* grab class set by classifier */
if ((m->m_flags & M_PKTHDR) == 0) {
/* should not happen */
-#if defined(__NetBSD__) || defined(__OpenBSD__)\
- || (defined(__FreeBSD__) && __FreeBSD_version >= 501113)
printf("altq: packet for %s does not have pkthdr\n",
ifq->altq_ifp->if_xname);
-#else
- printf("altq: packet for %s%d does not have pkthdr\n",
- ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
-#endif
m_freem(m);
return (ENOBUFS);
}
@@ -1029,13 +1021,7 @@ cbqclose(dev, flag, fmt, p)
while (cbq_list) {
ifp = cbq_list->ifnp.ifq_->altq_ifp;
-#if defined(__NetBSD__) || defined(__OpenBSD__)\
- || (defined(__FreeBSD__) && __FreeBSD_version >= 501113)
sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
-#else
- sprintf(iface.cbq_ifacename,
- "%s%d", ifp->if_name, ifp->if_unit);
-#endif
err = cbq_ifdetach(&iface);
if (err != 0 && error == 0)
error = err;
diff --git a/freebsd/sys/contrib/altq/altq/altq_cdnr.c b/freebsd/sys/contrib/altq/altq/altq_cdnr.c
index cc37e0b8..ce517318 100644
--- a/freebsd/sys/contrib/altq/altq/altq_cdnr.c
+++ b/freebsd/sys/contrib/altq/altq/altq_cdnr.c
@@ -1,7 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/* $FreeBSD$ */
-/* $KAME: altq_cdnr.c,v 1.14 2003/09/05 22:40:36 itojun Exp $ */
+/* $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */
/*
* Copyright (C) 1999-2002
@@ -31,12 +31,10 @@
#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
-#if (__FreeBSD__ != 2)
#include <rtems/bsd/local/opt_inet.h>
#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
#endif
-#endif
#endif /* __FreeBSD__ || __NetBSD__ */
#include <rtems/bsd/sys/param.h>
diff --git a/freebsd/sys/contrib/altq/altq/altq_hfsc.c b/freebsd/sys/contrib/altq/altq/altq_hfsc.c
index 41f7c6db..64c9d17c 100644
--- a/freebsd/sys/contrib/altq/altq/altq_hfsc.c
+++ b/freebsd/sys/contrib/altq/altq/altq_hfsc.c
@@ -46,12 +46,10 @@
#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
-#if (__FreeBSD__ != 2)
#include <rtems/bsd/local/opt_inet.h>
#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
#endif
-#endif
#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
@@ -700,14 +698,8 @@ hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
/* grab class set by classifier */
if ((m->m_flags & M_PKTHDR) == 0) {
/* should not happen */
-#if defined(__NetBSD__) || defined(__OpenBSD__)\
- || (defined(__FreeBSD__) && __FreeBSD_version >= 501113)
printf("altq: packet for %s does not have pkthdr\n",
ifq->altq_ifp->if_xname);
-#else
- printf("altq: packet for %s%d does not have pkthdr\n",
- ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
-#endif
m_freem(m);
return (ENOBUFS);
}
diff --git a/freebsd/sys/contrib/altq/altq/altq_priq.c b/freebsd/sys/contrib/altq/altq/altq_priq.c
index e0e2522d..0cb47576 100644
--- a/freebsd/sys/contrib/altq/altq/altq_priq.c
+++ b/freebsd/sys/contrib/altq/altq/altq_priq.c
@@ -33,12 +33,10 @@
#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
-#if (__FreeBSD__ != 2)
#include <rtems/bsd/local/opt_inet.h>
#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
#endif
-#endif
#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
@@ -471,14 +469,8 @@ priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
/* grab class set by classifier */
if ((m->m_flags & M_PKTHDR) == 0) {
/* should not happen */
-#if defined(__NetBSD__) || defined(__OpenBSD__)\
- || (defined(__FreeBSD__) && __FreeBSD_version >= 501113)
printf("altq: packet for %s does not have pkthdr\n",
ifq->altq_ifp->if_xname);
-#else
- printf("altq: packet for %s%d does not have pkthdr\n",
- ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
-#endif
m_freem(m);
return (ENOBUFS);
}
diff --git a/freebsd/sys/contrib/altq/altq/altq_red.c b/freebsd/sys/contrib/altq/altq/altq_red.c
index 78b8e6fc..7d1ad735 100644
--- a/freebsd/sys/contrib/altq/altq/altq_red.c
+++ b/freebsd/sys/contrib/altq/altq/altq_red.c
@@ -64,12 +64,10 @@
#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
-#if (__FreeBSD__ != 2)
#include <rtems/bsd/local/opt_inet.h>
#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
#endif
-#endif
#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
@@ -518,11 +516,9 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
struct mbuf *m0;
struct pf_mtag *at;
void *hdr;
- int af;
at = pf_find_mtag(m);
if (at != NULL) {
- af = at->af;
hdr = at->hdr;
#ifdef ALTQ3_COMPAT
} else if (pktattr != NULL) {
@@ -532,9 +528,6 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
} else
return (0);
- if (af != AF_INET && af != AF_INET6)
- return (0);
-
/* verify that pattr_hdr is within the mbuf data */
for (m0 = m; m0 != NULL; m0 = m0->m_next)
if (((caddr_t)hdr >= m0->m_data) &&
@@ -545,8 +538,8 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
return (0);
}
- switch (af) {
- case AF_INET:
+ switch (((struct ip *)hdr)->ip_v) {
+ case IPVERSION:
if (flags & REDF_ECN4) {
struct ip *ip = hdr;
u_int8_t otos;
@@ -579,7 +572,7 @@ mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
}
break;
#ifdef INET6
- case AF_INET6:
+ case (IPV6_VERSION >> 4):
if (flags & REDF_ECN6) {
struct ip6_hdr *ip6 = hdr;
u_int32_t flowlabel;
diff --git a/freebsd/sys/contrib/altq/altq/altq_rio.c b/freebsd/sys/contrib/altq/altq/altq_rio.c
index d1fe3f18..c5fb097d 100644
--- a/freebsd/sys/contrib/altq/altq/altq_rio.c
+++ b/freebsd/sys/contrib/altq/altq/altq_rio.c
@@ -63,12 +63,10 @@
#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
-#if (__FreeBSD__ != 2)
#include <rtems/bsd/local/opt_inet.h>
#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
#endif
-#endif
#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */
diff --git a/freebsd/sys/contrib/altq/altq/altq_rmclass.c b/freebsd/sys/contrib/altq/altq/altq_rmclass.c
index 6550dda4..b385f78a 100644
--- a/freebsd/sys/contrib/altq/altq/altq_rmclass.c
+++ b/freebsd/sys/contrib/altq/altq/altq_rmclass.c
@@ -1,7 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/* $FreeBSD$ */
-/* $KAME: altq_rmclass.c,v 1.18 2003/11/06 06:32:53 kjc Exp $ */
+/* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */
/*
* Copyright (c) 1991-1997 Regents of the University of California.
@@ -43,12 +43,10 @@
#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
-#if (__FreeBSD__ != 2)
#include <rtems/bsd/local/opt_inet.h>
#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
#endif
-#endif
#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
diff --git a/freebsd/sys/contrib/altq/altq/altq_subr.c b/freebsd/sys/contrib/altq/altq/altq_subr.c
index 3168da74..3ebd58d8 100644
--- a/freebsd/sys/contrib/altq/altq/altq_subr.c
+++ b/freebsd/sys/contrib/altq/altq/altq_subr.c
@@ -74,15 +74,12 @@
/* machine dependent clock related includes */
#ifdef __FreeBSD__
-#if __FreeBSD__ < 3
-#include <rtems/bsd/local/opt_cpu.h> /* for FreeBSD-2.2.8 to get i586_ctr_freq */
-#endif
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/eventhandler.h>
#include <machine/clock.h>
#endif
-#if defined(__i386__)
+#if defined(__amd64__) || defined(__i386__)
#include <machine/cpufunc.h> /* for pentium tsc */
#include <machine/specialreg.h> /* for CPUID_TSC */
#ifdef __FreeBSD__
@@ -90,7 +87,7 @@
#elif defined(__NetBSD__) || defined(__OpenBSD__)
#include <machine/cpu.h> /* for cpu_feature */
#endif
-#endif /* __i386__ */
+#endif /* __amd64 || __i386__ */
/*
* internal function prototypes
@@ -451,7 +448,7 @@ static void
tbr_timeout(arg)
void *arg;
{
-#if defined(__FreeBSD__)
+#ifdef __FreeBSD__
VNET_ITERATOR_DECL(vnet_iter);
#endif
struct ifnet *ifp;
@@ -463,7 +460,7 @@ tbr_timeout(arg)
#else
s = splimp();
#endif
-#if defined(__FreeBSD__) && (__FreeBSD_version >= 500000)
+#ifdef __FreeBSD__
IFNET_RLOCK_NOSLEEP();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
@@ -479,7 +476,7 @@ tbr_timeout(arg)
ifp->if_start != NULL)
(*ifp->if_start)(ifp);
}
-#if defined(__FreeBSD__) && (__FreeBSD_version >= 500000)
+#ifdef __FreeBSD__
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
@@ -490,20 +487,6 @@ tbr_timeout(arg)
CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
else
tbr_timer = 0; /* don't need tbr_timer anymore */
-#if defined(__alpha__) && !defined(ALTQ_NOPCC)
- {
- /*
- * XXX read out the machine dependent clock once a second
- * to detect counter wrap-around.
- */
- static u_int cnt;
-
- if (++cnt >= hz) {
- (void)read_machclk();
- cnt = 0;
- }
- }
-#endif /* __alpha__ && !ALTQ_NOPCC */
}
/*
@@ -901,16 +884,9 @@ int machclk_usepcc;
u_int32_t machclk_freq;
u_int32_t machclk_per_tick;
-#ifdef __alpha__
-#ifdef __FreeBSD__
-extern u_int32_t cycles_per_sec; /* alpha cpu clock frequency */
-#elif defined(__NetBSD__) || defined(__OpenBSD__)
-extern u_int64_t cycles_per_usec; /* alpha cpu clock frequency */
-#endif
-#endif /* __alpha__ */
#if defined(__i386__) && defined(__NetBSD__)
extern u_int64_t cpu_tsc_freq;
-#endif /* __alpha__ */
+#endif
#ifndef __rtems__
#if (__FreeBSD_version >= 700035)
@@ -945,7 +921,7 @@ init_machclk_setup(void)
machclk_usepcc = 1;
-#if (!defined(__i386__) && !defined(__alpha__)) || defined(ALTQ_NOPCC)
+#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
machclk_usepcc = 0;
#endif
#if defined(__FreeBSD__) && defined(SMP)
@@ -954,16 +930,14 @@ init_machclk_setup(void)
#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
machclk_usepcc = 0;
#endif
-#ifdef __i386__
-#ifndef __rtems__
+#if defined(__amd64__) || defined(__i386__)
/* check if TSC is available */
- if (machclk_usepcc == 1 && ((cpu_feature & CPUID_TSC) == 0 ||
- tsc_is_broken))
-#else /* __rtems__ */
- /* check if TSC is available */
- if (machclk_usepcc == 1 && ((cpu_feature & CPUID_TSC) == 0 ||
- !(x86_has_tsc()) ))
-#endif /* __rtems__ */
+#ifdef __FreeBSD__
+ if ((cpu_feature & CPUID_TSC) == 0 ||
+ atomic_load_acq_64(&tsc_freq) == 0)
+#else
+ if ((cpu_feature & CPUID_TSC) == 0)
+#endif
machclk_usepcc = 0;
#endif
}
@@ -993,27 +967,15 @@ init_machclk(void)
* if the clock frequency (of Pentium TSC or Alpha PCC) is
* accessible, just use it.
*/
-#ifndef __rtems__
-#ifdef __i386__
+#if defined(__amd64__) || defined(__i386__)
#ifdef __FreeBSD__
-#if (__FreeBSD_version > 300000)
- machclk_freq = tsc_freq;
-#else
- machclk_freq = i586_ctr_freq;
-#endif
+ machclk_freq = atomic_load_acq_64(&tsc_freq);
#elif defined(__NetBSD__)
machclk_freq = (u_int32_t)cpu_tsc_freq;
#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
machclk_freq = pentium_mhz * 1000000;
#endif
-#elif defined(__alpha__)
-#ifdef __FreeBSD__
- machclk_freq = cycles_per_sec;
-#elif defined(__NetBSD__) || defined(__OpenBSD__)
- machclk_freq = (u_int32_t)(cycles_per_usec * 1000000);
#endif
-#endif /* __alpha__ */
-#endif /* __rtems__ */
/*
* if we don't know the clock frequency, measure it.
@@ -1059,25 +1021,8 @@ read_machclk(void)
u_int64_t val;
if (machclk_usepcc) {
-#if defined(__i386__)
+#if defined(__amd64__) || defined(__i386__)
val = rdtsc();
-#elif defined(__alpha__)
- static u_int32_t last_pcc, upper;
- u_int32_t pcc;
-
- /*
- * for alpha, make a 64bit counter value out of the 32bit
- * alpha processor cycle counter.
- * read_machclk must be called within a half of its
- * wrap-around cycle (about 5 sec for 400MHz cpu) to properly
- * detect a counter wrap-around.
- * tbr_timeout calls read_machclk once a second.
- */
- pcc = (u_int32_t)alpha_rpcc();
- if (pcc <= last_pcc)
- upper++;
- last_pcc = pcc;
- val = ((u_int64_t)upper << 32) + pcc;
#else
panic("read_machclk");
#endif
diff --git a/freebsd/sys/contrib/altq/altq/altq_var.h b/freebsd/sys/contrib/altq/altq/altq_var.h
index 6f37f182..b956002a 100644
--- a/freebsd/sys/contrib/altq/altq/altq_var.h
+++ b/freebsd/sys/contrib/altq/altq/altq_var.h
@@ -141,11 +141,7 @@ extern u_int64_t read_machclk(void);
* misc stuff for compatibility
*/
/* ioctl cmd type */
-#if defined(__FreeBSD__) && (__FreeBSD__ < 3)
-typedef int ioctlcmd_t;
-#else
typedef u_long ioctlcmd_t;
-#endif
/*
* queue macros:
diff --git a/freebsd/sys/contrib/altq/altq/if_altq.h b/freebsd/sys/contrib/altq/altq/if_altq.h
index 50462e9e..2d983e9a 100644
--- a/freebsd/sys/contrib/altq/altq/if_altq.h
+++ b/freebsd/sys/contrib/altq/altq/if_altq.h
@@ -1,5 +1,5 @@
/* $FreeBSD$ */
-/* $KAME: if_altq.h,v 1.11 2003/07/10 12:07:50 kjc Exp $ */
+/* $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ */
/*
* Copyright (C) 1997-2003
@@ -29,7 +29,7 @@
#ifndef _ALTQ_IF_ALTQ_H_
#define _ALTQ_IF_ALTQ_H_
-#if (defined(__FreeBSD__) && __FreeBSD_version >= 500000)
+#ifdef __FreeBSD__
#include <rtems/bsd/sys/lock.h> /* XXX */
#include <sys/mutex.h> /* XXX */
#include <sys/event.h> /* XXX */
@@ -51,7 +51,7 @@ struct ifaltq {
int ifq_len;
int ifq_maxlen;
int ifq_drops;
-#if (defined(__FreeBSD__) && __FreeBSD_version >= 500000)
+#ifdef __FreeBSD__
struct mtx ifq_mtx;
#endif
diff --git a/freebsd/sys/contrib/pf/net/if_pflog.c b/freebsd/sys/contrib/pf/net/if_pflog.c
index 5efbf76d..51de5cd0 100644
--- a/freebsd/sys/contrib/pf/net/if_pflog.c
+++ b/freebsd/sys/contrib/pf/net/if_pflog.c
@@ -1,6 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */
+/* $OpenBSD: if_pflog.c,v 1.26 2007/10/18 21:58:18 mpf Exp $ */
/*
* The authors of this code are John Ioannidis (ji@tla.org),
* Angelos D. Keromytis (kermit@csd.uch.gr) and
@@ -84,28 +84,28 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/bpf.h>
-#ifdef INET
+#if defined(INET) || defined(INET6)
#include <netinet/in.h>
+#endif
+#ifdef INET
#include <netinet/in_var.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#endif
#ifdef INET6
-#ifndef INET
-#include <netinet/in.h>
-#endif
+#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#endif /* INET6 */
#include <net/pfvar.h>
#include <net/if_pflog.h>
-#ifdef INET
#ifdef __FreeBSD__
+#ifdef INET
#include <machine/in_cksum.h>
-#endif
-#endif
+#endif /* INET */
+#endif /* __FreeBSD__ */
#define PFLOGMTU (32768 + MHLEN + MLEN)
@@ -117,7 +117,11 @@ __FBSDID("$FreeBSD$");
void pflogattach(int);
int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
+#ifdef __FreeBSD__
+ struct route *);
+#else
+ struct rtentry *);
+#endif
int pflogioctl(struct ifnet *, u_long, caddr_t);
void pflogstart(struct ifnet *);
#ifdef __FreeBSD__
@@ -130,7 +134,7 @@ int pflog_clone_destroy(struct ifnet *);
LIST_HEAD(, pflog_softc) pflogif_list;
#ifdef __FreeBSD__
-IFC_SIMPLE_DECLARE(pflog, 1);
+IFC_SIMPLE_DECLARE(pflog, 1);
#else
struct if_clone pflog_cloner =
IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy);
@@ -138,10 +142,6 @@ struct if_clone pflog_cloner =
struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */
-#ifndef __FreeBSD__
-extern int ifqmaxlen;
-#endif
-
void
pflogattach(int npflog)
{
@@ -149,9 +149,6 @@ pflogattach(int npflog)
LIST_INIT(&pflogif_list);
for (i = 0; i < PFLOGIFS_MAX; i++)
pflogifs[i] = NULL;
-#ifndef __FreeBSD__
- (void) pflog_clone_create(&pflog_cloner, 0);
-#endif
if_clone_attach(&pflog_cloner);
}
@@ -170,9 +167,9 @@ pflog_clone_create(struct if_clone *ifc, int unit)
if (unit >= PFLOGIFS_MAX)
return (EINVAL);
- if ((pflogif = malloc(sizeof(*pflogif), M_DEVBUF, M_NOWAIT)) == NULL)
+ if ((pflogif = malloc(sizeof(*pflogif),
+ M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
return (ENOMEM);
- bzero(pflogif, sizeof(*pflogif));
pflogif->sc_unit = unit;
#ifdef __FreeBSD__
@@ -211,6 +208,7 @@ pflog_clone_create(struct if_clone *ifc, int unit)
s = splnet();
#ifdef __FreeBSD__
+ /* XXX: Why pf(4) lock?! Better add a pflog lock?! */
PF_LOCK();
#endif
LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list);
@@ -291,7 +289,11 @@ pflogstart(struct ifnet *ifp)
int
pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+#ifdef __FreeBSD__
+ struct route *rt)
+#else
+ struct rtentry *rt)
+#endif
{
m_freem(m);
return (0);
@@ -302,9 +304,6 @@ int
pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
switch (cmd) {
- case SIOCSIFADDR:
- case SIOCAIFADDR:
- case SIOCSIFDSTADDR:
case SIOCSIFFLAGS:
#ifdef __FreeBSD__
if (ifp->if_flags & IFF_UP)
@@ -319,7 +318,7 @@ pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
#endif
break;
default:
- return (EINVAL);
+ return (ENOTTY);
}
return (0);
@@ -335,7 +334,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
struct pfloghdr hdr;
if (kif == NULL || m == NULL || rm == NULL || pd == NULL)
- return (-1);
+ return ( 1);
if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf)
return (0);
@@ -349,7 +348,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
if (am == NULL) {
hdr.rulenr = htonl(rm->nr);
- hdr.subrulenr = -1;
+ hdr.subrulenr = 1;
} else {
hdr.rulenr = htonl(am->nr);
hdr.subrulenr = htonl(rm->nr);
@@ -359,11 +358,11 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
}
if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done)
#ifdef __FreeBSD__
- /*
+ /*
* XXX: This should not happen as we force an early lookup
* via debug.pfugidhack
*/
- ; /* empty */
+ ; /* empty */
#else
pd->lookup.done = pf_socket_lookup(dir, pd);
#endif
@@ -432,7 +431,7 @@ static moduledata_t pflog_mod = { "pflog", pflog_modevent, 0 };
#define PFLOG_MODVER 1
-DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(pflog, PFLOG_MODVER);
MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER);
#endif /* __FreeBSD__ */
diff --git a/freebsd/sys/contrib/pf/net/if_pflog.h b/freebsd/sys/contrib/pf/net/if_pflog.h
index a3c74d16..5f48f6c7 100644
--- a/freebsd/sys/contrib/pf/net/if_pflog.h
+++ b/freebsd/sys/contrib/pf/net/if_pflog.h
@@ -1,5 +1,4 @@
-/* $FreeBSD$ */
-/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */
+/* $OpenBSD: if_pflog.h,v 1.13 2006/10/23 12:46:09 henning Exp $ */
/*
* Copyright 2001 Niels Provos <provos@citi.umich.edu>
* All rights reserved.
@@ -26,11 +25,10 @@
*/
#ifndef _NET_IF_PFLOG_H_
-#define _NET_IF_PFLOG_H_
+#define _NET_IF_PFLOG_H_
#define PFLOGIFS_MAX 16
-#ifdef _KERNEL
struct pflog_softc {
#ifdef __FreeBSD__
struct ifnet *sc_ifp; /* the interface pointer */
@@ -40,9 +38,8 @@ struct pflog_softc {
int sc_unit;
LIST_ENTRY(pflog_softc) sc_list;
};
-#endif /* _KERNEL */
-#define PFLOG_RULESET_NAME_SIZE 16
+#define PFLOG_RULESET_NAME_SIZE 16
struct pfloghdr {
u_int8_t length;
@@ -61,9 +58,9 @@ struct pfloghdr {
u_int8_t pad[3];
};
-#define PFLOG_HDRLEN sizeof(struct pfloghdr)
+#define PFLOG_HDRLEN sizeof(struct pfloghdr)
/* minus pad, also used as a signature */
-#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad)
+#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad)
/* XXX remove later when old format logs are no longer needed */
struct old_pfloghdr {
@@ -74,23 +71,24 @@ struct old_pfloghdr {
u_short action;
u_short dir;
};
-#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr)
+#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr)
#ifdef _KERNEL
-
#ifdef __FreeBSD__
struct pf_rule;
struct pf_ruleset;
struct pfi_kif;
struct pf_pdesc;
+#if 0
typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
struct pf_ruleset *, struct pf_pdesc *);
extern pflog_packet_t *pflog_packet_ptr;
-#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do { \
- if (pflog_packet_ptr != NULL) \
- pflog_packet_ptr(i,a,b,c,d,e,f,g,h); \
+#endif
+#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) do { \
+ if (pflog_packet_ptr != NULL) \
+ pflog_packet_ptr(i,a,b,c,d,e,f,g,h); \
} while (0)
#else /* ! __FreeBSD__ */
#if NPFLOG > 0
@@ -98,6 +96,6 @@ extern pflog_packet_t *pflog_packet_ptr;
#else
#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0)
#endif /* NPFLOG > 0 */
-#endif /* __FreeBSD__ */
+#endif
#endif /* _KERNEL */
#endif /* _NET_IF_PFLOG_H_ */
diff --git a/freebsd/sys/contrib/pf/net/if_pflow.h b/freebsd/sys/contrib/pf/net/if_pflow.h
new file mode 100644
index 00000000..35ccbeb4
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/if_pflow.h
@@ -0,0 +1,126 @@
+/* $OpenBSD: if_pflow.h,v 1.5 2009/02/27 11:09:36 gollo Exp $ */
+
+/*
+ * Copyright (c) 2008 Henning Brauer <henning@openbsd.org>
+ * Copyright (c) 2008 Joerg Goltermann <jg@osn.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IF_PFLOW_H_
+#define _NET_IF_PFLOW_H_
+
+#define PFLOW_ID_LEN sizeof(u_int64_t)
+
+#define PFLOW_MAXFLOWS 30
+#define PFLOW_VERSION 5
+#define PFLOW_ENGINE_TYPE 42
+#define PFLOW_ENGINE_ID 42
+#define PFLOW_MAXBYTES 0xffffffff
+#define PFLOW_TIMEOUT 30
+
+struct pflow_flow {
+ u_int32_t src_ip;
+ u_int32_t dest_ip;
+ u_int32_t nexthop_ip;
+ u_int16_t if_index_in;
+ u_int16_t if_index_out;
+ u_int32_t flow_packets;
+ u_int32_t flow_octets;
+ u_int32_t flow_start;
+ u_int32_t flow_finish;
+ u_int16_t src_port;
+ u_int16_t dest_port;
+ u_int8_t pad1;
+ u_int8_t tcp_flags;
+ u_int8_t protocol;
+ u_int8_t tos;
+ u_int16_t src_as;
+ u_int16_t dest_as;
+ u_int8_t src_mask;
+ u_int8_t dest_mask;
+ u_int16_t pad2;
+} __packed;
+
+#ifdef _KERNEL
+
+extern int pflow_ok;
+
+struct pflow_softc {
+ struct ifnet sc_if;
+ struct ifnet *sc_pflow_ifp;
+
+ unsigned int sc_count;
+ unsigned int sc_maxcount;
+ u_int64_t sc_gcounter;
+ struct ip_moptions sc_imo;
+#ifdef __FreeBSD__
+ struct callout sc_tmo;
+#else
+ struct timeout sc_tmo;
+#endif
+ struct in_addr sc_sender_ip;
+ u_int16_t sc_sender_port;
+ struct in_addr sc_receiver_ip;
+ u_int16_t sc_receiver_port;
+ struct mbuf *sc_mbuf; /* current cumulative mbuf */
+ SLIST_ENTRY(pflow_softc) sc_next;
+};
+
+extern struct pflow_softc *pflowif;
+
+#endif /* _KERNEL */
+
+struct pflow_header {
+ u_int16_t version;
+ u_int16_t count;
+ u_int32_t uptime_ms;
+ u_int32_t time_sec;
+ u_int32_t time_nanosec;
+ u_int32_t flow_sequence;
+ u_int8_t engine_type;
+ u_int8_t engine_id;
+ u_int8_t reserved1;
+ u_int8_t reserved2;
+} __packed;
+
+#define PFLOW_HDRLEN sizeof(struct pflow_header)
+
+struct pflowstats {
+ u_int64_t pflow_flows;
+ u_int64_t pflow_packets;
+ u_int64_t pflow_onomem;
+ u_int64_t pflow_oerrors;
+};
+
+/*
+ * Configuration structure for SIOCSETPFLOW SIOCGETPFLOW
+ */
+struct pflowreq {
+ struct in_addr sender_ip;
+ struct in_addr receiver_ip;
+ u_int16_t receiver_port;
+ u_int16_t addrmask;
+#define PFLOW_MASK_SRCIP 0x01
+#define PFLOW_MASK_DSTIP 0x02
+#define PFLOW_MASK_DSTPRT 0x04
+};
+
+#ifdef _KERNEL
+int export_pflow(struct pf_state *);
+int pflow_sysctl(int *, u_int, void *, size_t *, void *, size_t);
+#endif /* _KERNEL */
+
+#endif /* _NET_IF_PFLOW_H_ */
diff --git a/freebsd/sys/contrib/pf/net/if_pfsync.c b/freebsd/sys/contrib/pf/net/if_pfsync.c
index 24c80e3d..36cb1573 100644
--- a/freebsd/sys/contrib/pf/net/if_pfsync.c
+++ b/freebsd/sys/contrib/pf/net/if_pfsync.c
@@ -1,6 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */
+/* $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ */
/*
* Copyright (c) 2002 Michael Shalayeff
@@ -28,27 +28,41 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+/*
+ * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Revisions picked from OpenBSD after revision 1.110 import:
+ * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
+ * 1.120, 1.175 - use monotonic time_uptime
+ * 1.122 - reduce number of updates for non-TCP sessions
+ * 1.128 - cleanups
+ * 1.146 - bzero() mbuf before sparsely filling it with data
+ * 1.170 - SIOCSIFMTU checks
+ */
+
#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_carp.h>
-#include <rtems/bsd/local/opt_bpf.h>
#include <rtems/bsd/local/opt_pf.h>
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#ifdef DEV_BPF
-#define NBPFILTER DEV_BPF
-#else
-#define NBPFILTER 0
-#endif
-
-#ifdef DEV_PFSYNC
-#define NPFSYNC DEV_PFSYNC
-#else
-#define NPFSYNC 0
-#endif
+#define NBPFILTER 1
#ifdef DEV_CARP
#define NCARP DEV_CARP
@@ -58,7 +72,10 @@ __FBSDID("$FreeBSD$");
#endif /* __FreeBSD__ */
#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
#ifdef __FreeBSD__
+#include <sys/bus.h>
+#include <sys/interrupt.h>
#include <sys/priv.h>
#endif
#include <sys/proc.h>
@@ -74,12 +91,15 @@ __FBSDID("$FreeBSD$");
#include <sys/taskqueue.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
-#include <sys/sysctl.h>
+#include <sys/protosw.h>
#else
#include <sys/ioctl.h>
#include <sys/timeout.h>
#endif
-#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#ifndef __FreeBSD__
+#include <sys/pool.h>
+#endif
#include <net/if.h>
#ifdef __FreeBSD__
@@ -88,6 +108,11 @@ __FBSDID("$FreeBSD$");
#include <net/if_types.h>
#include <net/route.h>
#include <net/bpf.h>
+#include <net/netisr.h>
+#ifdef __FreeBSD__
+#include <net/vnet.h>
+#endif
+
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/tcp.h>
@@ -119,23 +144,188 @@ __FBSDID("$FreeBSD$");
#include "pfsync.h"
#endif
-#define PFSYNC_MINMTU \
- (sizeof(struct pfsync_header) + sizeof(struct pf_state))
+#define PFSYNC_MINPKT ( \
+ sizeof(struct ip) + \
+ sizeof(struct pfsync_header) + \
+ sizeof(struct pfsync_subheader) + \
+ sizeof(struct pfsync_eof))
+
+struct pfsync_pkt {
+ struct ip *ip;
+ struct in_addr src;
+ u_int8_t flags;
+};
+
+int pfsync_input_hmac(struct mbuf *, int);
+
+int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
+ struct pfsync_state_peer *);
+
+int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
+int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
+
+int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
+
+int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
+ pfsync_in_clr, /* PFSYNC_ACT_CLR */
+ pfsync_in_ins, /* PFSYNC_ACT_INS */
+ pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */
+ pfsync_in_upd, /* PFSYNC_ACT_UPD */
+ pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */
+ pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */
+ pfsync_in_del, /* PFSYNC_ACT_DEL */
+ pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */
+ pfsync_in_error, /* PFSYNC_ACT_INS_F */
+ pfsync_in_error, /* PFSYNC_ACT_DEL_F */
+ pfsync_in_bus, /* PFSYNC_ACT_BUS */
+ pfsync_in_tdb, /* PFSYNC_ACT_TDB */
+ pfsync_in_eof /* PFSYNC_ACT_EOF */
+};
+
+struct pfsync_q {
+ int (*write)(struct pf_state *, struct mbuf *, int);
+ size_t len;
+ u_int8_t action;
+};
+
+/* we have one of these for every PFSYNC_S_ */
+int pfsync_out_state(struct pf_state *, struct mbuf *, int);
+int pfsync_out_iack(struct pf_state *, struct mbuf *, int);
+int pfsync_out_upd_c(struct pf_state *, struct mbuf *, int);
+int pfsync_out_del(struct pf_state *, struct mbuf *, int);
+
+struct pfsync_q pfsync_qs[] = {
+ { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS },
+ { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
+ { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD },
+ { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C },
+ { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }
+};
+
+void pfsync_q_ins(struct pf_state *, int);
+void pfsync_q_del(struct pf_state *);
+
+struct pfsync_upd_req_item {
+ TAILQ_ENTRY(pfsync_upd_req_item) ur_entry;
+ struct pfsync_upd_req ur_msg;
+};
+TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
+
+struct pfsync_deferral {
+ TAILQ_ENTRY(pfsync_deferral) pd_entry;
+ struct pf_state *pd_st;
+ struct mbuf *pd_m;
+#ifdef __FreeBSD__
+ struct callout pd_tmo;
+#else
+ struct timeout pd_tmo;
+#endif
+};
+TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
+
+#define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \
+ sizeof(struct pfsync_deferral))
-#ifdef PFSYNCDEBUG
-#define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0)
-int pfsyncdebug;
+#ifdef notyet
+int pfsync_out_tdb(struct tdb *, struct mbuf *, int);
+#endif
+
+struct pfsync_softc {
+#ifdef __FreeBSD__
+ struct ifnet *sc_ifp;
#else
-#define DPRINTF(x)
+ struct ifnet sc_if;
#endif
+ struct ifnet *sc_sync_if;
-struct pfsync_softc *pfsyncif = NULL;
-struct pfsyncstats pfsyncstats;
#ifdef __FreeBSD__
-SYSCTL_DECL(_net_inet_pfsync);
-SYSCTL_STRUCT(_net_inet_pfsync, 0, stats, CTLFLAG_RW,
- &pfsyncstats, pfsyncstats,
+ uma_zone_t sc_pool;
+#else
+ struct pool sc_pool;
+#endif
+
+ struct ip_moptions sc_imo;
+
+ struct in_addr sc_sync_peer;
+ u_int8_t sc_maxupdates;
+#ifdef __FreeBSD__
+ int pfsync_sync_ok;
+#endif
+
+ struct ip sc_template;
+
+ struct pf_state_queue sc_qs[PFSYNC_S_COUNT];
+ size_t sc_len;
+
+ struct pfsync_upd_reqs sc_upd_req_list;
+
+ struct pfsync_deferrals sc_deferrals;
+ u_int sc_deferred;
+
+ void *sc_plus;
+ size_t sc_pluslen;
+
+ u_int32_t sc_ureq_sent;
+ int sc_bulk_tries;
+#ifdef __FreeBSD__
+ struct callout sc_bulkfail_tmo;
+#else
+ struct timeout sc_bulkfail_tmo;
+#endif
+
+ u_int32_t sc_ureq_received;
+ struct pf_state *sc_bulk_next;
+ struct pf_state *sc_bulk_last;
+#ifdef __FreeBSD__
+ struct callout sc_bulk_tmo;
+#else
+ struct timeout sc_bulk_tmo;
+#endif
+
+ TAILQ_HEAD(, tdb) sc_tdb_q;
+
+#ifdef __FreeBSD__
+ struct callout sc_tmo;
+#else
+ struct timeout sc_tmo;
+#endif
+};
+
+#ifdef __FreeBSD__
+static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync data");
+static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL;
+#define V_pfsyncif VNET(pfsyncif)
+static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
+#define V_pfsync_swi_cookie VNET(pfsync_swi_cookie)
+static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
+#define V_pfsyncstats VNET(pfsyncstats)
+
+static void pfsyncintr(void *);
+static int pfsync_multicast_setup(struct pfsync_softc *);
+static void pfsync_multicast_cleanup(struct pfsync_softc *);
+static int pfsync_init(void);
+static void pfsync_uninit(void);
+static void pfsync_sendout1(int);
+
+#define schednetisr(NETISR_PFSYNC) swi_sched(V_pfsync_swi_cookie, 0)
+
+SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
+SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW,
+ &VNET_NAME(pfsyncstats), pfsyncstats,
"PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
+#else
+struct pfsync_softc *pfsyncif = NULL;
+struct pfsyncstats pfsyncstats;
+#define V_pfsyncstats pfsyncstats
#endif
void pfsyncattach(int);
@@ -146,48 +336,54 @@ void pfsync_clone_destroy(struct ifnet *);
int pfsync_clone_create(struct if_clone *, int);
int pfsync_clone_destroy(struct ifnet *);
#endif
-void pfsync_setmtu(struct pfsync_softc *, int);
int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
struct pf_state_peer *);
-int pfsync_insert_net_state(struct pfsync_state *, u_int8_t);
-#ifdef PFSYNC_TDB
void pfsync_update_net_tdb(struct pfsync_tdb *);
-#endif
int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+#ifdef __FreeBSD__
struct route *);
+#else
+ struct rtentry *);
+#endif
int pfsyncioctl(struct ifnet *, u_long, caddr_t);
void pfsyncstart(struct ifnet *);
-struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
-int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
-int pfsync_sendout(struct pfsync_softc *);
-#ifdef PFSYNC_TDB
-int pfsync_tdb_sendout(struct pfsync_softc *);
-#endif
-int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
+struct mbuf *pfsync_if_dequeue(struct ifnet *);
+
+void pfsync_deferred(struct pf_state *, int);
+void pfsync_undefer(struct pfsync_deferral *, int);
+void pfsync_defer_tmo(void *);
+
+void pfsync_request_update(u_int32_t, u_int64_t);
+void pfsync_update_state_req(struct pf_state *);
+
+void pfsync_drop(struct pfsync_softc *);
+void pfsync_sendout(void);
+void pfsync_send_plus(void *, size_t);
void pfsync_timeout(void *);
-#ifdef PFSYNC_TDB
void pfsync_tdb_timeout(void *);
-#endif
-void pfsync_send_bus(struct pfsync_softc *, u_int8_t);
+
+void pfsync_bulk_start(void);
+void pfsync_bulk_status(u_int8_t);
void pfsync_bulk_update(void *);
-void pfsync_bulkfail(void *);
+void pfsync_bulk_fail(void *);
#ifdef __FreeBSD__
-void pfsync_ifdetach(void *, struct ifnet *);
-void pfsync_senddef(void *, int);
-
/* XXX: ugly */
#define betoh64 (unsigned long long)be64toh
#define timeout_del callout_stop
#endif
-int pfsync_sync_ok;
+#define PFSYNC_MAX_BULKTRIES 12
#ifndef __FreeBSD__
-extern int ifqmaxlen;
+int pfsync_sync_ok;
#endif
#ifdef __FreeBSD__
+VNET_DEFINE(struct ifc_simple_data, pfsync_cloner_data);
+VNET_DEFINE(struct if_clone, pfsync_cloner);
+#define V_pfsync_cloner_data VNET(pfsync_cloner_data)
+#define V_pfsync_cloner VNET(pfsync_cloner)
IFC_SIMPLE_DECLARE(pfsync, 1);
#else
struct if_clone pfsync_cloner =
@@ -199,7 +395,6 @@ pfsyncattach(int npfsync)
{
if_clone_attach(&pfsync_cloner);
}
-
int
#ifdef __FreeBSD__
pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
@@ -207,95 +402,76 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
pfsync_clone_create(struct if_clone *ifc, int unit)
#endif
{
+ struct pfsync_softc *sc;
struct ifnet *ifp;
+ int q;
if (unit != 0)
return (EINVAL);
+#ifdef __FreeBSD__
+ sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
+ sc->pfsync_sync_ok = 1;
+#else
pfsync_sync_ok = 1;
- if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL)
- return (ENOMEM);
- bzero(pfsyncif, sizeof(*pfsyncif));
-#ifdef __FreeBSD__
- if ((pfsyncif->sc_imo.imo_membership = (struct in_multi **)malloc(
- (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_DEVBUF,
- M_NOWAIT)) == NULL) {
- free(pfsyncif, M_DEVBUF);
- return (ENOSPC);
- }
- pfsyncif->sc_imo.imo_mfilters = NULL;
- pfsyncif->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
- pfsyncif->sc_imo.imo_multicast_vif = -1;
+ sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO);
+#endif
- ifp = pfsyncif->sc_ifp = if_alloc(IFT_PFSYNC);
- if (ifp == NULL) {
- free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
- free(pfsyncif, M_DEVBUF);
- return (ENOSPC);
- }
- if_initname(ifp, ifc->ifc_name, unit);
+ for (q = 0; q < PFSYNC_S_COUNT; q++)
+ TAILQ_INIT(&sc->sc_qs[q]);
- pfsyncif->sc_detachtag = EVENTHANDLER_REGISTER(ifnet_departure_event,
- pfsync_ifdetach, pfsyncif, EVENTHANDLER_PRI_ANY);
- if (pfsyncif->sc_detachtag == NULL) {
- if_free(ifp);
- free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
- free(pfsyncif, M_DEVBUF);
- return (ENOSPC);
- }
-
- pfsyncif->sc_ifq.ifq_maxlen = ifqmaxlen;
- mtx_init(&pfsyncif->sc_ifq.ifq_mtx, ifp->if_xname,
- "pfsync send queue", MTX_DEF);
- TASK_INIT(&pfsyncif->sc_send_task, 0, pfsync_senddef, pfsyncif);
-#endif
- pfsyncif->sc_mbuf = NULL;
- pfsyncif->sc_mbuf_net = NULL;
-#ifdef PFSYNC_TDB
- pfsyncif->sc_mbuf_tdb = NULL;
-#endif
- pfsyncif->sc_statep.s = NULL;
- pfsyncif->sc_statep_net.s = NULL;
-#ifdef PFSYNC_TDB
- pfsyncif->sc_statep_tdb.t = NULL;
-#endif
- pfsyncif->sc_maxupdates = 128;
#ifdef __FreeBSD__
- pfsyncif->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
- pfsyncif->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
+ sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE, NULL, NULL, NULL,
+ NULL, UMA_ALIGN_PTR, 0);
#else
- pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
- pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
+ pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
#endif
- pfsyncif->sc_ureq_received = 0;
- pfsyncif->sc_ureq_sent = 0;
- pfsyncif->sc_bulk_send_next = NULL;
- pfsyncif->sc_bulk_terminator = NULL;
+ TAILQ_INIT(&sc->sc_upd_req_list);
+ TAILQ_INIT(&sc->sc_deferrals);
+ sc->sc_deferred = 0;
+
+ TAILQ_INIT(&sc->sc_tdb_q);
+
+ sc->sc_len = PFSYNC_MINPKT;
+ sc->sc_maxupdates = 128;
+
#ifndef __FreeBSD__
- ifp = &pfsyncif->sc_if;
+ sc->sc_imo.imo_membership = (struct in_multi **)malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
+ M_WAITOK | M_ZERO);
+ sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+#endif
+
+#ifdef __FreeBSD__
+ ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
+ if (ifp == NULL) {
+ uma_zdestroy(sc->sc_pool);
+ free(sc, M_PFSYNC);
+ return (ENOSPC);
+ }
+ if_initname(ifp, ifc->ifc_name, unit);
+#else
+ ifp = &sc->sc_if;
snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
#endif
- ifp->if_softc = pfsyncif;
+ ifp->if_softc = sc;
ifp->if_ioctl = pfsyncioctl;
ifp->if_output = pfsyncoutput;
ifp->if_start = pfsyncstart;
ifp->if_type = IFT_PFSYNC;
ifp->if_snd.ifq_maxlen = ifqmaxlen;
- ifp->if_hdrlen = PFSYNC_HDRLEN;
- pfsync_setmtu(pfsyncif, ETHERMTU);
+ ifp->if_hdrlen = sizeof(struct pfsync_header);
+ ifp->if_mtu = ETHERMTU;
#ifdef __FreeBSD__
- callout_init(&pfsyncif->sc_tmo, CALLOUT_MPSAFE);
-#ifdef PFSYNC_TDB
- callout_init(&pfsyncif->sc_tdb_tmo, CALLOUT_MPSAFE);
-#endif
- callout_init(&pfsyncif->sc_bulk_tmo, CALLOUT_MPSAFE);
- callout_init(&pfsyncif->sc_bulkfail_tmo, CALLOUT_MPSAFE);
+ callout_init(&sc->sc_tmo, CALLOUT_MPSAFE);
+ callout_init_mtx(&sc->sc_bulk_tmo, &pf_task_mtx, 0);
+ callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE);
#else
- timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif);
- timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif);
- timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif);
- timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif);
+ timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
+ timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
+ timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
#endif
+
if_attach(ifp);
#ifndef __FreeBSD__
if_alloc_sadl(ifp);
@@ -309,8 +485,14 @@ pfsync_clone_create(struct if_clone *ifc, int unit)
#ifdef __FreeBSD__
bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
#else
- bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+ bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+#endif
#endif
+
+#ifdef __FreeBSD__
+ V_pfsyncif = sc;
+#else
+ pfsyncif = sc;
#endif
return (0);
@@ -323,60 +505,98 @@ int
#endif
pfsync_clone_destroy(struct ifnet *ifp)
{
+ struct pfsync_softc *sc = ifp->if_softc;
+
#ifdef __FreeBSD__
- EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfsyncif->sc_detachtag);
- callout_stop(&pfsyncif->sc_tmo);
-#ifdef PFSYNC_TDB
- callout_stop(&pfsyncif->sc_tdb_tmo);
+ PF_LOCK();
+#endif
+ timeout_del(&sc->sc_bulkfail_tmo);
+ timeout_del(&sc->sc_bulk_tmo);
+ timeout_del(&sc->sc_tmo);
+#ifdef __FreeBSD__
+ PF_UNLOCK();
+#endif
+#if NCARP > 0
+#ifdef notyet
+#ifdef __FreeBSD__
+ if (!sc->pfsync_sync_ok)
+#else
+ if (!pfsync_sync_ok)
+#endif
+ carp_group_demote_adj(&sc->sc_if, -1);
#endif
- callout_stop(&pfsyncif->sc_bulk_tmo);
- callout_stop(&pfsyncif->sc_bulkfail_tmo);
- /* XXX: more? */
#endif
-
#if NBPFILTER > 0
bpfdetach(ifp);
#endif
if_detach(ifp);
+
+ pfsync_drop(sc);
+
+ while (sc->sc_deferred > 0)
+ pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
+
+#ifdef __FreeBSD__
+ UMA_DESTROY(sc->sc_pool);
+#else
+ pool_destroy(&sc->sc_pool);
+#endif
#ifdef __FreeBSD__
if_free(ifp);
- free(pfsyncif->sc_imo.imo_membership, M_DEVBUF);
+ if (sc->sc_imo.imo_membership)
+ pfsync_multicast_cleanup(sc);
+ free(sc, M_PFSYNC);
+#else
+ free(sc->sc_imo.imo_membership, M_IPMOPTS);
+ free(sc, M_DEVBUF);
#endif
- free(pfsyncif, M_DEVBUF);
+
+#ifdef __FreeBSD__
+ V_pfsyncif = NULL;
+#else
pfsyncif = NULL;
+#endif
+
#ifndef __FreeBSD__
return (0);
#endif
}
-/*
- * Start output on the pfsync interface.
- */
-void
-pfsyncstart(struct ifnet *ifp)
+struct mbuf *
+pfsync_if_dequeue(struct ifnet *ifp)
{
struct mbuf *m;
#ifndef __FreeBSD__
int s;
#endif
- for (;;) {
#ifdef __FreeBSD__
- IF_LOCK(&ifp->if_snd);
- _IF_DROP(&ifp->if_snd);
- _IF_DEQUEUE(&ifp->if_snd, m);
- IF_UNLOCK(&ifp->if_snd);
+ IF_LOCK(&ifp->if_snd);
+ _IF_DROP(&ifp->if_snd);
+ _IF_DEQUEUE(&ifp->if_snd, m);
+ IF_UNLOCK(&ifp->if_snd);
#else
- s = splnet();
- IF_DROP(&ifp->if_snd);
- IF_DEQUEUE(&ifp->if_snd, m);
- splx(s);
+ s = splnet();
+ IF_DEQUEUE(&ifp->if_snd, m);
+ splx(s);
#endif
- if (m == NULL)
- return;
- else
- m_freem(m);
+ return (m);
+}
+
+/*
+ * Start output on the pfsync interface.
+ */
+void
+pfsyncstart(struct ifnet *ifp)
+{
+ struct mbuf *m;
+
+ while ((m = pfsync_if_dequeue(ifp)) != NULL) {
+#ifndef __FreeBSD__
+ IF_DROP(&ifp->if_snd);
+#endif
+ m_freem(m);
}
}
@@ -385,85 +605,198 @@ pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
struct pf_state_peer *d)
{
if (s->scrub.scrub_flag && d->scrub == NULL) {
- d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
+#ifdef __FreeBSD__
+ d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
+#else
+ d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
+#endif
if (d->scrub == NULL)
return (ENOMEM);
- bzero(d->scrub, sizeof(*d->scrub));
}
return (0);
}
+#ifndef __FreeBSD__
+void
+pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
+{
+ bzero(sp, sizeof(struct pfsync_state));
+
+ /* copy from state key */
+ sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
+ sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
+ sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
+ sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
+ sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
+ sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
+ sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
+ sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
+ sp->proto = st->key[PF_SK_WIRE]->proto;
+ sp->af = st->key[PF_SK_WIRE]->af;
+
+ /* copy from state */
+ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
+ bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
+ sp->creation = htonl(time_uptime - st->creation);
+ sp->expire = pf_state_expires(st);
+ if (sp->expire <= time_second)
+ sp->expire = htonl(0);
+ else
+ sp->expire = htonl(sp->expire - time_second);
+
+ sp->direction = st->direction;
+ sp->log = st->log;
+ sp->timeout = st->timeout;
+ sp->state_flags = st->state_flags;
+ if (st->src_node)
+ sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
+ if (st->nat_src_node)
+ sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
+
+ bcopy(&st->id, &sp->id, sizeof(sp->id));
+ sp->creatorid = st->creatorid;
+ pf_state_peer_hton(&st->src, &sp->src);
+ pf_state_peer_hton(&st->dst, &sp->dst);
+
+ if (st->rule.ptr == NULL)
+ sp->rule = htonl(-1);
+ else
+ sp->rule = htonl(st->rule.ptr->nr);
+ if (st->anchor.ptr == NULL)
+ sp->anchor = htonl(-1);
+ else
+ sp->anchor = htonl(st->anchor.ptr->nr);
+ if (st->nat_rule.ptr == NULL)
+ sp->nat_rule = htonl(-1);
+ else
+ sp->nat_rule = htonl(st->nat_rule.ptr->nr);
+
+ pf_state_counter_hton(st->packets[0], sp->packets[0]);
+ pf_state_counter_hton(st->packets[1], sp->packets[1]);
+ pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
+ pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
+
+}
+#endif
+
int
-pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag)
+pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
{
struct pf_state *st = NULL;
+ struct pf_state_key *skw = NULL, *sks = NULL;
struct pf_rule *r = NULL;
struct pfi_kif *kif;
+ int pool_flags;
+ int error;
+
+#ifdef __FreeBSD__
+ PF_LOCK_ASSERT();
+ if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
- printf("pfsync_insert_net_state: invalid creator id:"
+#endif
+ printf("pfsync_state_import: invalid creator id:"
" %08x\n", ntohl(sp->creatorid));
return (EINVAL);
}
- kif = pfi_kif_get(sp->ifname);
- if (kif == NULL) {
+ if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync_insert_net_state: "
+#endif
+ printf("pfsync_state_import: "
"unknown interface: %s\n", sp->ifname);
- /* skip this state */
- return (0);
+ if (flags & PFSYNC_SI_IOCTL)
+ return (EINVAL);
+ return (0); /* skip this state */
}
/*
- * If the ruleset checksums match, it's safe to associate the state
- * with the rule of that number.
+ * If the ruleset checksums match or the state is coming from the ioctl,
+ * it's safe to associate the state with the rule of that number.
*/
- if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag)
+ if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
+ (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
+ pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
r = pf_main_ruleset.rules[
PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
else
+#ifdef __FreeBSD__
+ r = &V_pf_default_rule;
+#else
r = &pf_default_rule;
+#endif
- if (!r->max_states || r->states < r->max_states)
- st = pool_get(&pf_state_pl, PR_NOWAIT);
- if (st == NULL) {
- pfi_kif_unref(kif, PFI_KIF_REF_NONE);
- return (ENOMEM);
- }
- bzero(st, sizeof(*st));
+ if ((r->max_states && r->states_cur >= r->max_states))
+ goto cleanup;
- /* allocate memory for scrub info */
- if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
- pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) {
- pfi_kif_unref(kif, PFI_KIF_REF_NONE);
- if (st->src.scrub)
- pool_put(&pf_state_scrub_pl, st->src.scrub);
- pool_put(&pf_state_pl, st);
- return (ENOMEM);
- }
+#ifdef __FreeBSD__
+ if (flags & PFSYNC_SI_IOCTL)
+ pool_flags = PR_WAITOK | PR_ZERO;
+ else
+ pool_flags = PR_NOWAIT | PR_ZERO;
- st->rule.ptr = r;
- /* XXX get pointers to nat_rule and anchor */
+ if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL)
+ goto cleanup;
+#else
+ if (flags & PFSYNC_SI_IOCTL)
+ pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
+ else
+ pool_flags = PR_LIMITFAIL | PR_ZERO;
- /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
- r->states++;
+ if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
+ goto cleanup;
+#endif
- /* fill in the rest of the state entry */
- pf_state_host_ntoh(&sp->lan, &st->lan);
- pf_state_host_ntoh(&sp->gwy, &st->gwy);
- pf_state_host_ntoh(&sp->ext, &st->ext);
+ if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
+ goto cleanup;
- pf_state_peer_ntoh(&sp->src, &st->src);
- pf_state_peer_ntoh(&sp->dst, &st->dst);
+ if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
+ &sp->key[PF_SK_STACK].addr[0], sp->af) ||
+ PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
+ &sp->key[PF_SK_STACK].addr[1], sp->af) ||
+ sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
+ sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
+ if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
+ goto cleanup;
+ } else
+ sks = skw;
+
+ /* allocate memory for scrub info */
+ if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
+ pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
+ goto cleanup;
+
+ /* copy to state key(s) */
+ skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
+ skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
+ skw->port[0] = sp->key[PF_SK_WIRE].port[0];
+ skw->port[1] = sp->key[PF_SK_WIRE].port[1];
+ skw->proto = sp->proto;
+ skw->af = sp->af;
+ if (sks != skw) {
+ sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
+ sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
+ sks->port[0] = sp->key[PF_SK_STACK].port[0];
+ sks->port[1] = sp->key[PF_SK_STACK].port[1];
+ sks->proto = sp->proto;
+ sks->af = sp->af;
+ }
+ /* copy to state */
bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
- st->creation = time_second - ntohl(sp->creation);
- st->expire = ntohl(sp->expire) + time_second;
+ st->creation = time_uptime - ntohl(sp->creation);
+ st->expire = time_second;
+ if (sp->expire) {
+ /* XXX No adaptive scaling. */
+ st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
+ }
- st->af = sp->af;
- st->proto = sp->proto;
+ st->expire = ntohl(sp->expire) + time_second;
st->direction = sp->direction;
st->log = sp->log;
st->timeout = sp->timeout;
@@ -471,21 +804,74 @@ pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag)
bcopy(sp->id, &st->id, sizeof(st->id));
st->creatorid = sp->creatorid;
- st->sync_flags = PFSTATE_FROMSYNC;
+ pf_state_peer_ntoh(&sp->src, &st->src);
+ pf_state_peer_ntoh(&sp->dst, &st->dst);
+
+ st->rule.ptr = r;
+ st->nat_rule.ptr = NULL;
+ st->anchor.ptr = NULL;
+ st->rt_kif = NULL;
+
+ st->pfsync_time = time_uptime;
+ st->sync_state = PFSYNC_S_NONE;
- if (pf_insert_state(kif, st)) {
- pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+ /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
+ r->states_cur++;
+ r->states_tot++;
+
+ if (!ISSET(flags, PFSYNC_SI_IOCTL))
+ SET(st->state_flags, PFSTATE_NOSYNC);
+
+ if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
- r->states--;
+ r->states_cur--;
+ goto cleanup_state;
+ }
+
+ if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
+ CLR(st->state_flags, PFSTATE_NOSYNC);
+ if (ISSET(st->state_flags, PFSTATE_ACK)) {
+ pfsync_q_ins(st, PFSYNC_S_IACK);
+ schednetisr(NETISR_PFSYNC);
+ }
+ }
+ CLR(st->state_flags, PFSTATE_ACK);
+
+ return (0);
+
+cleanup:
+ error = ENOMEM;
+ if (skw == sks)
+ sks = NULL;
+#ifdef __FreeBSD__
+ if (skw != NULL)
+ pool_put(&V_pf_state_key_pl, skw);
+ if (sks != NULL)
+ pool_put(&V_pf_state_key_pl, sks);
+#else
+ if (skw != NULL)
+ pool_put(&pf_state_key_pl, skw);
+ if (sks != NULL)
+ pool_put(&pf_state_key_pl, sks);
+#endif
+
+cleanup_state: /* pf_state_insert frees the state keys */
+ if (st) {
+#ifdef __FreeBSD__
+ if (st->dst.scrub)
+ pool_put(&V_pf_state_scrub_pl, st->dst.scrub);
+ if (st->src.scrub)
+ pool_put(&V_pf_state_scrub_pl, st->src.scrub);
+ pool_put(&V_pf_state_pl, st);
+#else
if (st->dst.scrub)
pool_put(&pf_state_scrub_pl, st->dst.scrub);
if (st->src.scrub)
pool_put(&pf_state_scrub_pl, st->src.scrub);
pool_put(&pf_state_pl, st);
- return (EINVAL);
+#endif
}
-
- return (0);
+ return (error);
}
void
@@ -495,597 +881,873 @@ pfsync_input(struct mbuf *m, __unused int off)
pfsync_input(struct mbuf *m, ...)
#endif
{
- struct ip *ip = mtod(m, struct ip *);
- struct pfsync_header *ph;
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
struct pfsync_softc *sc = pfsyncif;
- struct pf_state *st;
- struct pf_state_cmp key;
- struct pfsync_state *sp;
- struct pfsync_state_upd *up;
- struct pfsync_state_del *dp;
- struct pfsync_state_clr *cp;
- struct pfsync_state_upd_req *rup;
- struct pfsync_state_bus *bus;
-#ifdef PFSYNC_TDB
- struct pfsync_tdb *pt;
#endif
- struct in_addr src;
- struct mbuf *mp;
- int iplen, action, error, i, s, count, offp, sfail, stale = 0;
- u_int8_t chksum_flag = 0;
+ struct pfsync_pkt pkt;
+ struct ip *ip = mtod(m, struct ip *);
+ struct pfsync_header *ph;
+ struct pfsync_subheader subh;
+
+ int offset;
+ int rv;
- pfsyncstats.pfsyncs_ipackets++;
+ V_pfsyncstats.pfsyncs_ipackets++;
/* verify that we have a sync interface configured */
- if (!sc || !sc->sc_sync_ifp || !pf_status.running)
+#ifdef __FreeBSD__
+ if (!sc || !sc->sc_sync_if || !V_pf_status.running)
+#else
+ if (!sc || !sc->sc_sync_if || !pf_status.running)
+#endif
goto done;
/* verify that the packet came in on the right interface */
- if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
- pfsyncstats.pfsyncs_badif++;
+ if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
+ V_pfsyncstats.pfsyncs_badif++;
goto done;
}
- /* verify that the IP TTL is 255. */
+#ifdef __FreeBSD__
+ sc->sc_ifp->if_ipackets++;
+ sc->sc_ifp->if_ibytes += m->m_pkthdr.len;
+#else
+ sc->sc_if.if_ipackets++;
+ sc->sc_if.if_ibytes += m->m_pkthdr.len;
+#endif
+ /* verify that the IP TTL is 255. */
if (ip->ip_ttl != PFSYNC_DFLTTL) {
- pfsyncstats.pfsyncs_badttl++;
+ V_pfsyncstats.pfsyncs_badttl++;
goto done;
}
- iplen = ip->ip_hl << 2;
-
- if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
- pfsyncstats.pfsyncs_hdrops++;
+ offset = ip->ip_hl << 2;
+ if (m->m_pkthdr.len < offset + sizeof(*ph)) {
+ V_pfsyncstats.pfsyncs_hdrops++;
goto done;
}
- if (iplen + sizeof(*ph) > m->m_len) {
- if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
- pfsyncstats.pfsyncs_hdrops++;
- goto done;
+ if (offset + sizeof(*ph) > m->m_len) {
+ if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
+ V_pfsyncstats.pfsyncs_hdrops++;
+ return;
}
ip = mtod(m, struct ip *);
}
- ph = (struct pfsync_header *)((char *)ip + iplen);
+ ph = (struct pfsync_header *)((char *)ip + offset);
/* verify the version */
if (ph->version != PFSYNC_VERSION) {
- pfsyncstats.pfsyncs_badver++;
+ V_pfsyncstats.pfsyncs_badver++;
goto done;
}
- action = ph->action;
- count = ph->count;
-
- /* make sure it's a valid action code */
- if (action >= PFSYNC_ACT_MAX) {
- pfsyncstats.pfsyncs_badact++;
+#if 0
+ if (pfsync_input_hmac(m, offset) != 0) {
+ /* XXX stats */
goto done;
}
+#endif
/* Cheaper to grab this now than having to mess with mbufs later */
- src = ip->ip_src;
-
- if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
- chksum_flag++;
-
- switch (action) {
- case PFSYNC_ACT_CLR: {
- struct pf_state *nexts;
- struct pfi_kif *kif;
- u_int32_t creatorid;
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- sizeof(*cp), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
+ pkt.ip = ip;
+ pkt.src = ip->ip_src;
+ pkt.flags = 0;
+
+#ifdef __FreeBSD__
+ if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
+#else
+ if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
+#endif
+ pkt.flags |= PFSYNC_SI_CKSUM;
+
+ offset += sizeof(*ph);
+ for (;;) {
+ m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
+ offset += sizeof(subh);
+
+ if (subh.action >= PFSYNC_ACT_MAX) {
+ V_pfsyncstats.pfsyncs_badact++;
+ goto done;
}
- cp = (struct pfsync_state_clr *)(mp->m_data + offp);
- creatorid = cp->creatorid;
- s = splsoftnet();
+ rv = (*pfsync_acts[subh.action])(&pkt, m, offset,
+ ntohs(subh.count));
+ if (rv == -1)
+ return;
+
+ offset += rv;
+ }
+
+done:
+ m_freem(m);
+}
+
+int
+pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_clr *clr;
+ struct mbuf *mp;
+ int len = sizeof(*clr) * count;
+ int i, offp;
+
+ struct pf_state *st, *nexts;
+ struct pf_state_key *sk, *nextsk;
+ struct pf_state_item *si;
+ u_int32_t creatorid;
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ clr = (struct pfsync_clr *)(mp->m_data + offp);
+
+ s = splsoftnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- if (cp->ifname[0] == '\0') {
+ for (i = 0; i < count; i++) {
+ creatorid = clr[i].creatorid;
+
+ if (clr[i].ifname[0] == '\0') {
+#ifdef __FreeBSD__
+ for (st = RB_MIN(pf_state_tree_id, &V_tree_id);
+ st; st = nexts) {
+ nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st);
+#else
for (st = RB_MIN(pf_state_tree_id, &tree_id);
st; st = nexts) {
nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
+#endif
if (st->creatorid == creatorid) {
- st->sync_flags |= PFSTATE_FROMSYNC;
+ SET(st->state_flags, PFSTATE_NOSYNC);
pf_unlink_state(st);
}
}
} else {
- if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
+ if (pfi_kif_get(clr[i].ifname) == NULL)
+ continue;
+
+ /* XXX correct? */
#ifdef __FreeBSD__
- PF_UNLOCK();
+ for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl);
+#else
+ for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
#endif
- splx(s);
- return;
- }
- for (st = RB_MIN(pf_state_tree_lan_ext,
- &kif->pfik_lan_ext); st; st = nexts) {
- nexts = RB_NEXT(pf_state_tree_lan_ext,
- &kif->pfik_lan_ext, st);
- if (st->creatorid == creatorid) {
- st->sync_flags |= PFSTATE_FROMSYNC;
- pf_unlink_state(st);
+ sk; sk = nextsk) {
+ nextsk = RB_NEXT(pf_state_tree,
+#ifdef __FreeBSD__
+ &V_pf_statetbl, sk);
+#else
+ &pf_statetbl, sk);
+#endif
+ TAILQ_FOREACH(si, &sk->states, entry) {
+ if (si->s->creatorid == creatorid) {
+ SET(si->s->state_flags,
+ PFSTATE_NOSYNC);
+ pf_unlink_state(si->s);
+ }
}
}
}
+ }
#ifdef __FreeBSD__
- PF_UNLOCK();
+ PF_UNLOCK();
#endif
- splx(s);
+ splx(s);
- break;
+ return (len);
+}
+
+int
+pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct mbuf *mp;
+ struct pfsync_state *sa, *sp;
+ int len = sizeof(*sp) * count;
+ int i, offp;
+
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
}
- case PFSYNC_ACT_INS:
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- count * sizeof(*sp), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
- }
+ sa = (struct pfsync_state *)(mp->m_data + offp);
- s = splsoftnet();
+ s = splsoftnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
- i < count; i++, sp++) {
- /* check for invalid values */
- if (sp->timeout >= PFTM_MAX ||
- sp->src.state > PF_TCPS_PROXY_DST ||
- sp->dst.state > PF_TCPS_PROXY_DST ||
- sp->direction > PF_OUT ||
- (sp->af != AF_INET && sp->af != AF_INET6)) {
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync_insert: PFSYNC_ACT_INS: "
- "invalid value\n");
- pfsyncstats.pfsyncs_badstate++;
- continue;
- }
+ for (i = 0; i < count; i++) {
+ sp = &sa[i];
- if ((error = pfsync_insert_net_state(sp,
- chksum_flag))) {
- if (error == ENOMEM) {
+ /* check for invalid values */
+ if (sp->timeout >= PFTM_MAX ||
+ sp->src.state > PF_TCPS_PROXY_DST ||
+ sp->dst.state > PF_TCPS_PROXY_DST ||
+ sp->direction > PF_OUT ||
+ (sp->af != AF_INET && sp->af != AF_INET6)) {
#ifdef __FreeBSD__
- PF_UNLOCK();
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC) {
#endif
- splx(s);
- goto done;
- }
- continue;
+ printf("pfsync_input: PFSYNC5_ACT_INS: "
+ "invalid value\n");
}
+ V_pfsyncstats.pfsyncs_badval++;
+ continue;
}
+
+ if (pfsync_state_import(sp, pkt->flags) == ENOMEM) {
+ /* drop out, but process the rest of the actions */
+ break;
+ }
+ }
#ifdef __FreeBSD__
- PF_UNLOCK();
+ PF_UNLOCK();
#endif
- splx(s);
- break;
- case PFSYNC_ACT_UPD:
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- count * sizeof(*sp), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
- }
+ splx(s);
+
+ return (len);
+}
- s = splsoftnet();
+int
+pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_ins_ack *ia, *iaa;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+
+ struct mbuf *mp;
+ int len = count * sizeof(*ia);
+ int offp, i;
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
+
+ s = splsoftnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
- i < count; i++, sp++) {
- int flags = PFSYNC_FLAG_STALE;
-
- /* check for invalid values */
- if (sp->timeout >= PFTM_MAX ||
- sp->src.state > PF_TCPS_PROXY_DST ||
- sp->dst.state > PF_TCPS_PROXY_DST) {
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync_insert: PFSYNC_ACT_UPD: "
- "invalid value\n");
- pfsyncstats.pfsyncs_badstate++;
- continue;
- }
+ for (i = 0; i < count; i++) {
+ ia = &iaa[i];
- bcopy(sp->id, &key.id, sizeof(key.id));
- key.creatorid = sp->creatorid;
+ bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
+ id_key.creatorid = ia->creatorid;
- st = pf_find_state_byid(&key);
- if (st == NULL) {
- /* insert the update */
- if (pfsync_insert_net_state(sp, chksum_flag))
- pfsyncstats.pfsyncs_badstate++;
- continue;
- }
- sfail = 0;
- if (st->proto == IPPROTO_TCP) {
- /*
- * The state should never go backwards except
- * for syn-proxy states. Neither should the
- * sequence window slide backwards.
- */
- if (st->src.state > sp->src.state &&
- (st->src.state < PF_TCPS_PROXY_SRC ||
- sp->src.state >= PF_TCPS_PROXY_SRC))
- sfail = 1;
- else if (SEQ_GT(st->src.seqlo,
- ntohl(sp->src.seqlo)))
- sfail = 3;
- else if (st->dst.state > sp->dst.state) {
- /* There might still be useful
- * information about the src state here,
- * so import that part of the update,
- * then "fail" so we send the updated
- * state back to the peer who is missing
- * our what we know. */
- pf_state_peer_ntoh(&sp->src, &st->src);
- /* XXX do anything with timeouts? */
- sfail = 7;
- flags = 0;
- } else if (st->dst.state >= TCPS_SYN_SENT &&
- SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
- sfail = 4;
- } else {
- /*
- * Non-TCP protocol state machine always go
- * forwards
- */
- if (st->src.state > sp->src.state)
- sfail = 5;
- else if (st->dst.state > sp->dst.state)
- sfail = 6;
- }
- if (sfail) {
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: %s stale update "
- "(%d) id: %016llx "
- "creatorid: %08x\n",
- (sfail < 7 ? "ignoring"
- : "partial"), sfail,
- betoh64(st->id),
- ntohl(st->creatorid));
- pfsyncstats.pfsyncs_badstate++;
-
- if (!(sp->sync_flags & PFSTATE_STALE)) {
- /* we have a better state, send it */
- if (sc->sc_mbuf != NULL && !stale)
- pfsync_sendout(sc);
- stale++;
- if (!st->sync_flags)
- pfsync_pack_state(
- PFSYNC_ACT_UPD, st, flags);
- }
- continue;
- }
- pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
- pf_state_peer_ntoh(&sp->src, &st->src);
- pf_state_peer_ntoh(&sp->dst, &st->dst);
- st->expire = ntohl(sp->expire) + time_second;
- st->timeout = sp->timeout;
- }
- if (stale && sc->sc_mbuf != NULL)
- pfsync_sendout(sc);
+ st = pf_find_state_byid(&id_key);
+ if (st == NULL)
+ continue;
+
+ if (ISSET(st->state_flags, PFSTATE_ACK))
+ pfsync_deferred(st, 0);
+ }
#ifdef __FreeBSD__
- PF_UNLOCK();
+ PF_UNLOCK();
#endif
- splx(s);
- break;
+ splx(s);
/*
- * It's not strictly necessary for us to support the "uncompressed"
- * delete action, but it's relatively simple and maintains consistency.
+ * XXX this is not yet implemented, but we know the size of the
+ * message so we can skip it.
*/
- case PFSYNC_ACT_DEL:
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- count * sizeof(*sp), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
- }
- s = splsoftnet();
+ return (count * sizeof(struct pfsync_ins_ack));
+}
+
+int
+pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
+ struct pfsync_state_peer *dst)
+{
+ int sfail = 0;
+
+ /*
+ * The state should never go backwards except
+ * for syn-proxy states. Neither should the
+ * sequence window slide backwards.
+ */
+ if (st->src.state > src->state &&
+ (st->src.state < PF_TCPS_PROXY_SRC ||
+ src->state >= PF_TCPS_PROXY_SRC))
+ sfail = 1;
+ else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))
+ sfail = 3;
+ else if (st->dst.state > dst->state) {
+ /* There might still be useful
+ * information about the src state here,
+ * so import that part of the update,
+ * then "fail" so we send the updated
+ * state back to the peer who is missing
+ * our what we know. */
+ pf_state_peer_ntoh(src, &st->src);
+ /* XXX do anything with timeouts? */
+ sfail = 7;
+ } else if (st->dst.state >= TCPS_SYN_SENT &&
+ SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))
+ sfail = 4;
+
+ return (sfail);
+}
+
+int
+pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_state *sa, *sp;
+ struct pf_state_cmp id_key;
+ struct pf_state_key *sk;
+ struct pf_state *st;
+ int sfail;
+
+ struct mbuf *mp;
+ int len = count * sizeof(*sp);
+ int offp, i;
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ sa = (struct pfsync_state *)(mp->m_data + offp);
+
+ s = splsoftnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
- i < count; i++, sp++) {
- bcopy(sp->id, &key.id, sizeof(key.id));
- key.creatorid = sp->creatorid;
+ for (i = 0; i < count; i++) {
+ sp = &sa[i];
- st = pf_find_state_byid(&key);
- if (st == NULL) {
- pfsyncstats.pfsyncs_badstate++;
- continue;
+ /* check for invalid values */
+ if (sp->timeout >= PFTM_MAX ||
+ sp->src.state > PF_TCPS_PROXY_DST ||
+ sp->dst.state > PF_TCPS_PROXY_DST) {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
+ printf("pfsync_input: PFSYNC_ACT_UPD: "
+ "invalid value\n");
}
- st->sync_flags |= PFSTATE_FROMSYNC;
- pf_unlink_state(st);
+ V_pfsyncstats.pfsyncs_badval++;
+ continue;
}
+
+ bcopy(sp->id, &id_key.id, sizeof(id_key.id));
+ id_key.creatorid = sp->creatorid;
+
+ st = pf_find_state_byid(&id_key);
+ if (st == NULL) {
+ /* insert the update */
+ if (pfsync_state_import(sp, 0))
+ V_pfsyncstats.pfsyncs_badstate++;
+ continue;
+ }
+
+ if (ISSET(st->state_flags, PFSTATE_ACK))
+ pfsync_deferred(st, 1);
+
+ sk = st->key[PF_SK_WIRE]; /* XXX right one? */
+ sfail = 0;
+ if (sk->proto == IPPROTO_TCP)
+ sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst);
+ else {
+ /*
+ * Non-TCP protocol state machine always go
+ * forwards
+ */
+ if (st->src.state > sp->src.state)
+ sfail = 5;
+ else if (st->dst.state > sp->dst.state)
+ sfail = 6;
+ }
+
+ if (sfail) {
#ifdef __FreeBSD__
- PF_UNLOCK();
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC) {
#endif
- splx(s);
- break;
- case PFSYNC_ACT_UPD_C: {
- int update_requested = 0;
+ printf("pfsync: %s stale update (%d)"
+ " id: %016llx creatorid: %08x\n",
+ (sfail < 7 ? "ignoring" : "partial"),
+ sfail, betoh64(st->id),
+ ntohl(st->creatorid));
+ }
+ V_pfsyncstats.pfsyncs_stale++;
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- count * sizeof(*up), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
+ pfsync_update_state(st);
+ schednetisr(NETISR_PFSYNC);
+ continue;
}
+ pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
+ pf_state_peer_ntoh(&sp->src, &st->src);
+ pf_state_peer_ntoh(&sp->dst, &st->dst);
+ st->expire = ntohl(sp->expire) + time_second;
+ st->timeout = sp->timeout;
+ st->pfsync_time = time_uptime;
+ }
+#ifdef __FreeBSD__
+ PF_UNLOCK();
+#endif
+ splx(s);
+
+ return (len);
+}
- s = splsoftnet();
+int
+pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_upd_c *ua, *up;
+ struct pf_state_key *sk;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+
+ int len = count * sizeof(*up);
+ int sfail;
+
+ struct mbuf *mp;
+ int offp, i;
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ ua = (struct pfsync_upd_c *)(mp->m_data + offp);
+
+ s = splsoftnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
- i < count; i++, up++) {
- /* check for invalid values */
- if (up->timeout >= PFTM_MAX ||
- up->src.state > PF_TCPS_PROXY_DST ||
- up->dst.state > PF_TCPS_PROXY_DST) {
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync_insert: "
- "PFSYNC_ACT_UPD_C: "
- "invalid value\n");
- pfsyncstats.pfsyncs_badstate++;
- continue;
+ for (i = 0; i < count; i++) {
+ up = &ua[i];
+
+ /* check for invalid values */
+ if (up->timeout >= PFTM_MAX ||
+ up->src.state > PF_TCPS_PROXY_DST ||
+ up->dst.state > PF_TCPS_PROXY_DST) {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
+ printf("pfsync_input: "
+ "PFSYNC_ACT_UPD_C: "
+ "invalid value\n");
}
+ V_pfsyncstats.pfsyncs_badval++;
+ continue;
+ }
- bcopy(up->id, &key.id, sizeof(key.id));
- key.creatorid = up->creatorid;
+ bcopy(&up->id, &id_key.id, sizeof(id_key.id));
+ id_key.creatorid = up->creatorid;
- st = pf_find_state_byid(&key);
- if (st == NULL) {
- /* We don't have this state. Ask for it. */
- error = pfsync_request_update(up, &src);
- if (error == ENOMEM) {
+ st = pf_find_state_byid(&id_key);
+ if (st == NULL) {
+ /* We don't have this state. Ask for it. */
+ pfsync_request_update(id_key.creatorid, id_key.id);
+ continue;
+ }
+
+ if (ISSET(st->state_flags, PFSTATE_ACK))
+ pfsync_deferred(st, 1);
+
+ sk = st->key[PF_SK_WIRE]; /* XXX right one? */
+ sfail = 0;
+ if (sk->proto == IPPROTO_TCP)
+ sfail = pfsync_upd_tcp(st, &up->src, &up->dst);
+ else {
+ /*
+ * Non-TCP protocol state machine always go forwards
+ */
+ if (st->src.state > up->src.state)
+ sfail = 5;
+ else if (st->dst.state > up->dst.state)
+ sfail = 6;
+ }
+
+ if (sfail) {
#ifdef __FreeBSD__
- PF_UNLOCK();
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC) {
#endif
- splx(s);
- goto done;
- }
- update_requested = 1;
- pfsyncstats.pfsyncs_badstate++;
- continue;
+ printf("pfsync: ignoring stale update "
+ "(%d) id: %016llx "
+ "creatorid: %08x\n", sfail,
+ betoh64(st->id),
+ ntohl(st->creatorid));
}
- sfail = 0;
- if (st->proto == IPPROTO_TCP) {
- /*
- * The state should never go backwards except
- * for syn-proxy states. Neither should the
- * sequence window slide backwards.
- */
- if (st->src.state > up->src.state &&
- (st->src.state < PF_TCPS_PROXY_SRC ||
- up->src.state >= PF_TCPS_PROXY_SRC))
- sfail = 1;
- else if (st->dst.state > up->dst.state)
- sfail = 2;
- else if (SEQ_GT(st->src.seqlo,
- ntohl(up->src.seqlo)))
- sfail = 3;
- else if (st->dst.state >= TCPS_SYN_SENT &&
- SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
- sfail = 4;
- } else {
- /*
- * Non-TCP protocol state machine always go
- * forwards
- */
- if (st->src.state > up->src.state)
- sfail = 5;
- else if (st->dst.state > up->dst.state)
- sfail = 6;
- }
- if (sfail) {
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: ignoring stale update "
- "(%d) id: %016llx "
- "creatorid: %08x\n", sfail,
- betoh64(st->id),
- ntohl(st->creatorid));
- pfsyncstats.pfsyncs_badstate++;
-
- /* we have a better state, send it out */
- if ((!stale || update_requested) &&
- sc->sc_mbuf != NULL) {
- pfsync_sendout(sc);
- update_requested = 0;
- }
- stale++;
- if (!st->sync_flags)
- pfsync_pack_state(PFSYNC_ACT_UPD, st,
- PFSYNC_FLAG_STALE);
- continue;
- }
- pfsync_alloc_scrub_memory(&up->dst, &st->dst);
- pf_state_peer_ntoh(&up->src, &st->src);
- pf_state_peer_ntoh(&up->dst, &st->dst);
- st->expire = ntohl(up->expire) + time_second;
- st->timeout = up->timeout;
+ V_pfsyncstats.pfsyncs_stale++;
+
+ pfsync_update_state(st);
+ schednetisr(NETISR_PFSYNC);
+ continue;
}
- if ((update_requested || stale) && sc->sc_mbuf)
- pfsync_sendout(sc);
+ pfsync_alloc_scrub_memory(&up->dst, &st->dst);
+ pf_state_peer_ntoh(&up->src, &st->src);
+ pf_state_peer_ntoh(&up->dst, &st->dst);
+ st->expire = ntohl(up->expire) + time_second;
+ st->timeout = up->timeout;
+ st->pfsync_time = time_uptime;
+ }
#ifdef __FreeBSD__
- PF_UNLOCK();
+ PF_UNLOCK();
#endif
- splx(s);
- break;
+ splx(s);
+
+ return (len);
+}
+
+int
+pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_upd_req *ur, *ura;
+ struct mbuf *mp;
+ int len = count * sizeof(*ur);
+ int i, offp;
+
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
}
- case PFSYNC_ACT_DEL_C:
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- count * sizeof(*dp), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
- }
+ ura = (struct pfsync_upd_req *)(mp->m_data + offp);
- s = splsoftnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
- i < count; i++, dp++) {
- bcopy(dp->id, &key.id, sizeof(key.id));
- key.creatorid = dp->creatorid;
+ for (i = 0; i < count; i++) {
+ ur = &ura[i];
+
+ bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
+ id_key.creatorid = ur->creatorid;
- st = pf_find_state_byid(&key);
+ if (id_key.id == 0 && id_key.creatorid == 0)
+ pfsync_bulk_start();
+ else {
+ st = pf_find_state_byid(&id_key);
if (st == NULL) {
- pfsyncstats.pfsyncs_badstate++;
+ V_pfsyncstats.pfsyncs_badstate++;
continue;
}
- st->sync_flags |= PFSTATE_FROMSYNC;
- pf_unlink_state(st);
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC))
+ continue;
+
+ pfsync_update_state_req(st);
}
+ }
#ifdef __FreeBSD__
- PF_UNLOCK();
+ PF_UNLOCK();
#endif
- splx(s);
- break;
- case PFSYNC_ACT_INS_F:
- case PFSYNC_ACT_DEL_F:
- /* not implemented */
- break;
- case PFSYNC_ACT_UREQ:
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- count * sizeof(*rup), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
- }
- s = splsoftnet();
+ return (len);
+}
+
+int
+pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct mbuf *mp;
+ struct pfsync_state *sa, *sp;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+ int len = count * sizeof(*sp);
+ int offp, i;
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ sa = (struct pfsync_state *)(mp->m_data + offp);
+
+ s = splsoftnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- if (sc->sc_mbuf != NULL)
- pfsync_sendout(sc);
- for (i = 0,
- rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
- i < count; i++, rup++) {
- bcopy(rup->id, &key.id, sizeof(key.id));
- key.creatorid = rup->creatorid;
-
- if (key.id == 0 && key.creatorid == 0) {
- sc->sc_ureq_received = time_uptime;
- if (sc->sc_bulk_send_next == NULL)
- sc->sc_bulk_send_next =
- TAILQ_FIRST(&state_list);
- sc->sc_bulk_terminator = sc->sc_bulk_send_next;
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: received "
- "bulk update request\n");
- pfsync_send_bus(sc, PFSYNC_BUS_START);
-#ifdef __FreeBSD__
- callout_reset(&sc->sc_bulk_tmo, 1 * hz,
- pfsync_bulk_update, pfsyncif);
-#else
- timeout_add(&sc->sc_bulk_tmo, 1 * hz);
-#endif
- } else {
- st = pf_find_state_byid(&key);
- if (st == NULL) {
- pfsyncstats.pfsyncs_badstate++;
- continue;
- }
- if (!st->sync_flags)
- pfsync_pack_state(PFSYNC_ACT_UPD,
- st, 0);
- }
+ for (i = 0; i < count; i++) {
+ sp = &sa[i];
+
+ bcopy(sp->id, &id_key.id, sizeof(id_key.id));
+ id_key.creatorid = sp->creatorid;
+
+ st = pf_find_state_byid(&id_key);
+ if (st == NULL) {
+ V_pfsyncstats.pfsyncs_badstate++;
+ continue;
}
- if (sc->sc_mbuf != NULL)
- pfsync_sendout(sc);
+ SET(st->state_flags, PFSTATE_NOSYNC);
+ pf_unlink_state(st);
+ }
#ifdef __FreeBSD__
- PF_UNLOCK();
+ PF_UNLOCK();
#endif
- splx(s);
- break;
- case PFSYNC_ACT_BUS:
- /* If we're not waiting for a bulk update, who cares. */
- if (sc->sc_ureq_sent == 0)
- break;
+ splx(s);
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- sizeof(*bus), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
+ return (len);
+}
+
+int
+pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct mbuf *mp;
+ struct pfsync_del_c *sa, *sp;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+ int len = count * sizeof(*sp);
+ int offp, i;
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ sa = (struct pfsync_del_c *)(mp->m_data + offp);
+
+ s = splsoftnet();
+#ifdef __FreeBSD__
+ PF_LOCK();
+#endif
+ for (i = 0; i < count; i++) {
+ sp = &sa[i];
+
+ bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
+ id_key.creatorid = sp->creatorid;
+
+ st = pf_find_state_byid(&id_key);
+ if (st == NULL) {
+ V_pfsyncstats.pfsyncs_badstate++;
+ continue;
}
- bus = (struct pfsync_state_bus *)(mp->m_data + offp);
- switch (bus->status) {
- case PFSYNC_BUS_START:
+
+ SET(st->state_flags, PFSTATE_NOSYNC);
+ pf_unlink_state(st);
+ }
#ifdef __FreeBSD__
- callout_reset(&sc->sc_bulkfail_tmo,
- pf_pool_limits[PF_LIMIT_STATES].limit /
- (PFSYNC_BULKPACKETS * sc->sc_maxcount),
- pfsync_bulkfail, pfsyncif);
+ PF_UNLOCK();
+#endif
+ splx(s);
+
+ return (len);
+}
+
+int
+pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
#else
- timeout_add(&sc->sc_bulkfail_tmo,
- pf_pool_limits[PF_LIMIT_STATES].limit /
- (PFSYNC_BULKPACKETS * sc->sc_maxcount));
+ struct pfsync_softc *sc = pfsyncif;
#endif
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: received bulk "
- "update start\n");
- break;
- case PFSYNC_BUS_END:
- if (time_uptime - ntohl(bus->endtime) >=
- sc->sc_ureq_sent) {
- /* that's it, we're happy */
- sc->sc_ureq_sent = 0;
- sc->sc_bulk_tries = 0;
- timeout_del(&sc->sc_bulkfail_tmo);
+ struct pfsync_bus *bus;
+ struct mbuf *mp;
+ int len = count * sizeof(*bus);
+ int offp;
+
+ /* If we're not waiting for a bulk update, who cares. */
+ if (sc->sc_ureq_sent == 0)
+ return (len);
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ bus = (struct pfsync_bus *)(mp->m_data + offp);
+
+ switch (bus->status) {
+ case PFSYNC_BUS_START:
+#ifdef __FreeBSD__
+ callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
+ V_pf_pool_limits[PF_LIMIT_STATES].limit /
+ ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
+ sizeof(struct pfsync_state)),
+ pfsync_bulk_fail, V_pfsyncif);
+#else
+ timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
+ pf_pool_limits[PF_LIMIT_STATES].limit /
+ ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
+ sizeof(struct pfsync_state)));
+#endif
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC)
+#endif
+ printf("pfsync: received bulk update start\n");
+ break;
+
+ case PFSYNC_BUS_END:
+ if (time_uptime - ntohl(bus->endtime) >=
+ sc->sc_ureq_sent) {
+ /* that's it, we're happy */
+ sc->sc_ureq_sent = 0;
+ sc->sc_bulk_tries = 0;
+ timeout_del(&sc->sc_bulkfail_tmo);
#if NCARP > 0
- if (!pfsync_sync_ok)
+#ifdef notyet
#ifdef __FreeBSD__
-#ifdef CARP_ADVANCED
- carp_group_demote_adj(sc->sc_ifp, -1);
+ if (!sc->pfsync_sync_ok)
+#else
+ if (!pfsync_sync_ok)
+#endif
+ carp_group_demote_adj(&sc->sc_if, -1);
+#endif
#endif
+#ifdef __FreeBSD__
+ sc->pfsync_sync_ok = 1;
#else
- carp_group_demote_adj(&sc->sc_if, -1);
+ pfsync_sync_ok = 1;
#endif
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC)
#endif
- pfsync_sync_ok = 1;
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: received valid "
- "bulk update end\n");
- } else {
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: received invalid "
- "bulk update end: bad timestamp\n");
- }
- break;
+ printf("pfsync: received valid "
+ "bulk update end\n");
+ } else {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC)
+#endif
+ printf("pfsync: received invalid "
+ "bulk update end: bad timestamp\n");
}
break;
-#ifdef PFSYNC_TDB
- case PFSYNC_ACT_TDB_UPD:
- if ((mp = m_pulldown(m, iplen + sizeof(*ph),
- count * sizeof(*pt), &offp)) == NULL) {
- pfsyncstats.pfsyncs_badlen++;
- return;
- }
- s = splsoftnet();
+ }
+
+ return (len);
+}
+
+int
+pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ int len = count * sizeof(struct pfsync_tdb);
+
+#if defined(IPSEC)
+ struct pfsync_tdb *tp;
+ struct mbuf *mp;
+ int offp;
+ int i;
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ tp = (struct pfsync_tdb *)(mp->m_data + offp);
+
+ s = splsoftnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp);
- i < count; i++, pt++)
- pfsync_update_net_tdb(pt);
+ for (i = 0; i < count; i++)
+ pfsync_update_net_tdb(&tp[i]);
#ifdef __FreeBSD__
- PF_UNLOCK();
+ PF_UNLOCK();
#endif
- splx(s);
- break;
+ splx(s);
#endif
+
+ return (len);
+}
+
+#if defined(IPSEC)
+/* Update an in-kernel tdb. Silently fail if no tdb is found. */
+void
+pfsync_update_net_tdb(struct pfsync_tdb *pt)
+{
+ struct tdb *tdb;
+ int s;
+
+ /* check for invalid values */
+ if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
+ (pt->dst.sa.sa_family != AF_INET &&
+ pt->dst.sa.sa_family != AF_INET6))
+ goto bad;
+
+ s = spltdb();
+ tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
+ if (tdb) {
+ pt->rpl = ntohl(pt->rpl);
+ pt->cur_bytes = betoh64(pt->cur_bytes);
+
+ /* Neither replay nor byte counter should ever decrease. */
+ if (pt->rpl < tdb->tdb_rpl ||
+ pt->cur_bytes < tdb->tdb_cur_bytes) {
+ splx(s);
+ goto bad;
+ }
+
+ tdb->tdb_rpl = pt->rpl;
+ tdb->tdb_cur_bytes = pt->cur_bytes;
}
+ splx(s);
+ return;
-done:
- if (m)
- m_freem(m);
+bad:
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC)
+#endif
+ printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
+ "invalid value\n");
+ V_pfsyncstats.pfsyncs_badstate++;
+ return;
+}
+#endif
+
+
+int
+pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ /* check if we are at the right place in the packet */
+ if (offset != m->m_pkthdr.len - sizeof(struct pfsync_eof))
+ V_pfsyncstats.pfsyncs_badact++;
+
+ /* we're done. free and let the caller return */
+ m_freem(m);
+ return (-1);
+}
+
+int
+pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ V_pfsyncstats.pfsyncs_badact++;
+
+ m_freem(m);
+ return (-1);
}
int
pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+#ifdef __FreeBSD__
+ struct route *rt)
+#else
+ struct rtentry *rt)
+#endif
{
m_freem(m);
return (0);
@@ -1103,12 +1765,15 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct ip_moptions *imo = &sc->sc_imo;
struct pfsyncreq pfsyncr;
struct ifnet *sifp;
+ struct ip *ip;
int s, error;
switch (cmd) {
+#if 0
case SIOCSIFADDR:
case SIOCAIFADDR:
case SIOCSIFDSTADDR:
+#endif
case SIOCSIFFLAGS:
#ifdef __FreeBSD__
if (ifp->if_flags & IFF_UP)
@@ -1123,32 +1788,33 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
#endif
break;
case SIOCSIFMTU:
- if (ifr->ifr_mtu < PFSYNC_MINMTU)
+ if (!sc->sc_sync_if ||
+ ifr->ifr_mtu <= PFSYNC_MINPKT ||
+ ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
return (EINVAL);
- if (ifr->ifr_mtu > MCLBYTES)
- ifr->ifr_mtu = MCLBYTES;
- s = splnet();
+ if (ifr->ifr_mtu < ifp->if_mtu) {
+ s = splnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK();
#endif
- if (ifr->ifr_mtu < ifp->if_mtu)
- pfsync_sendout(sc);
- pfsync_setmtu(sc, ifr->ifr_mtu);
+ pfsync_sendout();
#ifdef __FreeBSD__
- PF_UNLOCK();
+ PF_UNLOCK();
#endif
- splx(s);
+ splx(s);
+ }
+ ifp->if_mtu = ifr->ifr_mtu;
break;
case SIOCGETPFSYNC:
bzero(&pfsyncr, sizeof(pfsyncr));
- if (sc->sc_sync_ifp)
+ if (sc->sc_sync_if) {
strlcpy(pfsyncr.pfsyncr_syncdev,
- sc->sc_sync_ifp->if_xname, IFNAMSIZ);
+ sc->sc_sync_if->if_xname, IFNAMSIZ);
+ }
pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
- if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
- return (error);
- break;
+ return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
+
case SIOCSETPFSYNC:
#ifdef __FreeBSD__
if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
@@ -1184,22 +1850,18 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
if (pfsyncr.pfsyncr_syncdev[0] == 0) {
- sc->sc_sync_ifp = NULL;
- if (sc->sc_mbuf_net != NULL) {
- /* Don't keep stale pfsync packets around. */
- s = splnet();
- m_freem(sc->sc_mbuf_net);
- sc->sc_mbuf_net = NULL;
- sc->sc_statep_net.s = NULL;
- splx(s);
- }
+ sc->sc_sync_if = NULL;
#ifdef __FreeBSD__
PF_UNLOCK();
-#endif
+ if (imo->imo_membership)
+ pfsync_multicast_cleanup(sc);
+#else
if (imo->imo_num_memberships > 0) {
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+ in_delmulti(imo->imo_membership[
+ --imo->imo_num_memberships]);
imo->imo_multicast_ifp = NULL;
}
+#endif
break;
}
@@ -1208,116 +1870,117 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
#endif
if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
return (EINVAL);
+
#ifdef __FreeBSD__
PF_LOCK();
#endif
-
s = splnet();
#ifdef __FreeBSD__
if (sifp->if_mtu < sc->sc_ifp->if_mtu ||
#else
if (sifp->if_mtu < sc->sc_if.if_mtu ||
#endif
- (sc->sc_sync_ifp != NULL &&
- sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
+ (sc->sc_sync_if != NULL &&
+ sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
sifp->if_mtu < MCLBYTES - sizeof(struct ip))
- pfsync_sendout(sc);
- sc->sc_sync_ifp = sifp;
+ pfsync_sendout();
+ sc->sc_sync_if = sifp;
#ifdef __FreeBSD__
- pfsync_setmtu(sc, sc->sc_ifp->if_mtu);
+ if (imo->imo_membership) {
+ PF_UNLOCK();
+ pfsync_multicast_cleanup(sc);
+ PF_LOCK();
+ }
#else
- pfsync_setmtu(sc, sc->sc_if.if_mtu);
-#endif
-
if (imo->imo_num_memberships > 0) {
-#ifdef __FreeBSD__
- PF_UNLOCK();
-#endif
in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
-#ifdef __FreeBSD__
- PF_LOCK();
-#endif
imo->imo_multicast_ifp = NULL;
}
+#endif
- if (sc->sc_sync_ifp &&
#ifdef __FreeBSD__
+ if (sc->sc_sync_if &&
sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
+ PF_UNLOCK();
+ error = pfsync_multicast_setup(sc);
+ if (error)
+ return (error);
+ PF_LOCK();
+ }
#else
+ if (sc->sc_sync_if &&
sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
-#endif
struct in_addr addr;
- if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
- sc->sc_sync_ifp = NULL;
-#ifdef __FreeBSD__
- PF_UNLOCK();
-#endif
+ if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
+ sc->sc_sync_if = NULL;
splx(s);
return (EADDRNOTAVAIL);
}
-#ifdef __FreeBSD__
- addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
-#else
addr.s_addr = INADDR_PFSYNC_GROUP;
-#endif
-#ifdef __FreeBSD__
- PF_UNLOCK();
-#endif
if ((imo->imo_membership[0] =
- in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
- sc->sc_sync_ifp = NULL;
+ in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
+ sc->sc_sync_if = NULL;
splx(s);
return (ENOBUFS);
}
-#ifdef __FreeBSD__
- PF_LOCK();
-#endif
imo->imo_num_memberships++;
- imo->imo_multicast_ifp = sc->sc_sync_ifp;
+ imo->imo_multicast_ifp = sc->sc_sync_if;
imo->imo_multicast_ttl = PFSYNC_DFLTTL;
imo->imo_multicast_loop = 0;
}
+#endif /* !__FreeBSD__ */
- if (sc->sc_sync_ifp ||
+ ip = &sc->sc_template;
+ bzero(ip, sizeof(*ip));
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(sc->sc_template) >> 2;
+ ip->ip_tos = IPTOS_LOWDELAY;
+ /* len and id are set later */
#ifdef __FreeBSD__
- sc->sc_sendaddr.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
+ ip->ip_off = IP_DF;
#else
- sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
+ ip->ip_off = htons(IP_DF);
#endif
+ ip->ip_ttl = PFSYNC_DFLTTL;
+ ip->ip_p = IPPROTO_PFSYNC;
+ ip->ip_src.s_addr = INADDR_ANY;
+ ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
+
+ if (sc->sc_sync_if) {
/* Request a full state table update. */
sc->sc_ureq_sent = time_uptime;
#if NCARP > 0
- if (pfsync_sync_ok)
+#ifdef notyet
#ifdef __FreeBSD__
-#ifdef CARP_ADVANCED
- carp_group_demote_adj(sc->sc_ifp, 1);
-#endif
+ if (sc->pfsync_sync_ok)
#else
+ if (pfsync_sync_ok)
+#endif
carp_group_demote_adj(&sc->sc_if, 1);
#endif
#endif
+#ifdef __FreeBSD__
+ sc->pfsync_sync_ok = 0;
+#else
pfsync_sync_ok = 0;
+#endif
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
if (pf_status.debug >= PF_DEBUG_MISC)
+#endif
printf("pfsync: requesting bulk update\n");
#ifdef __FreeBSD__
callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
- pfsync_bulkfail, pfsyncif);
+ pfsync_bulk_fail, V_pfsyncif);
#else
- timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
-#endif
- error = pfsync_request_update(NULL, NULL);
- if (error == ENOMEM) {
-#ifdef __FreeBSD__
- PF_UNLOCK();
+ timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
#endif
- splx(s);
- return (ENOMEM);
- }
- pfsync_sendout(sc);
+ pfsync_request_update(0, 0);
}
#ifdef __FreeBSD__
PF_UNLOCK();
@@ -1333,34 +1996,165 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (0);
}
-void
-pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
+int
+pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset)
+{
+ struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset);
+
+ pfsync_state_export(sp, st);
+
+ return (sizeof(*sp));
+}
+
+int
+pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset)
+{
+ struct pfsync_ins_ack *iack =
+ (struct pfsync_ins_ack *)(m->m_data + offset);
+
+ iack->id = st->id;
+ iack->creatorid = st->creatorid;
+
+ return (sizeof(*iack));
+}
+
+int
+pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset)
{
- int mtu;
+ struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset);
+
+ bzero(up, sizeof(*up));
+ up->id = st->id;
+ pf_state_peer_hton(&st->src, &up->src);
+ pf_state_peer_hton(&st->dst, &up->dst);
+ up->creatorid = st->creatorid;
- if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
- mtu = sc->sc_sync_ifp->if_mtu;
+ up->expire = pf_state_expires(st);
+ if (up->expire <= time_second)
+ up->expire = htonl(0);
else
- mtu = mtu_req;
+ up->expire = htonl(up->expire - time_second);
+ up->timeout = st->timeout;
+
+ return (sizeof(*up));
+}
+
+int
+pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset)
+{
+ struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset);
+
+ dp->id = st->id;
+ dp->creatorid = st->creatorid;
- sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
- sizeof(struct pfsync_state);
- if (sc->sc_maxcount > 254)
- sc->sc_maxcount = 254;
+ SET(st->state_flags, PFSTATE_NOSYNC);
+
+ return (sizeof(*dp));
+}
+
+void
+pfsync_drop(struct pfsync_softc *sc)
+{
+ struct pf_state *st;
+ struct pfsync_upd_req_item *ur;
+#ifdef notyet
+ struct tdb *t;
+#endif
+ int q;
+
+ for (q = 0; q < PFSYNC_S_COUNT; q++) {
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ continue;
+
+ TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
+#ifdef PFSYNC_DEBUG
#ifdef __FreeBSD__
- sc->sc_ifp->if_mtu = sizeof(struct pfsync_header) +
+ KASSERT(st->sync_state == q,
+ ("%s: st->sync_state == q",
+ __FUNCTION__));
#else
- sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
+ KASSERT(st->sync_state == q);
+#endif
#endif
- sc->sc_maxcount * sizeof(struct pfsync_state);
+ st->sync_state = PFSYNC_S_NONE;
+ }
+ TAILQ_INIT(&sc->sc_qs[q]);
+ }
+
+ while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+ pool_put(&sc->sc_pool, ur);
+ }
+
+ sc->sc_plus = NULL;
+
+#ifdef notyet
+ if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
+ TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
+ CLR(t->tdb_flags, TDBF_PFSYNC);
+
+ TAILQ_INIT(&sc->sc_tdb_q);
+ }
+#endif
+
+ sc->sc_len = PFSYNC_MINPKT;
}
-struct mbuf *
-pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
+#ifdef __FreeBSD__
+void pfsync_sendout()
{
- struct pfsync_header *h;
+ pfsync_sendout1(1);
+}
+
+static void
+pfsync_sendout1(int schedswi)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+void
+pfsync_sendout(void)
+{
+ struct pfsync_softc *sc = pfsyncif;
+#endif
+#if NBPFILTER > 0
+#ifdef __FreeBSD__
+ struct ifnet *ifp = sc->sc_ifp;
+#else
+ struct ifnet *ifp = &sc->sc_if;
+#endif
+#endif
struct mbuf *m;
- int len;
+ struct ip *ip;
+ struct pfsync_header *ph;
+ struct pfsync_subheader *subh;
+ struct pf_state *st;
+ struct pfsync_upd_req_item *ur;
+#ifdef notyet
+ struct tdb *t;
+#endif
+#ifdef __FreeBSD__
+ size_t pktlen;
+#endif
+ int offset;
+ int q, count = 0;
+
+#ifdef __FreeBSD__
+ PF_LOCK_ASSERT();
+#else
+ splassert(IPL_NET);
+#endif
+
+ if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
+ return;
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
+#else
+ if (sc->sc_sync_if == NULL) {
+#endif
+ pfsync_drop(sc);
+ return;
+ }
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
@@ -1369,932 +2163,1293 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
#else
sc->sc_if.if_oerrors++;
#endif
- return (NULL);
+ V_pfsyncstats.pfsyncs_onomem++;
+ pfsync_drop(sc);
+ return;
}
- switch (action) {
- case PFSYNC_ACT_CLR:
- len = sizeof(struct pfsync_header) +
- sizeof(struct pfsync_state_clr);
- break;
- case PFSYNC_ACT_UPD_C:
- len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
- sizeof(struct pfsync_header);
- break;
- case PFSYNC_ACT_DEL_C:
- len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
- sizeof(struct pfsync_header);
- break;
- case PFSYNC_ACT_UREQ:
- len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
- sizeof(struct pfsync_header);
- break;
- case PFSYNC_ACT_BUS:
- len = sizeof(struct pfsync_header) +
- sizeof(struct pfsync_state_bus);
- break;
-#ifdef PFSYNC_TDB
- case PFSYNC_ACT_TDB_UPD:
- len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) +
- sizeof(struct pfsync_header);
- break;
+#ifdef __FreeBSD__
+ pktlen = max_linkhdr + sc->sc_len;
+ if (pktlen > MHLEN) {
+ /* Find the right pool to allocate from. */
+ /* XXX: This is ugly. */
+ m_cljget(m, M_DONTWAIT, pktlen <= MCLBYTES ? MCLBYTES :
+#if MJUMPAGESIZE != MCLBYTES
+ pktlen <= MJUMPAGESIZE ? MJUMPAGESIZE :
#endif
- default:
- len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
- sizeof(struct pfsync_header);
- break;
- }
-
- if (len > MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
+ pktlen <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES);
+#else
+ if (max_linkhdr + sc->sc_len > MHLEN) {
+ MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
+#endif
+ if (!ISSET(m->m_flags, M_EXT)) {
m_free(m);
#ifdef __FreeBSD__
sc->sc_ifp->if_oerrors++;
#else
sc->sc_if.if_oerrors++;
#endif
- return (NULL);
+ V_pfsyncstats.pfsyncs_onomem++;
+ pfsync_drop(sc);
+ return;
}
- m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
- } else
- MH_ALIGN(m, len);
+ }
+ m->m_data += max_linkhdr;
+ m->m_len = m->m_pkthdr.len = sc->sc_len;
- m->m_pkthdr.rcvif = NULL;
- m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
- h = mtod(m, struct pfsync_header *);
- h->version = PFSYNC_VERSION;
- h->af = 0;
- h->count = 0;
- h->action = action;
-#ifndef PFSYNC_TDB
- if (action != PFSYNC_ACT_TDB_UPD)
-#endif
- bcopy(&pf_status.pf_chksum, &h->pf_chksum,
- PF_MD5_DIGEST_LENGTH);
+ /* build the ip header */
+ ip = (struct ip *)m->m_data;
+ bcopy(&sc->sc_template, ip, sizeof(*ip));
+ offset = sizeof(*ip);
- *sp = (void *)((char *)h + PFSYNC_HDRLEN);
-#ifdef PFSYNC_TDB
- if (action == PFSYNC_ACT_TDB_UPD)
#ifdef __FreeBSD__
- callout_reset(&sc->sc_tdb_tmo, hz, pfsync_tdb_timeout,
- pfsyncif);
+ ip->ip_len = m->m_pkthdr.len;
#else
- timeout_add(&sc->sc_tdb_tmo, hz);
-#endif
- else
+ ip->ip_len = htons(m->m_pkthdr.len);
#endif
-#ifdef __FreeBSD__
- callout_reset(&sc->sc_tmo, hz, pfsync_timeout, pfsyncif);
-#else
- timeout_add(&sc->sc_tmo, hz);
-#endif
- return (m);
-}
+ ip->ip_id = htons(ip_randomid());
-int
-pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
-{
- struct ifnet *ifp = NULL;
- struct pfsync_softc *sc = pfsyncif;
- struct pfsync_header *h, *h_net;
- struct pfsync_state *sp = NULL;
- struct pfsync_state_upd *up = NULL;
- struct pfsync_state_del *dp = NULL;
- struct pf_rule *r;
- u_long secs;
- int s, ret = 0;
- u_int8_t i = 255, newaction = 0;
+ /* build the pfsync header */
+ ph = (struct pfsync_header *)(m->m_data + offset);
+ bzero(ph, sizeof(*ph));
+ offset += sizeof(*ph);
- if (sc == NULL)
- return (0);
+ ph->version = PFSYNC_VERSION;
+ ph->len = htons(sc->sc_len - sizeof(*ip));
#ifdef __FreeBSD__
- ifp = sc->sc_ifp;
+ bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
#else
- ifp = &sc->sc_if;
+ bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
#endif
- /*
- * If a packet falls in the forest and there's nobody around to
- * hear, does it make a sound?
- */
- if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
+ /* walk the queues */
+ for (q = 0; q < PFSYNC_S_COUNT; q++) {
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ continue;
+
+ subh = (struct pfsync_subheader *)(m->m_data + offset);
+ offset += sizeof(*subh);
+
+ count = 0;
+ TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
+#ifdef PFSYNC_DEBUG
#ifdef __FreeBSD__
- sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
+ KASSERT(st->sync_state == q,
+ ("%s: st->sync_state == q",
+ __FUNCTION__));
#else
- sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+ KASSERT(st->sync_state == q);
#endif
- /* Don't leave any stale pfsync packets hanging around. */
- if (sc->sc_mbuf != NULL) {
- m_freem(sc->sc_mbuf);
- sc->sc_mbuf = NULL;
- sc->sc_statep.s = NULL;
+#endif
+
+ offset += pfsync_qs[q].write(st, m, offset);
+ st->sync_state = PFSYNC_S_NONE;
+ count++;
}
- return (0);
+ TAILQ_INIT(&sc->sc_qs[q]);
+
+ bzero(subh, sizeof(*subh));
+ subh->action = pfsync_qs[q].action;
+ subh->count = htons(count);
}
- if (action >= PFSYNC_ACT_MAX)
- return (EINVAL);
+ if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
+ subh = (struct pfsync_subheader *)(m->m_data + offset);
+ offset += sizeof(*subh);
- s = splnet();
-#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
-#endif
- if (sc->sc_mbuf == NULL) {
- if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
- (void *)&sc->sc_statep.s)) == NULL) {
- splx(s);
- return (ENOMEM);
- }
- h = mtod(sc->sc_mbuf, struct pfsync_header *);
- } else {
- h = mtod(sc->sc_mbuf, struct pfsync_header *);
- if (h->action != action) {
- pfsync_sendout(sc);
- if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
- (void *)&sc->sc_statep.s)) == NULL) {
- splx(s);
- return (ENOMEM);
- }
- h = mtod(sc->sc_mbuf, struct pfsync_header *);
- } else {
- /*
- * If it's an update, look in the packet to see if
- * we already have an update for the state.
- */
- if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
- struct pfsync_state *usp =
- (void *)((char *)h + PFSYNC_HDRLEN);
-
- for (i = 0; i < h->count; i++) {
- if (!memcmp(usp->id, &st->id,
- PFSYNC_ID_LEN) &&
- usp->creatorid == st->creatorid) {
- sp = usp;
- sp->updates++;
- break;
- }
- usp++;
- }
- }
+ count = 0;
+ while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+
+ bcopy(&ur->ur_msg, m->m_data + offset,
+ sizeof(ur->ur_msg));
+ offset += sizeof(ur->ur_msg);
+
+ pool_put(&sc->sc_pool, ur);
+
+ count++;
}
+
+ bzero(subh, sizeof(*subh));
+ subh->action = PFSYNC_ACT_UPD_REQ;
+ subh->count = htons(count);
}
- secs = time_second;
+ /* has someone built a custom region for us to add? */
+ if (sc->sc_plus != NULL) {
+ bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
+ offset += sc->sc_pluslen;
- st->pfsync_time = time_uptime;
+ sc->sc_plus = NULL;
+ }
- if (sp == NULL) {
- /* not a "duplicate" update */
- i = 255;
- sp = sc->sc_statep.s++;
- sc->sc_mbuf->m_pkthdr.len =
- sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
- h->count++;
- bzero(sp, sizeof(*sp));
-
- bcopy(&st->id, sp->id, sizeof(sp->id));
- sp->creatorid = st->creatorid;
-
- strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
- pf_state_host_hton(&st->lan, &sp->lan);
- pf_state_host_hton(&st->gwy, &sp->gwy);
- pf_state_host_hton(&st->ext, &sp->ext);
-
- bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
-
- sp->creation = htonl(secs - st->creation);
- pf_state_counter_hton(st->packets[0], sp->packets[0]);
- pf_state_counter_hton(st->packets[1], sp->packets[1]);
- pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
- pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
- if ((r = st->rule.ptr) == NULL)
- sp->rule = htonl(-1);
- else
- sp->rule = htonl(r->nr);
- if ((r = st->anchor.ptr) == NULL)
- sp->anchor = htonl(-1);
- else
- sp->anchor = htonl(r->nr);
- sp->af = st->af;
- sp->proto = st->proto;
- sp->direction = st->direction;
- sp->log = st->log;
- sp->state_flags = st->state_flags;
- sp->timeout = st->timeout;
+#ifdef notyet
+ if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
+ subh = (struct pfsync_subheader *)(m->m_data + offset);
+ offset += sizeof(*subh);
+
+ count = 0;
+ TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
+ offset += pfsync_out_tdb(t, m, offset);
+ CLR(t->tdb_flags, TDBF_PFSYNC);
- if (flags & PFSYNC_FLAG_STALE)
- sp->sync_flags |= PFSTATE_STALE;
+ count++;
+ }
+ TAILQ_INIT(&sc->sc_tdb_q);
+
+ bzero(subh, sizeof(*subh));
+ subh->action = PFSYNC_ACT_TDB;
+ subh->count = htons(count);
}
+#endif
- pf_state_peer_hton(&st->src, &sp->src);
- pf_state_peer_hton(&st->dst, &sp->dst);
+ subh = (struct pfsync_subheader *)(m->m_data + offset);
+ offset += sizeof(*subh);
- if (st->expire <= secs)
- sp->expire = htonl(0);
- else
- sp->expire = htonl(st->expire - secs);
+ bzero(subh, sizeof(*subh));
+ subh->action = PFSYNC_ACT_EOF;
+ subh->count = htons(1);
- /* do we need to build "compressed" actions for network transfer? */
- if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
- switch (action) {
- case PFSYNC_ACT_UPD:
- newaction = PFSYNC_ACT_UPD_C;
- break;
- case PFSYNC_ACT_DEL:
- newaction = PFSYNC_ACT_DEL_C;
- break;
- default:
- /* by default we just send the uncompressed states */
- break;
- }
+ /* XXX write checksum in EOF here */
+
+ /* we're done, let's put it on the wire */
+#if NBPFILTER > 0
+ if (ifp->if_bpf) {
+ m->m_data += sizeof(*ip);
+ m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
+#ifdef __FreeBSD__
+ BPF_MTAP(ifp, m);
+#else
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+ m->m_data -= sizeof(*ip);
+ m->m_len = m->m_pkthdr.len = sc->sc_len;
}
- if (newaction) {
- if (sc->sc_mbuf_net == NULL) {
- if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
- (void *)&sc->sc_statep_net.s)) == NULL) {
- splx(s);
- return (ENOMEM);
- }
- }
- h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
-
- switch (newaction) {
- case PFSYNC_ACT_UPD_C:
- if (i != 255) {
- up = (void *)((char *)h_net +
- PFSYNC_HDRLEN + (i * sizeof(*up)));
- up->updates++;
- } else {
- h_net->count++;
- sc->sc_mbuf_net->m_pkthdr.len =
- sc->sc_mbuf_net->m_len += sizeof(*up);
- up = sc->sc_statep_net.u++;
-
- bzero(up, sizeof(*up));
- bcopy(&st->id, up->id, sizeof(up->id));
- up->creatorid = st->creatorid;
- }
- up->timeout = st->timeout;
- up->expire = sp->expire;
- up->src = sp->src;
- up->dst = sp->dst;
- break;
- case PFSYNC_ACT_DEL_C:
- sc->sc_mbuf_net->m_pkthdr.len =
- sc->sc_mbuf_net->m_len += sizeof(*dp);
- dp = sc->sc_statep_net.d++;
- h_net->count++;
-
- bzero(dp, sizeof(*dp));
- bcopy(&st->id, dp->id, sizeof(dp->id));
- dp->creatorid = st->creatorid;
- break;
- }
+ if (sc->sc_sync_if == NULL) {
+ sc->sc_len = PFSYNC_MINPKT;
+ m_freem(m);
+ return;
}
+#endif
- if (h->count == sc->sc_maxcount ||
- (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
- ret = pfsync_sendout(sc);
+#ifdef __FreeBSD__
+ sc->sc_ifp->if_opackets++;
+ sc->sc_ifp->if_obytes += m->m_pkthdr.len;
+ sc->sc_len = PFSYNC_MINPKT;
- splx(s);
- return (ret);
+ if (!_IF_QFULL(&sc->sc_ifp->if_snd))
+ _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+ else {
+ m_freem(m);
+ sc->sc_ifp->if_snd.ifq_drops++;
+ }
+ if (schedswi)
+ swi_sched(V_pfsync_swi_cookie, 0);
+#else
+ sc->sc_if.if_opackets++;
+ sc->sc_if.if_obytes += m->m_pkthdr.len;
+
+ if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
+ pfsyncstats.pfsyncs_opackets++;
+ else
+ pfsyncstats.pfsyncs_oerrors++;
+
+ /* start again */
+ sc->sc_len = PFSYNC_MINPKT;
+#endif
}
-/* This must be called in splnet() */
-int
-pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
+void
+pfsync_insert_state(struct pf_state *st)
{
- struct ifnet *ifp = NULL;
- struct pfsync_header *h;
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
struct pfsync_softc *sc = pfsyncif;
- struct pfsync_state_upd_req *rup;
- int ret = 0;
-
- if (sc == NULL)
- return (0);
+#endif
#ifdef __FreeBSD__
- ifp = sc->sc_ifp;
+ PF_LOCK_ASSERT();
#else
- ifp = &sc->sc_if;
+ splassert(IPL_SOFTNET);
#endif
- if (sc->sc_mbuf == NULL) {
- if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
- (void *)&sc->sc_statep.s)) == NULL)
- return (ENOMEM);
- h = mtod(sc->sc_mbuf, struct pfsync_header *);
- } else {
- h = mtod(sc->sc_mbuf, struct pfsync_header *);
- if (h->action != PFSYNC_ACT_UREQ) {
- pfsync_sendout(sc);
- if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
- (void *)&sc->sc_statep.s)) == NULL)
- return (ENOMEM);
- h = mtod(sc->sc_mbuf, struct pfsync_header *);
- }
- }
- if (src != NULL)
- sc->sc_sendaddr = *src;
- sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
- h->count++;
- rup = sc->sc_statep.r++;
- bzero(rup, sizeof(*rup));
- if (up != NULL) {
- bcopy(up->id, rup->id, sizeof(rup->id));
- rup->creatorid = up->creatorid;
+ if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
+ st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
+ SET(st->state_flags, PFSTATE_NOSYNC);
+ return;
}
- if (h->count == sc->sc_maxcount)
- ret = pfsync_sendout(sc);
+ if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC))
+ return;
+
+#ifdef PFSYNC_DEBUG
+#ifdef __FreeBSD__
+ KASSERT(st->sync_state == PFSYNC_S_NONE,
+ ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
+#else
+ KASSERT(st->sync_state == PFSYNC_S_NONE);
+#endif
+#endif
+
+ if (sc->sc_len == PFSYNC_MINPKT)
+#ifdef __FreeBSD__
+ callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
+ V_pfsyncif);
+#else
+ timeout_add_sec(&sc->sc_tmo, 1);
+#endif
+
+ pfsync_q_ins(st, PFSYNC_S_INS);
- return (ret);
+ if (ISSET(st->state_flags, PFSTATE_ACK))
+ schednetisr(NETISR_PFSYNC);
+ else
+ st->sync_updates = 0;
}
+int defer = 10;
+
int
-pfsync_clear_states(u_int32_t creatorid, char *ifname)
+pfsync_defer(struct pf_state *st, struct mbuf *m)
{
- struct ifnet *ifp = NULL;
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
struct pfsync_softc *sc = pfsyncif;
- struct pfsync_state_clr *cp;
- int s, ret;
+#endif
+ struct pfsync_deferral *pd;
- if (sc == NULL)
+#ifdef __FreeBSD__
+ PF_LOCK_ASSERT();
+#else
+ splassert(IPL_SOFTNET);
+#endif
+
+ if (sc->sc_deferred >= 128)
+ pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
+
+ pd = pool_get(&sc->sc_pool, M_NOWAIT);
+ if (pd == NULL)
return (0);
+ sc->sc_deferred++;
#ifdef __FreeBSD__
- ifp = sc->sc_ifp;
+ m->m_flags |= M_SKIP_FIREWALL;
#else
- ifp = &sc->sc_if;
+ m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
#endif
+ SET(st->state_flags, PFSTATE_ACK);
+
+ pd->pd_st = st;
+ pd->pd_m = m;
+
+ TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ callout_init(&pd->pd_tmo, CALLOUT_MPSAFE);
+ callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo,
+ pd);
+#else
+ timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
+ timeout_add(&pd->pd_tmo, defer);
#endif
- s = splnet();
- if (sc->sc_mbuf != NULL)
- pfsync_sendout(sc);
- if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
- (void *)&sc->sc_statep.c)) == NULL) {
- splx(s);
- return (ENOMEM);
- }
- sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
- cp = sc->sc_statep.c;
- cp->creatorid = creatorid;
- if (ifname != NULL)
- strlcpy(cp->ifname, ifname, IFNAMSIZ);
- ret = (pfsync_sendout(sc));
- splx(s);
- return (ret);
+ return (1);
}
void
-pfsync_timeout(void *v)
+pfsync_undefer(struct pfsync_deferral *pd, int drop)
{
- struct pfsync_softc *sc = v;
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
+#endif
int s;
- s = splnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ PF_LOCK_ASSERT();
+#else
+ splassert(IPL_SOFTNET);
#endif
- pfsync_sendout(sc);
+
+ TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+ sc->sc_deferred--;
+
+ CLR(pd->pd_st->state_flags, PFSTATE_ACK);
+ timeout_del(&pd->pd_tmo); /* bah */
+ if (drop)
+ m_freem(pd->pd_m);
+ else {
+ s = splnet();
#ifdef __FreeBSD__
- PF_UNLOCK();
+ /* XXX: use pf_defered?! */
+ PF_UNLOCK();
#endif
- splx(s);
+ ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0,
+ (void *)NULL, (void *)NULL);
+#ifdef __FreeBSD__
+ PF_LOCK();
+#endif
+ splx(s);
+ }
+
+ pool_put(&sc->sc_pool, pd);
}
-#ifdef PFSYNC_TDB
void
-pfsync_tdb_timeout(void *v)
+pfsync_defer_tmo(void *arg)
{
- struct pfsync_softc *sc = v;
+#if defined(__FreeBSD__) && defined(VIMAGE)
+ struct pfsync_deferral *pd = arg;
+#endif
int s;
- s = splnet();
+ s = splsoftnet();
#ifdef __FreeBSD__
+ CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */
PF_LOCK();
#endif
- pfsync_tdb_sendout(sc);
+ pfsync_undefer(arg, 0);
#ifdef __FreeBSD__
PF_UNLOCK();
+ CURVNET_RESTORE();
#endif
splx(s);
}
+
+void
+pfsync_deferred(struct pf_state *st, int drop)
+{
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
#endif
+ struct pfsync_deferral *pd;
+
+ TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
+ if (pd->pd_st == st) {
+ pfsync_undefer(pd, drop);
+ return;
+ }
+ }
+
+ panic("pfsync_send_deferred: unable to find deferred state");
+}
+
+u_int pfsync_upds = 0;
-/* This must be called in splnet() */
void
-pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
+pfsync_update_state(struct pf_state *st)
{
- struct pfsync_state_bus *bus;
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
+#endif
+ int sync = 0;
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ PF_LOCK_ASSERT();
+#else
+ splassert(IPL_SOFTNET);
#endif
- if (sc->sc_mbuf != NULL)
- pfsync_sendout(sc);
- if (pfsync_sync_ok &&
- (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
- (void *)&sc->sc_statep.b)) != NULL) {
- sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
- bus = sc->sc_statep.b;
- bus->creatorid = pf_status.hostid;
- bus->status = status;
- bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
- pfsync_sendout(sc);
+ if (sc == NULL)
+ return;
+
+ if (ISSET(st->state_flags, PFSTATE_ACK))
+ pfsync_deferred(st, 0);
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
+ if (st->sync_state != PFSYNC_S_NONE)
+ pfsync_q_del(st);
+ return;
+ }
+
+ if (sc->sc_len == PFSYNC_MINPKT)
+#ifdef __FreeBSD__
+ callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
+ V_pfsyncif);
+#else
+ timeout_add_sec(&sc->sc_tmo, 1);
+#endif
+
+ switch (st->sync_state) {
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_UPD:
+ case PFSYNC_S_INS:
+ /* we're already handling it */
+
+ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
+ st->sync_updates++;
+ if (st->sync_updates >= sc->sc_maxupdates)
+ sync = 1;
+ }
+ break;
+
+ case PFSYNC_S_IACK:
+ pfsync_q_del(st);
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(st, PFSYNC_S_UPD_C);
+ st->sync_updates = 0;
+ break;
+
+ default:
+ panic("pfsync_update_state: unexpected sync state %d",
+ st->sync_state);
+ }
+
+ if (sync || (time_uptime - st->pfsync_time) < 2) {
+ pfsync_upds++;
+ schednetisr(NETISR_PFSYNC);
}
}
void
-pfsync_bulk_update(void *v)
+pfsync_request_update(u_int32_t creatorid, u_int64_t id)
{
- struct pfsync_softc *sc = v;
- int s, i = 0;
- struct pf_state *state;
-
- s = splnet();
#ifdef __FreeBSD__
- PF_LOCK();
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
#endif
- if (sc->sc_mbuf != NULL)
- pfsync_sendout(sc);
+ struct pfsync_upd_req_item *item;
+ size_t nlen = sizeof(struct pfsync_upd_req);
+ int s;
+
+ PF_LOCK_ASSERT();
/*
- * Grab at most PFSYNC_BULKPACKETS worth of states which have not
- * been sent since the latest request was made.
+ * this code does nothing to prevent multiple update requests for the
+ * same state being generated.
*/
- state = sc->sc_bulk_send_next;
- if (state)
- do {
- /* send state update if syncable and not already sent */
- if (!state->sync_flags
- && state->timeout < PFTM_MAX
- && state->pfsync_time <= sc->sc_ureq_received) {
- pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
- i++;
- }
- /* figure next state to send */
- state = TAILQ_NEXT(state, u.s.entry_list);
-
- /* wrap to start of list if we hit the end */
- if (!state)
- state = TAILQ_FIRST(&state_list);
- } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
- state != sc->sc_bulk_terminator);
-
- if (!state || state == sc->sc_bulk_terminator) {
- /* we're done */
- pfsync_send_bus(sc, PFSYNC_BUS_END);
- sc->sc_ureq_received = 0;
- sc->sc_bulk_send_next = NULL;
- sc->sc_bulk_terminator = NULL;
- timeout_del(&sc->sc_bulk_tmo);
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: bulk update complete\n");
- } else {
- /* look again for more in a bit */
+ item = pool_get(&sc->sc_pool, PR_NOWAIT);
+ if (item == NULL) {
+ /* XXX stats */
+ return;
+ }
+
+ item->ur_msg.id = id;
+ item->ur_msg.creatorid = creatorid;
+
+ if (TAILQ_EMPTY(&sc->sc_upd_req_list))
+ nlen += sizeof(struct pfsync_subheader);
+
#ifdef __FreeBSD__
- callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update,
- pfsyncif);
+ if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
#else
- timeout_add(&sc->sc_bulk_tmo, 1);
+ if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
#endif
- sc->sc_bulk_send_next = state;
+ s = splnet();
+ pfsync_sendout();
+ splx(s);
+
+ nlen = sizeof(struct pfsync_subheader) +
+ sizeof(struct pfsync_upd_req);
}
- if (sc->sc_mbuf != NULL)
- pfsync_sendout(sc);
- splx(s);
+
+ TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
+ sc->sc_len += nlen;
+
+ schednetisr(NETISR_PFSYNC);
+}
+
+void
+pfsync_update_state_req(struct pf_state *st)
+{
#ifdef __FreeBSD__
- PF_UNLOCK();
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
#endif
+
+ PF_LOCK_ASSERT();
+
+ if (sc == NULL)
+ panic("pfsync_update_state_req: nonexistant instance");
+
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
+ if (st->sync_state != PFSYNC_S_NONE)
+ pfsync_q_del(st);
+ return;
+ }
+
+ switch (st->sync_state) {
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_IACK:
+ pfsync_q_del(st);
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(st, PFSYNC_S_UPD);
+ schednetisr(NETISR_PFSYNC);
+ return;
+
+ case PFSYNC_S_INS:
+ case PFSYNC_S_UPD:
+ case PFSYNC_S_DEL:
+ /* we're already handling it */
+ return;
+
+ default:
+ panic("pfsync_update_state_req: unexpected sync state %d",
+ st->sync_state);
+ }
}
void
-pfsync_bulkfail(void *v)
+pfsync_delete_state(struct pf_state *st)
{
- struct pfsync_softc *sc = v;
- int s, error;
-
#ifdef __FreeBSD__
- PF_LOCK();
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
#endif
- if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
- /* Try again in a bit */
+
#ifdef __FreeBSD__
- callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
- pfsyncif);
+ PF_LOCK_ASSERT();
#else
- timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
+ splassert(IPL_SOFTNET);
#endif
- s = splnet();
- error = pfsync_request_update(NULL, NULL);
- if (error == ENOMEM) {
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: cannot allocate mbufs for "
- "bulk update\n");
- } else
- pfsync_sendout(sc);
- splx(s);
- } else {
- /* Pretend like the transfer was ok */
- sc->sc_ureq_sent = 0;
- sc->sc_bulk_tries = 0;
-#if NCARP > 0
- if (!pfsync_sync_ok)
+
+ if (sc == NULL)
+ return;
+
+ if (ISSET(st->state_flags, PFSTATE_ACK))
+ pfsync_deferred(st, 1);
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
+ if (st->sync_state != PFSYNC_S_NONE)
+ pfsync_q_del(st);
+ return;
+ }
+
+ if (sc->sc_len == PFSYNC_MINPKT)
#ifdef __FreeBSD__
-#ifdef CARP_ADVANCED
- carp_group_demote_adj(sc->sc_ifp, -1);
-#endif
+ callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
+ V_pfsyncif);
#else
- carp_group_demote_adj(&sc->sc_if, -1);
-#endif
+ timeout_add_sec(&sc->sc_tmo, 1);
#endif
- pfsync_sync_ok = 1;
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync: failed to receive "
- "bulk update status\n");
- timeout_del(&sc->sc_bulkfail_tmo);
+
+ switch (st->sync_state) {
+ case PFSYNC_S_INS:
+ /* we never got to tell the world so just forget about it */
+ pfsync_q_del(st);
+ return;
+
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_UPD:
+ case PFSYNC_S_IACK:
+ pfsync_q_del(st);
+ /* FALLTHROUGH to putting it on the del list */
+
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(st, PFSYNC_S_DEL);
+ return;
+
+ default:
+ panic("pfsync_delete_state: unexpected sync state %d",
+ st->sync_state);
}
+}
+
+void
+pfsync_clear_states(u_int32_t creatorid, const char *ifname)
+{
+ struct {
+ struct pfsync_subheader subh;
+ struct pfsync_clr clr;
+ } __packed r;
+
#ifdef __FreeBSD__
- PF_UNLOCK();
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
#endif
+
+#ifdef __FreeBSD__
+ PF_LOCK_ASSERT();
+#else
+ splassert(IPL_SOFTNET);
+#endif
+
+ if (sc == NULL)
+ return;
+
+ bzero(&r, sizeof(r));
+
+ r.subh.action = PFSYNC_ACT_CLR;
+ r.subh.count = htons(1);
+
+ strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
+ r.clr.creatorid = creatorid;
+
+ pfsync_send_plus(&r, sizeof(r));
}
-/* This must be called in splnet() */
-int
-pfsync_sendout(struct pfsync_softc *sc)
+void
+pfsync_q_ins(struct pf_state *st, int q)
{
-#if NBPFILTER > 0
#ifdef __FreeBSD__
- struct ifnet *ifp = sc->sc_ifp;
+ struct pfsync_softc *sc = V_pfsyncif;
#else
- struct ifnet *ifp = &sc->sc_if;
+ struct pfsync_softc *sc = pfsyncif;
#endif
+ size_t nlen = pfsync_qs[q].len;
+ int s;
+
+ PF_LOCK_ASSERT();
+
+#ifdef __FreeBSD__
+ KASSERT(st->sync_state == PFSYNC_S_NONE,
+ ("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
+#else
+ KASSERT(st->sync_state == PFSYNC_S_NONE);
#endif
- struct mbuf *m;
+#if 1 || defined(PFSYNC_DEBUG)
+ if (sc->sc_len < PFSYNC_MINPKT)
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ panic("pfsync pkt len is too low %zu", sc->sc_len);
+#else
+ panic("pfsync pkt len is too low %d", sc->sc_len);
#endif
- timeout_del(&sc->sc_tmo);
+#endif
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ nlen += sizeof(struct pfsync_subheader);
- if (sc->sc_mbuf == NULL)
- return (0);
- m = sc->sc_mbuf;
- sc->sc_mbuf = NULL;
- sc->sc_statep.s = NULL;
+#ifdef __FreeBSD__
+ if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
+#else
+ if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
+#endif
+ s = splnet();
+ pfsync_sendout();
+ splx(s);
-#if NBPFILTER > 0
- if (ifp->if_bpf)
+ nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
+ }
+
+ sc->sc_len += nlen;
+ TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
+ st->sync_state = q;
+}
+
+void
+pfsync_q_del(struct pf_state *st)
+{
#ifdef __FreeBSD__
- BPF_MTAP(ifp, m);
+ struct pfsync_softc *sc = V_pfsyncif;
#else
- bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+ struct pfsync_softc *sc = pfsyncif;
#endif
+ int q = st->sync_state;
+
+#ifdef __FreeBSD__
+ KASSERT(st->sync_state != PFSYNC_S_NONE,
+ ("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__));
+#else
+ KASSERT(st->sync_state != PFSYNC_S_NONE);
#endif
- if (sc->sc_mbuf_net) {
- m_freem(m);
- m = sc->sc_mbuf_net;
- sc->sc_mbuf_net = NULL;
- sc->sc_statep_net.s = NULL;
+ sc->sc_len -= pfsync_qs[q].len;
+ TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ st->sync_state = PFSYNC_S_NONE;
+
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ sc->sc_len -= sizeof(struct pfsync_subheader);
+}
+
+#ifdef notyet
+void
+pfsync_update_tdb(struct tdb *t, int output)
+{
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
+#endif
+ size_t nlen = sizeof(struct pfsync_tdb);
+ int s;
+
+ if (sc == NULL)
+ return;
+
+ if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
+ if (TAILQ_EMPTY(&sc->sc_tdb_q))
+ nlen += sizeof(struct pfsync_subheader);
+
+ if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
+ s = splnet();
+ PF_LOCK();
+ pfsync_sendout();
+ PF_UNLOCK();
+ splx(s);
+
+ nlen = sizeof(struct pfsync_subheader) +
+ sizeof(struct pfsync_tdb);
+ }
+
+ sc->sc_len += nlen;
+ TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
+ SET(t->tdb_flags, TDBF_PFSYNC);
+ t->tdb_updates = 0;
+ } else {
+ if (++t->tdb_updates >= sc->sc_maxupdates)
+ schednetisr(NETISR_PFSYNC);
}
- return pfsync_sendout_mbuf(sc, m);
+ if (output)
+ SET(t->tdb_flags, TDBF_PFSYNC_RPL);
+ else
+ CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
}
-#ifdef PFSYNC_TDB
-int
-pfsync_tdb_sendout(struct pfsync_softc *sc)
+void
+pfsync_delete_tdb(struct tdb *t)
{
-#if NBPFILTER > 0
#ifdef __FreeBSD__
- struct ifnet *ifp = sc->sc_ifp;
+ struct pfsync_softc *sc = V_pfsyncif;
#else
- struct ifnet *ifp = &sc->sc_if;
+ struct pfsync_softc *sc = pfsyncif;
#endif
+
+ if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
+ return;
+
+ sc->sc_len -= sizeof(struct pfsync_tdb);
+ TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
+ CLR(t->tdb_flags, TDBF_PFSYNC);
+
+ if (TAILQ_EMPTY(&sc->sc_tdb_q))
+ sc->sc_len -= sizeof(struct pfsync_subheader);
+}
+
+int
+pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset)
+{
+ struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset);
+
+ bzero(ut, sizeof(*ut));
+ ut->spi = t->tdb_spi;
+ bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
+ /*
+ * When a failover happens, the master's rpl is probably above
+ * what we see here (we may be up to a second late), so
+ * increase it a bit for outbound tdbs to manage most such
+ * situations.
+ *
+ * For now, just add an offset that is likely to be larger
+ * than the number of packets we can see in one second. The RFC
+ * just says the next packet must have a higher seq value.
+ *
+ * XXX What is a good algorithm for this? We could use
+ * a rate-determined increase, but to know it, we would have
+ * to extend struct tdb.
+ * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
+ * will soon be replaced anyway. For now, just don't handle
+ * this edge case.
+ */
+#define RPL_INCR 16384
+ ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
+ RPL_INCR : 0));
+ ut->cur_bytes = htobe64(t->tdb_cur_bytes);
+ ut->sproto = t->tdb_sproto;
+
+ return (sizeof(*ut));
+}
#endif
- struct mbuf *m;
+void
+pfsync_bulk_start(void)
+{
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
#endif
- timeout_del(&sc->sc_tdb_tmo);
- if (sc->sc_mbuf_tdb == NULL)
- return (0);
- m = sc->sc_mbuf_tdb;
- sc->sc_mbuf_tdb = NULL;
- sc->sc_statep_tdb.t = NULL;
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC)
+#endif
+ printf("pfsync: received bulk update request\n");
-#if NBPFILTER > 0
- if (ifp->if_bpf)
#ifdef __FreeBSD__
- BPF_MTAP(ifp, m);
+ PF_LOCK_ASSERT();
+ if (TAILQ_EMPTY(&V_state_list))
#else
- bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+ if (TAILQ_EMPTY(&state_list))
#endif
+ pfsync_bulk_status(PFSYNC_BUS_END);
+ else {
+ sc->sc_ureq_received = time_uptime;
+ if (sc->sc_bulk_next == NULL)
+#ifdef __FreeBSD__
+ sc->sc_bulk_next = TAILQ_FIRST(&V_state_list);
+#else
+ sc->sc_bulk_next = TAILQ_FIRST(&state_list);
#endif
+ sc->sc_bulk_last = sc->sc_bulk_next;
- return pfsync_sendout_mbuf(sc, m);
+ pfsync_bulk_status(PFSYNC_BUS_START);
+ callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
+ }
}
-#endif
-int
-pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
+void
+pfsync_bulk_update(void *arg)
{
- struct sockaddr sa;
- struct ip *ip;
+ struct pfsync_softc *sc = arg;
+ struct pf_state *st = sc->sc_bulk_next;
+ int i = 0;
+ int s;
+
+ PF_LOCK_ASSERT();
+ s = splsoftnet();
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ CURVNET_SET(sc->sc_ifp->if_vnet);
#endif
- if (sc->sc_sync_ifp ||
+ for (;;) {
+ if (st->sync_state == PFSYNC_S_NONE &&
+ st->timeout < PFTM_MAX &&
+ st->pfsync_time <= sc->sc_ureq_received) {
+ pfsync_update_state_req(st);
+ i++;
+ }
+
+ st = TAILQ_NEXT(st, entry_list);
+ if (st == NULL)
#ifdef __FreeBSD__
- sc->sc_sync_peer.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
+ st = TAILQ_FIRST(&V_state_list);
#else
- sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
+ st = TAILQ_FIRST(&state_list);
#endif
- M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
- if (m == NULL) {
- pfsyncstats.pfsyncs_onomem++;
- return (0);
+
+ if (st == sc->sc_bulk_last) {
+ /* we're done */
+ sc->sc_bulk_next = NULL;
+ sc->sc_bulk_last = NULL;
+ pfsync_bulk_status(PFSYNC_BUS_END);
+ break;
}
- ip = mtod(m, struct ip *);
- ip->ip_v = IPVERSION;
- ip->ip_hl = sizeof(*ip) >> 2;
- ip->ip_tos = IPTOS_LOWDELAY;
+
#ifdef __FreeBSD__
- ip->ip_len = m->m_pkthdr.len;
+ if (i > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
#else
- ip->ip_len = htons(m->m_pkthdr.len);
+ if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
#endif
- ip->ip_id = htons(ip_randomid());
+ sizeof(struct pfsync_state)) {
+ /* we've filled a packet */
+ sc->sc_bulk_next = st;
#ifdef __FreeBSD__
- ip->ip_off = IP_DF;
+ callout_reset(&sc->sc_bulk_tmo, 1,
+ pfsync_bulk_update, sc);
#else
- ip->ip_off = htons(IP_DF);
+ timeout_add(&sc->sc_bulk_tmo, 1);
#endif
- ip->ip_ttl = PFSYNC_DFLTTL;
- ip->ip_p = IPPROTO_PFSYNC;
- ip->ip_sum = 0;
+ break;
+ }
+ }
- bzero(&sa, sizeof(sa));
- ip->ip_src.s_addr = INADDR_ANY;
+#ifdef __FreeBSD__
+ CURVNET_RESTORE();
+#endif
+ splx(s);
+}
+
+void
+pfsync_bulk_status(u_int8_t status)
+{
+ struct {
+ struct pfsync_subheader subh;
+ struct pfsync_bus bus;
+ } __packed r;
#ifdef __FreeBSD__
- if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP))
+ struct pfsync_softc *sc = V_pfsyncif;
#else
- if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
+ struct pfsync_softc *sc = pfsyncif;
#endif
- m->m_flags |= M_MCAST;
- ip->ip_dst = sc->sc_sendaddr;
- sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
- pfsyncstats.pfsyncs_opackets++;
+ PF_LOCK_ASSERT();
+
+ bzero(&r, sizeof(r));
+
+ r.subh.action = PFSYNC_ACT_BUS;
+ r.subh.count = htons(1);
#ifdef __FreeBSD__
- if (!IF_HANDOFF(&sc->sc_ifq, m, NULL))
- pfsyncstats.pfsyncs_oerrors++;
- taskqueue_enqueue(taskqueue_thread, &pfsyncif->sc_send_task);
+ r.bus.creatorid = V_pf_status.hostid;
#else
- if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
- pfsyncstats.pfsyncs_oerrors++;
+ r.bus.creatorid = pf_status.hostid;
#endif
- } else
- m_freem(m);
+ r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
+ r.bus.status = status;
- return (0);
+ pfsync_send_plus(&r, sizeof(r));
}
-#ifdef PFSYNC_TDB
-/* Update an in-kernel tdb. Silently fail if no tdb is found. */
void
-pfsync_update_net_tdb(struct pfsync_tdb *pt)
+pfsync_bulk_fail(void *arg)
{
- struct tdb *tdb;
- int s;
+ struct pfsync_softc *sc = arg;
- /* check for invalid values */
- if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
- (pt->dst.sa.sa_family != AF_INET &&
- pt->dst.sa.sa_family != AF_INET6))
- goto bad;
+#ifdef __FreeBSD__
+ CURVNET_SET(sc->sc_ifp->if_vnet);
+#endif
- s = spltdb();
- tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
- if (tdb) {
- pt->rpl = ntohl(pt->rpl);
- pt->cur_bytes = betoh64(pt->cur_bytes);
+ if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
+ /* Try again */
+#ifdef __FreeBSD__
+ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
+ pfsync_bulk_fail, V_pfsyncif);
+#else
+ timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
+#endif
+ PF_LOCK();
+ pfsync_request_update(0, 0);
+ PF_UNLOCK();
+ } else {
+ /* Pretend like the transfer was ok */
+ sc->sc_ureq_sent = 0;
+ sc->sc_bulk_tries = 0;
+#if NCARP > 0
+#ifdef notyet
+#ifdef __FreeBSD__
+ if (!sc->pfsync_sync_ok)
+#else
+ if (!pfsync_sync_ok)
+#endif
+ carp_group_demote_adj(&sc->sc_if, -1);
+#endif
+#endif
+#ifdef __FreeBSD__
+ sc->pfsync_sync_ok = 1;
+#else
+ pfsync_sync_ok = 1;
+#endif
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC)
+#endif
+ printf("pfsync: failed to receive bulk update\n");
+ }
- /* Neither replay nor byte counter should ever decrease. */
- if (pt->rpl < tdb->tdb_rpl ||
- pt->cur_bytes < tdb->tdb_cur_bytes) {
- splx(s);
- goto bad;
- }
+#ifdef __FreeBSD__
+ CURVNET_RESTORE();
+#endif
+}
- tdb->tdb_rpl = pt->rpl;
- tdb->tdb_cur_bytes = pt->cur_bytes;
+void
+pfsync_send_plus(void *plus, size_t pluslen)
+{
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
+#endif
+ int s;
+
+ PF_LOCK_ASSERT();
+
+#ifdef __FreeBSD__
+ if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) {
+#else
+ if (sc->sc_len + pluslen > sc->sc_if.if_mtu) {
+#endif
+ s = splnet();
+ pfsync_sendout();
+ splx(s);
}
+
+ sc->sc_plus = plus;
+ sc->sc_len += (sc->sc_pluslen = pluslen);
+
+ s = splnet();
+ pfsync_sendout();
splx(s);
- return;
+}
- bad:
- if (pf_status.debug >= PF_DEBUG_MISC)
- printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
- "invalid value\n");
- pfsyncstats.pfsyncs_badstate++;
- return;
+int
+pfsync_up(void)
+{
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
+ struct pfsync_softc *sc = pfsyncif;
+#endif
+
+#ifdef __FreeBSD__
+ if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING))
+#else
+ if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
+#endif
+ return (0);
+
+ return (1);
}
-/* One of our local tdbs have been updated, need to sync rpl with others */
int
-pfsync_update_tdb(struct tdb *tdb, int output)
+pfsync_state_in_use(struct pf_state *st)
{
- struct ifnet *ifp = NULL;
+#ifdef __FreeBSD__
+ struct pfsync_softc *sc = V_pfsyncif;
+#else
struct pfsync_softc *sc = pfsyncif;
- struct pfsync_header *h;
- struct pfsync_tdb *pt = NULL;
- int s, i, ret;
+#endif
if (sc == NULL)
return (0);
+ if (st->sync_state != PFSYNC_S_NONE ||
+ st == sc->sc_bulk_next ||
+ st == sc->sc_bulk_last)
+ return (1);
+
+ return (0);
+}
+
+u_int pfsync_ints;
+u_int pfsync_tmos;
+
+void
+pfsync_timeout(void *arg)
+{
+#if defined(__FreeBSD__) && defined(VIMAGE)
+ struct pfsync_softc *sc = arg;
+#endif
+ int s;
+
#ifdef __FreeBSD__
- ifp = sc->sc_ifp;
-#else
- ifp = &sc->sc_if;
+ CURVNET_SET(sc->sc_ifp->if_vnet);
#endif
- if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
+
+ pfsync_tmos++;
+
+ s = splnet();
#ifdef __FreeBSD__
- sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
-#else
- sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+ PF_LOCK();
#endif
- /* Don't leave any stale pfsync packets hanging around. */
- if (sc->sc_mbuf_tdb != NULL) {
- m_freem(sc->sc_mbuf_tdb);
- sc->sc_mbuf_tdb = NULL;
- sc->sc_statep_tdb.t = NULL;
- }
- return (0);
- }
+ pfsync_sendout();
+#ifdef __FreeBSD__
+ PF_UNLOCK();
+#endif
+ splx(s);
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ CURVNET_RESTORE();
#endif
+}
+
+/* this is a softnet/netisr handler */
+void
+#ifdef __FreeBSD__
+pfsyncintr(void *arg)
+{
+ struct pfsync_softc *sc = arg;
+ struct mbuf *m, *n;
+
+ CURVNET_SET(sc->sc_ifp->if_vnet);
+ pfsync_ints++;
+
+ PF_LOCK();
+ if (sc->sc_len > PFSYNC_MINPKT)
+ pfsync_sendout1(0);
+ _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
+ PF_UNLOCK();
+
+ for (; m != NULL; m = n) {
+
+ n = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)
+ == 0)
+ V_pfsyncstats.pfsyncs_opackets++;
+ else
+ V_pfsyncstats.pfsyncs_oerrors++;
+ }
+ CURVNET_RESTORE();
+}
+#else
+pfsyncintr(void)
+{
+ int s;
+
+ pfsync_ints++;
+
s = splnet();
- if (sc->sc_mbuf_tdb == NULL) {
- if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD,
- (void *)&sc->sc_statep_tdb.t)) == NULL) {
- splx(s);
- return (ENOMEM);
- }
- h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
- } else {
- h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
- if (h->action != PFSYNC_ACT_TDB_UPD) {
- /*
- * XXX will never happen as long as there's
- * only one "TDB action".
- */
- pfsync_tdb_sendout(sc);
- sc->sc_mbuf_tdb = pfsync_get_mbuf(sc,
- PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t);
- if (sc->sc_mbuf_tdb == NULL) {
- splx(s);
- return (ENOMEM);
- }
- h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
- } else if (sc->sc_maxupdates) {
- /*
- * If it's an update, look in the packet to see if
- * we already have an update for the state.
- */
- struct pfsync_tdb *u =
- (void *)((char *)h + PFSYNC_HDRLEN);
-
- for (i = 0; !pt && i < h->count; i++) {
- if (tdb->tdb_spi == u->spi &&
- tdb->tdb_sproto == u->sproto &&
- !bcmp(&tdb->tdb_dst, &u->dst,
- SA_LEN(&u->dst.sa))) {
- pt = u;
- pt->updates++;
- }
- u++;
- }
- }
+ pfsync_sendout();
+ splx(s);
+}
+#endif
+
+int
+pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
+ size_t newlen)
+{
+
+#ifdef notyet
+ /* All sysctl names at this level are terminal. */
+ if (namelen != 1)
+ return (ENOTDIR);
+
+ switch (name[0]) {
+ case PFSYNCCTL_STATS:
+ if (newp != NULL)
+ return (EPERM);
+ return (sysctl_struct(oldp, oldlenp, newp, newlen,
+ &V_pfsyncstats, sizeof(V_pfsyncstats)));
}
+#endif
+ return (ENOPROTOOPT);
+}
- if (pt == NULL) {
- /* not a "duplicate" update */
- pt = sc->sc_statep_tdb.t++;
- sc->sc_mbuf_tdb->m_pkthdr.len =
- sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb);
- h->count++;
- bzero(pt, sizeof(*pt));
+#ifdef __FreeBSD__
+static int
+pfsync_multicast_setup(struct pfsync_softc *sc)
+{
+ struct ip_moptions *imo = &sc->sc_imo;
+ int error;
- pt->spi = tdb->tdb_spi;
- memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst);
- pt->sproto = tdb->tdb_sproto;
+ if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
+ sc->sc_sync_if = NULL;
+ return (EADDRNOTAVAIL);
}
- /*
- * When a failover happens, the master's rpl is probably above
- * what we see here (we may be up to a second late), so
- * increase it a bit for outbound tdbs to manage most such
- * situations.
- *
- * For now, just add an offset that is likely to be larger
- * than the number of packets we can see in one second. The RFC
- * just says the next packet must have a higher seq value.
- *
- * XXX What is a good algorithm for this? We could use
- * a rate-determined increase, but to know it, we would have
- * to extend struct tdb.
- * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
- * will soon be replaced anyway. For now, just don't handle
- * this edge case.
- */
-#define RPL_INCR 16384
- pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0));
- pt->cur_bytes = htobe64(tdb->tdb_cur_bytes);
+ imo->imo_membership = (struct in_multi **)malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC,
+ M_WAITOK | M_ZERO);
+ imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ imo->imo_multicast_vif = -1;
- if (h->count == sc->sc_maxcount ||
- (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates)))
- ret = pfsync_tdb_sendout(sc);
+ if ((error = in_joingroup(sc->sc_sync_if, &sc->sc_sync_peer, NULL,
+ &imo->imo_membership[0])) != 0) {
+ free(imo->imo_membership, M_PFSYNC);
+ return (error);
+ }
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = sc->sc_sync_if;
+ imo->imo_multicast_ttl = PFSYNC_DFLTTL;
+ imo->imo_multicast_loop = 0;
- splx(s);
- return (ret);
+ return (0);
}
-#endif /* PFSYNC_TDB */
-#ifdef __FreeBSD__
-void
-pfsync_ifdetach(void *arg, struct ifnet *ifp)
+static void
+pfsync_multicast_cleanup(struct pfsync_softc *sc)
{
- struct pfsync_softc *sc = (struct pfsync_softc *)arg;
- struct ip_moptions *imo;
+ struct ip_moptions *imo = &sc->sc_imo;
+
+ in_leavegroup(imo->imo_membership[0], NULL);
+ free(imo->imo_membership, M_PFSYNC);
+ imo->imo_membership = NULL;
+ imo->imo_multicast_ifp = NULL;
+}
- if (sc == NULL || sc->sc_sync_ifp != ifp)
- return; /* not for us; unlocked read */
+#ifdef INET
+extern struct domain inetdomain;
+static struct protosw in_pfsync_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_PFSYNC,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = pfsync_input,
+ .pr_output = (pr_output_t *)rip_output,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+#endif
+static int
+pfsync_init()
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+ int error = 0;
+
+ VNET_LIST_RLOCK();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_pfsync_cloner = pfsync_cloner;
+ V_pfsync_cloner_data = pfsync_cloner_data;
+ V_pfsync_cloner.ifc_data = &V_pfsync_cloner_data;
+ if_clone_attach(&V_pfsync_cloner);
+ error = swi_add(NULL, "pfsync", pfsyncintr, V_pfsyncif,
+ SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
+ CURVNET_RESTORE();
+ if (error)
+ goto fail_locked;
+ }
+ VNET_LIST_RUNLOCK();
+#ifdef INET
+ error = pf_proto_register(PF_INET, &in_pfsync_protosw);
+ if (error)
+ goto fail;
+ error = ipproto_register(IPPROTO_PFSYNC);
+ if (error) {
+ pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
+ goto fail;
+ }
+#endif
PF_LOCK();
+ pfsync_state_import_ptr = pfsync_state_import;
+ pfsync_up_ptr = pfsync_up;
+ pfsync_insert_state_ptr = pfsync_insert_state;
+ pfsync_update_state_ptr = pfsync_update_state;
+ pfsync_delete_state_ptr = pfsync_delete_state;
+ pfsync_clear_states_ptr = pfsync_clear_states;
+ pfsync_state_in_use_ptr = pfsync_state_in_use;
+ pfsync_defer_ptr = pfsync_defer;
+ PF_UNLOCK();
- /* Deal with a member interface going away from under us. */
- sc->sc_sync_ifp = NULL;
- if (sc->sc_mbuf_net != NULL) {
- m_freem(sc->sc_mbuf_net);
- sc->sc_mbuf_net = NULL;
- sc->sc_statep_net.s = NULL;
- }
- imo = &sc->sc_imo;
- if (imo->imo_num_memberships > 0) {
- KASSERT(imo->imo_num_memberships == 1,
- ("%s: imo_num_memberships != 1", __func__));
- /*
- * Our event handler is always called after protocol
- * domains have been detached from the underlying ifnet.
- * Do not call in_delmulti(); we held a single reference
- * which the protocol domain has purged in in_purgemaddrs().
- */
- PF_UNLOCK();
- imo->imo_membership[--imo->imo_num_memberships] = NULL;
- PF_LOCK();
- imo->imo_multicast_ifp = NULL;
+ return (0);
+
+fail:
+ VNET_LIST_RLOCK();
+fail_locked:
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ if (V_pfsync_swi_cookie) {
+ swi_remove(V_pfsync_swi_cookie);
+ if_clone_detach(&V_pfsync_cloner);
+ }
+ CURVNET_RESTORE();
}
+ VNET_LIST_RUNLOCK();
- PF_UNLOCK();
+ return (error);
}
-void
-pfsync_senddef(void *arg, __unused int pending)
+static void
+pfsync_uninit()
{
- struct pfsync_softc *sc = (struct pfsync_softc *)arg;
- struct mbuf *m;
+ VNET_ITERATOR_DECL(vnet_iter);
- for(;;) {
- IF_DEQUEUE(&sc->sc_ifq, m);
- if (m == NULL)
- break;
- /* Deal with a member interface going away from under us. */
- if (sc->sc_sync_ifp == NULL) {
- pfsyncstats.pfsyncs_oerrors++;
- m_freem(m);
- continue;
- }
- if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
- pfsyncstats.pfsyncs_oerrors++;
+ PF_LOCK();
+ pfsync_state_import_ptr = NULL;
+ pfsync_up_ptr = NULL;
+ pfsync_insert_state_ptr = NULL;
+ pfsync_update_state_ptr = NULL;
+ pfsync_delete_state_ptr = NULL;
+ pfsync_clear_states_ptr = NULL;
+ pfsync_state_in_use_ptr = NULL;
+ pfsync_defer_ptr = NULL;
+ PF_UNLOCK();
+
+ ipproto_unregister(IPPROTO_PFSYNC);
+ pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
+ VNET_LIST_RLOCK();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ swi_remove(V_pfsync_swi_cookie);
+ if_clone_detach(&V_pfsync_cloner);
+ CURVNET_RESTORE();
}
+ VNET_LIST_RUNLOCK();
}
static int
@@ -2304,17 +3459,23 @@ pfsync_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- pfsyncattach(0);
+ error = pfsync_init();
+ break;
+ case MOD_QUIESCE:
+ /*
+ * Module should not be unloaded due to race conditions.
+ */
+ error = EPERM;
break;
case MOD_UNLOAD:
- if_clone_detach(&pfsync_cloner);
+ pfsync_uninit();
break;
default:
error = EINVAL;
break;
}
- return error;
+ return (error);
}
static moduledata_t pfsync_mod = {
@@ -2325,7 +3486,7 @@ static moduledata_t pfsync_mod = {
#define PFSYNC_MODVER 1
-DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
MODULE_VERSION(pfsync, PFSYNC_MODVER);
-MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER);
+MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
#endif /* __FreeBSD__ */
diff --git a/freebsd/sys/contrib/pf/net/if_pfsync.h b/freebsd/sys/contrib/pf/net/if_pfsync.h
index f306610f..17259b78 100644
--- a/freebsd/sys/contrib/pf/net/if_pfsync.h
+++ b/freebsd/sys/contrib/pf/net/if_pfsync.h
@@ -1,5 +1,4 @@
-/* $FreeBSD$ */
-/* $OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $ */
+/* $OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $ */
/*
* Copyright (c) 2001 Michael Shalayeff
@@ -27,227 +26,217 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+/*
+ * Copyright (c) 2008 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
#ifndef _NET_IF_PFSYNC_H_
-#define _NET_IF_PFSYNC_H_
+#define _NET_IF_PFSYNC_H_
+#define PFSYNC_VERSION 5
+#define PFSYNC_DFLTTL 255
-#define PFSYNC_ID_LEN sizeof(u_int64_t)
+#define PFSYNC_ACT_CLR 0 /* clear all states */
+#define PFSYNC_ACT_INS 1 /* insert state */
+#define PFSYNC_ACT_INS_ACK 2 /* ack of insterted state */
+#define PFSYNC_ACT_UPD 3 /* update state */
+#define PFSYNC_ACT_UPD_C 4 /* "compressed" update state */
+#define PFSYNC_ACT_UPD_REQ 5 /* request "uncompressed" state */
+#define PFSYNC_ACT_DEL 6 /* delete state */
+#define PFSYNC_ACT_DEL_C 7 /* "compressed" delete state */
+#define PFSYNC_ACT_INS_F 8 /* insert fragment */
+#define PFSYNC_ACT_DEL_F 9 /* delete fragments */
+#define PFSYNC_ACT_BUS 10 /* bulk update status */
+#define PFSYNC_ACT_TDB 11 /* TDB replay counter update */
+#define PFSYNC_ACT_EOF 12 /* end of frame */
+#define PFSYNC_ACT_MAX 13
+
+#define PFSYNC_ACTIONS "CLR ST", \
+ "INS ST", \
+ "INS ST ACK", \
+ "UPD ST", \
+ "UPD ST COMP", \
+ "UPD ST REQ", \
+ "DEL ST", \
+ "DEL ST COMP", \
+ "INS FR", \
+ "DEL FR", \
+ "BULK UPD STAT", \
+ "TDB UPD", \
+ "EOF"
+
+#define PFSYNC_HMAC_LEN 20
-struct pfsync_state_scrub {
- u_int16_t pfss_flags;
- u_int8_t pfss_ttl; /* stashed TTL */
-#define PFSYNC_SCRUB_FLAG_VALID 0x01
- u_int8_t scrub_flag;
- u_int32_t pfss_ts_mod; /* timestamp modulation */
-} __packed;
+/*
+ * A pfsync frame is built from a header followed by several sections which
+ * are all prefixed with their own subheaders. Frames must be terminated with
+ * an EOF subheader.
+ *
+ * | ... |
+ * | IP header |
+ * +============================+
+ * | pfsync_header |
+ * +----------------------------+
+ * | pfsync_subheader |
+ * +----------------------------+
+ * | first action fields |
+ * | ... |
+ * +----------------------------+
+ * | pfsync_subheader |
+ * +----------------------------+
+ * | second action fields |
+ * | ... |
+ * +----------------------------+
+ * | EOF pfsync_subheader |
+ * +----------------------------+
+ * | HMAC |
+ * +============================+
+ */
-struct pfsync_state_host {
- struct pf_addr addr;
- u_int16_t port;
- u_int16_t pad[3];
+/*
+ * Frame header
+ */
+
+struct pfsync_header {
+ u_int8_t version;
+ u_int8_t _pad;
+ u_int16_t len;
+ u_int8_t pfcksum[PF_MD5_DIGEST_LENGTH];
} __packed;
-struct pfsync_state_peer {
- struct pfsync_state_scrub scrub; /* state is scrubbed */
- u_int32_t seqlo; /* Max sequence number sent */
- u_int32_t seqhi; /* Max the other end ACKd + win */
- u_int32_t seqdiff; /* Sequence number modulator */
- u_int16_t max_win; /* largest window (pre scaling) */
- u_int16_t mss; /* Maximum segment size option */
- u_int8_t state; /* active state level */
- u_int8_t wscale; /* window scaling factor */
- u_int8_t pad[6];
+/*
+ * Frame region subheader
+ */
+
+struct pfsync_subheader {
+ u_int8_t action;
+ u_int8_t _pad;
+ u_int16_t count;
} __packed;
-struct pfsync_state {
- u_int32_t id[2];
- char ifname[IFNAMSIZ];
- struct pfsync_state_host lan;
- struct pfsync_state_host gwy;
- struct pfsync_state_host ext;
- struct pfsync_state_peer src;
- struct pfsync_state_peer dst;
- struct pf_addr rt_addr;
- u_int32_t rule;
- u_int32_t anchor;
- u_int32_t nat_rule;
- u_int32_t creation;
- u_int32_t expire;
- u_int32_t packets[2][2];
- u_int32_t bytes[2][2];
- u_int32_t creatorid;
- sa_family_t af;
- u_int8_t proto;
- u_int8_t direction;
- u_int8_t log;
- u_int8_t state_flags;
- u_int8_t timeout;
- u_int8_t sync_flags;
- u_int8_t updates;
+/*
+ * CLR
+ */
+
+struct pfsync_clr {
+ char ifname[IFNAMSIZ];
+ u_int32_t creatorid;
} __packed;
-#define PFSYNC_FLAG_COMPRESS 0x01
-#define PFSYNC_FLAG_STALE 0x02
+/*
+ * INS, UPD, DEL
+ */
+
+/* these use struct pfsync_state in pfvar.h */
-#ifdef PFSYNC_TDB
-struct pfsync_tdb {
- u_int32_t spi;
- union sockaddr_union dst;
- u_int32_t rpl;
- u_int64_t cur_bytes;
- u_int8_t sproto;
- u_int8_t updates;
- u_int8_t pad[2];
+/*
+ * INS_ACK
+ */
+
+struct pfsync_ins_ack {
+ u_int64_t id;
+ u_int32_t creatorid;
} __packed;
-#endif
-struct pfsync_state_upd {
- u_int32_t id[2];
+/*
+ * UPD_C
+ */
+
+struct pfsync_upd_c {
+ u_int64_t id;
struct pfsync_state_peer src;
struct pfsync_state_peer dst;
- u_int32_t creatorid;
- u_int32_t expire;
- u_int8_t timeout;
- u_int8_t updates;
- u_int8_t pad[6];
+ u_int32_t creatorid;
+ u_int32_t expire;
+ u_int8_t timeout;
+ u_int8_t _pad[3];
} __packed;
-struct pfsync_state_del {
- u_int32_t id[2];
- u_int32_t creatorid;
- struct {
- u_int8_t state;
- } src;
- struct {
- u_int8_t state;
- } dst;
- u_int8_t pad[2];
-} __packed;
+/*
+ * UPD_REQ
+ */
-struct pfsync_state_upd_req {
- u_int32_t id[2];
- u_int32_t creatorid;
- u_int32_t pad;
+struct pfsync_upd_req {
+ u_int64_t id;
+ u_int32_t creatorid;
} __packed;
-struct pfsync_state_clr {
- char ifname[IFNAMSIZ];
- u_int32_t creatorid;
- u_int32_t pad;
-} __packed;
+/*
+ * DEL_C
+ */
-struct pfsync_state_bus {
- u_int32_t creatorid;
- u_int32_t endtime;
- u_int8_t status;
-#define PFSYNC_BUS_START 1
-#define PFSYNC_BUS_END 2
- u_int8_t pad[7];
+struct pfsync_del_c {
+ u_int64_t id;
+ u_int32_t creatorid;
} __packed;
-#ifdef _KERNEL
-
-union sc_statep {
- struct pfsync_state *s;
- struct pfsync_state_upd *u;
- struct pfsync_state_del *d;
- struct pfsync_state_clr *c;
- struct pfsync_state_bus *b;
- struct pfsync_state_upd_req *r;
-};
+/*
+ * INS_F, DEL_F
+ */
-#ifdef PFSYNC_TDB
-union sc_tdb_statep {
- struct pfsync_tdb *t;
-};
-#endif
+/* not implemented (yet) */
-extern int pfsync_sync_ok;
+/*
+ * BUS
+ */
-struct pfsync_softc {
-#ifdef __FreeBSD__
- struct ifnet *sc_ifp;
-#else
- struct ifnet sc_if;
-#endif
- struct ifnet *sc_sync_ifp;
+struct pfsync_bus {
+ u_int32_t creatorid;
+ u_int32_t endtime;
+ u_int8_t status;
+#define PFSYNC_BUS_START 1
+#define PFSYNC_BUS_END 2
+ u_int8_t _pad[3];
+} __packed;
- struct ip_moptions sc_imo;
-#ifdef __FreeBSD__
- struct callout sc_tmo;
-#ifdef PFSYNC_TDB
- struct callout sc_tdb_tmo;
-#endif
- struct callout sc_bulk_tmo;
- struct callout sc_bulkfail_tmo;
-#else
- struct timeout sc_tmo;
- struct timeout sc_tdb_tmo;
- struct timeout sc_bulk_tmo;
- struct timeout sc_bulkfail_tmo;
-#endif
- struct in_addr sc_sync_peer;
- struct in_addr sc_sendaddr;
- struct mbuf *sc_mbuf; /* current cumulative mbuf */
- struct mbuf *sc_mbuf_net; /* current cumulative mbuf */
-#ifdef PFSYNC_TDB
- struct mbuf *sc_mbuf_tdb; /* dito for TDB updates */
-#endif
-#ifdef __FreeBSD__
- struct ifqueue sc_ifq;
- struct task sc_send_task;
-#endif
- union sc_statep sc_statep;
- union sc_statep sc_statep_net;
-#ifdef PFSYNC_TDB
- union sc_tdb_statep sc_statep_tdb;
-#endif
- u_int32_t sc_ureq_received;
- u_int32_t sc_ureq_sent;
- struct pf_state *sc_bulk_send_next;
- struct pf_state *sc_bulk_terminator;
- int sc_bulk_tries;
- int sc_maxcount; /* number of states in mtu */
- int sc_maxupdates; /* number of updates/state */
-#ifdef __FreeBSD__
- eventhandler_tag sc_detachtag;
-#endif
-};
+/*
+ * TDB
+ */
-extern struct pfsync_softc *pfsyncif;
-#endif
+struct pfsync_tdb {
+ u_int32_t spi;
+ union sockaddr_union dst;
+ u_int32_t rpl;
+ u_int64_t cur_bytes;
+ u_int8_t sproto;
+ u_int8_t updates;
+ u_int8_t _pad[2];
+} __packed;
+/*
+ * EOF
+ */
-struct pfsync_header {
- u_int8_t version;
-#define PFSYNC_VERSION 3
- u_int8_t af;
- u_int8_t action;
-#define PFSYNC_ACT_CLR 0 /* clear all states */
-#define PFSYNC_ACT_INS 1 /* insert state */
-#define PFSYNC_ACT_UPD 2 /* update state */
-#define PFSYNC_ACT_DEL 3 /* delete state */
-#define PFSYNC_ACT_UPD_C 4 /* "compressed" state update */
-#define PFSYNC_ACT_DEL_C 5 /* "compressed" state delete */
-#define PFSYNC_ACT_INS_F 6 /* insert fragment */
-#define PFSYNC_ACT_DEL_F 7 /* delete fragments */
-#define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */
-#define PFSYNC_ACT_BUS 9 /* Bulk Update Status */
-#define PFSYNC_ACT_TDB_UPD 10 /* TDB replay counter update */
-#define PFSYNC_ACT_MAX 11
- u_int8_t count;
- u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
+struct pfsync_eof {
+ u_int8_t hmac[PFSYNC_HMAC_LEN];
} __packed;
-#define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */
-#define PFSYNC_MAX_BULKTRIES 12
-#define PFSYNC_HDRLEN sizeof(struct pfsync_header)
-#define PFSYNC_ACTIONS \
- "CLR ST", "INS ST", "UPD ST", "DEL ST", \
- "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \
- "UPD REQ", "BLK UPD STAT", "TDB UPD"
+#define PFSYNC_HDRLEN sizeof(struct pfsync_header)
+
-#define PFSYNC_DFLTTL 255
+
+/*
+ * Names for PFSYNC sysctl objects
+ */
+#define PFSYNCCTL_STATS 1 /* PFSYNC stats */
+#define PFSYNCCTL_MAXID 2
+
+#define PFSYNCCTL_NAMES { \
+ { 0, 0 }, \
+ { "stats", CTLTYPE_STRUCT }, \
+}
struct pfsyncstats {
u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */
@@ -280,96 +269,56 @@ struct pfsyncreq {
};
#ifdef __FreeBSD__
-#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq)
-#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq)
+#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq)
+#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq)
#endif
-#define pf_state_peer_hton(s,d) do { \
- (d)->seqlo = htonl((s)->seqlo); \
- (d)->seqhi = htonl((s)->seqhi); \
- (d)->seqdiff = htonl((s)->seqdiff); \
- (d)->max_win = htons((s)->max_win); \
- (d)->mss = htons((s)->mss); \
- (d)->state = (s)->state; \
- (d)->wscale = (s)->wscale; \
- if ((s)->scrub) { \
- (d)->scrub.pfss_flags = \
- htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \
- (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \
- (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
- (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \
- } \
-} while (0)
-
-#define pf_state_peer_ntoh(s,d) do { \
- (d)->seqlo = ntohl((s)->seqlo); \
- (d)->seqhi = ntohl((s)->seqhi); \
- (d)->seqdiff = ntohl((s)->seqdiff); \
- (d)->max_win = ntohs((s)->max_win); \
- (d)->mss = ntohs((s)->mss); \
- (d)->state = (s)->state; \
- (d)->wscale = (s)->wscale; \
- if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \
- (d)->scrub != NULL) { \
- (d)->scrub->pfss_flags = \
- ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \
- (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \
- (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
- } \
-} while (0)
-
-#define pf_state_host_hton(s,d) do { \
- bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \
- (d)->port = (s)->port; \
-} while (0)
-
-#define pf_state_host_ntoh(s,d) do { \
- bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \
- (d)->port = (s)->port; \
-} while (0)
-
-#define pf_state_counter_hton(s,d) do { \
- d[0] = htonl((s>>32)&0xffffffff); \
- d[1] = htonl(s&0xffffffff); \
-} while (0)
-
-#define pf_state_counter_ntoh(s,d) do { \
- d = ntohl(s[0]); \
- d = d<<32; \
- d += ntohl(s[1]); \
-} while (0)
-
#ifdef _KERNEL
+
+/*
+ * this shows where a pf state is with respect to the syncing.
+ */
+#define PFSYNC_S_INS 0x00
+#define PFSYNC_S_IACK 0x01
+#define PFSYNC_S_UPD 0x02
+#define PFSYNC_S_UPD_C 0x03
+#define PFSYNC_S_DEL 0x04
+#define PFSYNC_S_COUNT 0x05
+
+#define PFSYNC_S_DEFER 0xfe
+#define PFSYNC_S_NONE 0xff
+
#ifdef __FreeBSD__
-void pfsync_input(struct mbuf *, __unused int);
+void pfsync_input(struct mbuf *, __unused int);
#else
-void pfsync_input(struct mbuf *, ...);
+void pfsync_input(struct mbuf *, ...);
#endif
-int pfsync_clear_states(u_int32_t, char *);
-int pfsync_pack_state(u_int8_t, struct pf_state *, int);
-#define pfsync_insert_state(st) do { \
- if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || \
- (st->proto == IPPROTO_PFSYNC)) \
- st->sync_flags |= PFSTATE_NOSYNC; \
- else if (!st->sync_flags) \
- pfsync_pack_state(PFSYNC_ACT_INS, (st), \
- PFSYNC_FLAG_COMPRESS); \
- st->sync_flags &= ~PFSTATE_FROMSYNC; \
-} while (0)
-#define pfsync_update_state(st) do { \
- if (!st->sync_flags) \
- pfsync_pack_state(PFSYNC_ACT_UPD, (st), \
- PFSYNC_FLAG_COMPRESS); \
- st->sync_flags &= ~PFSTATE_FROMSYNC; \
-} while (0)
-#define pfsync_delete_state(st) do { \
- if (!st->sync_flags) \
- pfsync_pack_state(PFSYNC_ACT_DEL, (st), \
- PFSYNC_FLAG_COMPRESS); \
-} while (0)
-#ifdef PFSYNC_TDB
-int pfsync_update_tdb(struct tdb *, int);
+int pfsync_sysctl(int *, u_int, void *, size_t *,
+ void *, size_t);
+
+#define PFSYNC_SI_IOCTL 0x01
+#define PFSYNC_SI_CKSUM 0x02
+#define PFSYNC_SI_ACK 0x04
+int pfsync_state_import(struct pfsync_state *, u_int8_t);
+#ifndef __FreeBSD__
+void pfsync_state_export(struct pfsync_state *,
+ struct pf_state *);
#endif
+
+void pfsync_insert_state(struct pf_state *);
+void pfsync_update_state(struct pf_state *);
+void pfsync_delete_state(struct pf_state *);
+void pfsync_clear_states(u_int32_t, const char *);
+
+#ifdef notyet
+void pfsync_update_tdb(struct tdb *, int);
+void pfsync_delete_tdb(struct tdb *);
+#endif
+
+int pfsync_defer(struct pf_state *, struct mbuf *);
+
+int pfsync_up(void);
+int pfsync_state_in_use(struct pf_state *);
#endif
#endif /* _NET_IF_PFSYNC_H_ */
diff --git a/freebsd/sys/contrib/pf/net/pf.c b/freebsd/sys/contrib/pf/net/pf.c
index 70123329..edb4b2e9 100644
--- a/freebsd/sys/contrib/pf/net/pf.c
+++ b/freebsd/sys/contrib/pf/net/pf.c
@@ -1,11 +1,10 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */
-/* add: $OpenBSD: pf.c,v 1.559 2007/09/18 18:45:59 markus Exp $ */
+/* $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
- * Copyright (c) 2002,2003 Henning Brauer
+ * Copyright (c) 2002 - 2008 Henning Brauer
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,28 +49,19 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bpf.h>
#include <rtems/bsd/local/opt_pf.h>
-#ifdef DEV_BPF
-#define NBPFILTER DEV_BPF
-#else
-#define NBPFILTER 0
-#endif
-
-#ifdef DEV_PFLOG
-#define NPFLOG DEV_PFLOG
-#else
-#define NPFLOG 0
-#endif
+#define NPFSYNC 1
-#ifdef DEV_PFSYNC
-#define NPFSYNC DEV_PFSYNC
+#ifdef DEV_PFLOW
+#define NPFLOW DEV_PFLOW
#else
-#define NPFSYNC 0
+#define NPFLOW 0
#endif
#else
#include "bpfilter.h"
#include "pflog.h"
#include "pfsync.h"
+#include "pflow.h"
#endif
#include <rtems/bsd/sys/param.h>
@@ -83,8 +73,10 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <rtems/bsd/sys/time.h>
#ifdef __FreeBSD__
+#include <sys/random.h>
#include <sys/sysctl.h>
#include <sys/endian.h>
+#define betoh64 be64toh
#else
#include <sys/pool.h>
#endif
@@ -97,11 +89,21 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#endif
+#ifdef __FreeBSD__
+#include <sys/md5.h>
+#else
+#include <crypto/md5.h>
+#endif
+
#include <net/if.h>
#include <net/if_types.h>
#include <net/bpf.h>
#include <net/route.h>
-#ifndef __FreeBSD__
+#ifdef __FreeBSD__
+#ifdef RADIX_MPATH
+#include <net/radix_mpath.h>
+#endif
+#else
#include <net/radix_mpath.h>
#endif
@@ -120,16 +122,18 @@ __FBSDID("$FreeBSD$");
#include <netinet/udp_var.h>
#include <netinet/icmp_var.h>
#include <netinet/if_ether.h>
+#ifdef __FreeBSD__
+#include <netinet/ip_fw.h>
+#include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
+#endif
#ifndef __FreeBSD__
#include <dev/rndvar.h>
#endif
#include <net/pfvar.h>
#include <net/if_pflog.h>
-
-#if NPFSYNC > 0
+#include <net/if_pflow.h>
#include <net/if_pfsync.h>
-#endif /* NPFSYNC > 0 */
#ifdef INET6
#include <netinet/ip6.h>
@@ -149,15 +153,61 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
extern int ip_optcopy(struct ip *, struct ip *);
-extern int debug_pfugidhack;
#endif
-#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
+#ifdef __FreeBSD__
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+#else
+#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
+#endif
/*
* Global variables
*/
+/* state tables */
+#ifdef __FreeBSD__
+VNET_DEFINE(struct pf_state_tree, pf_statetbl);
+
+VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]);
+VNET_DEFINE(struct pf_palist, pf_pabuf);
+VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active);
+VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive);
+VNET_DEFINE(struct pf_status, pf_status);
+
+VNET_DEFINE(u_int32_t, ticket_altqs_active);
+VNET_DEFINE(u_int32_t, ticket_altqs_inactive);
+VNET_DEFINE(int, altqs_inactive_open);
+VNET_DEFINE(u_int32_t, ticket_pabuf);
+
+VNET_DEFINE(MD5_CTX, pf_tcp_secret_ctx);
+#define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx)
+VNET_DEFINE(u_char, pf_tcp_secret[16]);
+#define V_pf_tcp_secret VNET(pf_tcp_secret)
+VNET_DEFINE(int, pf_tcp_secret_init);
+#define V_pf_tcp_secret_init VNET(pf_tcp_secret_init)
+VNET_DEFINE(int, pf_tcp_iss_off);
+#define V_pf_tcp_iss_off VNET(pf_tcp_iss_off)
+
+struct pf_anchor_stackframe {
+ struct pf_ruleset *rs;
+ struct pf_rule *r;
+ struct pf_anchor_node *parent;
+ struct pf_anchor *child;
+};
+VNET_DEFINE(struct pf_anchor_stackframe, pf_anchor_stack[64]);
+#define V_pf_anchor_stack VNET(pf_anchor_stack)
+
+VNET_DEFINE(uma_zone_t, pf_src_tree_pl);
+VNET_DEFINE(uma_zone_t, pf_rule_pl);
+VNET_DEFINE(uma_zone_t, pf_pooladdr_pl);
+VNET_DEFINE(uma_zone_t, pf_state_pl);
+VNET_DEFINE(uma_zone_t, pf_state_key_pl);
+VNET_DEFINE(uma_zone_t, pf_state_item_pl);
+VNET_DEFINE(uma_zone_t, pf_altq_pl);
+#else
+struct pf_state_tree pf_statetbl;
+
struct pf_altqqueue pf_altqs[2];
struct pf_palist pf_pabuf;
struct pf_altqqueue *pf_altqs_active;
@@ -169,6 +219,11 @@ u_int32_t ticket_altqs_inactive;
int altqs_inactive_open;
u_int32_t ticket_pabuf;
+MD5_CTX pf_tcp_secret_ctx;
+u_char pf_tcp_secret[16];
+int pf_tcp_secret_init;
+int pf_tcp_iss_off;
+
struct pf_anchor_stackframe {
struct pf_ruleset *rs;
struct pf_rule *r;
@@ -176,16 +231,11 @@ struct pf_anchor_stackframe {
struct pf_anchor *child;
} pf_anchor_stack[64];
-#ifdef __FreeBSD__
-uma_zone_t pf_src_tree_pl, pf_rule_pl;
-uma_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
-#else
-struct pool pf_src_tree_pl, pf_rule_pl;
-struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
+struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
+struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
+struct pool pf_altq_pl;
#endif
-void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
-
void pf_init_threshold(struct pf_threshold *, u_int32_t,
u_int32_t);
void pf_add_threshold(struct pf_threshold *);
@@ -214,18 +264,12 @@ void pf_send_tcp(const struct pf_rule *, sa_family_t,
u_int16_t, u_int16_t, u_int32_t, u_int32_t,
u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
u_int16_t, struct ether_header *, struct ifnet *);
-void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
+static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
sa_family_t, struct pf_rule *);
-struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *,
- int, int, struct pfi_kif *,
- struct pf_addr *, u_int16_t, struct pf_addr *,
- u_int16_t, int);
-struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *,
- int, int, struct pfi_kif *, struct pf_src_node **,
- struct pf_addr *, u_int16_t,
- struct pf_addr *, u_int16_t,
- struct pf_addr *, u_int16_t *);
-int pf_test_tcp(struct pf_rule **, struct pf_state **,
+void pf_detach_state(struct pf_state *);
+void pf_state_key_detach(struct pf_state *, int);
+u_int32_t pf_tcp_iss(struct pf_pdesc *);
+int pf_test_rule(struct pf_rule **, struct pf_state **,
int, struct pfi_kif *, struct mbuf *, int,
void *, struct pf_pdesc *, struct pf_rule **,
#ifdef __FreeBSD__
@@ -234,23 +278,14 @@ int pf_test_tcp(struct pf_rule **, struct pf_state **,
#else
struct pf_ruleset **, struct ifqueue *);
#endif
-int pf_test_udp(struct pf_rule **, struct pf_state **,
- int, struct pfi_kif *, struct mbuf *, int,
- void *, struct pf_pdesc *, struct pf_rule **,
-#ifdef __FreeBSD__
- struct pf_ruleset **, struct ifqueue *,
- struct inpcb *);
-#else
- struct pf_ruleset **, struct ifqueue *);
-#endif
-int pf_test_icmp(struct pf_rule **, struct pf_state **,
- int, struct pfi_kif *, struct mbuf *, int,
- void *, struct pf_pdesc *, struct pf_rule **,
- struct pf_ruleset **, struct ifqueue *);
-int pf_test_other(struct pf_rule **, struct pf_state **,
- int, struct pfi_kif *, struct mbuf *, int, void *,
- struct pf_pdesc *, struct pf_rule **,
- struct pf_ruleset **, struct ifqueue *);
+static __inline int pf_create_state(struct pf_rule *, struct pf_rule *,
+ struct pf_rule *, struct pf_pdesc *,
+ struct pf_src_node *, struct pf_state_key *,
+ struct pf_state_key *, struct pf_state_key *,
+ struct pf_state_key *, struct mbuf *, int,
+ u_int16_t, u_int16_t, int *, struct pfi_kif *,
+ struct pf_state **, int, u_int16_t, u_int16_t,
+ int);
int pf_test_fragment(struct pf_rule **, int,
struct pfi_kif *, struct mbuf *, void *,
struct pf_pdesc *, struct pf_rule **,
@@ -259,7 +294,7 @@ int pf_tcp_track_full(struct pf_state_peer *,
struct pf_state_peer *, struct pf_state **,
struct pfi_kif *, struct mbuf *, int,
struct pf_pdesc *, u_short *, int *);
-int pf_tcp_track_sloppy(struct pf_state_peer *,
+int pf_tcp_track_sloppy(struct pf_state_peer *,
struct pf_state_peer *, struct pf_state **,
struct pf_pdesc *, u_short *);
int pf_test_state_tcp(struct pf_state **, int,
@@ -272,30 +307,14 @@ int pf_test_state_icmp(struct pf_state **, int,
struct pfi_kif *, struct mbuf *, int,
void *, struct pf_pdesc *, u_short *);
int pf_test_state_other(struct pf_state **, int,
- struct pfi_kif *, struct pf_pdesc *);
-int pf_match_tag(struct mbuf *, struct pf_rule *,
- struct pf_mtag *, int *);
-int pf_step_out_of_anchor(int *, struct pf_ruleset **,
- int, struct pf_rule **, struct pf_rule **,
- int *);
-void pf_hash(struct pf_addr *, struct pf_addr *,
- struct pf_poolhashkey *, sa_family_t);
-int pf_map_addr(u_int8_t, struct pf_rule *,
- struct pf_addr *, struct pf_addr *,
- struct pf_addr *, struct pf_src_node **);
-int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
- struct pf_addr *, struct pf_addr *, u_int16_t,
- struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
- struct pf_src_node **);
+ struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
void pf_route(struct mbuf **, struct pf_rule *, int,
struct ifnet *, struct pf_state *,
struct pf_pdesc *);
void pf_route6(struct mbuf **, struct pf_rule *, int,
struct ifnet *, struct pf_state *,
struct pf_pdesc *);
-#ifdef __FreeBSD__
-/* XXX: import */
-#else
+#ifndef __FreeBSD__
int pf_socket_lookup(int, struct pf_pdesc *);
#endif
u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
@@ -303,24 +322,37 @@ u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t,
sa_family_t);
u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
- u_int16_t);
+ int, u_int16_t);
void pf_set_rt_ifp(struct pf_state *,
struct pf_addr *);
int pf_check_proto_cksum(struct mbuf *, int, int,
u_int8_t, sa_family_t);
+#ifndef __FreeBSD__
+struct pf_divert *pf_get_divert(struct mbuf *);
+#endif
+void pf_print_state_parts(struct pf_state *,
+ struct pf_state_key *, struct pf_state_key *);
int pf_addr_wrap_neq(struct pf_addr_wrap *,
struct pf_addr_wrap *);
-struct pf_state *pf_find_state_recurse(struct pfi_kif *,
- struct pf_state_cmp *, u_int8_t);
+int pf_compare_state_keys(struct pf_state_key *,
+ struct pf_state_key *, struct pfi_kif *, u_int);
+#ifdef __FreeBSD__
+struct pf_state *pf_find_state(struct pfi_kif *,
+ struct pf_state_key_cmp *, u_int, struct mbuf *,
+ struct pf_mtag *);
+#else
+struct pf_state *pf_find_state(struct pfi_kif *,
+ struct pf_state_key_cmp *, u_int, struct mbuf *);
+#endif
int pf_src_connlimit(struct pf_state **);
int pf_check_congestion(struct ifqueue *);
#ifdef __FreeBSD__
int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
-extern int pf_end_threads;
+VNET_DECLARE(int, pf_end_threads);
-struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
+VNET_DEFINE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]);
#else
extern struct pool pfr_ktable_pl;
extern struct pool pfr_kentry_pl;
@@ -334,80 +366,101 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
};
#endif
-#define STATE_LOOKUP() \
+#ifdef __FreeBSD__
+#define PPACKET_LOOPED() \
+ (pd->pf_mtag->flags & PF_PACKET_LOOPED)
+
+#define PACKET_LOOPED() \
+ (pd.pf_mtag->flags & PF_PACKET_LOOPED)
+
+#define STATE_LOOKUP(i, k, d, s, m, pt) \
do { \
- if (direction == PF_IN) \
- *state = pf_find_state_recurse( \
- kif, &key, PF_EXT_GWY); \
- else \
- *state = pf_find_state_recurse( \
- kif, &key, PF_LAN_EXT); \
- if (*state == NULL || (*state)->timeout == PFTM_PURGE) \
+ s = pf_find_state(i, k, d, m, pt); \
+ if (s == NULL || (s)->timeout == PFTM_PURGE) \
return (PF_DROP); \
- if (direction == PF_OUT && \
- (((*state)->rule.ptr->rt == PF_ROUTETO && \
- (*state)->rule.ptr->direction == PF_OUT) || \
- ((*state)->rule.ptr->rt == PF_REPLYTO && \
- (*state)->rule.ptr->direction == PF_IN)) && \
- (*state)->rt_kif != NULL && \
- (*state)->rt_kif != kif) \
+ if (PPACKET_LOOPED()) \
+ return (PF_PASS); \
+ if (d == PF_OUT && \
+ (((s)->rule.ptr->rt == PF_ROUTETO && \
+ (s)->rule.ptr->direction == PF_OUT) || \
+ ((s)->rule.ptr->rt == PF_REPLYTO && \
+ (s)->rule.ptr->direction == PF_IN)) && \
+ (s)->rt_kif != NULL && \
+ (s)->rt_kif != i) \
return (PF_PASS); \
} while (0)
+#else
+#define STATE_LOOKUP(i, k, d, s, m) \
+ do { \
+ s = pf_find_state(i, k, d, m); \
+ if (s == NULL || (s)->timeout == PFTM_PURGE) \
+ return (PF_DROP); \
+ if (d == PF_OUT && \
+ (((s)->rule.ptr->rt == PF_ROUTETO && \
+ (s)->rule.ptr->direction == PF_OUT) || \
+ ((s)->rule.ptr->rt == PF_REPLYTO && \
+ (s)->rule.ptr->direction == PF_IN)) && \
+ (s)->rt_kif != NULL && \
+ (s)->rt_kif != i) \
+ return (PF_PASS); \
+ } while (0)
+#endif
-#define STATE_TRANSLATE(s) \
- (s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
- ((s)->af == AF_INET6 && \
- ((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
- (s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
- (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
- (s)->lan.port != (s)->gwy.port
-
-#define BOUND_IFACE(r, k) \
+#ifdef __FreeBSD__
+#define BOUND_IFACE(r, k) \
+ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
+#else
+#define BOUND_IFACE(r, k) \
((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
+#endif
-#define STATE_INC_COUNTERS(s) \
+#define STATE_INC_COUNTERS(s) \
do { \
- s->rule.ptr->states++; \
- if (s->anchor.ptr != NULL) \
- s->anchor.ptr->states++; \
- if (s->nat_rule.ptr != NULL) \
- s->nat_rule.ptr->states++; \
+ s->rule.ptr->states_cur++; \
+ s->rule.ptr->states_tot++; \
+ if (s->anchor.ptr != NULL) { \
+ s->anchor.ptr->states_cur++; \
+ s->anchor.ptr->states_tot++; \
+ } \
+ if (s->nat_rule.ptr != NULL) { \
+ s->nat_rule.ptr->states_cur++; \
+ s->nat_rule.ptr->states_tot++; \
+ } \
} while (0)
-#define STATE_DEC_COUNTERS(s) \
+#define STATE_DEC_COUNTERS(s) \
do { \
if (s->nat_rule.ptr != NULL) \
- s->nat_rule.ptr->states--; \
+ s->nat_rule.ptr->states_cur--; \
if (s->anchor.ptr != NULL) \
- s->anchor.ptr->states--; \
- s->rule.ptr->states--; \
+ s->anchor.ptr->states_cur--; \
+ s->rule.ptr->states_cur--; \
} while (0)
+static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
+static __inline int pf_state_compare_key(struct pf_state_key *,
+ struct pf_state_key *);
+static __inline int pf_state_compare_id(struct pf_state *,
+ struct pf_state *);
+
+#ifdef __FreeBSD__
+VNET_DEFINE(struct pf_src_tree, tree_src_tracking);
+
+VNET_DEFINE(struct pf_state_tree_id, tree_id);
+VNET_DEFINE(struct pf_state_queue, state_list);
+#else
struct pf_src_tree tree_src_tracking;
struct pf_state_tree_id tree_id;
struct pf_state_queue state_list;
-
-#ifdef __FreeBSD__
-static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
-static int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *);
-static int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *);
-static int pf_state_compare_id(struct pf_state *, struct pf_state *);
#endif
RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
-RB_GENERATE(pf_state_tree_lan_ext, pf_state,
- u.s.entry_lan_ext, pf_state_compare_lan_ext);
-RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
- u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
+RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
RB_GENERATE(pf_state_tree_id, pf_state,
- u.s.entry_id, pf_state_compare_id);
+ entry_id, pf_state_compare_id);
-#ifdef __FreeBSD__
-static int
-#else
static __inline int
-#endif
pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
{
int diff;
@@ -451,169 +504,6 @@ pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
return (0);
}
-#ifdef __FreeBSD__
-static int
-#else
-static __inline int
-#endif
-pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
-{
- int diff;
-
- if ((diff = a->proto - b->proto) != 0)
- return (diff);
- if ((diff = a->af - b->af) != 0)
- return (diff);
- switch (a->af) {
-#ifdef INET
- case AF_INET:
- if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
- return (1);
- if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
- return (-1);
- if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
- return (1);
- if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
- return (-1);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
- return (1);
- if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
- return (-1);
- if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
- return (1);
- if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
- return (-1);
- if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
- return (1);
- if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
- return (-1);
- if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
- return (1);
- if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
- return (-1);
- if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
- return (1);
- if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
- return (-1);
- if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
- return (1);
- if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
- return (-1);
- if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
- return (1);
- if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
- return (-1);
- if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
- return (1);
- if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
- return (-1);
- break;
-#endif /* INET6 */
- }
-
- if ((diff = a->lan.port - b->lan.port) != 0)
- return (diff);
- if ((diff = a->ext.port - b->ext.port) != 0)
- return (diff);
-
- return (0);
-}
-
-#ifdef __FreeBSD__
-static int
-#else
-static __inline int
-#endif
-pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
-{
- int diff;
-
- if ((diff = a->proto - b->proto) != 0)
- return (diff);
- if ((diff = a->af - b->af) != 0)
- return (diff);
- switch (a->af) {
-#ifdef INET
- case AF_INET:
- if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
- return (1);
- if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
- return (-1);
- if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
- return (1);
- if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
- return (-1);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
- return (1);
- if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
- return (-1);
- if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
- return (1);
- if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
- return (-1);
- if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
- return (1);
- if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
- return (-1);
- if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
- return (1);
- if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
- return (-1);
- if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
- return (1);
- if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
- return (-1);
- if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
- return (1);
- if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
- return (-1);
- if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
- return (1);
- if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
- return (-1);
- if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
- return (1);
- if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
- return (-1);
- break;
-#endif /* INET6 */
- }
-
- if ((diff = a->ext.port - b->ext.port) != 0)
- return (diff);
- if ((diff = a->gwy.port - b->gwy.port) != 0)
- return (diff);
-
- return (0);
-}
-
-#ifdef __FreeBSD__
-static int
-#else
-static __inline int
-#endif
-pf_state_compare_id(struct pf_state *a, struct pf_state *b)
-{
- if (a->id > b->id)
- return (1);
- if (a->id < b->id)
- return (-1);
- if (a->creatorid > b->creatorid)
- return (1);
- if (a->creatorid < b->creatorid)
- return (-1);
-
- return (0);
-}
-
#ifdef INET6
void
pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
@@ -634,80 +524,6 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
}
#endif /* INET6 */
-struct pf_state *
-pf_find_state_byid(struct pf_state_cmp *key)
-{
- pf_status.fcounters[FCNT_STATE_SEARCH]++;
- return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
-}
-
-struct pf_state *
-pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree)
-{
- struct pf_state *s;
-
- pf_status.fcounters[FCNT_STATE_SEARCH]++;
-
- switch (tree) {
- case PF_LAN_EXT:
- if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext,
- (struct pf_state *)key)) != NULL)
- return (s);
- if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext,
- (struct pf_state *)key)) != NULL)
- return (s);
- return (NULL);
- case PF_EXT_GWY:
- if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy,
- (struct pf_state *)key)) != NULL)
- return (s);
- if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy,
- (struct pf_state *)key)) != NULL)
- return (s);
- return (NULL);
- default:
- panic("pf_find_state_recurse");
- }
-}
-
-struct pf_state *
-pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more)
-{
- struct pf_state *s, *ss = NULL;
- struct pfi_kif *kif;
-
- pf_status.fcounters[FCNT_STATE_SEARCH]++;
-
- switch (tree) {
- case PF_LAN_EXT:
- TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
- s = RB_FIND(pf_state_tree_lan_ext,
- &kif->pfik_lan_ext, (struct pf_state *)key);
- if (s == NULL)
- continue;
- if (more == NULL)
- return (s);
- ss = s;
- (*more)++;
- }
- return (ss);
- case PF_EXT_GWY:
- TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
- s = RB_FIND(pf_state_tree_ext_gwy,
- &kif->pfik_ext_gwy, (struct pf_state *)key);
- if (s == NULL)
- continue;
- if (more == NULL)
- return (s);
- ss = s;
- (*more)++;
- }
- return (ss);
- default:
- panic("pf_find_state_all");
- }
-}
-
void
pf_init_threshold(struct pf_threshold *threshold,
u_int32_t limit, u_int32_t seconds)
@@ -741,7 +557,6 @@ pf_check_threshold(struct pf_threshold *threshold)
int
pf_src_connlimit(struct pf_state **state)
{
- struct pf_state *s;
int bad = 0;
(*state)->src_node->conn++;
@@ -751,13 +566,21 @@ pf_src_connlimit(struct pf_state **state)
if ((*state)->rule.ptr->max_src_conn &&
(*state)->rule.ptr->max_src_conn <
(*state)->src_node->conn) {
+#ifdef __FreeBSD__
+ V_pf_status.lcounters[LCNT_SRCCONN]++;
+#else
pf_status.lcounters[LCNT_SRCCONN]++;
+#endif
bad++;
}
if ((*state)->rule.ptr->max_src_conn_rate.limit &&
pf_check_threshold(&(*state)->src_node->conn_rate)) {
+#ifdef __FreeBSD__
+ V_pf_status.lcounters[LCNT_SRCCONNRATE]++;
+#else
pf_status.lcounters[LCNT_SRCCONNRATE]++;
+#endif
bad++;
}
@@ -768,16 +591,21 @@ pf_src_connlimit(struct pf_state **state)
struct pfr_addr p;
u_int32_t killed = 0;
+#ifdef __FreeBSD__
+ V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
printf("pf_src_connlimit: blocking address ");
pf_print_host(&(*state)->src_node->addr, 0,
- (*state)->af);
+ (*state)->key[PF_SK_WIRE]->af);
}
bzero(&p, sizeof(p));
- p.pfra_af = (*state)->af;
- switch ((*state)->af) {
+ p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
+ switch ((*state)->key[PF_SK_WIRE]->af) {
#ifdef INET
case AF_INET:
p.pfra_net = 32;
@@ -797,34 +625,51 @@ pf_src_connlimit(struct pf_state **state)
/* kill existing states if that's required. */
if ((*state)->rule.ptr->flush) {
- pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+ struct pf_state_key *sk;
+ struct pf_state *st;
- RB_FOREACH(s, pf_state_tree_id, &tree_id) {
+#ifdef __FreeBSD__
+ V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+ RB_FOREACH(st, pf_state_tree_id, &V_tree_id) {
+#else
+ pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+ RB_FOREACH(st, pf_state_tree_id, &tree_id) {
+#endif
+ sk = st->key[PF_SK_WIRE];
/*
* Kill states from this source. (Only those
* from the same rule if PF_FLUSH_GLOBAL is not
* set)
*/
- if (s->af == (*state)->af &&
+ if (sk->af ==
+ (*state)->key[PF_SK_WIRE]->af &&
(((*state)->direction == PF_OUT &&
PF_AEQ(&(*state)->src_node->addr,
- &s->lan.addr, s->af)) ||
+ &sk->addr[1], sk->af)) ||
((*state)->direction == PF_IN &&
PF_AEQ(&(*state)->src_node->addr,
- &s->ext.addr, s->af))) &&
+ &sk->addr[0], sk->af))) &&
((*state)->rule.ptr->flush &
PF_FLUSH_GLOBAL ||
- (*state)->rule.ptr == s->rule.ptr)) {
- s->timeout = PFTM_PURGE;
- s->src.state = s->dst.state =
+ (*state)->rule.ptr == st->rule.ptr)) {
+ st->timeout = PFTM_PURGE;
+ st->src.state = st->dst.state =
TCPS_CLOSED;
killed++;
}
}
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
if (pf_status.debug >= PF_DEBUG_MISC)
+#endif
printf(", %u states killed", killed);
}
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+#else
if (pf_status.debug >= PF_DEBUG_MISC)
+#endif
printf("\n");
}
@@ -848,18 +693,30 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
k.rule.ptr = rule;
else
k.rule.ptr = NULL;
+#ifdef __FreeBSD__
+ V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
+ *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k);
+#else
pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
+#endif
}
if (*sn == NULL) {
if (!rule->max_src_nodes ||
rule->src_nodes < rule->max_src_nodes)
- (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
+#ifdef __FreeBSD__
+ (*sn) = pool_get(&V_pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
+#else
+ (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
+#endif
else
+#ifdef __FreeBSD__
+ V_pf_status.lcounters[LCNT_SRCNODES]++;
+#else
pf_status.lcounters[LCNT_SRCNODES]++;
+#endif
if ((*sn) == NULL)
return (-1);
- bzero(*sn, sizeof(struct pf_src_node));
pf_init_threshold(&(*sn)->conn_rate,
rule->max_src_conn_rate.limit,
@@ -873,109 +730,590 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
(*sn)->rule.ptr = NULL;
PF_ACPY(&(*sn)->addr, src, af);
if (RB_INSERT(pf_src_tree,
+#ifdef __FreeBSD__
+ &V_tree_src_tracking, *sn) != NULL) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
&tree_src_tracking, *sn) != NULL) {
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
printf("pf: src_tree insert failed: ");
pf_print_host(&(*sn)->addr, 0, af);
printf("\n");
}
+#ifdef __FreeBSD__
+ pool_put(&V_pf_src_tree_pl, *sn);
+#else
pool_put(&pf_src_tree_pl, *sn);
+#endif
return (-1);
}
(*sn)->creation = time_second;
(*sn)->ruletype = rule->action;
if ((*sn)->rule.ptr != NULL)
(*sn)->rule.ptr->src_nodes++;
+#ifdef __FreeBSD__
+ V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
+ V_pf_status.src_nodes++;
+#else
pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
pf_status.src_nodes++;
+#endif
} else {
if (rule->max_src_states &&
(*sn)->states >= rule->max_src_states) {
+#ifdef __FreeBSD__
+ V_pf_status.lcounters[LCNT_SRCSTATES]++;
+#else
pf_status.lcounters[LCNT_SRCSTATES]++;
+#endif
return (-1);
}
}
return (0);
}
+/* state table stuff */
+
+static __inline int
+pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
+{
+ int diff;
+
+ if ((diff = a->proto - b->proto) != 0)
+ return (diff);
+ if ((diff = a->af - b->af) != 0)
+ return (diff);
+ switch (a->af) {
+#ifdef INET
+ case AF_INET:
+ if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
+ return (1);
+ if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
+ return (-1);
+ if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
+ return (1);
+ if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
+ return (-1);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
+ return (1);
+ if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
+ return (-1);
+ if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
+ return (1);
+ if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
+ return (-1);
+ if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
+ return (1);
+ if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
+ return (-1);
+ if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
+ return (1);
+ if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
+ return (-1);
+ if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
+ return (1);
+ if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
+ return (-1);
+ if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
+ return (1);
+ if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
+ return (-1);
+ if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
+ return (1);
+ if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
+ return (-1);
+ if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
+ return (1);
+ if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
+ return (-1);
+ break;
+#endif /* INET6 */
+ }
+
+ if ((diff = a->port[0] - b->port[0]) != 0)
+ return (diff);
+ if ((diff = a->port[1] - b->port[1]) != 0)
+ return (diff);
+
+ return (0);
+}
+
+static __inline int
+pf_state_compare_id(struct pf_state *a, struct pf_state *b)
+{
+ if (a->id > b->id)
+ return (1);
+ if (a->id < b->id)
+ return (-1);
+ if (a->creatorid > b->creatorid)
+ return (1);
+ if (a->creatorid < b->creatorid)
+ return (-1);
+
+ return (0);
+}
+
int
-pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
+pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
{
- /* Thou MUST NOT insert multiple duplicate keys */
- state->u.s.kif = kif;
- if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
- if (pf_status.debug >= PF_DEBUG_MISC) {
- printf("pf: state insert failed: tree_lan_ext");
- printf(" lan: ");
- pf_print_host(&state->lan.addr, state->lan.port,
- state->af);
- printf(" gwy: ");
- pf_print_host(&state->gwy.addr, state->gwy.port,
- state->af);
- printf(" ext: ");
- pf_print_host(&state->ext.addr, state->ext.port,
- state->af);
- if (state->sync_flags & PFSTATE_FROMSYNC)
- printf(" (from sync)");
- printf("\n");
- }
+ struct pf_state_item *si;
+ struct pf_state_key *cur;
+ struct pf_state *olds = NULL;
+
+#ifdef __FreeBSD__
+ KASSERT(s->key[idx] == NULL, ("%s: key is null!", __FUNCTION__));
+#else
+ KASSERT(s->key[idx] == NULL); /* XXX handle this? */
+#endif
+
+#ifdef __FreeBSD__
+ if ((cur = RB_INSERT(pf_state_tree, &V_pf_statetbl, sk)) != NULL) {
+#else
+ if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) {
+#endif
+ /* key exists. check for same kif, if none, add to key */
+ TAILQ_FOREACH(si, &cur->states, entry)
+ if (si->s->kif == s->kif &&
+ si->s->direction == s->direction) {
+ if (sk->proto == IPPROTO_TCP &&
+ si->s->src.state >= TCPS_FIN_WAIT_2 &&
+ si->s->dst.state >= TCPS_FIN_WAIT_2) {
+ si->s->src.state = si->s->dst.state =
+ TCPS_CLOSED;
+ /* unlink late or sks can go away */
+ olds = si->s;
+ } else {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
+ printf("pf: %s key attach "
+ "failed on %s: ",
+ (idx == PF_SK_WIRE) ?
+ "wire" : "stack",
+ s->kif->pfik_name);
+ pf_print_state_parts(s,
+ (idx == PF_SK_WIRE) ?
+ sk : NULL,
+ (idx == PF_SK_STACK) ?
+ sk : NULL);
+ printf(", existing: ");
+ pf_print_state_parts(si->s,
+ (idx == PF_SK_WIRE) ?
+ sk : NULL,
+ (idx == PF_SK_STACK) ?
+ sk : NULL);
+ printf("\n");
+ }
+#ifdef __FreeBSD__
+ pool_put(&V_pf_state_key_pl, sk);
+#else
+ pool_put(&pf_state_key_pl, sk);
+#endif
+ return (-1); /* collision! */
+ }
+ }
+#ifdef __FreeBSD__
+ pool_put(&V_pf_state_key_pl, sk);
+#else
+ pool_put(&pf_state_key_pl, sk);
+#endif
+ s->key[idx] = cur;
+ } else
+ s->key[idx] = sk;
+
+#ifdef __FreeBSD__
+ if ((si = pool_get(&V_pf_state_item_pl, PR_NOWAIT)) == NULL) {
+#else
+ if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
+#endif
+ pf_state_key_detach(s, idx);
return (-1);
}
+ si->s = s;
- if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
- if (pf_status.debug >= PF_DEBUG_MISC) {
- printf("pf: state insert failed: tree_ext_gwy");
- printf(" lan: ");
- pf_print_host(&state->lan.addr, state->lan.port,
- state->af);
- printf(" gwy: ");
- pf_print_host(&state->gwy.addr, state->gwy.port,
- state->af);
- printf(" ext: ");
- pf_print_host(&state->ext.addr, state->ext.port,
- state->af);
- if (state->sync_flags & PFSTATE_FROMSYNC)
- printf(" (from sync)");
- printf("\n");
+ /* list is sorted, if-bound states before floating */
+#ifdef __FreeBSD__
+ if (s->kif == V_pfi_all)
+#else
+ if (s->kif == pfi_all)
+#endif
+ TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry);
+ else
+ TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry);
+
+ if (olds)
+ pf_unlink_state(olds);
+
+ return (0);
+}
+
+void
+pf_detach_state(struct pf_state *s)
+{
+ if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
+ s->key[PF_SK_WIRE] = NULL;
+
+ if (s->key[PF_SK_STACK] != NULL)
+ pf_state_key_detach(s, PF_SK_STACK);
+
+ if (s->key[PF_SK_WIRE] != NULL)
+ pf_state_key_detach(s, PF_SK_WIRE);
+}
+
+void
+pf_state_key_detach(struct pf_state *s, int idx)
+{
+ struct pf_state_item *si;
+
+ si = TAILQ_FIRST(&s->key[idx]->states);
+ while (si && si->s != s)
+ si = TAILQ_NEXT(si, entry);
+
+ if (si) {
+ TAILQ_REMOVE(&s->key[idx]->states, si, entry);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_state_item_pl, si);
+#else
+ pool_put(&pf_state_item_pl, si);
+#endif
+ }
+
+ if (TAILQ_EMPTY(&s->key[idx]->states)) {
+#ifdef __FreeBSD__
+ RB_REMOVE(pf_state_tree, &V_pf_statetbl, s->key[idx]);
+#else
+ RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]);
+#endif
+ if (s->key[idx]->reverse)
+ s->key[idx]->reverse->reverse = NULL;
+#ifdef __FreeBSD__
+ /* XXX: implement this */
+#else
+ if (s->key[idx]->inp)
+ s->key[idx]->inp->inp_pf_sk = NULL;
+#endif
+#ifdef __FreeBSD__
+ pool_put(&V_pf_state_key_pl, s->key[idx]);
+#else
+ pool_put(&pf_state_key_pl, s->key[idx]);
+#endif
+ }
+ s->key[idx] = NULL;
+}
+
+struct pf_state_key *
+pf_alloc_state_key(int pool_flags)
+{
+ struct pf_state_key *sk;
+
+#ifdef __FreeBSD__
+ if ((sk = pool_get(&V_pf_state_key_pl, pool_flags)) == NULL)
+#else
+ if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL)
+#endif
+ return (NULL);
+ TAILQ_INIT(&sk->states);
+
+ return (sk);
+}
+
+int
+pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr,
+ struct pf_state_key **skw, struct pf_state_key **sks,
+ struct pf_state_key **skp, struct pf_state_key **nkp,
+ struct pf_addr *saddr, struct pf_addr *daddr,
+ u_int16_t sport, u_int16_t dport)
+{
+#ifdef __FreeBSD__
+ KASSERT((*skp == NULL && *nkp == NULL),
+ ("%s: skp == NULL && nkp == NULL", __FUNCTION__));
+#else
+ KASSERT((*skp == NULL && *nkp == NULL));
+#endif
+
+ if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
+ return (ENOMEM);
+
+ PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af);
+ PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af);
+ (*skp)->port[pd->sidx] = sport;
+ (*skp)->port[pd->didx] = dport;
+ (*skp)->proto = pd->proto;
+ (*skp)->af = pd->af;
+
+ if (nr != NULL) {
+ if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
+ return (ENOMEM); /* caller must handle cleanup */
+
+ /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */
+ PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af);
+ PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af);
+ (*nkp)->port[0] = (*skp)->port[0];
+ (*nkp)->port[1] = (*skp)->port[1];
+ (*nkp)->proto = pd->proto;
+ (*nkp)->af = pd->af;
+ } else
+ *nkp = *skp;
+
+ if (pd->dir == PF_IN) {
+ *skw = *skp;
+ *sks = *nkp;
+ } else {
+ *sks = *skp;
+ *skw = *nkp;
+ }
+ return (0);
+}
+
+
+int
+pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
+ struct pf_state_key *sks, struct pf_state *s)
+{
+#ifndef __FreeBSD__
+ splassert(IPL_SOFTNET);
+#endif
+
+ s->kif = kif;
+
+ if (skw == sks) {
+ if (pf_state_key_attach(skw, s, PF_SK_WIRE))
+ return (-1);
+ s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
+ } else {
+ if (pf_state_key_attach(skw, s, PF_SK_WIRE)) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_state_key_pl, sks);
+#else
+ pool_put(&pf_state_key_pl, sks);
+#endif
+ return (-1);
+ }
+ if (pf_state_key_attach(sks, s, PF_SK_STACK)) {
+ pf_state_key_detach(s, PF_SK_WIRE);
+ return (-1);
}
- RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
- return (-1);
}
- if (state->id == 0 && state->creatorid == 0) {
- state->id = htobe64(pf_status.stateid++);
- state->creatorid = pf_status.hostid;
+ if (s->id == 0 && s->creatorid == 0) {
+#ifdef __FreeBSD__
+ s->id = htobe64(V_pf_status.stateid++);
+ s->creatorid = V_pf_status.hostid;
+#else
+ s->id = htobe64(pf_status.stateid++);
+ s->creatorid = pf_status.hostid;
+#endif
}
- if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
- if (pf_status.debug >= PF_DEBUG_MISC) {
#ifdef __FreeBSD__
- printf("pf: state insert failed: "
- "id: %016llx creatorid: %08x",
- (long long)be64toh(state->id),
- ntohl(state->creatorid));
+ if (RB_INSERT(pf_state_tree_id, &V_tree_id, s) != NULL) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
#else
+ if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
+ if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
printf("pf: state insert failed: "
"id: %016llx creatorid: %08x",
- betoh64(state->id), ntohl(state->creatorid));
+#ifdef __FreeBSD__
+ (unsigned long long)betoh64(s->id), ntohl(s->creatorid));
+#else
+ betoh64(s->id), ntohl(s->creatorid));
#endif
- if (state->sync_flags & PFSTATE_FROMSYNC)
- printf(" (from sync)");
printf("\n");
}
- RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
- RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
+ pf_detach_state(s);
return (-1);
}
- TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list);
+#ifdef __FreeBSD__
+ TAILQ_INSERT_TAIL(&V_state_list, s, entry_list);
+ V_pf_status.fcounters[FCNT_STATE_INSERT]++;
+ V_pf_status.states++;
+#else
+ TAILQ_INSERT_TAIL(&state_list, s, entry_list);
pf_status.fcounters[FCNT_STATE_INSERT]++;
pf_status.states++;
+#endif
pfi_kif_ref(kif, PFI_KIF_REF_STATE);
-#if NPFSYNC
- pfsync_insert_state(state);
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_insert_state_ptr != NULL)
+ pfsync_insert_state_ptr(s);
+#else
+ pfsync_insert_state(s);
+#endif
#endif
return (0);
}
+struct pf_state *
+pf_find_state_byid(struct pf_state_cmp *key)
+{
+#ifdef __FreeBSD__
+ V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
+
+ return (RB_FIND(pf_state_tree_id, &V_tree_id, (struct pf_state *)key));
+#else
+ pf_status.fcounters[FCNT_STATE_SEARCH]++;
+
+ return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
+#endif
+}
+
+/* XXX debug function, intended to be removed one day */
+int
+pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b,
+ struct pfi_kif *kif, u_int dir)
+{
+ /* a (from hdr) and b (new) must be exact opposites of each other */
+ if (a->af == b->af && a->proto == b->proto &&
+ PF_AEQ(&a->addr[0], &b->addr[1], a->af) &&
+ PF_AEQ(&a->addr[1], &b->addr[0], a->af) &&
+ a->port[0] == b->port[1] &&
+ a->port[1] == b->port[0])
+ return (0);
+ else {
+ /* mismatch. must not happen. */
+ printf("pf: state key linking mismatch! dir=%s, "
+ "if=%s, stored af=%u, a0: ",
+ dir == PF_OUT ? "OUT" : "IN", kif->pfik_name, a->af);
+ pf_print_host(&a->addr[0], a->port[0], a->af);
+ printf(", a1: ");
+ pf_print_host(&a->addr[1], a->port[1], a->af);
+ printf(", proto=%u", a->proto);
+ printf(", found af=%u, a0: ", b->af);
+ pf_print_host(&b->addr[0], b->port[0], b->af);
+ printf(", a1: ");
+ pf_print_host(&b->addr[1], b->port[1], b->af);
+ printf(", proto=%u", b->proto);
+ printf(".\n");
+ return (-1);
+ }
+}
+
+struct pf_state *
+#ifdef __FreeBSD__
+pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
+ struct mbuf *m, struct pf_mtag *pftag)
+#else
+pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
+ struct mbuf *m)
+#endif
+{
+ struct pf_state_key *sk;
+ struct pf_state_item *si;
+
+#ifdef __FreeBSD__
+ V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
+#else
+ pf_status.fcounters[FCNT_STATE_SEARCH]++;
+#endif
+
+#ifdef __FreeBSD__
+ if (dir == PF_OUT && pftag->statekey &&
+ ((struct pf_state_key *)pftag->statekey)->reverse)
+ sk = ((struct pf_state_key *)pftag->statekey)->reverse;
+ else {
+#ifdef __FreeBSD__
+ if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl,
+#else
+ if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
+#endif
+ (struct pf_state_key *)key)) == NULL)
+ return (NULL);
+ if (dir == PF_OUT && pftag->statekey &&
+ pf_compare_state_keys(pftag->statekey, sk,
+ kif, dir) == 0) {
+ ((struct pf_state_key *)
+ pftag->statekey)->reverse = sk;
+ sk->reverse = pftag->statekey;
+ }
+ }
+#else
+ if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
+ ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse)
+ sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse;
+ else {
+#ifdef __FreeBSD__
+ if ((sk = RB_FIND(pf_state_tree, &V_pf_statetbl,
+#else
+ if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
+#endif
+ (struct pf_state_key *)key)) == NULL)
+ return (NULL);
+ if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
+ pf_compare_state_keys(m->m_pkthdr.pf.statekey, sk,
+ kif, dir) == 0) {
+ ((struct pf_state_key *)
+ m->m_pkthdr.pf.statekey)->reverse = sk;
+ sk->reverse = m->m_pkthdr.pf.statekey;
+ }
+ }
+#endif
+
+ if (dir == PF_OUT)
+#ifdef __FreeBSD__
+ pftag->statekey = NULL;
+#else
+ m->m_pkthdr.pf.statekey = NULL;
+#endif
+
+ /* list is sorted, if-bound states before floating ones */
+ TAILQ_FOREACH(si, &sk->states, entry)
+#ifdef __FreeBSD__
+ if ((si->s->kif == V_pfi_all || si->s->kif == kif) &&
+#else
+ if ((si->s->kif == pfi_all || si->s->kif == kif) &&
+#endif
+ sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
+ si->s->key[PF_SK_STACK]))
+ return (si->s);
+
+ return (NULL);
+}
+
+struct pf_state *
+pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
+{
+ struct pf_state_key *sk;
+ struct pf_state_item *si, *ret = NULL;
+
+#ifdef __FreeBSD__
+ V_pf_status.fcounters[FCNT_STATE_SEARCH]++;
+#else
+ pf_status.fcounters[FCNT_STATE_SEARCH]++;
+#endif
+
+#ifdef __FreeBSD__
+ sk = RB_FIND(pf_state_tree, &V_pf_statetbl, (struct pf_state_key *)key);
+#else
+ sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
+#endif
+ if (sk != NULL) {
+ TAILQ_FOREACH(si, &sk->states, entry)
+ if (dir == PF_INOUT ||
+ (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
+ si->s->key[PF_SK_STACK]))) {
+ if (more == NULL)
+ return (si->s);
+
+ if (ret)
+ (*more)++;
+ else
+ ret = si;
+ }
+ }
+ return (ret ? ret->s : NULL);
+}
+
+/* END state table stuff */
+
+
void
pf_purge_thread(void *v)
{
@@ -984,25 +1322,28 @@ pf_purge_thread(void *v)
int locked;
#endif
+ CURVNET_SET((struct vnet *)v);
+
for (;;) {
tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
#ifdef __FreeBSD__
- sx_slock(&pf_consistency_lock);
+ sx_slock(&V_pf_consistency_lock);
PF_LOCK();
locked = 0;
- if (pf_end_threads) {
+ if (V_pf_end_threads) {
PF_UNLOCK();
- sx_sunlock(&pf_consistency_lock);
- sx_xlock(&pf_consistency_lock);
+ sx_sunlock(&V_pf_consistency_lock);
+ sx_xlock(&V_pf_consistency_lock);
PF_LOCK();
- pf_purge_expired_states(pf_status.states, 1);
+
+ pf_purge_expired_states(V_pf_status.states, 1);
pf_purge_expired_fragments();
pf_purge_expired_src_nodes(1);
- pf_end_threads++;
+ V_pf_end_threads++;
- sx_xunlock(&pf_consistency_lock);
+ sx_xunlock(&V_pf_consistency_lock);
PF_UNLOCK();
wakeup(pf_purge_thread);
kproc_exit(0);
@@ -1012,16 +1353,16 @@ pf_purge_thread(void *v)
/* process a fraction of the state table every second */
#ifdef __FreeBSD__
- if(!pf_purge_expired_states(1 + (pf_status.states
- / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
+ if (!pf_purge_expired_states(1 + (V_pf_status.states /
+ V_pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
PF_UNLOCK();
- sx_sunlock(&pf_consistency_lock);
- sx_xlock(&pf_consistency_lock);
+ sx_sunlock(&V_pf_consistency_lock);
+ sx_xlock(&V_pf_consistency_lock);
PF_LOCK();
locked = 1;
- pf_purge_expired_states(1 + (pf_status.states
- / pf_default_rule.timeout[PFTM_INTERVAL]), 1);
+ pf_purge_expired_states(1 + (V_pf_status.states /
+ V_pf_default_rule.timeout[PFTM_INTERVAL]), 1);
}
#else
pf_purge_expired_states(1 + (pf_status.states
@@ -1029,16 +1370,13 @@ pf_purge_thread(void *v)
#endif
/* purge other expired types every PFTM_INTERVAL seconds */
+#ifdef __FreeBSD__
+ if (++nloops >= V_pf_default_rule.timeout[PFTM_INTERVAL]) {
+#else
if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+#endif
pf_purge_expired_fragments();
- if (!pf_purge_expired_src_nodes(locked)) {
- PF_UNLOCK();
- sx_sunlock(&pf_consistency_lock);
- sx_xlock(&pf_consistency_lock);
- PF_LOCK();
- locked = 1;
- pf_purge_expired_src_nodes(1);
- }
+ pf_purge_expired_src_nodes(0);
nloops = 0;
}
@@ -1046,11 +1384,12 @@ pf_purge_thread(void *v)
#ifdef __FreeBSD__
PF_UNLOCK();
if (locked)
- sx_xunlock(&pf_consistency_lock);
+ sx_xunlock(&V_pf_consistency_lock);
else
- sx_sunlock(&pf_consistency_lock);
+ sx_sunlock(&V_pf_consistency_lock);
#endif
}
+ CURVNET_RESTORE();
}
u_int32_t
@@ -1066,7 +1405,7 @@ pf_state_expires(const struct pf_state *state)
return (time_second);
if (state->timeout == PFTM_UNTIL_PACKET)
return (0);
-#ifdef __FreeBSD__
+#ifdef __FreeBSD__
KASSERT(state->timeout != PFTM_UNLINKED,
("pf_state_expires: timeout == PFTM_UNLINKED"));
KASSERT((state->timeout < PFTM_MAX),
@@ -1077,15 +1416,25 @@ pf_state_expires(const struct pf_state *state)
#endif
timeout = state->rule.ptr->timeout[state->timeout];
if (!timeout)
+#ifdef __FreeBSD__
+ timeout = V_pf_default_rule.timeout[state->timeout];
+#else
timeout = pf_default_rule.timeout[state->timeout];
+#endif
start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
if (start) {
end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
- states = state->rule.ptr->states;
+ states = state->rule.ptr->states_cur;
} else {
+#ifdef __FreeBSD__
+ start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
+ end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
+ states = V_pf_status.states;
+#else
start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
states = pf_status.states;
+#endif
}
if (end && states > start && start < end) {
if (states < end)
@@ -1105,46 +1454,61 @@ void
pf_purge_expired_src_nodes(int waslocked)
#endif
{
- struct pf_src_node *cur, *next;
- int locked = waslocked;
-
- for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
- next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
+ struct pf_src_node *cur, *next;
+ int locked = waslocked;
- if (cur->states <= 0 && cur->expire <= time_second) {
- if (! locked) {
#ifdef __FreeBSD__
- if (!sx_try_upgrade(&pf_consistency_lock))
- return (0);
+ for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) {
+ next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur);
#else
- rw_enter_write(&pf_consistency_lock);
+ for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
+ next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
#endif
- next = RB_NEXT(pf_src_tree,
- &tree_src_tracking, cur);
- locked = 1;
- }
- if (cur->rule.ptr != NULL) {
- cur->rule.ptr->src_nodes--;
- if (cur->rule.ptr->states <= 0 &&
- cur->rule.ptr->max_src_nodes <= 0)
- pf_rm_rule(NULL, cur->rule.ptr);
- }
- RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, cur);
- }
- }
- if (locked && !waslocked)
+ if (cur->states <= 0 && cur->expire <= time_second) {
+ if (! locked) {
#ifdef __FreeBSD__
- sx_downgrade(&pf_consistency_lock);
+ if (!sx_try_upgrade(&V_pf_consistency_lock))
+ return (0);
#else
- rw_exit_write(&pf_consistency_lock);
+ rw_enter_write(&pf_consistency_lock);
#endif
+ next = RB_NEXT(pf_src_tree,
+#ifdef __FreeBSD__
+ &V_tree_src_tracking, cur);
+#else
+ &tree_src_tracking, cur);
+#endif
+ locked = 1;
+ }
+ if (cur->rule.ptr != NULL) {
+ cur->rule.ptr->src_nodes--;
+ if (cur->rule.ptr->states_cur <= 0 &&
+ cur->rule.ptr->max_src_nodes <= 0)
+ pf_rm_rule(NULL, cur->rule.ptr);
+ }
+#ifdef __FreeBSD__
+ RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur);
+ V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+ V_pf_status.src_nodes--;
+ pool_put(&V_pf_src_tree_pl, cur);
+#else
+ RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
+ pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+ pf_status.src_nodes--;
+ pool_put(&pf_src_tree_pl, cur);
+#endif
+ }
+ }
+ if (locked && !waslocked)
#ifdef __FreeBSD__
+ {
+ sx_downgrade(&V_pf_consistency_lock);
+ }
return (1);
+#else
+ rw_exit_write(&pf_consistency_lock);
#endif
}
@@ -1154,15 +1518,17 @@ pf_src_tree_remove_state(struct pf_state *s)
u_int32_t timeout;
if (s->src_node != NULL) {
- if (s->proto == IPPROTO_TCP) {
- if (s->src.tcp_est)
- --s->src_node->conn;
- }
+ if (s->src.tcp_est)
+ --s->src_node->conn;
if (--s->src_node->states <= 0) {
timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
if (!timeout)
timeout =
+#ifdef __FreeBSD__
+ V_pf_default_rule.timeout[PFTM_SRC_NODE];
+#else
pf_default_rule.timeout[PFTM_SRC_NODE];
+#endif
s->src_node->expire = time_second + timeout;
}
}
@@ -1171,7 +1537,11 @@ pf_src_tree_remove_state(struct pf_state *s)
timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
if (!timeout)
timeout =
+#ifdef __FreeBSD__
+ V_pf_default_rule.timeout[PFTM_SRC_NODE];
+#else
pf_default_rule.timeout[PFTM_SRC_NODE];
+#endif
s->nat_src_node->expire = time_second + timeout;
}
}
@@ -1186,29 +1556,49 @@ pf_unlink_state(struct pf_state *cur)
if (cur->local_flags & PFSTATE_EXPIRING)
return;
cur->local_flags |= PFSTATE_EXPIRING;
+#else
+ splassert(IPL_SOFTNET);
#endif
+
if (cur->src.state == PF_TCPS_PROXY_DST) {
+ /* XXX wire key the right one? */
#ifdef __FreeBSD__
- pf_send_tcp(NULL, cur->rule.ptr, cur->af,
+ pf_send_tcp(NULL, cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
#else
- pf_send_tcp(cur->rule.ptr, cur->af,
+ pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
#endif
- &cur->ext.addr, &cur->lan.addr,
- cur->ext.port, cur->lan.port,
+ &cur->key[PF_SK_WIRE]->addr[1],
+ &cur->key[PF_SK_WIRE]->addr[0],
+ cur->key[PF_SK_WIRE]->port[1],
+ cur->key[PF_SK_WIRE]->port[0],
cur->src.seqhi, cur->src.seqlo + 1,
TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
}
- RB_REMOVE(pf_state_tree_ext_gwy,
- &cur->u.s.kif->pfik_ext_gwy, cur);
- RB_REMOVE(pf_state_tree_lan_ext,
- &cur->u.s.kif->pfik_lan_ext, cur);
+#ifdef __FreeBSD__
+ RB_REMOVE(pf_state_tree_id, &V_tree_id, cur);
+#else
RB_REMOVE(pf_state_tree_id, &tree_id, cur);
-#if NPFSYNC
- if (cur->creatorid == pf_status.hostid)
- pfsync_delete_state(cur);
+#endif
+#if NPFLOW > 0
+ if (cur->state_flags & PFSTATE_PFLOW)
+#ifdef __FreeBSD__
+ if (export_pflow_ptr != NULL)
+ export_pflow_ptr(cur);
+#else
+ export_pflow(cur);
+#endif
+#endif
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_delete_state_ptr != NULL)
+ pfsync_delete_state_ptr(cur);
+#else
+ pfsync_delete_state(cur);
+#endif
#endif
cur->timeout = PFTM_UNLINKED;
pf_src_tree_remove_state(cur);
+ pf_detach_state(cur);
}
/* callers should be at splsoftnet and hold the
@@ -1216,10 +1606,17 @@ pf_unlink_state(struct pf_state *cur)
void
pf_free_state(struct pf_state *cur)
{
-#if NPFSYNC
- if (pfsyncif != NULL &&
- (pfsyncif->sc_bulk_send_next == cur ||
- pfsyncif->sc_bulk_terminator == cur))
+#ifndef __FreeBSD__
+ splassert(IPL_SOFTNET);
+#endif
+
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_state_in_use_ptr != NULL &&
+ pfsync_state_in_use_ptr(cur))
+#else
+ if (pfsync_state_in_use(cur))
+#endif
return;
#endif
#ifdef __FreeBSD__
@@ -1228,24 +1625,34 @@ pf_free_state(struct pf_state *cur)
#else
KASSERT(cur->timeout == PFTM_UNLINKED);
#endif
- if (--cur->rule.ptr->states <= 0 &&
+ if (--cur->rule.ptr->states_cur <= 0 &&
cur->rule.ptr->src_nodes <= 0)
pf_rm_rule(NULL, cur->rule.ptr);
if (cur->nat_rule.ptr != NULL)
- if (--cur->nat_rule.ptr->states <= 0 &&
+ if (--cur->nat_rule.ptr->states_cur <= 0 &&
cur->nat_rule.ptr->src_nodes <= 0)
pf_rm_rule(NULL, cur->nat_rule.ptr);
if (cur->anchor.ptr != NULL)
- if (--cur->anchor.ptr->states <= 0)
+ if (--cur->anchor.ptr->states_cur <= 0)
pf_rm_rule(NULL, cur->anchor.ptr);
pf_normalize_tcp_cleanup(cur);
- pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE);
- TAILQ_REMOVE(&state_list, cur, u.s.entry_list);
+ pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
+#ifdef __FreeBSD__
+ TAILQ_REMOVE(&V_state_list, cur, entry_list);
+#else
+ TAILQ_REMOVE(&state_list, cur, entry_list);
+#endif
if (cur->tag)
pf_tag_unref(cur->tag);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_state_pl, cur);
+ V_pf_status.fcounters[FCNT_STATE_REMOVALS]++;
+ V_pf_status.states--;
+#else
pool_put(&pf_state_pl, cur);
pf_status.fcounters[FCNT_STATE_REMOVALS]++;
pf_status.states--;
+#endif
}
#ifdef __FreeBSD__
@@ -1259,28 +1666,32 @@ pf_purge_expired_states(u_int32_t maxcheck)
static struct pf_state *cur = NULL;
struct pf_state *next;
#ifdef __FreeBSD__
- int locked = waslocked;
+ int locked = waslocked;
#else
- int locked = 0;
+ int locked = 0;
#endif
while (maxcheck--) {
/* wrap to start of list when we hit the end */
if (cur == NULL) {
+#ifdef __FreeBSD__
+ cur = TAILQ_FIRST(&V_state_list);
+#else
cur = TAILQ_FIRST(&state_list);
+#endif
if (cur == NULL)
break; /* list empty */
}
/* get next state, as cur may get deleted */
- next = TAILQ_NEXT(cur, u.s.entry_list);
+ next = TAILQ_NEXT(cur, entry_list);
if (cur->timeout == PFTM_UNLINKED) {
/* free unlinked state */
if (! locked) {
#ifdef __FreeBSD__
- if (!sx_try_upgrade(&pf_consistency_lock))
- return (0);
+ if (!sx_try_upgrade(&V_pf_consistency_lock))
+ return (0);
#else
rw_enter_write(&pf_consistency_lock);
#endif
@@ -1292,8 +1703,8 @@ pf_purge_expired_states(u_int32_t maxcheck)
pf_unlink_state(cur);
if (! locked) {
#ifdef __FreeBSD__
- if (!sx_try_upgrade(&pf_consistency_lock))
- return (0);
+ if (!sx_try_upgrade(&V_pf_consistency_lock))
+ return (0);
#else
rw_enter_write(&pf_consistency_lock);
#endif
@@ -1306,7 +1717,7 @@ pf_purge_expired_states(u_int32_t maxcheck)
#ifdef __FreeBSD__
if (!waslocked && locked)
- sx_downgrade(&pf_consistency_lock);
+ sx_downgrade(&V_pf_consistency_lock);
return (1);
#else
@@ -1320,7 +1731,7 @@ pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
{
if (aw->type != PF_ADDR_TABLE)
return (0);
- if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
+ if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL)
return (1);
return (0);
}
@@ -1367,34 +1778,33 @@ pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
#ifdef INET6
case AF_INET6: {
u_int16_t b;
- u_int8_t i, curstart = 255, curend = 0,
- maxstart = 0, maxend = 0;
+ u_int8_t i, curstart, curend, maxstart, maxend;
+ curstart = curend = maxstart = maxend = 255;
for (i = 0; i < 8; i++) {
if (!addr->addr16[i]) {
if (curstart == 255)
curstart = i;
- else
- curend = i;
+ curend = i;
} else {
- if (curstart) {
- if ((curend - curstart) >
- (maxend - maxstart)) {
- maxstart = curstart;
- maxend = curend;
- curstart = 255;
- }
+ if ((curend - curstart) >
+ (maxend - maxstart)) {
+ maxstart = curstart;
+ maxend = curend;
}
+ curstart = curend = 255;
}
}
+ if ((curend - curstart) >
+ (maxend - maxstart)) {
+ maxstart = curstart;
+ maxend = curend;
+ }
for (i = 0; i < 8; i++) {
if (i >= maxstart && i <= maxend) {
- if (maxend != 7) {
- if (i == maxstart)
- printf(":");
- } else {
- if (i == maxend)
- printf(":");
- }
+ if (i == 0)
+ printf(":");
+ if (i == maxend)
+ printf(":");
} else {
b = ntohs(addr->addr16[i]);
printf("%x", b);
@@ -1415,39 +1825,87 @@ pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
void
pf_print_state(struct pf_state *s)
{
- switch (s->proto) {
+ pf_print_state_parts(s, NULL, NULL);
+}
+
+void
+pf_print_state_parts(struct pf_state *s,
+ struct pf_state_key *skwp, struct pf_state_key *sksp)
+{
+ struct pf_state_key *skw, *sks;
+ u_int8_t proto, dir;
+
+ /* Do our best to fill these, but they're skipped if NULL */
+ skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
+ sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
+ proto = skw ? skw->proto : (sks ? sks->proto : 0);
+ dir = s ? s->direction : 0;
+
+ switch (proto) {
+ case IPPROTO_IPV4:
+ printf("IPv4");
+ break;
+ case IPPROTO_IPV6:
+ printf("IPv6");
+ break;
case IPPROTO_TCP:
- printf("TCP ");
+ printf("TCP");
break;
case IPPROTO_UDP:
- printf("UDP ");
+ printf("UDP");
break;
case IPPROTO_ICMP:
- printf("ICMP ");
+ printf("ICMP");
break;
case IPPROTO_ICMPV6:
- printf("ICMPV6 ");
+ printf("ICMPv6");
break;
default:
- printf("%u ", s->proto);
+ printf("%u", skw->proto);
break;
}
- pf_print_host(&s->lan.addr, s->lan.port, s->af);
- printf(" ");
- pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
- printf(" ");
- pf_print_host(&s->ext.addr, s->ext.port, s->af);
- printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
- s->src.seqhi, s->src.max_win, s->src.seqdiff);
- if (s->src.wscale && s->dst.wscale)
- printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
- printf("]");
- printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
- s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
- if (s->src.wscale && s->dst.wscale)
- printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
- printf("]");
- printf(" %u:%u", s->src.state, s->dst.state);
+ switch (dir) {
+ case PF_IN:
+ printf(" in");
+ break;
+ case PF_OUT:
+ printf(" out");
+ break;
+ }
+ if (skw) {
+ printf(" wire: ");
+ pf_print_host(&skw->addr[0], skw->port[0], skw->af);
+ printf(" ");
+ pf_print_host(&skw->addr[1], skw->port[1], skw->af);
+ }
+ if (sks) {
+ printf(" stack: ");
+ if (sks != skw) {
+ pf_print_host(&sks->addr[0], sks->port[0], sks->af);
+ printf(" ");
+ pf_print_host(&sks->addr[1], sks->port[1], sks->af);
+ } else
+ printf("-");
+ }
+ if (s) {
+ if (proto == IPPROTO_TCP) {
+ printf(" [lo=%u high=%u win=%u modulator=%u",
+ s->src.seqlo, s->src.seqhi,
+ s->src.max_win, s->src.seqdiff);
+ if (s->src.wscale && s->dst.wscale)
+ printf(" wscale=%u",
+ s->src.wscale & PF_WSCALE_MASK);
+ printf("]");
+ printf(" [lo=%u high=%u win=%u modulator=%u",
+ s->dst.seqlo, s->dst.seqhi,
+ s->dst.max_win, s->dst.seqdiff);
+ if (s->src.wscale && s->dst.wscale)
+ printf(" wscale=%u",
+ s->dst.wscale & PF_WSCALE_MASK);
+ printf("]");
+ }
+ printf(" %u:%u", s->src.state, s->dst.state);
+ }
}
void
@@ -1530,6 +1988,7 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
return (1);
switch (aw1->type) {
case PF_ADDR_ADDRMASK:
+ case PF_ADDR_RANGE:
if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
return (1);
if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
@@ -1653,12 +2112,13 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
struct pf_addr oia, ooa;
PF_ACPY(&oia, ia, af);
- PF_ACPY(&ooa, oa, af);
+ if (oa)
+ PF_ACPY(&ooa, oa, af);
/* Change inner protocol port, fix inner protocol checksum. */
if (ip != NULL) {
u_int16_t oip = *ip;
- u_int32_t opc = 0; /* make the compiler happy */
+ u_int32_t opc;
if (pc != NULL)
opc = *pc;
@@ -1702,31 +2162,33 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
break;
#endif /* INET6 */
}
- /* Change outer ip address, fix outer ip or icmpv6 checksum. */
- PF_ACPY(oa, na, af);
- switch (af) {
+ /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
+ if (oa) {
+ PF_ACPY(oa, na, af);
+ switch (af) {
#ifdef INET
- case AF_INET:
- *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
- ooa.addr16[0], oa->addr16[0], 0),
- ooa.addr16[1], oa->addr16[1], 0);
- break;
+ case AF_INET:
+ *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
+ ooa.addr16[0], oa->addr16[0], 0),
+ ooa.addr16[1], oa->addr16[1], 0);
+ break;
#endif /* INET */
#ifdef INET6
- case AF_INET6:
- *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
- pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
- pf_cksum_fixup(pf_cksum_fixup(*ic,
- ooa.addr16[0], oa->addr16[0], u),
- ooa.addr16[1], oa->addr16[1], u),
- ooa.addr16[2], oa->addr16[2], u),
- ooa.addr16[3], oa->addr16[3], u),
- ooa.addr16[4], oa->addr16[4], u),
- ooa.addr16[5], oa->addr16[5], u),
- ooa.addr16[6], oa->addr16[6], u),
- ooa.addr16[7], oa->addr16[7], u);
- break;
+ case AF_INET6:
+ *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(*ic,
+ ooa.addr16[0], oa->addr16[0], u),
+ ooa.addr16[1], oa->addr16[1], u),
+ ooa.addr16[2], oa->addr16[2], u),
+ ooa.addr16[3], oa->addr16[3], u),
+ ooa.addr16[4], oa->addr16[4], u),
+ ooa.addr16[5], oa->addr16[5], u),
+ ooa.addr16[6], oa->addr16[6], u),
+ ooa.addr16[7], oa->addr16[7], u);
+ break;
#endif /* INET6 */
+ }
}
}
@@ -1748,7 +2210,7 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
int copyback = 0, i, olen;
struct sackblk sack;
-#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
+#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
if (hlen < TCPOLEN_SACKLEN ||
!pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
return 0;
@@ -1817,9 +2279,9 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
#endif /* INET6 */
struct tcphdr *th;
char *opt;
- struct pf_mtag *pf_mtag;
-
#ifdef __FreeBSD__
+ struct pf_mtag *pf_mtag;
+
KASSERT(
#ifdef INET
af == AF_INET
@@ -1841,7 +2303,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
#ifdef INET6
h6 = NULL;
#endif
-#endif
+#endif /* __FreeBSD__ */
/* maximum segment size tcp option */
tlen = sizeof(struct tcphdr);
@@ -1867,42 +2329,46 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
return;
#ifdef __FreeBSD__
#ifdef MAC
- if (replyto)
- mac_netinet_firewall_reply(replyto, m);
- else
- mac_netinet_firewall_send(m);
-#else
- (void)replyto;
-#endif
+ mac_netinet_firewall_send(m);
#endif
if ((pf_mtag = pf_get_mtag(m)) == NULL) {
m_freem(m);
return;
}
+#endif
if (tag)
#ifdef __FreeBSD__
m->m_flags |= M_SKIP_FIREWALL;
+ pf_mtag->tag = rtag;
#else
- pf_mtag->flags |= PF_TAG_GENERATED;
+ m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
+ m->m_pkthdr.pf.tag = rtag;
#endif
- pf_mtag->tag = rtag;
-
if (r != NULL && r->rtableid >= 0)
#ifdef __FreeBSD__
{
M_SETFIB(m, r->rtableid);
-#endif
pf_mtag->rtableid = r->rtableid;
+#else
+ m->m_pkthdr.pf.rtableid = r->rtableid;
+#endif
#ifdef __FreeBSD__
}
#endif
+
#ifdef ALTQ
if (r != NULL && r->qid) {
+#ifdef __FreeBSD__
pf_mtag->qid = r->qid;
+
/* add hints for ecn */
- pf_mtag->af = af;
pf_mtag->hdr = mtod(m, struct ip *);
+#else
+ m->m_pkthdr.pf.qid = r->qid;
+ /* add hints for ecn */
+ m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
+#endif
}
#endif /* ALTQ */
m->m_data += max_linkhdr;
@@ -1968,18 +2434,19 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
#ifdef __FreeBSD__
h->ip_off = V_path_mtu_discovery ? IP_DF : 0;
h->ip_len = len;
+ h->ip_ttl = ttl ? ttl : V_ip_defttl;
#else
- h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
h->ip_len = htons(len);
+ h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
+ h->ip_ttl = ttl ? ttl : ip_defttl;
#endif
- h->ip_ttl = ttl ? ttl : V_ip_defttl;
h->ip_sum = 0;
if (eh == NULL) {
#ifdef __FreeBSD__
- PF_UNLOCK();
- ip_output(m, (void *)NULL, (void *)NULL, 0,
- (void *)NULL, (void *)NULL);
- PF_LOCK();
+ PF_UNLOCK();
+ ip_output(m, (void *)NULL, (void *)NULL, 0,
+ (void *)NULL, (void *)NULL);
+ PF_LOCK();
#else /* ! __FreeBSD__ */
ip_output(m, (void *)NULL, (void *)NULL, 0,
(void *)NULL, (void *)NULL);
@@ -2027,55 +2494,66 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
PF_LOCK();
#else
- ip6_output(m, NULL, NULL, 0, NULL, NULL);
+ ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
#endif
break;
#endif /* INET6 */
}
}
-void
+static void
pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
struct pf_rule *r)
{
- struct pf_mtag *pf_mtag;
struct mbuf *m0;
#ifdef __FreeBSD__
+#ifdef INET
struct ip *ip;
#endif
+ struct pf_mtag *pf_mtag;
+#endif
#ifdef __FreeBSD__
m0 = m_copypacket(m, M_DONTWAIT);
if (m0 == NULL)
return;
#else
- m0 = m_copy(m, 0, M_COPYALL);
+ if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL)
+ return;
#endif
+
+#ifdef __FreeBSD__
if ((pf_mtag = pf_get_mtag(m0)) == NULL)
return;
-#ifdef __FreeBSD__
/* XXX: revisit */
m0->m_flags |= M_SKIP_FIREWALL;
#else
- pf_mtag->flags |= PF_TAG_GENERATED;
+ m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
#endif
if (r->rtableid >= 0)
#ifdef __FreeBSD__
{
M_SETFIB(m0, r->rtableid);
-#endif
pf_mtag->rtableid = r->rtableid;
+#else
+ m0->m_pkthdr.pf.rtableid = r->rtableid;
+#endif
#ifdef __FreeBSD__
}
#endif
#ifdef ALTQ
if (r->qid) {
+#ifdef __FreeBSD__
pf_mtag->qid = r->qid;
/* add hints for ecn */
- pf_mtag->af = af;
pf_mtag->hdr = mtod(m0, struct ip *);
+#else
+ m0->m_pkthdr.pf.qid = r->qid;
+ /* add hints for ecn */
+ m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
+#endif
}
#endif /* ALTQ */
@@ -2155,6 +2633,44 @@ pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
}
}
+/*
+ * Return 1 if b <= a <= e, otherwise return 0.
+ */
+int
+pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
+ struct pf_addr *a, sa_family_t af)
+{
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if ((a->addr32[0] < b->addr32[0]) ||
+ (a->addr32[0] > e->addr32[0]))
+ return (0);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6: {
+ int i;
+
+ /* check a >= b */
+ for (i = 0; i < 4; ++i)
+ if (a->addr32[i] > b->addr32[i])
+ break;
+ else if (a->addr32[i] < b->addr32[i])
+ return (0);
+ /* check a <= e */
+ for (i = 0; i < 4; ++i)
+ if (a->addr32[i] < e->addr32[i])
+ break;
+ else if (a->addr32[i] > e->addr32[i])
+ return (0);
+ break;
+ }
+#endif /* INET6 */
+ }
+ return (1);
+}
+
int
pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
{
@@ -2206,88 +2722,80 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
return (pf_match(op, a1, a2, g));
}
-#ifndef __FreeBSD__
-struct pf_mtag *
-pf_find_mtag(struct mbuf *m)
-{
- struct m_tag *mtag;
-
- if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
- return (NULL);
-
- return ((struct pf_mtag *)(mtag + 1));
-}
-
-struct pf_mtag *
-pf_get_mtag(struct mbuf *m)
-{
- struct m_tag *mtag;
-
- if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) {
- mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag),
- M_NOWAIT);
- if (mtag == NULL)
- return (NULL);
- bzero(mtag + 1, sizeof(struct pf_mtag));
- m_tag_prepend(m, mtag);
- }
-
- return ((struct pf_mtag *)(mtag + 1));
-}
-#endif
-
int
-pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
- int *tag)
+#ifdef __FreeBSD__
+pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag,
+ struct pf_mtag *pf_mtag)
+#else
+pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
+#endif
{
if (*tag == -1)
+#ifdef __FreeBSD__
*tag = pf_mtag->tag;
+#else
+ *tag = m->m_pkthdr.pf.tag;
+#endif
return ((!r->match_tag_not && r->match_tag == *tag) ||
(r->match_tag_not && r->match_tag != *tag));
}
int
-pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid)
+#ifdef __FreeBSD__
+pf_tag_packet(struct mbuf *m, int tag, int rtableid,
+ struct pf_mtag *pf_mtag)
+#else
+pf_tag_packet(struct mbuf *m, int tag, int rtableid)
+#endif
{
if (tag <= 0 && rtableid < 0)
return (0);
- if (pf_mtag == NULL)
- if ((pf_mtag = pf_get_mtag(m)) == NULL)
- return (1);
if (tag > 0)
+#ifdef __FreeBSD__
pf_mtag->tag = tag;
+#else
+ m->m_pkthdr.pf.tag = tag;
+#endif
if (rtableid >= 0)
#ifdef __FreeBSD__
{
M_SETFIB(m, rtableid);
-#endif
- pf_mtag->rtableid = rtableid;
-#ifdef __FreeBSD__
}
+#else
+ m->m_pkthdr.pf.rtableid = rtableid;
#endif
return (0);
}
-static void
+void
pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
- struct pf_rule **r, struct pf_rule **a, int *match)
+ struct pf_rule **r, struct pf_rule **a, int *match)
{
struct pf_anchor_stackframe *f;
(*r)->anchor->match = 0;
if (match)
*match = 0;
+#ifdef __FreeBSD__
+ if (*depth >= sizeof(V_pf_anchor_stack) /
+ sizeof(V_pf_anchor_stack[0])) {
+#else
if (*depth >= sizeof(pf_anchor_stack) /
sizeof(pf_anchor_stack[0])) {
+#endif
printf("pf_step_into_anchor: stack overflow\n");
*r = TAILQ_NEXT(*r, entries);
return;
} else if (*depth == 0 && a != NULL)
*a = *r;
+#ifdef __FreeBSD__
+ f = V_pf_anchor_stack + (*depth)++;
+#else
f = pf_anchor_stack + (*depth)++;
+#endif
f->rs = *rs;
f->r = *r;
if ((*r)->anchor_wildcard) {
@@ -2316,7 +2824,11 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
do {
if (*depth <= 0)
break;
+#ifdef __FreeBSD__
+ f = V_pf_anchor_stack + *depth - 1;
+#else
f = pf_anchor_stack + *depth - 1;
+#endif
if (f->parent != NULL && f->child != NULL) {
if (f->child->match ||
(match != NULL && *match)) {
@@ -2337,7 +2849,7 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
if (*depth == 0 && a != NULL)
*a = NULL;
*rs = f->rs;
- if (f->r->anchor->match || (match != NULL && *match))
+ if (f->r->anchor->match || (match != NULL && *match))
quick = f->r->quick;
*r = TAILQ_NEXT(f->r, entries);
} while (*r == NULL);
@@ -2402,567 +2914,6 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af)
}
#endif /* INET6 */
-#define mix(a,b,c) \
- do { \
- a -= b; a -= c; a ^= (c >> 13); \
- b -= c; b -= a; b ^= (a << 8); \
- c -= a; c -= b; c ^= (b >> 13); \
- a -= b; a -= c; a ^= (c >> 12); \
- b -= c; b -= a; b ^= (a << 16); \
- c -= a; c -= b; c ^= (b >> 5); \
- a -= b; a -= c; a ^= (c >> 3); \
- b -= c; b -= a; b ^= (a << 10); \
- c -= a; c -= b; c ^= (b >> 15); \
- } while (0)
-
-/*
- * hash function based on bridge_hash in if_bridge.c
- */
-void
-pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
- struct pf_poolhashkey *key, sa_family_t af)
-{
- u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
-
- switch (af) {
-#ifdef INET
- case AF_INET:
- a += inaddr->addr32[0];
- b += key->key32[1];
- mix(a, b, c);
- hash->addr32[0] = c + key->key32[2];
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- a += inaddr->addr32[0];
- b += inaddr->addr32[2];
- mix(a, b, c);
- hash->addr32[0] = c;
- a += inaddr->addr32[1];
- b += inaddr->addr32[3];
- c += key->key32[1];
- mix(a, b, c);
- hash->addr32[1] = c;
- a += inaddr->addr32[2];
- b += inaddr->addr32[1];
- c += key->key32[2];
- mix(a, b, c);
- hash->addr32[2] = c;
- a += inaddr->addr32[3];
- b += inaddr->addr32[0];
- c += key->key32[3];
- mix(a, b, c);
- hash->addr32[3] = c;
- break;
-#endif /* INET6 */
- }
-}
-
-int
-pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
- struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
-{
- unsigned char hash[16];
- struct pf_pool *rpool = &r->rpool;
- struct pf_addr *raddr = &rpool->cur->addr.v.a.addr;
- struct pf_addr *rmask = &rpool->cur->addr.v.a.mask;
- struct pf_pooladdr *acur = rpool->cur;
- struct pf_src_node k;
-
- if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
- (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
- k.af = af;
- PF_ACPY(&k.addr, saddr, af);
- if (r->rule_flag & PFRULE_RULESRCTRACK ||
- r->rpool.opts & PF_POOL_STICKYADDR)
- k.rule.ptr = r;
- else
- k.rule.ptr = NULL;
- pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
- *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
- if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
- PF_ACPY(naddr, &(*sn)->raddr, af);
- if (pf_status.debug >= PF_DEBUG_MISC) {
- printf("pf_map_addr: src tracking maps ");
- pf_print_host(&k.addr, 0, af);
- printf(" to ");
- pf_print_host(naddr, 0, af);
- printf("\n");
- }
- return (0);
- }
- }
-
- if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
- return (1);
- if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
- switch (af) {
-#ifdef INET
- case AF_INET:
- if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
- (rpool->opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN)
- return (1);
- raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
- rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
- (rpool->opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN)
- return (1);
- raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
- rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
- break;
-#endif /* INET6 */
- }
- } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
- if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
- return (1); /* unsupported */
- } else {
- raddr = &rpool->cur->addr.v.a.addr;
- rmask = &rpool->cur->addr.v.a.mask;
- }
-
- switch (rpool->opts & PF_POOL_TYPEMASK) {
- case PF_POOL_NONE:
- PF_ACPY(naddr, raddr, af);
- break;
- case PF_POOL_BITMASK:
- PF_POOLMASK(naddr, raddr, rmask, saddr, af);
- break;
- case PF_POOL_RANDOM:
- if (init_addr != NULL && PF_AZERO(init_addr, af)) {
- switch (af) {
-#ifdef INET
- case AF_INET:
- rpool->counter.addr32[0] = htonl(arc4random());
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- if (rmask->addr32[3] != 0xffffffff)
- rpool->counter.addr32[3] =
- htonl(arc4random());
- else
- break;
- if (rmask->addr32[2] != 0xffffffff)
- rpool->counter.addr32[2] =
- htonl(arc4random());
- else
- break;
- if (rmask->addr32[1] != 0xffffffff)
- rpool->counter.addr32[1] =
- htonl(arc4random());
- else
- break;
- if (rmask->addr32[0] != 0xffffffff)
- rpool->counter.addr32[0] =
- htonl(arc4random());
- break;
-#endif /* INET6 */
- }
- PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
- PF_ACPY(init_addr, naddr, af);
-
- } else {
- PF_AINC(&rpool->counter, af);
- PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
- }
- break;
- case PF_POOL_SRCHASH:
- pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
- PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
- break;
- case PF_POOL_ROUNDROBIN:
- if (rpool->cur->addr.type == PF_ADDR_TABLE) {
- if (!pfr_pool_get(rpool->cur->addr.p.tbl,
- &rpool->tblidx, &rpool->counter,
- &raddr, &rmask, af))
- goto get_addr;
- } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
- if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
- &rpool->tblidx, &rpool->counter,
- &raddr, &rmask, af))
- goto get_addr;
- } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
- goto get_addr;
-
- try_next:
- if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
- rpool->cur = TAILQ_FIRST(&rpool->list);
- if (rpool->cur->addr.type == PF_ADDR_TABLE) {
- rpool->tblidx = -1;
- if (pfr_pool_get(rpool->cur->addr.p.tbl,
- &rpool->tblidx, &rpool->counter,
- &raddr, &rmask, af)) {
- /* table contains no address of type 'af' */
- if (rpool->cur != acur)
- goto try_next;
- return (1);
- }
- } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
- rpool->tblidx = -1;
- if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
- &rpool->tblidx, &rpool->counter,
- &raddr, &rmask, af)) {
- /* table contains no address of type 'af' */
- if (rpool->cur != acur)
- goto try_next;
- return (1);
- }
- } else {
- raddr = &rpool->cur->addr.v.a.addr;
- rmask = &rpool->cur->addr.v.a.mask;
- PF_ACPY(&rpool->counter, raddr, af);
- }
-
- get_addr:
- PF_ACPY(naddr, &rpool->counter, af);
- if (init_addr != NULL && PF_AZERO(init_addr, af))
- PF_ACPY(init_addr, naddr, af);
- PF_AINC(&rpool->counter, af);
- break;
- }
- if (*sn != NULL)
- PF_ACPY(&(*sn)->raddr, naddr, af);
-
- if (pf_status.debug >= PF_DEBUG_MISC &&
- (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
- printf("pf_map_addr: selected address ");
- pf_print_host(naddr, 0, af);
- printf("\n");
- }
-
- return (0);
-}
-
-int
-pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
- struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
- struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
- struct pf_src_node **sn)
-{
- struct pf_state_cmp key;
- struct pf_addr init_addr;
- u_int16_t cut;
-
- bzero(&init_addr, sizeof(init_addr));
- if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
- return (1);
-
- if (proto == IPPROTO_ICMP) {
- low = 1;
- high = 65535;
- }
-
- do {
- key.af = af;
- key.proto = proto;
- PF_ACPY(&key.ext.addr, daddr, key.af);
- PF_ACPY(&key.gwy.addr, naddr, key.af);
- key.ext.port = dport;
-
- /*
- * port search; start random, step;
- * similar 2 portloop in in_pcbbind
- */
- if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
- proto == IPPROTO_ICMP)) {
- key.gwy.port = dport;
- if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
- return (0);
- } else if (low == 0 && high == 0) {
- key.gwy.port = *nport;
- if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
- return (0);
- } else if (low == high) {
- key.gwy.port = htons(low);
- if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
- *nport = htons(low);
- return (0);
- }
- } else {
- u_int16_t tmp;
-
- if (low > high) {
- tmp = low;
- low = high;
- high = tmp;
- }
- /* low < high */
- cut = htonl(arc4random()) % (1 + high - low) + low;
- /* low <= cut <= high */
- for (tmp = cut; tmp <= high; ++(tmp)) {
- key.gwy.port = htons(tmp);
- if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
- NULL) {
- *nport = htons(tmp);
- return (0);
- }
- }
- for (tmp = cut - 1; tmp >= low; --(tmp)) {
- key.gwy.port = htons(tmp);
- if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
- NULL) {
- *nport = htons(tmp);
- return (0);
- }
- }
- }
-
- switch (r->rpool.opts & PF_POOL_TYPEMASK) {
- case PF_POOL_RANDOM:
- case PF_POOL_ROUNDROBIN:
- if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
- return (1);
- break;
- case PF_POOL_NONE:
- case PF_POOL_SRCHASH:
- case PF_POOL_BITMASK:
- default:
- return (1);
- }
- } while (! PF_AEQ(&init_addr, naddr, af) );
-
- return (1); /* none available */
-}
-
-struct pf_rule *
-pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
- int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
- struct pf_addr *daddr, u_int16_t dport, int rs_num)
-{
- struct pf_rule *r, *rm = NULL;
- struct pf_ruleset *ruleset = NULL;
- int tag = -1;
- int rtableid = -1;
- int asd = 0;
-
- r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
- while (r && rm == NULL) {
- struct pf_rule_addr *src = NULL, *dst = NULL;
- struct pf_addr_wrap *xdst = NULL;
-
- if (r->action == PF_BINAT && direction == PF_IN) {
- src = &r->dst;
- if (r->rpool.cur != NULL)
- xdst = &r->rpool.cur->addr;
- } else {
- src = &r->src;
- dst = &r->dst;
- }
-
- r->evaluations++;
- if (pfi_kif_match(r->kif, kif) == r->ifnot)
- r = r->skip[PF_SKIP_IFP].ptr;
- else if (r->direction && r->direction != direction)
- r = r->skip[PF_SKIP_DIR].ptr;
- else if (r->af && r->af != pd->af)
- r = r->skip[PF_SKIP_AF].ptr;
- else if (r->proto && r->proto != pd->proto)
- r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
- src->neg, kif))
- r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
- PF_SKIP_DST_ADDR].ptr;
- else if (src->port_op && !pf_match_port(src->port_op,
- src->port[0], src->port[1], sport))
- r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
- PF_SKIP_DST_PORT].ptr;
- else if (dst != NULL &&
- PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
- r = r->skip[PF_SKIP_DST_ADDR].ptr;
- else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
- 0, NULL))
- r = TAILQ_NEXT(r, entries);
- else if (dst != NULL && dst->port_op &&
- !pf_match_port(dst->port_op, dst->port[0],
- dst->port[1], dport))
- r = r->skip[PF_SKIP_DST_PORT].ptr;
- else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
- r = TAILQ_NEXT(r, entries);
- else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
- IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
- off, pd->hdr.tcp), r->os_fingerprint)))
- r = TAILQ_NEXT(r, entries);
- else {
- if (r->tag)
- tag = r->tag;
- if (r->rtableid >= 0)
- rtableid = r->rtableid;
- if (r->anchor == NULL) {
- rm = r;
- } else
- pf_step_into_anchor(&asd, &ruleset, rs_num,
- &r, NULL, NULL);
- }
- if (r == NULL)
- pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
- NULL, NULL);
- }
- if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid))
- return (NULL);
- if (rm != NULL && (rm->action == PF_NONAT ||
- rm->action == PF_NORDR || rm->action == PF_NOBINAT))
- return (NULL);
- return (rm);
-}
-
-struct pf_rule *
-pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
- struct pfi_kif *kif, struct pf_src_node **sn,
- struct pf_addr *saddr, u_int16_t sport,
- struct pf_addr *daddr, u_int16_t dport,
- struct pf_addr *naddr, u_int16_t *nport)
-{
- struct pf_rule *r = NULL;
-
- if (direction == PF_OUT) {
- r = pf_match_translation(pd, m, off, direction, kif, saddr,
- sport, daddr, dport, PF_RULESET_BINAT);
- if (r == NULL)
- r = pf_match_translation(pd, m, off, direction, kif,
- saddr, sport, daddr, dport, PF_RULESET_NAT);
- } else {
- r = pf_match_translation(pd, m, off, direction, kif, saddr,
- sport, daddr, dport, PF_RULESET_RDR);
- if (r == NULL)
- r = pf_match_translation(pd, m, off, direction, kif,
- saddr, sport, daddr, dport, PF_RULESET_BINAT);
- }
-
- if (r != NULL) {
- switch (r->action) {
- case PF_NONAT:
- case PF_NOBINAT:
- case PF_NORDR:
- return (NULL);
- case PF_NAT:
- if (pf_get_sport(pd->af, pd->proto, r, saddr,
- daddr, dport, naddr, nport, r->rpool.proxy_port[0],
- r->rpool.proxy_port[1], sn)) {
- DPFPRINTF(PF_DEBUG_MISC,
- ("pf: NAT proxy port allocation "
- "(%u-%u) failed\n",
- r->rpool.proxy_port[0],
- r->rpool.proxy_port[1]));
- return (NULL);
- }
- break;
- case PF_BINAT:
- switch (direction) {
- case PF_OUT:
- if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
- switch (pd->af) {
-#ifdef INET
- case AF_INET:
- if (r->rpool.cur->addr.p.dyn->
- pfid_acnt4 < 1)
- return (NULL);
- PF_POOLMASK(naddr,
- &r->rpool.cur->addr.p.dyn->
- pfid_addr4,
- &r->rpool.cur->addr.p.dyn->
- pfid_mask4,
- saddr, AF_INET);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- if (r->rpool.cur->addr.p.dyn->
- pfid_acnt6 < 1)
- return (NULL);
- PF_POOLMASK(naddr,
- &r->rpool.cur->addr.p.dyn->
- pfid_addr6,
- &r->rpool.cur->addr.p.dyn->
- pfid_mask6,
- saddr, AF_INET6);
- break;
-#endif /* INET6 */
- }
- } else
- PF_POOLMASK(naddr,
- &r->rpool.cur->addr.v.a.addr,
- &r->rpool.cur->addr.v.a.mask,
- saddr, pd->af);
- break;
- case PF_IN:
- if (r->src.addr.type == PF_ADDR_DYNIFTL) {
- switch (pd->af) {
-#ifdef INET
- case AF_INET:
- if (r->src.addr.p.dyn->
- pfid_acnt4 < 1)
- return (NULL);
- PF_POOLMASK(naddr,
- &r->src.addr.p.dyn->
- pfid_addr4,
- &r->src.addr.p.dyn->
- pfid_mask4,
- daddr, AF_INET);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- if (r->src.addr.p.dyn->
- pfid_acnt6 < 1)
- return (NULL);
- PF_POOLMASK(naddr,
- &r->src.addr.p.dyn->
- pfid_addr6,
- &r->src.addr.p.dyn->
- pfid_mask6,
- daddr, AF_INET6);
- break;
-#endif /* INET6 */
- }
- } else
- PF_POOLMASK(naddr,
- &r->src.addr.v.a.addr,
- &r->src.addr.v.a.mask, daddr,
- pd->af);
- break;
- }
- break;
- case PF_RDR: {
- if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
- return (NULL);
- if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
- PF_POOL_BITMASK)
- PF_POOLMASK(naddr, naddr,
- &r->rpool.cur->addr.v.a.mask, daddr,
- pd->af);
-
- if (r->rpool.proxy_port[1]) {
- u_int32_t tmp_nport;
-
- tmp_nport = ((ntohs(dport) -
- ntohs(r->dst.port[0])) %
- (r->rpool.proxy_port[1] -
- r->rpool.proxy_port[0] + 1)) +
- r->rpool.proxy_port[0];
-
- /* wrap around if necessary */
- if (tmp_nport > 65535)
- tmp_nport -= 65535;
- *nport = htons((u_int16_t)tmp_nport);
- } else if (r->rpool.proxy_port[0])
- *nport = htons(r->rpool.proxy_port[0]);
- break;
- }
- default:
- return (NULL);
- }
- }
-
- return (r);
-}
-
int
#ifdef __FreeBSD__
pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
@@ -2983,7 +2934,8 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
return (-1);
pd->lookup.uid = UID_MAX;
pd->lookup.gid = GID_MAX;
- pd->lookup.pid = NO_PID; /* XXX: revisit */
+ pd->lookup.pid = NO_PID;
+
#ifdef __FreeBSD__
if (inp_arg != NULL) {
INP_LOCK_ASSERT(inp_arg);
@@ -2997,6 +2949,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
return (1);
}
#endif
+
switch (pd->proto) {
case IPPROTO_TCP:
if (pd->hdr.tcp == NULL)
@@ -3039,21 +2992,24 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
#ifdef INET
case AF_INET:
#ifdef __FreeBSD__
- INP_INFO_RLOCK(pi); /* XXX LOR */
- inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
- dport, 0, NULL);
+ /*
+ * XXXRW: would be nice if we had an mbuf here so that we
+ * could use in_pcblookup_mbuf().
+ */
+ inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4,
+ dport, INPLOOKUP_RLOCKPCB, NULL);
if (inp == NULL) {
- inp = in_pcblookup_hash(pi, saddr->v4, sport,
- daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
- if(inp == NULL) {
- INP_INFO_RUNLOCK(pi);
+ inp = in_pcblookup(pi, saddr->v4, sport,
+ daddr->v4, dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, NULL);
+ if (inp == NULL)
return (-1);
- }
}
#else
inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
if (inp == NULL) {
- inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
+ inp = in_pcblookup_listen(tb, daddr->v4, dport, 0,
+ NULL);
if (inp == NULL)
return (-1);
}
@@ -3063,22 +3019,25 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
#ifdef INET6
case AF_INET6:
#ifdef __FreeBSD__
- INP_INFO_RLOCK(pi);
- inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
- &daddr->v6, dport, 0, NULL);
+ /*
+ * XXXRW: would be nice if we had an mbuf here so that we
+ * could use in6_pcblookup_mbuf().
+ */
+ inp = in6_pcblookup(pi, &saddr->v6, sport,
+ &daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL);
if (inp == NULL) {
- inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
- &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
- if (inp == NULL) {
- INP_INFO_RUNLOCK(pi);
+ inp = in6_pcblookup(pi, &saddr->v6, sport,
+ &daddr->v6, dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, NULL);
+ if (inp == NULL)
return (-1);
- }
}
#else
inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
dport);
if (inp == NULL) {
- inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
+ inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0,
+ NULL);
if (inp == NULL)
return (-1);
}
@@ -3090,6 +3049,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
return (-1);
}
#ifdef __FreeBSD__
+ INP_RLOCK_ASSERT(inp);
#ifndef __rtems__
pd->lookup.uid = inp->inp_cred->cr_uid;
pd->lookup.gid = inp->inp_cred->cr_groups[0];
@@ -3097,7 +3057,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
pd->lookup.uid = BSD_DEFAULT_UID;
pd->lookup.gid = BSD_DEFAULT_GID;
#endif /* __rtems__ */
- INP_INFO_RUNLOCK(pi);
+ INP_RUNLOCK(inp);
#else
pd->lookup.uid = inp->inp_socket->so_euid;
pd->lookup.gid = inp->inp_socket->so_egid;
@@ -3152,7 +3112,11 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
int hlen;
u_int8_t hdr[60];
u_int8_t *opt, optlen;
+#ifdef __FreeBSD__
u_int16_t mss = V_tcp_mssdflt;
+#else
+ u_int16_t mss = tcp_mssdflt;
+#endif
hlen = th_off << 2; /* hlen <= sizeof(hdr) */
if (hlen <= sizeof(struct tcphdr))
@@ -3185,7 +3149,7 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
}
u_int16_t
-pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
+pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
{
#ifdef INET
struct sockaddr_in *dst;
@@ -3196,8 +3160,13 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
struct route_in6 ro6;
#endif /* INET6 */
struct rtentry *rt = NULL;
- int hlen = 0; /* make the compiler happy */
+#ifdef __FreeBSD__
+ int hlen = 0;
u_int16_t mss = V_tcp_mssdflt;
+#else
+ int hlen;
+ u_int16_t mss = tcp_mssdflt;
+#endif
switch (af) {
#ifdef INET
@@ -3209,7 +3178,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
dst->sin_len = sizeof(*dst);
dst->sin_addr = addr->v4;
#ifdef __FreeBSD__
- in_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB);
+ in_rtalloc_ign(&ro, 0, rtableid);
#else /* ! __FreeBSD__ */
rtalloc_noclone(&ro, NO_CLONING);
#endif
@@ -3225,7 +3194,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
dst6->sin6_len = sizeof(*dst6);
dst6->sin6_addr = addr->v6;
#ifdef __FreeBSD__
- in6_rtalloc_ign(&ro6, 0, RT_DEFAULT_FIB);
+ in6_rtalloc_ign(&ro6, 0, rtableid);
#else /* ! __FreeBSD__ */
rtalloc_noclone((struct route *)&ro6, NO_CLONING);
#endif
@@ -3236,7 +3205,11 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
if (rt && rt->rt_ifp) {
mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
+#ifdef __FreeBSD__
mss = max(V_tcp_mssdflt, mss);
+#else
+ mss = max(tcp_mssdflt, mss);
+#endif
RTFREE(rt);
}
mss = min(mss, offer);
@@ -3248,55 +3221,113 @@ void
pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
{
struct pf_rule *r = s->rule.ptr;
+ struct pf_src_node *sn = NULL;
s->rt_kif = NULL;
if (!r->rt || r->rt == PF_FASTROUTE)
return;
- switch (s->af) {
+ switch (s->key[PF_SK_WIRE]->af) {
#ifdef INET
case AF_INET:
- pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
- &s->nat_src_node);
+ pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &sn);
s->rt_kif = r->rpool.cur->kif;
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
- pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
- &s->nat_src_node);
+ pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &sn);
s->rt_kif = r->rpool.cur->kif;
break;
#endif /* INET6 */
}
}
+u_int32_t
+pf_tcp_iss(struct pf_pdesc *pd)
+{
+ MD5_CTX ctx;
+ u_int32_t digest[4];
+
+#ifdef __FreeBSD__
+ if (V_pf_tcp_secret_init == 0) {
+ read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
+ MD5Init(&V_pf_tcp_secret_ctx);
+ MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
+ sizeof(V_pf_tcp_secret));
+ V_pf_tcp_secret_init = 1;
+ }
+
+ ctx = V_pf_tcp_secret_ctx;
+#else
+ if (pf_tcp_secret_init == 0) {
+ arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret));
+ MD5Init(&pf_tcp_secret_ctx);
+ MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
+ sizeof(pf_tcp_secret));
+ pf_tcp_secret_init = 1;
+ }
+
+ ctx = pf_tcp_secret_ctx;
+#endif
+
+ MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
+ MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
+ if (pd->af == AF_INET6) {
+ MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
+ MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
+ } else {
+ MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
+ MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
+ }
+ MD5Final((u_char *)digest, &ctx);
+#ifdef __FreeBSD__
+ V_pf_tcp_iss_off += 4096;
+#define ISN_RANDOM_INCREMENT (4096 - 1)
+ return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
+ V_pf_tcp_iss_off);
+#undef ISN_RANDOM_INCREMENT
+#else
+ pf_tcp_iss_off += 4096;
+ return (digest[0] + tcp_iss + pf_tcp_iss_off);
+#endif
+}
+
int
-pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
+pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
struct pfi_kif *kif, struct mbuf *m, int off, void *h,
-#ifdef __FreeBSD__
struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+#ifdef __FreeBSD__
struct ifqueue *ifq, struct inpcb *inp)
#else
- struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
struct ifqueue *ifq)
#endif
{
struct pf_rule *nr = NULL;
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
- struct tcphdr *th = pd->hdr.tcp;
- u_int16_t bport, nport = 0;
sa_family_t af = pd->af;
struct pf_rule *r, *a = NULL;
struct pf_ruleset *ruleset = NULL;
struct pf_src_node *nsn = NULL;
+ struct tcphdr *th = pd->hdr.tcp;
+ struct pf_state_key *skw = NULL, *sks = NULL;
+ struct pf_state_key *sk = NULL, *nk = NULL;
u_short reason;
- int rewrite = 0;
+ int rewrite = 0, hdrlen = 0;
int tag = -1, rtableid = -1;
- u_int16_t mss = V_tcp_mssdflt;
int asd = 0;
int match = 0;
+ int state_icmp = 0;
+#ifdef __FreeBSD__
+ u_int16_t sport = 0, dport = 0;
+ u_int16_t bproto_sum = 0, bip_sum = 0;
+#else
+ u_int16_t sport, dport;
+ u_int16_t bproto_sum = 0, bip_sum;
+#endif
+ u_int8_t icmptype = 0, icmpcode = 0;
- if (pf_check_congestion(ifq)) {
+
+ if (direction == PF_IN && pf_check_congestion(ifq)) {
REASON_SET(&reason, PFRES_CONGEST);
return (PF_DROP);
}
@@ -3304,44 +3335,193 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
#ifdef __FreeBSD__
if (inp != NULL)
pd->lookup.done = pf_socket_lookup(direction, pd, inp);
- else if (debug_pfugidhack) {
+ else if (V_debug_pfugidhack) {
PF_UNLOCK();
DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
- pd->lookup.done = pf_socket_lookup(direction, pd, inp);
+ pd->lookup.done = pf_socket_lookup(direction, pd, inp);
PF_LOCK();
}
#endif
+ switch (pd->proto) {
+ case IPPROTO_TCP:
+ sport = th->th_sport;
+ dport = th->th_dport;
+ hdrlen = sizeof(*th);
+ break;
+ case IPPROTO_UDP:
+ sport = pd->hdr.udp->uh_sport;
+ dport = pd->hdr.udp->uh_dport;
+ hdrlen = sizeof(*pd->hdr.udp);
+ break;
+#ifdef INET
+ case IPPROTO_ICMP:
+ if (pd->af != AF_INET)
+ break;
+ sport = dport = pd->hdr.icmp->icmp_id;
+ hdrlen = sizeof(*pd->hdr.icmp);
+ icmptype = pd->hdr.icmp->icmp_type;
+ icmpcode = pd->hdr.icmp->icmp_code;
+
+ if (icmptype == ICMP_UNREACH ||
+ icmptype == ICMP_SOURCEQUENCH ||
+ icmptype == ICMP_REDIRECT ||
+ icmptype == ICMP_TIMXCEED ||
+ icmptype == ICMP_PARAMPROB)
+ state_icmp++;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+ if (af != AF_INET6)
+ break;
+ sport = dport = pd->hdr.icmp6->icmp6_id;
+ hdrlen = sizeof(*pd->hdr.icmp6);
+ icmptype = pd->hdr.icmp6->icmp6_type;
+ icmpcode = pd->hdr.icmp6->icmp6_code;
+
+ if (icmptype == ICMP6_DST_UNREACH ||
+ icmptype == ICMP6_PACKET_TOO_BIG ||
+ icmptype == ICMP6_TIME_EXCEEDED ||
+ icmptype == ICMP6_PARAM_PROB)
+ state_icmp++;
+ break;
+#endif /* INET6 */
+ default:
+ sport = dport = hdrlen = 0;
+ break;
+ }
+
r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
- if (direction == PF_OUT) {
- bport = nport = th->th_sport;
- /* check outgoing packet for BINAT/NAT */
- if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
- saddr, th->th_sport, daddr, th->th_dport,
- &pd->naddr, &nport)) != NULL) {
- PF_ACPY(&pd->baddr, saddr, af);
- pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
- &th->th_sum, &pd->naddr, nport, 0, af);
- rewrite++;
- if (nr->natpass)
- r = NULL;
- pd->nat_rule = nr;
+ /* check packet for BINAT/NAT/RDR */
+ if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn,
+ &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) {
+ if (nk == NULL || sk == NULL) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ goto cleanup;
}
- } else {
- bport = nport = th->th_dport;
- /* check incoming packet for BINAT/RDR */
- if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
- saddr, th->th_sport, daddr, th->th_dport,
- &pd->naddr, &nport)) != NULL) {
- PF_ACPY(&pd->baddr, daddr, af);
- pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
- &th->th_sum, &pd->naddr, nport, 0, af);
+
+ if (pd->ip_sum)
+ bip_sum = *pd->ip_sum;
+
+ switch (pd->proto) {
+ case IPPROTO_TCP:
+ bproto_sum = th->th_sum;
+ pd->proto_sum = &th->th_sum;
+
+ if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
+ nk->port[pd->sidx] != sport) {
+ pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
+ &th->th_sum, &nk->addr[pd->sidx],
+ nk->port[pd->sidx], 0, af);
+ pd->sport = &th->th_sport;
+ sport = th->th_sport;
+ }
+
+ if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
+ nk->port[pd->didx] != dport) {
+ pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
+ &th->th_sum, &nk->addr[pd->didx],
+ nk->port[pd->didx], 0, af);
+ dport = th->th_dport;
+ pd->dport = &th->th_dport;
+ }
+ rewrite++;
+ break;
+ case IPPROTO_UDP:
+ bproto_sum = pd->hdr.udp->uh_sum;
+ pd->proto_sum = &pd->hdr.udp->uh_sum;
+
+ if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
+ nk->port[pd->sidx] != sport) {
+ pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
+ pd->ip_sum, &pd->hdr.udp->uh_sum,
+ &nk->addr[pd->sidx],
+ nk->port[pd->sidx], 1, af);
+ sport = pd->hdr.udp->uh_sport;
+ pd->sport = &pd->hdr.udp->uh_sport;
+ }
+
+ if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
+ nk->port[pd->didx] != dport) {
+ pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
+ pd->ip_sum, &pd->hdr.udp->uh_sum,
+ &nk->addr[pd->didx],
+ nk->port[pd->didx], 1, af);
+ dport = pd->hdr.udp->uh_dport;
+ pd->dport = &pd->hdr.udp->uh_dport;
+ }
+ rewrite++;
+ break;
+#ifdef INET
+ case IPPROTO_ICMP:
+ nk->port[0] = nk->port[1];
+ if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
+ pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
+ nk->addr[pd->sidx].v4.s_addr, 0);
+
+ if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
+ pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
+ nk->addr[pd->didx].v4.s_addr, 0);
+
+ if (nk->port[1] != pd->hdr.icmp->icmp_id) {
+ pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
+ pd->hdr.icmp->icmp_cksum, sport,
+ nk->port[1], 0);
+ pd->hdr.icmp->icmp_id = nk->port[1];
+ pd->sport = &pd->hdr.icmp->icmp_id;
+ }
+ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+ nk->port[0] = nk->port[1];
+ if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
+ pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
+ &nk->addr[pd->sidx], 0);
+
+ if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
+ pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
+ &nk->addr[pd->didx], 0);
rewrite++;
- if (nr->natpass)
- r = NULL;
- pd->nat_rule = nr;
+ break;
+#endif /* INET */
+ default:
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if (PF_ANEQ(saddr,
+ &nk->addr[pd->sidx], AF_INET))
+ pf_change_a(&saddr->v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->sidx].v4.s_addr, 0);
+
+ if (PF_ANEQ(daddr,
+ &nk->addr[pd->didx], AF_INET))
+ pf_change_a(&daddr->v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->didx].v4.s_addr, 0);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (PF_ANEQ(saddr,
+ &nk->addr[pd->sidx], AF_INET6))
+ PF_ACPY(saddr, &nk->addr[pd->sidx], af);
+
+ if (PF_ANEQ(daddr,
+ &nk->addr[pd->didx], AF_INET6))
+ PF_ACPY(saddr, &nk->addr[pd->didx], af);
+ break;
+#endif /* INET */
+ }
+ break;
}
+ if (nr->natpass)
+ r = NULL;
+ pd->nat_rule = nr;
}
while (r != NULL) {
@@ -3352,26 +3532,36 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
r = r->skip[PF_SKIP_DIR].ptr;
else if (r->af && r->af != af)
r = r->skip[PF_SKIP_AF].ptr;
- else if (r->proto && r->proto != IPPROTO_TCP)
+ else if (r->proto && r->proto != pd->proto)
r = r->skip[PF_SKIP_PROTO].ptr;
else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
- r->src.neg, kif))
+ r->src.neg, kif, M_GETFIB(m)))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+ /* tcp/udp only. port_op always 0 in other cases */
else if (r->src.port_op && !pf_match_port(r->src.port_op,
- r->src.port[0], r->src.port[1], th->th_sport))
+ r->src.port[0], r->src.port[1], sport))
r = r->skip[PF_SKIP_SRC_PORT].ptr;
else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
- r->dst.neg, NULL))
+ r->dst.neg, NULL, M_GETFIB(m)))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
+ /* tcp/udp only. port_op always 0 in other cases */
else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
- r->dst.port[0], r->dst.port[1], th->th_dport))
+ r->dst.port[0], r->dst.port[1], dport))
r = r->skip[PF_SKIP_DST_PORT].ptr;
+ /* icmp only. type always 0 in other cases */
+ else if (r->type && r->type != icmptype + 1)
+ r = TAILQ_NEXT(r, entries);
+ /* icmp only. type always 0 in other cases */
+ else if (r->code && r->code != icmpcode + 1)
+ r = TAILQ_NEXT(r, entries);
else if (r->tos && !(r->tos == pd->tos))
r = TAILQ_NEXT(r, entries);
else if (r->rule_flag & PFRULE_FRAGMENT)
r = TAILQ_NEXT(r, entries);
- else if ((r->flagset & th->th_flags) != r->flags)
+ else if (pd->proto == IPPROTO_TCP &&
+ (r->flagset & th->th_flags) != r->flags)
r = TAILQ_NEXT(r, entries);
+ /* tcp/udp only. uid.op always 0 in other cases */
else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
#ifdef __FreeBSD__
pf_socket_lookup(direction, pd, inp), 1)) &&
@@ -3381,6 +3571,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
!pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
pd->lookup.uid))
r = TAILQ_NEXT(r, entries);
+ /* tcp/udp only. gid.op always 0 in other cases */
else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
#ifdef __FreeBSD__
pf_socket_lookup(direction, pd, inp), 1)) &&
@@ -3390,12 +3581,23 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
!pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
pd->lookup.gid))
r = TAILQ_NEXT(r, entries);
- else if (r->prob && r->prob <= arc4random())
+ else if (r->prob &&
+#ifdef __FreeBSD__
+ r->prob <= arc4random())
+#else
+ r->prob <= arc4random_uniform(UINT_MAX - 1) + 1)
+#endif
r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+#ifdef __FreeBSD__
+ else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
+#else
+ else if (r->match_tag && !pf_match_tag(m, r, &tag))
+#endif
r = TAILQ_NEXT(r, entries);
- else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
- pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
+ else if (r->os_fingerprint != PF_OSFP_ANY &&
+ (pd->proto != IPPROTO_TCP || !pf_osfp_match(
+ pf_osfp_fingerprint(pd, m, off, th),
+ r->os_fingerprint)))
r = TAILQ_NEXT(r, entries);
else {
if (r->tag)
@@ -3424,13 +3626,9 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
REASON_SET(&reason, PFRES_MATCH);
- if (r->log || (nr != NULL && nr->natpass && nr->log)) {
+ if (r->log || (nr != NULL && nr->log)) {
if (rewrite)
-#ifdef __FreeBSD__
- m_copyback(m, off, sizeof(*th), (caddr_t)th);
-#else
- m_copyback(m, off, sizeof(*th), th);
-#endif
+ m_copyback(m, off, hdrlen, pd->hdr.any);
PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
a, ruleset, pd);
}
@@ -3441,161 +3639,233 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
(r->rule_flag & PFRULE_RETURN))) {
/* undo NAT changes, if they have taken place */
if (nr != NULL) {
- if (direction == PF_OUT) {
- pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
- &th->th_sum, &pd->baddr, bport, 0, af);
- rewrite++;
- } else {
- pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
- &th->th_sum, &pd->baddr, bport, 0, af);
- rewrite++;
- }
- }
- if (((r->rule_flag & PFRULE_RETURNRST) ||
+ PF_ACPY(saddr, &sk->addr[pd->sidx], af);
+ PF_ACPY(daddr, &sk->addr[pd->didx], af);
+ if (pd->sport)
+ *pd->sport = sk->port[pd->sidx];
+ if (pd->dport)
+ *pd->dport = sk->port[pd->didx];
+ if (pd->proto_sum)
+ *pd->proto_sum = bproto_sum;
+ if (pd->ip_sum)
+ *pd->ip_sum = bip_sum;
+ m_copyback(m, off, hdrlen, pd->hdr.any);
+ }
+ if (pd->proto == IPPROTO_TCP &&
+ ((r->rule_flag & PFRULE_RETURNRST) ||
(r->rule_flag & PFRULE_RETURN)) &&
!(th->th_flags & TH_RST)) {
- u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+ u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+ int len = 0;
+#ifdef INET
+ struct ip *h4;
+#endif
+#ifdef INET6
+ struct ip6_hdr *h6;
+#endif
- if (th->th_flags & TH_SYN)
- ack++;
- if (th->th_flags & TH_FIN)
- ack++;
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ h4 = mtod(m, struct ip *);
+ len = ntohs(h4->ip_len) - off;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ h6 = mtod(m, struct ip6_hdr *);
+ len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
+ break;
+#endif
+ }
+
+ if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
+ REASON_SET(&reason, PFRES_PROTCKSUM);
+ else {
+ if (th->th_flags & TH_SYN)
+ ack++;
+ if (th->th_flags & TH_FIN)
+ ack++;
#ifdef __FreeBSD__
- pf_send_tcp(m, r, af, pd->dst,
+ pf_send_tcp(m, r, af, pd->dst,
#else
- pf_send_tcp(r, af, pd->dst,
+ pf_send_tcp(r, af, pd->dst,
#endif
- pd->src, th->th_dport, th->th_sport,
- ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
- r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
- } else if ((af == AF_INET) && r->return_icmp)
+ pd->src, th->th_dport, th->th_sport,
+ ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
+ r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
+ }
+ } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
+ r->return_icmp)
pf_send_icmp(m, r->return_icmp >> 8,
r->return_icmp & 255, af, r);
- else if ((af == AF_INET6) && r->return_icmp6)
+ else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
+ r->return_icmp6)
pf_send_icmp(m, r->return_icmp6 >> 8,
r->return_icmp6 & 255, af, r);
}
if (r->action == PF_DROP)
- return (PF_DROP);
+ goto cleanup;
- if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+#ifdef __FreeBSD__
+ if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag)) {
+#else
+ if (pf_tag_packet(m, tag, rtableid)) {
+#endif
REASON_SET(&reason, PFRES_MEMORY);
- return (PF_DROP);
+ goto cleanup;
}
- if (r->keep_state || nr != NULL ||
- (pd->flags & PFDESC_TCP_NORM)) {
- /* create new state */
- u_int16_t len;
- struct pf_state *s = NULL;
- struct pf_src_node *sn = NULL;
+ if (!state_icmp && (r->keep_state || nr != NULL ||
+ (pd->flags & PFDESC_TCP_NORM))) {
+ int action;
+ action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m,
+ off, sport, dport, &rewrite, kif, sm, tag, bproto_sum,
+ bip_sum, hdrlen);
+ if (action != PF_PASS)
+ return (action);
+ } else {
+#ifdef __FreeBSD__
+ if (sk != NULL)
+ pool_put(&V_pf_state_key_pl, sk);
+ if (nk != NULL)
+ pool_put(&V_pf_state_key_pl, nk);
+#else
+ if (sk != NULL)
+ pool_put(&pf_state_key_pl, sk);
+ if (nk != NULL)
+ pool_put(&pf_state_key_pl, nk);
+#endif
+ }
- len = pd->tot_len - off - (th->th_off << 2);
+ /* copy back packet headers if we performed NAT operations */
+ if (rewrite)
+ m_copyback(m, off, hdrlen, pd->hdr.any);
+
+#if NPFSYNC > 0
+ if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) &&
+#ifdef __FreeBSD__
+ direction == PF_OUT && pfsync_up_ptr != NULL && pfsync_up_ptr()) {
+#else
+ direction == PF_OUT && pfsync_up()) {
+#endif
+ /*
+ * We want the state created, but we dont
+ * want to send this in case a partner
+ * firewall has to know about it to allow
+ * replies through it.
+ */
+#ifdef __FreeBSD__
+ if (pfsync_defer_ptr != NULL &&
+ pfsync_defer_ptr(*sm, m))
+#else
+ if (pfsync_defer(*sm, m))
+#endif
+ return (PF_DEFER);
+ }
+#endif
+
+ return (PF_PASS);
- /* check maximums */
- if (r->max_states && (r->states >= r->max_states)) {
- pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(&reason, PFRES_MAXSTATES);
- goto cleanup;
- }
- /* src node for filter rule */
- if ((r->rule_flag & PFRULE_SRCTRACK ||
- r->rpool.opts & PF_POOL_STICKYADDR) &&
- pf_insert_src_node(&sn, r, saddr, af) != 0) {
- REASON_SET(&reason, PFRES_SRCLIMIT);
- goto cleanup;
- }
- /* src node for translation rule */
- if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
- ((direction == PF_OUT &&
- pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
- (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
- REASON_SET(&reason, PFRES_SRCLIMIT);
- goto cleanup;
- }
- s = pool_get(&pf_state_pl, PR_NOWAIT);
- if (s == NULL) {
- REASON_SET(&reason, PFRES_MEMORY);
cleanup:
- if (sn != NULL && sn->states == 0 && sn->expire == 0) {
- RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, sn);
- }
- if (nsn != sn && nsn != NULL && nsn->states == 0 &&
- nsn->expire == 0) {
- RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, nsn);
- }
- return (PF_DROP);
- }
- bzero(s, sizeof(*s));
- s->rule.ptr = r;
- s->nat_rule.ptr = nr;
- s->anchor.ptr = a;
- STATE_INC_COUNTERS(s);
- if (r->allow_opts)
- s->state_flags |= PFSTATE_ALLOWOPTS;
- if (r->rule_flag & PFRULE_STATESLOPPY)
- s->state_flags |= PFSTATE_SLOPPY;
- s->log = r->log & PF_LOG_ALL;
- if (nr != NULL)
- s->log |= nr->log & PF_LOG_ALL;
- s->proto = IPPROTO_TCP;
- s->direction = direction;
- s->af = af;
- if (direction == PF_OUT) {
- PF_ACPY(&s->gwy.addr, saddr, af);
- s->gwy.port = th->th_sport; /* sport */
- PF_ACPY(&s->ext.addr, daddr, af);
- s->ext.port = th->th_dport;
- if (nr != NULL) {
- PF_ACPY(&s->lan.addr, &pd->baddr, af);
- s->lan.port = bport;
- } else {
- PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
- s->lan.port = s->gwy.port;
- }
- } else {
- PF_ACPY(&s->lan.addr, daddr, af);
- s->lan.port = th->th_dport;
- PF_ACPY(&s->ext.addr, saddr, af);
- s->ext.port = th->th_sport;
- if (nr != NULL) {
- PF_ACPY(&s->gwy.addr, &pd->baddr, af);
- s->gwy.port = bport;
- } else {
- PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
- s->gwy.port = s->lan.port;
- }
- }
+#ifdef __FreeBSD__
+ if (sk != NULL)
+ pool_put(&V_pf_state_key_pl, sk);
+ if (nk != NULL)
+ pool_put(&V_pf_state_key_pl, nk);
+#else
+ if (sk != NULL)
+ pool_put(&pf_state_key_pl, sk);
+ if (nk != NULL)
+ pool_put(&pf_state_key_pl, nk);
+#endif
+ return (PF_DROP);
+}
+static __inline int
+pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
+ struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw,
+ struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk,
+ struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite,
+ struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum,
+ u_int16_t bip_sum, int hdrlen)
+{
+ struct pf_state *s = NULL;
+ struct pf_src_node *sn = NULL;
+ struct tcphdr *th = pd->hdr.tcp;
+#ifdef __FreeBSD__
+ u_int16_t mss = V_tcp_mssdflt;
+#else
+ u_int16_t mss = tcp_mssdflt;
+#endif
+ u_short reason;
+
+ /* check maximums */
+ if (r->max_states && (r->states_cur >= r->max_states)) {
+#ifdef __FreeBSD__
+ V_pf_status.lcounters[LCNT_STATES]++;
+#else
+ pf_status.lcounters[LCNT_STATES]++;
+#endif
+ REASON_SET(&reason, PFRES_MAXSTATES);
+ return (PF_DROP);
+ }
+ /* src node for filter rule */
+ if ((r->rule_flag & PFRULE_SRCTRACK ||
+ r->rpool.opts & PF_POOL_STICKYADDR) &&
+ pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
+ goto csfailed;
+ }
+ /* src node for translation rule */
+ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
+ pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
+ goto csfailed;
+ }
+#ifdef __FreeBSD__
+ s = pool_get(&V_pf_state_pl, PR_NOWAIT | PR_ZERO);
+#else
+ s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO);
+#endif
+ if (s == NULL) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ goto csfailed;
+ }
+ s->rule.ptr = r;
+ s->nat_rule.ptr = nr;
+ s->anchor.ptr = a;
+ STATE_INC_COUNTERS(s);
+ if (r->allow_opts)
+ s->state_flags |= PFSTATE_ALLOWOPTS;
+ if (r->rule_flag & PFRULE_STATESLOPPY)
+ s->state_flags |= PFSTATE_SLOPPY;
+ if (r->rule_flag & PFRULE_PFLOW)
+ s->state_flags |= PFSTATE_PFLOW;
+ s->log = r->log & PF_LOG_ALL;
+ s->sync_state = PFSYNC_S_NONE;
+ if (nr != NULL)
+ s->log |= nr->log & PF_LOG_ALL;
+ switch (pd->proto) {
+ case IPPROTO_TCP:
s->src.seqlo = ntohl(th->th_seq);
- s->src.seqhi = s->src.seqlo + len + 1;
+ s->src.seqhi = s->src.seqlo + pd->p_len + 1;
if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
r->keep_state == PF_STATE_MODULATE) {
/* Generate sequence number modulator */
-#ifdef __FreeBSD__
- while ((s->src.seqdiff =
- pf_new_isn(s) - s->src.seqlo) == 0)
- ;
-#else
- while ((s->src.seqdiff =
- tcp_rndiss_next() - s->src.seqlo) == 0)
- ;
-#endif
+ if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
+ 0)
+ s->src.seqdiff = 1;
pf_change_a(&th->th_seq, &th->th_sum,
htonl(s->src.seqlo + s->src.seqdiff), 0);
- rewrite = 1;
+ *rewrite = 1;
} else
s->src.seqdiff = 0;
if (th->th_flags & TH_SYN) {
s->src.seqhi++;
- s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
+ s->src.wscale = pf_get_wscale(m, off,
+ th->th_off, pd->af);
}
s->src.max_win = MAX(ntohs(th->th_win), 1);
if (s->src.wscale & PF_WSCALE_MASK) {
@@ -3611,994 +3881,174 @@ cleanup:
s->dst.max_win = 1;
s->src.state = TCPS_SYN_SENT;
s->dst.state = TCPS_CLOSED;
- s->creation = time_second;
- s->expire = time_second;
s->timeout = PFTM_TCP_FIRST_PACKET;
- pf_set_rt_ifp(s, saddr);
- if (sn != NULL) {
- s->src_node = sn;
- s->src_node->states++;
- }
- if (nsn != NULL) {
- PF_ACPY(&nsn->raddr, &pd->naddr, af);
- s->nat_src_node = nsn;
- s->nat_src_node->states++;
- }
+ break;
+ case IPPROTO_UDP:
+ s->src.state = PFUDPS_SINGLE;
+ s->dst.state = PFUDPS_NO_TRAFFIC;
+ s->timeout = PFTM_UDP_FIRST_PACKET;
+ break;
+ case IPPROTO_ICMP:
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+#endif
+ s->timeout = PFTM_ICMP_FIRST_PACKET;
+ break;
+ default:
+ s->src.state = PFOTHERS_SINGLE;
+ s->dst.state = PFOTHERS_NO_TRAFFIC;
+ s->timeout = PFTM_OTHER_FIRST_PACKET;
+ }
+
+ s->creation = time_second;
+ s->expire = time_second;
+
+ if (sn != NULL) {
+ s->src_node = sn;
+ s->src_node->states++;
+ }
+ if (nsn != NULL) {
+ /* XXX We only modify one side for now. */
+ PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
+ s->nat_src_node = nsn;
+ s->nat_src_node->states++;
+ }
+ if (pd->proto == IPPROTO_TCP) {
if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
off, pd, th, &s->src, &s->dst)) {
REASON_SET(&reason, PFRES_MEMORY);
pf_src_tree_remove_state(s);
STATE_DEC_COUNTERS(s);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_state_pl, s);
+#else
pool_put(&pf_state_pl, s);
+#endif
return (PF_DROP);
}
if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
- &s->src, &s->dst, &rewrite)) {
+ &s->src, &s->dst, rewrite)) {
/* This really shouldn't happen!!! */
DPFPRINTF(PF_DEBUG_URGENT,
("pf_normalize_tcp_stateful failed on first pkt"));
pf_normalize_tcp_cleanup(s);
pf_src_tree_remove_state(s);
STATE_DEC_COUNTERS(s);
- pool_put(&pf_state_pl, s);
- return (PF_DROP);
- }
- if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
- pf_normalize_tcp_cleanup(s);
- REASON_SET(&reason, PFRES_STATEINS);
- pf_src_tree_remove_state(s);
- STATE_DEC_COUNTERS(s);
- pool_put(&pf_state_pl, s);
- return (PF_DROP);
- } else
- *sm = s;
- if (tag > 0) {
- pf_tag_ref(tag);
- s->tag = tag;
- }
- if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
- r->keep_state == PF_STATE_SYNPROXY) {
- s->src.state = PF_TCPS_PROXY_SRC;
- if (nr != NULL) {
- if (direction == PF_OUT) {
- pf_change_ap(saddr, &th->th_sport,
- pd->ip_sum, &th->th_sum, &pd->baddr,
- bport, 0, af);
- } else {
- pf_change_ap(daddr, &th->th_dport,
- pd->ip_sum, &th->th_sum, &pd->baddr,
- bport, 0, af);
- }
- }
- s->src.seqhi = htonl(arc4random());
- /* Find mss option */
- mss = pf_get_mss(m, off, th->th_off, af);
- mss = pf_calc_mss(saddr, af, mss);
- mss = pf_calc_mss(daddr, af, mss);
- s->src.mss = mss;
#ifdef __FreeBSD__
- pf_send_tcp(NULL, r, af, daddr, saddr, th->th_dport,
+ pool_put(&V_pf_state_pl, s);
#else
- pf_send_tcp(r, af, daddr, saddr, th->th_dport,
+ pool_put(&pf_state_pl, s);
#endif
- th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
- TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
- REASON_SET(&reason, PFRES_SYNPROXY);
- return (PF_SYNPROXY_DROP);
+ return (PF_DROP);
}
}
+ s->direction = pd->dir;
- /* copy back packet headers if we performed NAT operations */
- if (rewrite)
- m_copyback(m, off, sizeof(*th), (caddr_t)th);
-
- return (PF_PASS);
-}
+ if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk,
+ pd->src, pd->dst, sport, dport))
+ goto csfailed;
-int
-pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
- struct pfi_kif *kif, struct mbuf *m, int off, void *h,
+ if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) {
+ if (pd->proto == IPPROTO_TCP)
+ pf_normalize_tcp_cleanup(s);
+ REASON_SET(&reason, PFRES_STATEINS);
+ pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
#ifdef __FreeBSD__
- struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
- struct ifqueue *ifq, struct inpcb *inp)
+ pool_put(&V_pf_state_pl, s);
#else
- struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
- struct ifqueue *ifq)
+ pool_put(&pf_state_pl, s);
#endif
-{
- struct pf_rule *nr = NULL;
- struct pf_addr *saddr = pd->src, *daddr = pd->dst;
- struct udphdr *uh = pd->hdr.udp;
- u_int16_t bport, nport = 0;
- sa_family_t af = pd->af;
- struct pf_rule *r, *a = NULL;
- struct pf_ruleset *ruleset = NULL;
- struct pf_src_node *nsn = NULL;
- u_short reason;
- int rewrite = 0;
- int tag = -1, rtableid = -1;
- int asd = 0;
- int match = 0;
-
- if (pf_check_congestion(ifq)) {
- REASON_SET(&reason, PFRES_CONGEST);
return (PF_DROP);
- }
+ } else
+ *sm = s;
-#ifdef __FreeBSD__
- if (inp != NULL)
- pd->lookup.done = pf_socket_lookup(direction, pd, inp);
- else if (debug_pfugidhack) {
- PF_UNLOCK();
- DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
- pd->lookup.done = pf_socket_lookup(direction, pd, inp);
- PF_LOCK();
+ pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */
+ if (tag > 0) {
+ pf_tag_ref(tag);
+ s->tag = tag;
}
+ if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
+ TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
+ s->src.state = PF_TCPS_PROXY_SRC;
+ /* undo NAT changes, if they have taken place */
+ if (nr != NULL) {
+ struct pf_state_key *skt = s->key[PF_SK_WIRE];
+ if (pd->dir == PF_OUT)
+ skt = s->key[PF_SK_STACK];
+ PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
+ PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
+ if (pd->sport)
+ *pd->sport = skt->port[pd->sidx];
+ if (pd->dport)
+ *pd->dport = skt->port[pd->didx];
+ if (pd->proto_sum)
+ *pd->proto_sum = bproto_sum;
+ if (pd->ip_sum)
+ *pd->ip_sum = bip_sum;
+ m_copyback(m, off, hdrlen, pd->hdr.any);
+ }
+ s->src.seqhi = htonl(arc4random());
+ /* Find mss option */
+ int rtid = M_GETFIB(m);
+ mss = pf_get_mss(m, off, th->th_off, pd->af);
+ mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
+ mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
+ s->src.mss = mss;
+#ifdef __FreeBSD__
+ pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
+#else
+ pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
#endif
-
- r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
-
- if (direction == PF_OUT) {
- bport = nport = uh->uh_sport;
- /* check outgoing packet for BINAT/NAT */
- if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
- saddr, uh->uh_sport, daddr, uh->uh_dport,
- &pd->naddr, &nport)) != NULL) {
- PF_ACPY(&pd->baddr, saddr, af);
- pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
- &uh->uh_sum, &pd->naddr, nport, 1, af);
- rewrite++;
- if (nr->natpass)
- r = NULL;
- pd->nat_rule = nr;
- }
- } else {
- bport = nport = uh->uh_dport;
- /* check incoming packet for BINAT/RDR */
- if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
- saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
- &nport)) != NULL) {
- PF_ACPY(&pd->baddr, daddr, af);
- pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
- &uh->uh_sum, &pd->naddr, nport, 1, af);
- rewrite++;
- if (nr->natpass)
- r = NULL;
- pd->nat_rule = nr;
- }
+ th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
+ TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
+ REASON_SET(&reason, PFRES_SYNPROXY);
+ return (PF_SYNPROXY_DROP);
}
- while (r != NULL) {
- r->evaluations++;
- if (pfi_kif_match(r->kif, kif) == r->ifnot)
- r = r->skip[PF_SKIP_IFP].ptr;
- else if (r->direction && r->direction != direction)
- r = r->skip[PF_SKIP_DIR].ptr;
- else if (r->af && r->af != af)
- r = r->skip[PF_SKIP_AF].ptr;
- else if (r->proto && r->proto != IPPROTO_UDP)
- r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
- r->src.neg, kif))
- r = r->skip[PF_SKIP_SRC_ADDR].ptr;
- else if (r->src.port_op && !pf_match_port(r->src.port_op,
- r->src.port[0], r->src.port[1], uh->uh_sport))
- r = r->skip[PF_SKIP_SRC_PORT].ptr;
- else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
- r->dst.neg, NULL))
- r = r->skip[PF_SKIP_DST_ADDR].ptr;
- else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
- r->dst.port[0], r->dst.port[1], uh->uh_dport))
- r = r->skip[PF_SKIP_DST_PORT].ptr;
- else if (r->tos && !(r->tos == pd->tos))
- r = TAILQ_NEXT(r, entries);
- else if (r->rule_flag & PFRULE_FRAGMENT)
- r = TAILQ_NEXT(r, entries);
- else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
+ return (PF_PASS);
+
+csfailed:
#ifdef __FreeBSD__
- pf_socket_lookup(direction, pd, inp), 1)) &&
+ if (sk != NULL)
+ pool_put(&V_pf_state_key_pl, sk);
+ if (nk != NULL)
+ pool_put(&V_pf_state_key_pl, nk);
#else
- pf_socket_lookup(direction, pd), 1)) &&
+ if (sk != NULL)
+ pool_put(&pf_state_key_pl, sk);
+ if (nk != NULL)
+ pool_put(&pf_state_key_pl, nk);
#endif
- !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
- pd->lookup.uid))
- r = TAILQ_NEXT(r, entries);
- else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
+
+ if (sn != NULL && sn->states == 0 && sn->expire == 0) {
#ifdef __FreeBSD__
- pf_socket_lookup(direction, pd, inp), 1)) &&
+ RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn);
+ V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+ V_pf_status.src_nodes--;
+ pool_put(&V_pf_src_tree_pl, sn);
#else
- pf_socket_lookup(direction, pd), 1)) &&
+ RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
+ pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+ pf_status.src_nodes--;
+ pool_put(&pf_src_tree_pl, sn);
#endif
- !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
- pd->lookup.gid))
- r = TAILQ_NEXT(r, entries);
- else if (r->prob && r->prob <= arc4random())
- r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
- r = TAILQ_NEXT(r, entries);
- else if (r->os_fingerprint != PF_OSFP_ANY)
- r = TAILQ_NEXT(r, entries);
- else {
- if (r->tag)
- tag = r->tag;
- if (r->rtableid >= 0)
- rtableid = r->rtableid;
- if (r->anchor == NULL) {
- match = 1;
- *rm = r;
- *am = a;
- *rsm = ruleset;
- if ((*rm)->quick)
- break;
- r = TAILQ_NEXT(r, entries);
- } else
- pf_step_into_anchor(&asd, &ruleset,
- PF_RULESET_FILTER, &r, &a, &match);
- }
- if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
- PF_RULESET_FILTER, &r, &a, &match))
- break;
}
- r = *rm;
- a = *am;
- ruleset = *rsm;
-
- REASON_SET(&reason, PFRES_MATCH);
-
- if (r->log || (nr != NULL && nr->natpass && nr->log)) {
- if (rewrite)
+ if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
#ifdef __FreeBSD__
- m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
+ RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn);
+ V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+ V_pf_status.src_nodes--;
+ pool_put(&V_pf_src_tree_pl, nsn);
#else
- m_copyback(m, off, sizeof(*uh), uh);
+ RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
+ pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
+ pf_status.src_nodes--;
+ pool_put(&pf_src_tree_pl, nsn);
#endif
- PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
- a, ruleset, pd);
- }
-
- if ((r->action == PF_DROP) &&
- ((r->rule_flag & PFRULE_RETURNICMP) ||
- (r->rule_flag & PFRULE_RETURN))) {
- /* undo NAT changes, if they have taken place */
- if (nr != NULL) {
- if (direction == PF_OUT) {
- pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
- &uh->uh_sum, &pd->baddr, bport, 1, af);
- rewrite++;
- } else {
- pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
- &uh->uh_sum, &pd->baddr, bport, 1, af);
- rewrite++;
- }
- }
- if ((af == AF_INET) && r->return_icmp)
- pf_send_icmp(m, r->return_icmp >> 8,
- r->return_icmp & 255, af, r);
- else if ((af == AF_INET6) && r->return_icmp6)
- pf_send_icmp(m, r->return_icmp6 >> 8,
- r->return_icmp6 & 255, af, r);
- }
-
- if (r->action == PF_DROP)
- return (PF_DROP);
-
- if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
- REASON_SET(&reason, PFRES_MEMORY);
- return (PF_DROP);
- }
-
- if (r->keep_state || nr != NULL) {
- /* create new state */
- struct pf_state *s = NULL;
- struct pf_src_node *sn = NULL;
-
- /* check maximums */
- if (r->max_states && (r->states >= r->max_states)) {
- pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(&reason, PFRES_MAXSTATES);
- goto cleanup;
- }
- /* src node for filter rule */
- if ((r->rule_flag & PFRULE_SRCTRACK ||
- r->rpool.opts & PF_POOL_STICKYADDR) &&
- pf_insert_src_node(&sn, r, saddr, af) != 0) {
- REASON_SET(&reason, PFRES_SRCLIMIT);
- goto cleanup;
- }
- /* src node for translation rule */
- if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
- ((direction == PF_OUT &&
- pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
- (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
- REASON_SET(&reason, PFRES_SRCLIMIT);
- goto cleanup;
- }
- s = pool_get(&pf_state_pl, PR_NOWAIT);
- if (s == NULL) {
- REASON_SET(&reason, PFRES_MEMORY);
-cleanup:
- if (sn != NULL && sn->states == 0 && sn->expire == 0) {
- RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, sn);
- }
- if (nsn != sn && nsn != NULL && nsn->states == 0 &&
- nsn->expire == 0) {
- RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, nsn);
- }
- return (PF_DROP);
- }
- bzero(s, sizeof(*s));
- s->rule.ptr = r;
- s->nat_rule.ptr = nr;
- s->anchor.ptr = a;
- STATE_INC_COUNTERS(s);
- if (r->allow_opts)
- s->state_flags |= PFSTATE_ALLOWOPTS;
- if (r->rule_flag & PFRULE_STATESLOPPY)
- s->state_flags |= PFSTATE_SLOPPY;
- s->log = r->log & PF_LOG_ALL;
- if (nr != NULL)
- s->log |= nr->log & PF_LOG_ALL;
- s->proto = IPPROTO_UDP;
- s->direction = direction;
- s->af = af;
- if (direction == PF_OUT) {
- PF_ACPY(&s->gwy.addr, saddr, af);
- s->gwy.port = uh->uh_sport;
- PF_ACPY(&s->ext.addr, daddr, af);
- s->ext.port = uh->uh_dport;
- if (nr != NULL) {
- PF_ACPY(&s->lan.addr, &pd->baddr, af);
- s->lan.port = bport;
- } else {
- PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
- s->lan.port = s->gwy.port;
- }
- } else {
- PF_ACPY(&s->lan.addr, daddr, af);
- s->lan.port = uh->uh_dport;
- PF_ACPY(&s->ext.addr, saddr, af);
- s->ext.port = uh->uh_sport;
- if (nr != NULL) {
- PF_ACPY(&s->gwy.addr, &pd->baddr, af);
- s->gwy.port = bport;
- } else {
- PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
- s->gwy.port = s->lan.port;
- }
- }
- s->src.state = PFUDPS_SINGLE;
- s->dst.state = PFUDPS_NO_TRAFFIC;
- s->creation = time_second;
- s->expire = time_second;
- s->timeout = PFTM_UDP_FIRST_PACKET;
- pf_set_rt_ifp(s, saddr);
- if (sn != NULL) {
- s->src_node = sn;
- s->src_node->states++;
- }
- if (nsn != NULL) {
- PF_ACPY(&nsn->raddr, &pd->naddr, af);
- s->nat_src_node = nsn;
- s->nat_src_node->states++;
- }
- if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
- REASON_SET(&reason, PFRES_STATEINS);
- pf_src_tree_remove_state(s);
- STATE_DEC_COUNTERS(s);
- pool_put(&pf_state_pl, s);
- return (PF_DROP);
- } else
- *sm = s;
- if (tag > 0) {
- pf_tag_ref(tag);
- s->tag = tag;
- }
- }
-
- /* copy back packet headers if we performed NAT operations */
- if (rewrite)
- m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
-
- return (PF_PASS);
-}
-
-int
-pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
- struct pfi_kif *kif, struct mbuf *m, int off, void *h,
- struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
- struct ifqueue *ifq)
-{
- struct pf_rule *nr = NULL;
- struct pf_addr *saddr = pd->src, *daddr = pd->dst;
- struct pf_rule *r, *a = NULL;
- struct pf_ruleset *ruleset = NULL;
- struct pf_src_node *nsn = NULL;
- u_short reason;
- u_int16_t icmpid = 0, bport, nport = 0;
- sa_family_t af = pd->af;
- u_int8_t icmptype = 0; /* make the compiler happy */
- u_int8_t icmpcode = 0; /* make the compiler happy */
- int state_icmp = 0;
- int tag = -1, rtableid = -1;
-#ifdef INET6
- int rewrite = 0;
-#endif /* INET6 */
- int asd = 0;
- int match = 0;
-
- if (pf_check_congestion(ifq)) {
- REASON_SET(&reason, PFRES_CONGEST);
- return (PF_DROP);
- }
-
- switch (pd->proto) {
-#ifdef INET
- case IPPROTO_ICMP:
- icmptype = pd->hdr.icmp->icmp_type;
- icmpcode = pd->hdr.icmp->icmp_code;
- icmpid = pd->hdr.icmp->icmp_id;
-
- if (icmptype == ICMP_UNREACH ||
- icmptype == ICMP_SOURCEQUENCH ||
- icmptype == ICMP_REDIRECT ||
- icmptype == ICMP_TIMXCEED ||
- icmptype == ICMP_PARAMPROB)
- state_icmp++;
- break;
-#endif /* INET */
-#ifdef INET6
- case IPPROTO_ICMPV6:
- icmptype = pd->hdr.icmp6->icmp6_type;
- icmpcode = pd->hdr.icmp6->icmp6_code;
- icmpid = pd->hdr.icmp6->icmp6_id;
-
- if (icmptype == ICMP6_DST_UNREACH ||
- icmptype == ICMP6_PACKET_TOO_BIG ||
- icmptype == ICMP6_TIME_EXCEEDED ||
- icmptype == ICMP6_PARAM_PROB)
- state_icmp++;
- break;
-#endif /* INET6 */
- }
-
- r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
-
- if (direction == PF_OUT) {
- bport = nport = icmpid;
- /* check outgoing packet for BINAT/NAT */
- if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
- saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
- NULL) {
- PF_ACPY(&pd->baddr, saddr, af);
- switch (af) {
-#ifdef INET
- case AF_INET:
- pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
- pd->naddr.v4.s_addr, 0);
- pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
- pd->hdr.icmp->icmp_cksum, icmpid, nport, 0);
- pd->hdr.icmp->icmp_id = nport;
- m_copyback(m, off, ICMP_MINLEN,
- (caddr_t)pd->hdr.icmp);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
- &pd->naddr, 0);
- rewrite++;
- break;
-#endif /* INET6 */
- }
- if (nr->natpass)
- r = NULL;
- pd->nat_rule = nr;
- }
- } else {
- bport = nport = icmpid;
- /* check incoming packet for BINAT/RDR */
- if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
- saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
- NULL) {
- PF_ACPY(&pd->baddr, daddr, af);
- switch (af) {
-#ifdef INET
- case AF_INET:
- pf_change_a(&daddr->v4.s_addr,
- pd->ip_sum, pd->naddr.v4.s_addr, 0);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
- &pd->naddr, 0);
- rewrite++;
- break;
-#endif /* INET6 */
- }
- if (nr->natpass)
- r = NULL;
- pd->nat_rule = nr;
- }
- }
-
- while (r != NULL) {
- r->evaluations++;
- if (pfi_kif_match(r->kif, kif) == r->ifnot)
- r = r->skip[PF_SKIP_IFP].ptr;
- else if (r->direction && r->direction != direction)
- r = r->skip[PF_SKIP_DIR].ptr;
- else if (r->af && r->af != af)
- r = r->skip[PF_SKIP_AF].ptr;
- else if (r->proto && r->proto != pd->proto)
- r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
- r->src.neg, kif))
- r = r->skip[PF_SKIP_SRC_ADDR].ptr;
- else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
- r->dst.neg, NULL))
- r = r->skip[PF_SKIP_DST_ADDR].ptr;
- else if (r->type && r->type != icmptype + 1)
- r = TAILQ_NEXT(r, entries);
- else if (r->code && r->code != icmpcode + 1)
- r = TAILQ_NEXT(r, entries);
- else if (r->tos && !(r->tos == pd->tos))
- r = TAILQ_NEXT(r, entries);
- else if (r->rule_flag & PFRULE_FRAGMENT)
- r = TAILQ_NEXT(r, entries);
- else if (r->prob && r->prob <= arc4random())
- r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
- r = TAILQ_NEXT(r, entries);
- else if (r->os_fingerprint != PF_OSFP_ANY)
- r = TAILQ_NEXT(r, entries);
- else {
- if (r->tag)
- tag = r->tag;
- if (r->rtableid >= 0)
- rtableid = r->rtableid;
- if (r->anchor == NULL) {
- match = 1;
- *rm = r;
- *am = a;
- *rsm = ruleset;
- if ((*rm)->quick)
- break;
- r = TAILQ_NEXT(r, entries);
- } else
- pf_step_into_anchor(&asd, &ruleset,
- PF_RULESET_FILTER, &r, &a, &match);
- }
- if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
- PF_RULESET_FILTER, &r, &a, &match))
- break;
- }
- r = *rm;
- a = *am;
- ruleset = *rsm;
-
- REASON_SET(&reason, PFRES_MATCH);
-
- if (r->log || (nr != NULL && nr->natpass && nr->log)) {
-#ifdef INET6
- if (rewrite)
- m_copyback(m, off, sizeof(struct icmp6_hdr),
- (caddr_t)pd->hdr.icmp6);
-#endif /* INET6 */
- PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
- a, ruleset, pd);
- }
-
- if (r->action != PF_PASS)
- return (PF_DROP);
-
- if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
- REASON_SET(&reason, PFRES_MEMORY);
- return (PF_DROP);
- }
-
- if (!state_icmp && (r->keep_state || nr != NULL)) {
- /* create new state */
- struct pf_state *s = NULL;
- struct pf_src_node *sn = NULL;
-
- /* check maximums */
- if (r->max_states && (r->states >= r->max_states)) {
- pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(&reason, PFRES_MAXSTATES);
- goto cleanup;
- }
- /* src node for filter rule */
- if ((r->rule_flag & PFRULE_SRCTRACK ||
- r->rpool.opts & PF_POOL_STICKYADDR) &&
- pf_insert_src_node(&sn, r, saddr, af) != 0) {
- REASON_SET(&reason, PFRES_SRCLIMIT);
- goto cleanup;
- }
- /* src node for translation rule */
- if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
- ((direction == PF_OUT &&
- pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
- (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
- REASON_SET(&reason, PFRES_SRCLIMIT);
- goto cleanup;
- }
- s = pool_get(&pf_state_pl, PR_NOWAIT);
- if (s == NULL) {
- REASON_SET(&reason, PFRES_MEMORY);
-cleanup:
- if (sn != NULL && sn->states == 0 && sn->expire == 0) {
- RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, sn);
- }
- if (nsn != sn && nsn != NULL && nsn->states == 0 &&
- nsn->expire == 0) {
- RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, nsn);
- }
- return (PF_DROP);
- }
- bzero(s, sizeof(*s));
- s->rule.ptr = r;
- s->nat_rule.ptr = nr;
- s->anchor.ptr = a;
- STATE_INC_COUNTERS(s);
- if (r->allow_opts)
- s->state_flags |= PFSTATE_ALLOWOPTS;
- if (r->rule_flag & PFRULE_STATESLOPPY)
- s->state_flags |= PFSTATE_SLOPPY;
- s->log = r->log & PF_LOG_ALL;
- if (nr != NULL)
- s->log |= nr->log & PF_LOG_ALL;
- s->proto = pd->proto;
- s->direction = direction;
- s->af = af;
- if (direction == PF_OUT) {
- PF_ACPY(&s->gwy.addr, saddr, af);
- s->gwy.port = nport;
- PF_ACPY(&s->ext.addr, daddr, af);
- s->ext.port = 0;
- if (nr != NULL) {
- PF_ACPY(&s->lan.addr, &pd->baddr, af);
- s->lan.port = bport;
- } else {
- PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
- s->lan.port = s->gwy.port;
- }
- } else {
- PF_ACPY(&s->lan.addr, daddr, af);
- s->lan.port = nport;
- PF_ACPY(&s->ext.addr, saddr, af);
- s->ext.port = 0;
- if (nr != NULL) {
- PF_ACPY(&s->gwy.addr, &pd->baddr, af);
- s->gwy.port = bport;
- } else {
- PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
- s->gwy.port = s->lan.port;
- }
- }
- s->creation = time_second;
- s->expire = time_second;
- s->timeout = PFTM_ICMP_FIRST_PACKET;
- pf_set_rt_ifp(s, saddr);
- if (sn != NULL) {
- s->src_node = sn;
- s->src_node->states++;
- }
- if (nsn != NULL) {
- PF_ACPY(&nsn->raddr, &pd->naddr, af);
- s->nat_src_node = nsn;
- s->nat_src_node->states++;
- }
- if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
- REASON_SET(&reason, PFRES_STATEINS);
- pf_src_tree_remove_state(s);
- STATE_DEC_COUNTERS(s);
- pool_put(&pf_state_pl, s);
- return (PF_DROP);
- } else
- *sm = s;
- if (tag > 0) {
- pf_tag_ref(tag);
- s->tag = tag;
- }
- }
-
-#ifdef INET6
- /* copy back packet headers if we performed IPv6 NAT operations */
- if (rewrite)
- m_copyback(m, off, sizeof(struct icmp6_hdr),
- (caddr_t)pd->hdr.icmp6);
-#endif /* INET6 */
-
- return (PF_PASS);
-}
-
-int
-pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
- struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
- struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq)
-{
- struct pf_rule *nr = NULL;
- struct pf_rule *r, *a = NULL;
- struct pf_ruleset *ruleset = NULL;
- struct pf_src_node *nsn = NULL;
- struct pf_addr *saddr = pd->src, *daddr = pd->dst;
- sa_family_t af = pd->af;
- u_short reason;
- int tag = -1, rtableid = -1;
- int asd = 0;
- int match = 0;
-
- if (pf_check_congestion(ifq)) {
- REASON_SET(&reason, PFRES_CONGEST);
- return (PF_DROP);
- }
-
- r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
-
- if (direction == PF_OUT) {
- /* check outgoing packet for BINAT/NAT */
- if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
- saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
- PF_ACPY(&pd->baddr, saddr, af);
- switch (af) {
-#ifdef INET
- case AF_INET:
- pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
- pd->naddr.v4.s_addr, 0);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- PF_ACPY(saddr, &pd->naddr, af);
- break;
-#endif /* INET6 */
- }
- if (nr->natpass)
- r = NULL;
- pd->nat_rule = nr;
- }
- } else {
- /* check incoming packet for BINAT/RDR */
- if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
- saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
- PF_ACPY(&pd->baddr, daddr, af);
- switch (af) {
-#ifdef INET
- case AF_INET:
- pf_change_a(&daddr->v4.s_addr,
- pd->ip_sum, pd->naddr.v4.s_addr, 0);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- PF_ACPY(daddr, &pd->naddr, af);
- break;
-#endif /* INET6 */
- }
- if (nr->natpass)
- r = NULL;
- pd->nat_rule = nr;
- }
}
-
- while (r != NULL) {
- r->evaluations++;
- if (pfi_kif_match(r->kif, kif) == r->ifnot)
- r = r->skip[PF_SKIP_IFP].ptr;
- else if (r->direction && r->direction != direction)
- r = r->skip[PF_SKIP_DIR].ptr;
- else if (r->af && r->af != af)
- r = r->skip[PF_SKIP_AF].ptr;
- else if (r->proto && r->proto != pd->proto)
- r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
- r->src.neg, kif))
- r = r->skip[PF_SKIP_SRC_ADDR].ptr;
- else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
- r->dst.neg, NULL))
- r = r->skip[PF_SKIP_DST_ADDR].ptr;
- else if (r->tos && !(r->tos == pd->tos))
- r = TAILQ_NEXT(r, entries);
- else if (r->rule_flag & PFRULE_FRAGMENT)
- r = TAILQ_NEXT(r, entries);
- else if (r->prob && r->prob <= arc4random())
- r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
- r = TAILQ_NEXT(r, entries);
- else if (r->os_fingerprint != PF_OSFP_ANY)
- r = TAILQ_NEXT(r, entries);
- else {
- if (r->tag)
- tag = r->tag;
- if (r->rtableid >= 0)
- rtableid = r->rtableid;
- if (r->anchor == NULL) {
- match = 1;
- *rm = r;
- *am = a;
- *rsm = ruleset;
- if ((*rm)->quick)
- break;
- r = TAILQ_NEXT(r, entries);
- } else
- pf_step_into_anchor(&asd, &ruleset,
- PF_RULESET_FILTER, &r, &a, &match);
- }
- if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
- PF_RULESET_FILTER, &r, &a, &match))
- break;
- }
- r = *rm;
- a = *am;
- ruleset = *rsm;
-
- REASON_SET(&reason, PFRES_MATCH);
-
- if (r->log || (nr != NULL && nr->natpass && nr->log))
- PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
- a, ruleset, pd);
-
- if ((r->action == PF_DROP) &&
- ((r->rule_flag & PFRULE_RETURNICMP) ||
- (r->rule_flag & PFRULE_RETURN))) {
- struct pf_addr *a = NULL;
-
- if (nr != NULL) {
- if (direction == PF_OUT)
- a = saddr;
- else
- a = daddr;
- }
- if (a != NULL) {
- switch (af) {
-#ifdef INET
- case AF_INET:
- pf_change_a(&a->v4.s_addr, pd->ip_sum,
- pd->baddr.v4.s_addr, 0);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- PF_ACPY(a, &pd->baddr, af);
- break;
-#endif /* INET6 */
- }
- }
- if ((af == AF_INET) && r->return_icmp)
- pf_send_icmp(m, r->return_icmp >> 8,
- r->return_icmp & 255, af, r);
- else if ((af == AF_INET6) && r->return_icmp6)
- pf_send_icmp(m, r->return_icmp6 >> 8,
- r->return_icmp6 & 255, af, r);
- }
-
- if (r->action != PF_PASS)
- return (PF_DROP);
-
- if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
- REASON_SET(&reason, PFRES_MEMORY);
- return (PF_DROP);
- }
-
- if (r->keep_state || nr != NULL) {
- /* create new state */
- struct pf_state *s = NULL;
- struct pf_src_node *sn = NULL;
-
- /* check maximums */
- if (r->max_states && (r->states >= r->max_states)) {
- pf_status.lcounters[LCNT_STATES]++;
- REASON_SET(&reason, PFRES_MAXSTATES);
- goto cleanup;
- }
- /* src node for filter rule */
- if ((r->rule_flag & PFRULE_SRCTRACK ||
- r->rpool.opts & PF_POOL_STICKYADDR) &&
- pf_insert_src_node(&sn, r, saddr, af) != 0) {
- REASON_SET(&reason, PFRES_SRCLIMIT);
- goto cleanup;
- }
- /* src node for translation rule */
- if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
- ((direction == PF_OUT &&
- pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
- (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
- REASON_SET(&reason, PFRES_SRCLIMIT);
- goto cleanup;
- }
- s = pool_get(&pf_state_pl, PR_NOWAIT);
- if (s == NULL) {
- REASON_SET(&reason, PFRES_MEMORY);
-cleanup:
- if (sn != NULL && sn->states == 0 && sn->expire == 0) {
- RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, sn);
- }
- if (nsn != sn && nsn != NULL && nsn->states == 0 &&
- nsn->expire == 0) {
- RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
- pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
- pf_status.src_nodes--;
- pool_put(&pf_src_tree_pl, nsn);
- }
- return (PF_DROP);
- }
- bzero(s, sizeof(*s));
- s->rule.ptr = r;
- s->nat_rule.ptr = nr;
- s->anchor.ptr = a;
- STATE_INC_COUNTERS(s);
- if (r->allow_opts)
- s->state_flags |= PFSTATE_ALLOWOPTS;
- if (r->rule_flag & PFRULE_STATESLOPPY)
- s->state_flags |= PFSTATE_SLOPPY;
- s->log = r->log & PF_LOG_ALL;
- if (nr != NULL)
- s->log |= nr->log & PF_LOG_ALL;
- s->proto = pd->proto;
- s->direction = direction;
- s->af = af;
- if (direction == PF_OUT) {
- PF_ACPY(&s->gwy.addr, saddr, af);
- PF_ACPY(&s->ext.addr, daddr, af);
- if (nr != NULL)
- PF_ACPY(&s->lan.addr, &pd->baddr, af);
- else
- PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
- } else {
- PF_ACPY(&s->lan.addr, daddr, af);
- PF_ACPY(&s->ext.addr, saddr, af);
- if (nr != NULL)
- PF_ACPY(&s->gwy.addr, &pd->baddr, af);
- else
- PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
- }
- s->src.state = PFOTHERS_SINGLE;
- s->dst.state = PFOTHERS_NO_TRAFFIC;
- s->creation = time_second;
- s->expire = time_second;
- s->timeout = PFTM_OTHER_FIRST_PACKET;
- pf_set_rt_ifp(s, saddr);
- if (sn != NULL) {
- s->src_node = sn;
- s->src_node->states++;
- }
- if (nsn != NULL) {
- PF_ACPY(&nsn->raddr, &pd->naddr, af);
- s->nat_src_node = nsn;
- s->nat_src_node->states++;
- }
- if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
- REASON_SET(&reason, PFRES_STATEINS);
- pf_src_tree_remove_state(s);
- STATE_DEC_COUNTERS(s);
- pool_put(&pf_state_pl, s);
- return (PF_DROP);
- } else
- *sm = s;
- if (tag > 0) {
- pf_tag_ref(tag);
- s->tag = tag;
- }
- }
-
- return (PF_PASS);
+ return (PF_DROP);
}
int
@@ -4626,10 +4076,10 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
else if (r->proto && r->proto != pd->proto)
r = r->skip[PF_SKIP_PROTO].ptr;
else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
- r->src.neg, kif))
+ r->src.neg, kif, M_GETFIB(m)))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
- r->dst.neg, NULL))
+ r->dst.neg, NULL, M_GETFIB(m)))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
else if (r->tos && !(r->tos == pd->tos))
r = TAILQ_NEXT(r, entries);
@@ -4645,9 +4095,14 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
pd->proto == IPPROTO_ICMPV6) &&
(r->type || r->code))
r = TAILQ_NEXT(r, entries);
- else if (r->prob && r->prob <= arc4random())
+ else if (r->prob && r->prob <=
+ (arc4random() % (UINT_MAX - 1) + 1))
r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+#ifdef __FreeBSD__
+ else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
+#else
+ else if (r->match_tag && !pf_match_tag(m, r, &tag))
+#endif
r = TAILQ_NEXT(r, entries);
else {
if (r->anchor == NULL) {
@@ -4679,7 +4134,11 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
if (r->action != PF_PASS)
return (PF_DROP);
- if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) {
+#ifdef __FreeBSD__
+ if (pf_tag_packet(m, tag, -1, pd->pf_mtag)) {
+#else
+ if (pf_tag_packet(m, tag, -1)) {
+#endif
REASON_SET(&reason, PFRES_MEMORY);
return (PF_DROP);
}
@@ -4692,11 +4151,11 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
struct pf_pdesc *pd, u_short *reason, int *copyback)
{
- struct tcphdr *th = pd->hdr.tcp;
- u_int16_t win = ntohs(th->th_win);
- u_int32_t ack, end, seq, orig_seq;
- u_int8_t sws, dws;
- int ackskew;
+ struct tcphdr *th = pd->hdr.tcp;
+ u_int16_t win = ntohs(th->th_win);
+ u_int32_t ack, end, seq, orig_seq;
+ u_int8_t sws, dws;
+ int ackskew;
if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
sws = src->wscale & PF_WSCALE_MASK;
@@ -4724,13 +4183,9 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
/* Deferred generation of sequence number modulator */
if (dst->seqdiff && !src->seqdiff) {
-#ifdef __FreeBSD__
- while ((src->seqdiff = pf_new_isn(*state) - seq) == 0)
- ;
-#else
- while ((src->seqdiff = tcp_rndiss_next() - seq) == 0)
+ /* use random iss for the TCP server */
+ while ((src->seqdiff = arc4random() - seq) == 0)
;
-#endif
ack = ntohl(th->th_ack) - dst->seqdiff;
pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
src->seqdiff), 0);
@@ -4837,7 +4292,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
}
-#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
+#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
if (SEQ_GEQ(src->seqhi, end) &&
/* Last octet inside other's window space */
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
@@ -4847,7 +4302,8 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
(ackskew <= (MAXACKWINDOW << sws)) &&
/* Acking not more than one window forward */
((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
- (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) {
+ (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
+ (pd->flags & PFDESC_IP_REAS) == 0)) {
/* Require an exact/+1 sequence match on resets when possible */
if (dst->scrub || src->scrub) {
@@ -4937,19 +4393,25 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
* and keep updating the state TTL.
*/
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
printf("pf: loose state match: ");
pf_print_state(*state);
pf_print_flags(th->th_flags);
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
- "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len,
+ "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
#ifdef __FreeBSD__
- ackskew, (unsigned long long)(*state)->packets[0],
- (unsigned long long)(*state)->packets[1]);
+ pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
+ (unsigned long long)(*state)->packets[1],
#else
- ackskew, (*state)->packets[0],
- (*state)->packets[1]);
+ pd->p_len, ackskew, (*state)->packets[0],
+ (*state)->packets[1],
#endif
+ pd->dir == PF_IN ? "in" : "out",
+ pd->dir == (*state)->direction ? "fwd" : "rev");
}
if (dst->scrub || src->scrub) {
@@ -4987,7 +4449,7 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
/* Send RST for state mismatches during handshake */
if (!(th->th_flags & TH_RST))
#ifdef __FreeBSD__
- pf_send_tcp(m, (*state)->rule.ptr, pd->af,
+ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
#else
pf_send_tcp((*state)->rule.ptr, pd->af,
#endif
@@ -4999,16 +4461,16 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
src->seqlo = 0;
src->seqhi = 1;
src->max_win = 1;
+#ifdef __FreeBSD__
+ } else if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
} else if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
printf("pf: BAD state: ");
pf_print_state(*state);
pf_print_flags(th->th_flags);
printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
-#ifdef notyet
"pkts=%llu:%llu dir=%s,%s\n",
-#else
- "pkts=%llu:%llu%s\n",
-#endif
seq, orig_seq, ack, pd->p_len, ackskew,
#ifdef __FreeBSD__
(unsigned long long)(*state)->packets[0],
@@ -5016,12 +4478,8 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
#else
(*state)->packets[0], (*state)->packets[1],
#endif
-#ifdef notyet
- direction == PF_IN ? "in" : "out",
- direction == (*state)->direction ? "fwd" : "rev");
-#else
- "");
-#endif
+ pd->dir == PF_IN ? "in" : "out",
+ pd->dir == (*state)->direction ? "fwd" : "rev");
printf("pf: State failure on: %c %c %c %c | %c %c\n",
SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
@@ -5035,7 +4493,6 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
return (PF_DROP);
}
- /* Any packets which have gotten here are to be passed */
return (PF_PASS);
}
@@ -5110,32 +4567,36 @@ pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
return (PF_PASS);
}
-
int
pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
u_short *reason)
{
- struct pf_state_cmp key;
+ struct pf_state_key_cmp key;
struct tcphdr *th = pd->hdr.tcp;
int copyback = 0;
struct pf_state_peer *src, *dst;
+ struct pf_state_key *sk;
key.af = pd->af;
key.proto = IPPROTO_TCP;
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd->src, key.af);
- PF_ACPY(&key.gwy.addr, pd->dst, key.af);
- key.ext.port = th->th_sport;
- key.gwy.port = th->th_dport;
- } else {
- PF_ACPY(&key.lan.addr, pd->src, key.af);
- PF_ACPY(&key.ext.addr, pd->dst, key.af);
- key.lan.port = th->th_sport;
- key.ext.port = th->th_dport;
+ if (direction == PF_IN) { /* wire side, straight */
+ PF_ACPY(&key.addr[0], pd->src, key.af);
+ PF_ACPY(&key.addr[1], pd->dst, key.af);
+ key.port[0] = th->th_sport;
+ key.port[1] = th->th_dport;
+ } else { /* stack side, reverse */
+ PF_ACPY(&key.addr[1], pd->src, key.af);
+ PF_ACPY(&key.addr[0], pd->dst, key.af);
+ key.port[1] = th->th_sport;
+ key.port[0] = th->th_dport;
}
- STATE_LOOKUP();
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
if (direction == (*state)->direction) {
src = &(*state)->src;
@@ -5145,6 +4606,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
dst = &(*state)->src;
}
+ sk = (*state)->key[pd->didx];
+
if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
if (direction != (*state)->direction) {
REASON_SET(reason, PFRES_SYNPROXY);
@@ -5179,15 +4642,6 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
(*state)->src.state = PF_TCPS_PROXY_DST;
}
if ((*state)->src.state == PF_TCPS_PROXY_DST) {
- struct pf_state_host *src, *dst;
-
- if (direction == PF_OUT) {
- src = &(*state)->gwy;
- dst = &(*state)->ext;
- } else {
- src = &(*state)->ext;
- dst = &(*state)->lan;
- }
if (direction == (*state)->direction) {
if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
(ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
@@ -5200,11 +4654,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
(*state)->dst.seqhi = htonl(arc4random());
#ifdef __FreeBSD__
pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
- &src->addr,
#else
- pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
+ pf_send_tcp((*state)->rule.ptr, pd->af,
#endif
- &dst->addr, src->port, dst->port,
+ &sk->addr[pd->sidx], &sk->addr[pd->didx],
+ sk->port[pd->sidx], sk->port[pd->didx],
(*state)->dst.seqhi, 0, TH_SYN, 0,
(*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
REASON_SET(reason, PFRES_SYNPROXY);
@@ -5228,11 +4682,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
(*state)->tag, NULL, NULL);
#ifdef __FreeBSD__
pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
- &src->addr,
#else
- pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
+ pf_send_tcp((*state)->rule.ptr, pd->af,
#endif
- &dst->addr, src->port, dst->port,
+ &sk->addr[pd->sidx], &sk->addr[pd->didx],
+ sk->port[pd->sidx], sk->port[pd->didx],
(*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
TH_ACK, (*state)->dst.max_win, 0, 0, 1,
0, NULL, NULL);
@@ -5255,7 +4709,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
dst->state >= TCPS_FIN_WAIT_2 &&
src->state >= TCPS_FIN_WAIT_2) {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
printf("pf: state reuse ");
pf_print_state(*state);
pf_print_flags(th->th_flags);
@@ -5278,21 +4736,31 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
}
/* translate source/destination address, if necessary */
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_OUT)
+ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk = (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
+ nk->port[pd->sidx] != th->th_sport)
pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
- &th->th_sum, &(*state)->gwy.addr,
- (*state)->gwy.port, 0, pd->af);
- else
+ &th->th_sum, &nk->addr[pd->sidx],
+ nk->port[pd->sidx], 0, pd->af);
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
+ nk->port[pd->didx] != th->th_dport)
pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
- &th->th_sum, &(*state)->lan.addr,
- (*state)->lan.port, 0, pd->af);
- m_copyback(m, off, sizeof(*th), (caddr_t)th);
- } else if (copyback) {
- /* Copyback sequence modulation or stateful scrub changes */
- m_copyback(m, off, sizeof(*th), (caddr_t)th);
+ &th->th_sum, &nk->addr[pd->didx],
+ nk->port[pd->didx], 0, pd->af);
+ copyback = 1;
}
+ /* Copyback sequence modulation or stateful scrub changes if needed */
+ if (copyback)
+#ifdef __FreeBSD__
+ m_copyback(m, off, sizeof(*th), (caddr_t)th);
+#else
+ m_copyback(m, off, sizeof(*th), th);
+#endif
+
return (PF_PASS);
}
@@ -5301,24 +4769,28 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
{
struct pf_state_peer *src, *dst;
- struct pf_state_cmp key;
+ struct pf_state_key_cmp key;
struct udphdr *uh = pd->hdr.udp;
key.af = pd->af;
key.proto = IPPROTO_UDP;
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd->src, key.af);
- PF_ACPY(&key.gwy.addr, pd->dst, key.af);
- key.ext.port = uh->uh_sport;
- key.gwy.port = uh->uh_dport;
- } else {
- PF_ACPY(&key.lan.addr, pd->src, key.af);
- PF_ACPY(&key.ext.addr, pd->dst, key.af);
- key.lan.port = uh->uh_sport;
- key.ext.port = uh->uh_dport;
+ if (direction == PF_IN) { /* wire side, straight */
+ PF_ACPY(&key.addr[0], pd->src, key.af);
+ PF_ACPY(&key.addr[1], pd->dst, key.af);
+ key.port[0] = uh->uh_sport;
+ key.port[1] = uh->uh_dport;
+ } else { /* stack side, reverse */
+ PF_ACPY(&key.addr[1], pd->src, key.af);
+ PF_ACPY(&key.addr[0], pd->dst, key.af);
+ key.port[1] = uh->uh_sport;
+ key.port[0] = uh->uh_dport;
}
- STATE_LOOKUP();
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
if (direction == (*state)->direction) {
src = &(*state)->src;
@@ -5342,16 +4814,25 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
(*state)->timeout = PFTM_UDP_SINGLE;
/* translate source/destination address, if necessary */
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_OUT)
+ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk = (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
+ nk->port[pd->sidx] != uh->uh_sport)
pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
- &uh->uh_sum, &(*state)->gwy.addr,
- (*state)->gwy.port, 1, pd->af);
- else
+ &uh->uh_sum, &nk->addr[pd->sidx],
+ nk->port[pd->sidx], 1, pd->af);
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
+ nk->port[pd->didx] != uh->uh_dport)
pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
- &uh->uh_sum, &(*state)->lan.addr,
- (*state)->lan.port, 1, pd->af);
+ &uh->uh_sum, &nk->addr[pd->didx],
+ nk->port[pd->didx], 1, pd->af);
+#ifdef __FreeBSD__
m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
+#else
+ m_copyback(m, off, sizeof(*uh), uh);
+#endif
}
return (PF_PASS);
@@ -5361,12 +4842,15 @@ int
pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
{
- struct pf_addr *saddr = pd->src, *daddr = pd->dst;
- u_int16_t icmpid = 0; /* make the compiler happy */
- u_int16_t *icmpsum = NULL; /* make the compiler happy */
- u_int8_t icmptype = 0; /* make the compiler happy */
+ struct pf_addr *saddr = pd->src, *daddr = pd->dst;
+#ifdef __FreeBSD__
+ u_int16_t icmpid = 0, *icmpsum;
+#else
+ u_int16_t icmpid, *icmpsum;
+#endif
+ u_int8_t icmptype;
int state_icmp = 0;
- struct pf_state_cmp key;
+ struct pf_state_key_cmp key;
switch (pd->proto) {
#ifdef INET
@@ -5406,84 +4890,84 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
*/
key.af = pd->af;
key.proto = pd->proto;
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd->src, key.af);
- PF_ACPY(&key.gwy.addr, pd->dst, key.af);
- key.ext.port = 0;
- key.gwy.port = icmpid;
- } else {
- PF_ACPY(&key.lan.addr, pd->src, key.af);
- PF_ACPY(&key.ext.addr, pd->dst, key.af);
- key.lan.port = icmpid;
- key.ext.port = 0;
+ key.port[0] = key.port[1] = icmpid;
+ if (direction == PF_IN) { /* wire side, straight */
+ PF_ACPY(&key.addr[0], pd->src, key.af);
+ PF_ACPY(&key.addr[1], pd->dst, key.af);
+ } else { /* stack side, reverse */
+ PF_ACPY(&key.addr[1], pd->src, key.af);
+ PF_ACPY(&key.addr[0], pd->dst, key.af);
}
- STATE_LOOKUP();
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
(*state)->expire = time_second;
(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
/* translate source/destination address, if necessary */
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_OUT) {
- switch (pd->af) {
+ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk = (*state)->key[pd->didx];
+
+ switch (pd->af) {
#ifdef INET
- case AF_INET:
+ case AF_INET:
+ if (PF_ANEQ(pd->src,
+ &nk->addr[pd->sidx], AF_INET))
pf_change_a(&saddr->v4.s_addr,
pd->ip_sum,
- (*state)->gwy.addr.v4.s_addr, 0);
- pd->hdr.icmp->icmp_cksum =
- pf_cksum_fixup(
- pd->hdr.icmp->icmp_cksum, icmpid,
- (*state)->gwy.port, 0);
- pd->hdr.icmp->icmp_id =
- (*state)->gwy.port;
- m_copyback(m, off, ICMP_MINLEN,
- (caddr_t)pd->hdr.icmp);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- pf_change_a6(saddr,
- &pd->hdr.icmp6->icmp6_cksum,
- &(*state)->gwy.addr, 0);
- m_copyback(m, off,
- sizeof(struct icmp6_hdr),
- (caddr_t)pd->hdr.icmp6);
- break;
-#endif /* INET6 */
- }
- } else {
- switch (pd->af) {
-#ifdef INET
- case AF_INET:
+ nk->addr[pd->sidx].v4.s_addr, 0);
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
+ AF_INET))
pf_change_a(&daddr->v4.s_addr,
pd->ip_sum,
- (*state)->lan.addr.v4.s_addr, 0);
+ nk->addr[pd->didx].v4.s_addr, 0);
+
+ if (nk->port[0] !=
+ pd->hdr.icmp->icmp_id) {
pd->hdr.icmp->icmp_cksum =
pf_cksum_fixup(
pd->hdr.icmp->icmp_cksum, icmpid,
- (*state)->lan.port, 0);
+ nk->port[pd->sidx], 0);
pd->hdr.icmp->icmp_id =
- (*state)->lan.port;
- m_copyback(m, off, ICMP_MINLEN,
- (caddr_t)pd->hdr.icmp);
- break;
+ nk->port[pd->sidx];
+ }
+
+ m_copyback(m, off, ICMP_MINLEN,
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ pd->hdr.icmp);
+ break;
#endif /* INET */
#ifdef INET6
- case AF_INET6:
+ case AF_INET6:
+ if (PF_ANEQ(pd->src,
+ &nk->addr[pd->sidx], AF_INET6))
+ pf_change_a6(saddr,
+ &pd->hdr.icmp6->icmp6_cksum,
+ &nk->addr[pd->sidx], 0);
+
+ if (PF_ANEQ(pd->dst,
+ &nk->addr[pd->didx], AF_INET6))
pf_change_a6(daddr,
&pd->hdr.icmp6->icmp6_cksum,
- &(*state)->lan.addr, 0);
- m_copyback(m, off,
- sizeof(struct icmp6_hdr),
- (caddr_t)pd->hdr.icmp6);
- break;
+ &nk->addr[pd->didx], 0);
+
+ m_copyback(m, off,
+ sizeof(struct icmp6_hdr),
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ pd->hdr.icmp6);
+ break;
#endif /* INET6 */
- }
}
}
-
return (PF_PASS);
} else {
@@ -5493,6 +4977,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
*/
struct pf_pdesc pd2;
+#ifdef __FreeBSD__
+ bzero(&pd2, sizeof pd2);
+#endif
#ifdef INET
struct ip h2;
#endif /* INET */
@@ -5500,10 +4987,18 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
struct ip6_hdr h2_6;
int terminal = 0;
#endif /* INET6 */
- int ipoff2 = 0; /* make the compiler happy */
- int off2 = 0; /* make the compiler happy */
+#ifdef __FreeBSD__
+ int ipoff2 = 0;
+ int off2 = 0;
+#else
+ int ipoff2;
+ int off2;
+#endif
pd2.af = pd->af;
+ /* Payload packet is from the opposite direction. */
+ pd2.sidx = (direction == PF_IN) ? 1 : 0;
+ pd2.didx = (direction == PF_IN) ? 0 : 1;
switch (pd->af) {
#ifdef INET
case AF_INET:
@@ -5589,10 +5084,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
} while (!terminal);
break;
#endif /* INET6 */
-#ifdef __FreeBSD__
- default:
- panic("AF not supported: %d", pd->af);
-#endif
}
switch (pd2.proto) {
@@ -5618,19 +5109,16 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
key.af = pd2.af;
key.proto = IPPROTO_TCP;
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd2.dst, key.af);
- PF_ACPY(&key.gwy.addr, pd2.src, key.af);
- key.ext.port = th.th_dport;
- key.gwy.port = th.th_sport;
- } else {
- PF_ACPY(&key.lan.addr, pd2.dst, key.af);
- PF_ACPY(&key.ext.addr, pd2.src, key.af);
- key.lan.port = th.th_dport;
- key.ext.port = th.th_sport;
- }
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[pd2.sidx] = th.th_sport;
+ key.port[pd2.didx] = th.th_dport;
- STATE_LOOKUP();
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
if (direction == (*state)->direction) {
src = &(*state)->dst;
@@ -5656,7 +5144,11 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
(!SEQ_GEQ(src->seqhi, seq) ||
!SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
printf("pf: BAD ICMP %d:%d ",
icmptype, pd->hdr.icmp->icmp_code);
pf_print_host(pd->src, 0, pd->af);
@@ -5668,22 +5160,47 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
}
REASON_SET(reason, PFRES_BADSTATE);
return (PF_DROP);
+ } else {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
+ printf("pf: OK ICMP %d:%d ",
+ icmptype, pd->hdr.icmp->icmp_code);
+ pf_print_host(pd->src, 0, pd->af);
+ printf(" -> ");
+ pf_print_host(pd->dst, 0, pd->af);
+ printf(" state: ");
+ pf_print_state(*state);
+ printf(" seq=%u\n", seq);
+ }
}
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_IN) {
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af) ||
+ nk->port[pd2.sidx] != th.th_sport)
pf_change_icmp(pd2.src, &th.th_sport,
- daddr, &(*state)->lan.addr,
- (*state)->lan.port, NULL,
+ daddr, &nk->addr[pd2.sidx],
+ nk->port[pd2.sidx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
- } else {
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af) ||
+ nk->port[pd2.didx] != th.th_dport)
pf_change_icmp(pd2.dst, &th.th_dport,
- saddr, &(*state)->gwy.addr,
- (*state)->gwy.port, NULL,
+ NULL, /* XXX Inbound NAT? */
+ &nk->addr[pd2.didx],
+ nk->port[pd2.didx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
- }
copyback = 1;
}
@@ -5692,22 +5209,38 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
#ifdef INET
case AF_INET:
m_copyback(m, off, ICMP_MINLEN,
- (caddr_t)pd->hdr.icmp);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ pd->hdr.icmp);
m_copyback(m, ipoff2, sizeof(h2),
- (caddr_t)&h2);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ &h2);
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
m_copyback(m, off,
sizeof(struct icmp6_hdr),
- (caddr_t)pd->hdr.icmp6);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ pd->hdr.icmp6);
m_copyback(m, ipoff2, sizeof(h2_6),
- (caddr_t)&h2_6);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ &h2_6);
break;
#endif /* INET6 */
}
+#ifdef __FreeBSD__
m_copyback(m, off2, 8, (caddr_t)&th);
+#else
+ m_copyback(m, off2, 8, &th);
+#endif
}
return (PF_PASS);
@@ -5726,57 +5259,79 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
key.af = pd2.af;
key.proto = IPPROTO_UDP;
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd2.dst, key.af);
- PF_ACPY(&key.gwy.addr, pd2.src, key.af);
- key.ext.port = uh.uh_dport;
- key.gwy.port = uh.uh_sport;
- } else {
- PF_ACPY(&key.lan.addr, pd2.dst, key.af);
- PF_ACPY(&key.ext.addr, pd2.src, key.af);
- key.lan.port = uh.uh_dport;
- key.ext.port = uh.uh_sport;
- }
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[pd2.sidx] = uh.uh_sport;
+ key.port[pd2.didx] = uh.uh_dport;
- STATE_LOOKUP();
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_IN) {
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af) ||
+ nk->port[pd2.sidx] != uh.uh_sport)
pf_change_icmp(pd2.src, &uh.uh_sport,
- daddr, &(*state)->lan.addr,
- (*state)->lan.port, &uh.uh_sum,
+ daddr, &nk->addr[pd2.sidx],
+ nk->port[pd2.sidx], &uh.uh_sum,
pd2.ip_sum, icmpsum,
pd->ip_sum, 1, pd2.af);
- } else {
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af) ||
+ nk->port[pd2.didx] != uh.uh_dport)
pf_change_icmp(pd2.dst, &uh.uh_dport,
- saddr, &(*state)->gwy.addr,
- (*state)->gwy.port, &uh.uh_sum,
+ NULL, /* XXX Inbound NAT? */
+ &nk->addr[pd2.didx],
+ nk->port[pd2.didx], &uh.uh_sum,
pd2.ip_sum, icmpsum,
pd->ip_sum, 1, pd2.af);
- }
+
switch (pd2.af) {
#ifdef INET
case AF_INET:
m_copyback(m, off, ICMP_MINLEN,
- (caddr_t)pd->hdr.icmp);
- m_copyback(m, ipoff2, sizeof(h2),
- (caddr_t)&h2);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ pd->hdr.icmp);
+#ifdef __FreeBSD__
+ m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+#else
+ m_copyback(m, ipoff2, sizeof(h2), &h2);
+#endif
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
m_copyback(m, off,
sizeof(struct icmp6_hdr),
- (caddr_t)pd->hdr.icmp6);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ pd->hdr.icmp6);
m_copyback(m, ipoff2, sizeof(h2_6),
- (caddr_t)&h2_6);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ &h2_6);
break;
#endif /* INET6 */
}
- m_copyback(m, off2, sizeof(uh),
- (caddr_t)&uh);
+#ifdef __FreeBSD__
+ m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
+#else
+ m_copyback(m, off2, sizeof(uh), &uh);
+#endif
}
-
return (PF_PASS);
break;
}
@@ -5794,42 +5349,51 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
key.af = pd2.af;
key.proto = IPPROTO_ICMP;
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd2.dst, key.af);
- PF_ACPY(&key.gwy.addr, pd2.src, key.af);
- key.ext.port = 0;
- key.gwy.port = iih.icmp_id;
- } else {
- PF_ACPY(&key.lan.addr, pd2.dst, key.af);
- PF_ACPY(&key.ext.addr, pd2.src, key.af);
- key.lan.port = iih.icmp_id;
- key.ext.port = 0;
- }
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[0] = key.port[1] = iih.icmp_id;
- STATE_LOOKUP();
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_IN) {
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af) ||
+ nk->port[pd2.sidx] != iih.icmp_id)
pf_change_icmp(pd2.src, &iih.icmp_id,
- daddr, &(*state)->lan.addr,
- (*state)->lan.port, NULL,
+ daddr, &nk->addr[pd2.sidx],
+ nk->port[pd2.sidx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, AF_INET);
- } else {
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af) ||
+ nk->port[pd2.didx] != iih.icmp_id)
pf_change_icmp(pd2.dst, &iih.icmp_id,
- saddr, &(*state)->gwy.addr,
- (*state)->gwy.port, NULL,
+ NULL, /* XXX Inbound NAT? */
+ &nk->addr[pd2.didx],
+ nk->port[pd2.didx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, AF_INET);
- }
- m_copyback(m, off, ICMP_MINLEN,
- (caddr_t)pd->hdr.icmp);
- m_copyback(m, ipoff2, sizeof(h2),
- (caddr_t)&h2);
- m_copyback(m, off2, ICMP_MINLEN,
- (caddr_t)&iih);
- }
+#ifdef __FreeBSD__
+ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
+ m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+ m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
+#else
+ m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp);
+ m_copyback(m, ipoff2, sizeof(h2), &h2);
+ m_copyback(m, off2, ICMP_MINLEN, &iih);
+#endif
+ }
return (PF_PASS);
break;
}
@@ -5848,42 +5412,55 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
key.af = pd2.af;
key.proto = IPPROTO_ICMPV6;
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd2.dst, key.af);
- PF_ACPY(&key.gwy.addr, pd2.src, key.af);
- key.ext.port = 0;
- key.gwy.port = iih.icmp6_id;
- } else {
- PF_ACPY(&key.lan.addr, pd2.dst, key.af);
- PF_ACPY(&key.ext.addr, pd2.src, key.af);
- key.lan.port = iih.icmp6_id;
- key.ext.port = 0;
- }
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[0] = key.port[1] = iih.icmp6_id;
+
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
- STATE_LOOKUP();
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_IN) {
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af) ||
+ nk->port[pd2.sidx] != iih.icmp6_id)
pf_change_icmp(pd2.src, &iih.icmp6_id,
- daddr, &(*state)->lan.addr,
- (*state)->lan.port, NULL,
+ daddr, &nk->addr[pd2.sidx],
+ nk->port[pd2.sidx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, AF_INET6);
- } else {
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af) ||
+ nk->port[pd2.didx] != iih.icmp6_id)
pf_change_icmp(pd2.dst, &iih.icmp6_id,
- saddr, &(*state)->gwy.addr,
- (*state)->gwy.port, NULL,
+ NULL, /* XXX Inbound NAT? */
+ &nk->addr[pd2.didx],
+ nk->port[pd2.didx], NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, AF_INET6);
- }
+
+#ifdef __FreeBSD__
m_copyback(m, off, sizeof(struct icmp6_hdr),
(caddr_t)pd->hdr.icmp6);
- m_copyback(m, ipoff2, sizeof(h2_6),
- (caddr_t)&h2_6);
+ m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
m_copyback(m, off2, sizeof(struct icmp6_hdr),
(caddr_t)&iih);
+#else
+ m_copyback(m, off, sizeof(struct icmp6_hdr),
+ pd->hdr.icmp6);
+ m_copyback(m, ipoff2, sizeof(h2_6), &h2_6);
+ m_copyback(m, off2, sizeof(struct icmp6_hdr),
+ &iih);
+#endif
}
-
return (PF_PASS);
break;
}
@@ -5891,55 +5468,68 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
default: {
key.af = pd2.af;
key.proto = pd2.proto;
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd2.dst, key.af);
- PF_ACPY(&key.gwy.addr, pd2.src, key.af);
- key.ext.port = 0;
- key.gwy.port = 0;
- } else {
- PF_ACPY(&key.lan.addr, pd2.dst, key.af);
- PF_ACPY(&key.ext.addr, pd2.src, key.af);
- key.lan.port = 0;
- key.ext.port = 0;
- }
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[0] = key.port[1] = 0;
- STATE_LOOKUP();
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_IN) {
- pf_change_icmp(pd2.src, NULL,
- daddr, &(*state)->lan.addr,
- 0, NULL,
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af))
+ pf_change_icmp(pd2.src, NULL, daddr,
+ &nk->addr[pd2.sidx], 0, NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
- } else {
- pf_change_icmp(pd2.dst, NULL,
- saddr, &(*state)->gwy.addr,
- 0, NULL,
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af))
+ pf_change_icmp(pd2.src, NULL,
+ NULL, /* XXX Inbound NAT? */
+ &nk->addr[pd2.didx], 0, NULL,
pd2.ip_sum, icmpsum,
pd->ip_sum, 0, pd2.af);
- }
+
switch (pd2.af) {
#ifdef INET
case AF_INET:
+#ifdef __FreeBSD__
m_copyback(m, off, ICMP_MINLEN,
(caddr_t)pd->hdr.icmp);
- m_copyback(m, ipoff2, sizeof(h2),
- (caddr_t)&h2);
+ m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+#else
+ m_copyback(m, off, ICMP_MINLEN,
+ pd->hdr.icmp);
+ m_copyback(m, ipoff2, sizeof(h2), &h2);
+#endif
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
m_copyback(m, off,
sizeof(struct icmp6_hdr),
- (caddr_t)pd->hdr.icmp6);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ pd->hdr.icmp6);
m_copyback(m, ipoff2, sizeof(h2_6),
- (caddr_t)&h2_6);
+#ifdef __FreeBSD__
+ (caddr_t)
+#endif
+ &h2_6);
break;
#endif /* INET6 */
}
}
-
return (PF_PASS);
break;
}
@@ -5949,26 +5539,28 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
int
pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
- struct pf_pdesc *pd)
+ struct mbuf *m, struct pf_pdesc *pd)
{
struct pf_state_peer *src, *dst;
- struct pf_state_cmp key;
+ struct pf_state_key_cmp key;
key.af = pd->af;
key.proto = pd->proto;
if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, pd->src, key.af);
- PF_ACPY(&key.gwy.addr, pd->dst, key.af);
- key.ext.port = 0;
- key.gwy.port = 0;
+ PF_ACPY(&key.addr[0], pd->src, key.af);
+ PF_ACPY(&key.addr[1], pd->dst, key.af);
+ key.port[0] = key.port[1] = 0;
} else {
- PF_ACPY(&key.lan.addr, pd->src, key.af);
- PF_ACPY(&key.ext.addr, pd->dst, key.af);
- key.lan.port = 0;
- key.ext.port = 0;
+ PF_ACPY(&key.addr[1], pd->src, key.af);
+ PF_ACPY(&key.addr[0], pd->dst, key.af);
+ key.port[1] = key.port[0] = 0;
}
- STATE_LOOKUP();
+#ifdef __FreeBSD__
+ STATE_LOOKUP(kif, &key, direction, *state, m, pd->pf_mtag);
+#else
+ STATE_LOOKUP(kif, &key, direction, *state, m);
+#endif
if (direction == (*state)->direction) {
src = &(*state)->src;
@@ -5992,39 +5584,48 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
(*state)->timeout = PFTM_OTHER_SINGLE;
/* translate source/destination address, if necessary */
- if (STATE_TRANSLATE(*state)) {
- if (direction == PF_OUT)
- switch (pd->af) {
+ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk = (*state)->key[pd->didx];
+
+#ifdef __FreeBSD__
+ KASSERT(nk, ("%s: nk is null", __FUNCTION__));
+ KASSERT(pd, ("%s: pd is null", __FUNCTION__));
+ KASSERT(pd->src, ("%s: pd->src is null", __FUNCTION__));
+ KASSERT(pd->dst, ("%s: pd->dst is null", __FUNCTION__));
+#else
+ KASSERT(nk);
+ KASSERT(pd);
+ KASSERT(pd->src);
+ KASSERT(pd->dst);
+#endif
+ switch (pd->af) {
#ifdef INET
- case AF_INET:
+ case AF_INET:
+ if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
pf_change_a(&pd->src->v4.s_addr,
- pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->sidx].v4.s_addr,
0);
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
- break;
-#endif /* INET6 */
- }
- else
- switch (pd->af) {
-#ifdef INET
- case AF_INET:
+
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
pf_change_a(&pd->dst->v4.s_addr,
- pd->ip_sum, (*state)->lan.addr.v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->didx].v4.s_addr,
0);
+
break;
#endif /* INET */
#ifdef INET6
- case AF_INET6:
- PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
- break;
+ case AF_INET6:
+ if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
+ PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
+ PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
#endif /* INET6 */
- }
+ }
}
-
return (PF_PASS);
}
@@ -6080,8 +5681,14 @@ pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
}
int
-pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
+pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
+ int rtableid)
{
+#ifdef __FreeBSD__
+#ifdef RADIX_MPATH
+ struct radix_node_head *rnh;
+#endif
+#endif
struct sockaddr_in *dst;
int ret = 1;
int check_mpath;
@@ -6102,6 +5709,14 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
struct ifnet *ifp;
check_mpath = 0;
+#ifdef __FreeBSD__
+#ifdef RADIX_MPATH
+ /* XXX: stick to table 0 for now */
+ rnh = rt_tables_get_rnh(0, af);
+ if (rnh != NULL && rn_mpath_capable(rnh))
+ check_mpath = 1;
+#endif
+#endif
bzero(&ro, sizeof(ro));
switch (af) {
case AF_INET:
@@ -6109,18 +5724,24 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = addr->v4;
-#ifndef __FreeBSD__ /* MULTIPATH_ROUTING */
+#ifndef __FreeBSD__
if (ipmultipath)
check_mpath = 1;
#endif
break;
#ifdef INET6
case AF_INET6:
+ /*
+ * Skip check for addresses with embedded interface scope,
+ * as they would always match anyway.
+ */
+ if (IN6_IS_SCOPE_EMBED(&addr->v6))
+ goto out;
dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
dst6->sin6_family = AF_INET6;
dst6->sin6_len = sizeof(*dst6);
dst6->sin6_addr = addr->v6;
-#ifndef __FreeBSD__ /* MULTIPATH_ROUTING */
+#ifndef __FreeBSD__
if (ip6_multipath)
check_mpath = 1;
#endif
@@ -6135,13 +5756,21 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
goto out;
#ifdef __FreeBSD__
-/* XXX MRT not always INET */ /* stick with table 0 though */
- if (af == AF_INET)
- in_rtalloc_ign((struct route *)&ro, 0, RT_DEFAULT_FIB);
+ switch (af) {
#ifdef INET6
- else
- in6_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB);
+ case AF_INET6:
+ in6_rtalloc_ign(&ro, 0, rtableid);
+ break;
#endif
+#ifdef INET
+ case AF_INET:
+ in_rtalloc_ign((struct route *)&ro, 0, rtableid);
+ break;
+#endif
+ default:
+ rtalloc_ign((struct route *)&ro, 0); /* No/default FIB. */
+ break;
+ }
#else /* ! __FreeBSD__ */
rtalloc_noclone((struct route *)&ro, NO_CLONING);
#endif
@@ -6170,11 +5799,13 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
if (kif->pfik_ifp == ifp)
ret = 1;
-#ifdef __FreeBSD__ /* MULTIPATH_ROUTING */
- rn = NULL;
-#else
+#ifdef __FreeBSD__
+#ifdef RADIX_MPATH
rn = rn_mpath_next(rn);
#endif
+#else
+ rn = rn_mpath_next(rn, 0);
+#endif
} while (check_mpath == 1 && rn != NULL && ret == 0);
} else
ret = 0;
@@ -6185,7 +5816,8 @@ out:
}
int
-pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
+pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw,
+ int rtableid)
{
struct sockaddr_in *dst;
#ifdef INET6
@@ -6217,12 +5849,21 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
}
#ifdef __FreeBSD__
- if (af == AF_INET)
- in_rtalloc_ign((struct route *)&ro, 0, RT_DEFAULT_FIB);
+ switch (af) {
#ifdef INET6
- else
- in6_rtalloc_ign(&ro, 0, RT_DEFAULT_FIB);
+ case AF_INET6:
+ in6_rtalloc_ign(&ro, 0, rtableid);
+ break;
#endif
+#ifdef INET
+ case AF_INET:
+ in_rtalloc_ign((struct route *)&ro, 0, rtableid);
+ break;
+#endif
+ default:
+ rtalloc_ign((struct route *)&ro, 0);
+ break;
+ }
#else /* ! __FreeBSD__ */
rtalloc_noclone((struct route *)&ro, NO_CLONING);
#endif
@@ -6241,7 +5882,6 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
}
#ifdef INET
-
void
pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
struct pf_state *s, struct pf_pdesc *pd)
@@ -6266,7 +5906,11 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
(dir != PF_IN && dir != PF_OUT) || oifp == NULL)
panic("pf_route: invalid parameters");
+#ifdef __FreeBSD__
if (pd->pf_mtag->routed++ > 3) {
+#else
+ if ((*m)->m_pkthdr.pf.routed++ > 3) {
+#endif
m0 = *m;
*m = NULL;
goto bad;
@@ -6301,9 +5945,17 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
dst->sin_addr = ip->ip_dst;
if (r->rt == PF_FASTROUTE) {
- in_rtalloc(ro, 0);
+#ifdef __FreeBSD__
+ in_rtalloc_ign(ro, 0, M_GETFIB(m0));
+#else
+ rtalloc(ro);
+#endif
if (ro->ro_rt == 0) {
+#ifdef __FreeBSD__
KMOD_IPSTAT_INC(ips_noroute);
+#else
+ ipstat.ips_noroute++;
+#endif
goto bad;
}
@@ -6369,7 +6021,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
* XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
*/
NTOHS(ip->ip_len);
- NTOHS(ip->ip_off); /* XXX: needed? */
+ NTOHS(ip->ip_off); /* XXX: needed? */
in_delayed_cksum(m0);
HTONS(ip->ip_len);
HTONS(ip->ip_off);
@@ -6378,9 +6030,8 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
if (ntohs(ip->ip_len) <= ifp->if_mtu ||
- (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
(ifp->if_hwassist & CSUM_FRAGMENT &&
- ((ip->ip_off & htons(IP_DF)) == 0))) {
+ ((ip->ip_off & htons(IP_DF)) == 0))) {
/*
* ip->ip_len = htons(ip->ip_len);
* ip->ip_off = htons(ip->ip_off);
@@ -6400,7 +6051,6 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
PF_LOCK();
goto done;
}
-
#else
/* Copied from ip_output. */
#ifdef IPSEC
@@ -6421,25 +6071,28 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
ifp->if_bridge != NULL) {
in_delayed_cksum(m0);
- m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */
+ m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */
}
} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
ifp->if_bridge != NULL) {
in_delayed_cksum(m0);
- m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */
+ m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */
}
}
if (ntohs(ip->ip_len) <= ifp->if_mtu) {
+ ip->ip_sum = 0;
if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
ifp->if_bridge == NULL) {
m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
+#ifdef __FreeBSD__
KMOD_IPSTAT_INC(ips_outhwcsum);
- } else {
- ip->ip_sum = 0;
+#else
+ ipstat.ips_outhwcsum++;
+#endif
+ } else
ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
- }
/* Update relevant hardware checksum stats for TCP/UDP */
if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
KMOD_TCPSTAT_INC(tcps_outhwcsum);
@@ -6449,12 +6102,17 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
goto done;
}
#endif
+
/*
* Too large for interface; fragment if possible.
* Must be able to put at least 8 bytes per fragment.
*/
- if (ip->ip_off & htons(IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
+ if (ip->ip_off & htons(IP_DF)) {
+#ifdef __FreeBSD__
KMOD_IPSTAT_INC(ips_cantfrag);
+#else
+ ipstat.ips_cantfrag++;
+#endif
if (r->rt != PF_DUPTO) {
#ifdef __FreeBSD__
/* icmp_error() expects host byte ordering */
@@ -6485,7 +6143,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
error = ip_fragment(m0, ifp, ifp->if_mtu);
#endif
if (error) {
-#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */
+#ifndef __FreeBSD__ /* ip_fragment does not do m_freem() on FreeBSD */
m0 = NULL;
#endif
goto bad;
@@ -6511,7 +6169,11 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
}
if (error == 0)
+#ifdef __FreeBSD__
KMOD_IPSTAT_INC(ips_fragmented);
+#else
+ ipstat.ips_fragmented++;
+#endif
done:
if (r->rt != PF_DUPTO)
@@ -6539,13 +6201,16 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
struct ifnet *ifp = NULL;
struct pf_addr naddr;
struct pf_src_node *sn = NULL;
- int error = 0;
if (m == NULL || *m == NULL || r == NULL ||
(dir != PF_IN && dir != PF_OUT) || oifp == NULL)
panic("pf_route6: invalid parameters");
+#ifdef __FreeBSD__
if (pd->pf_mtag->routed++ > 3) {
+#else
+ if ((*m)->m_pkthdr.pf.routed++ > 3) {
+#endif
m0 = *m;
*m = NULL;
goto bad;
@@ -6584,14 +6249,9 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
m0->m_flags |= M_SKIP_FIREWALL;
PF_UNLOCK();
ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
- PF_LOCK();
#else
- mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
- if (mtag == NULL)
- goto bad;
- m_tag_prepend(m0, mtag);
- pd->pf_mtag->flags |= PF_TAG_GENERATED;
- ip6_output(m0, NULL, NULL, 0, NULL, NULL);
+ m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
+ ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
#endif
return;
}
@@ -6652,7 +6312,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
#ifdef __FreeBSD__
PF_UNLOCK();
#endif
- error = nd6_output(ifp, ifp, m0, dst, NULL);
+ nd6_output(ifp, ifp, m0, dst, NULL);
#ifdef __FreeBSD__
PF_LOCK();
#endif
@@ -6663,7 +6323,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
PF_UNLOCK();
icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
PF_LOCK();
- } else
+ } else
#else
if (r->rt != PF_DUPTO)
icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
@@ -6683,7 +6343,6 @@ bad:
}
#endif /* INET6 */
-
#ifdef __FreeBSD__
/*
* FreeBSD supports cksum offloads for the following drivers.
@@ -6721,10 +6380,10 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
sum = m->m_pkthdr.csum_data;
} else {
- ip = mtod(m, struct ip *);
+ ip = mtod(m, struct ip *);
sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr, htonl((u_short)len +
- m->m_pkthdr.csum_data + IPPROTO_TCP));
+ ip->ip_dst.s_addr, htonl((u_short)len +
+ m->m_pkthdr.csum_data + IPPROTO_TCP));
}
sum ^= 0xffff;
++hw_assist;
@@ -6735,14 +6394,14 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
sum = m->m_pkthdr.csum_data;
} else {
- ip = mtod(m, struct ip *);
+ ip = mtod(m, struct ip *);
sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr, htonl((u_short)len +
- m->m_pkthdr.csum_data + IPPROTO_UDP));
+ ip->ip_dst.s_addr, htonl((u_short)len +
+ m->m_pkthdr.csum_data + IPPROTO_UDP));
}
sum ^= 0xffff;
++hw_assist;
- }
+ }
break;
case IPPROTO_ICMP:
#ifdef INET6
@@ -6793,11 +6452,13 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a
KMOD_UDPSTAT_INC(udps_badsum);
break;
}
+#ifdef INET
case IPPROTO_ICMP:
{
KMOD_ICMPSTAT_INC(icps_checksum);
break;
}
+#endif
#ifdef INET6
case IPPROTO_ICMPV6:
{
@@ -6817,6 +6478,7 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a
return (0);
}
#else /* !__FreeBSD__ */
+
/*
* check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
* off is the offset where the protocol header starts
@@ -6893,9 +6555,11 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
case IPPROTO_UDP:
KMOD_UDPSTAT_INC(udps_badsum);
break;
+#ifdef INET
case IPPROTO_ICMP:
KMOD_ICMPSTAT_INC(icps_checksum);
break;
+#endif
#ifdef INET6
case IPPROTO_ICMPV6:
KMOD_ICMP6STAT_INC(icp6s_checksum);
@@ -6907,7 +6571,37 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
m->m_pkthdr.csum_flags |= flag_ok;
return (0);
}
-#endif /* __FreeBSD__ */
+#endif
+
+#ifndef __FreeBSD__
+struct pf_divert *
+pf_find_divert(struct mbuf *m)
+{
+ struct m_tag *mtag;
+
+ if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL)
+ return (NULL);
+
+ return ((struct pf_divert *)(mtag + 1));
+}
+
+struct pf_divert *
+pf_get_divert(struct mbuf *m)
+{
+ struct m_tag *mtag;
+
+ if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) {
+ mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert),
+ M_NOWAIT);
+ if (mtag == NULL)
+ return (NULL);
+ bzero(mtag + 1, sizeof(struct pf_divert));
+ m_tag_prepend(m, mtag);
+ }
+
+ return ((struct pf_divert *)(mtag + 1));
+}
+#endif
#ifdef INET
int
@@ -6922,8 +6616,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
struct pfi_kif *kif;
u_short action, reason = 0, log = 0;
struct mbuf *m = *m0;
- struct ip *h = NULL; /* make the compiler happy */
+#ifdef __FreeBSD__
+ struct ip *h = NULL;
+ struct m_tag *ipfwtag;
+ struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
+#else
+ struct ip *h;
struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr;
+#endif
struct pf_state *s = NULL;
struct pf_ruleset *ruleset = NULL;
struct pf_pdesc pd;
@@ -6931,44 +6631,32 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
#ifdef __FreeBSD__
PF_LOCK();
-#endif
- if (!pf_status.running)
-#ifdef __FreeBSD__
+ if (!V_pf_status.running)
{
PF_UNLOCK();
-#endif
return (PF_PASS);
-#ifdef __FreeBSD__
}
+#else
+ if (!pf_status.running)
+ return (PF_PASS);
#endif
memset(&pd, 0, sizeof(pd));
- if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
#ifdef __FreeBSD__
+ if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
PF_UNLOCK();
-#endif
DPFPRINTF(PF_DEBUG_URGENT,
("pf_test: pf_get_mtag returned NULL\n"));
return (PF_DROP);
}
-#ifdef __FreeBSD__
- if (m->m_flags & M_SKIP_FIREWALL) {
- PF_UNLOCK();
- return (PF_PASS);
- }
-#else
- if (pd.pf_mtag->flags & PF_TAG_GENERATED)
- return (PF_PASS);
#endif
-
-#ifdef __FreeBSD__
- /* XXX_IMPORT: later */
-#else
+#ifndef __FreeBSD__
if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
- ifp = ifp->if_carpdev;
+ kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
+ else
#endif
+ kif = (struct pfi_kif *)ifp->if_pf_kif;
- kif = (struct pfi_kif *)ifp->if_pf_kif;
if (kif == NULL) {
#ifdef __FreeBSD__
PF_UNLOCK();
@@ -6977,12 +6665,15 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
return (PF_DROP);
}
- if (kif->pfik_flags & PFI_IFLAG_SKIP) {
+ if (kif->pfik_flags & PFI_IFLAG_SKIP)
#ifdef __FreeBSD__
+ {
PF_UNLOCK();
#endif
return (PF_PASS);
+#ifdef __FreeBSD__
}
+#endif
#ifdef __FreeBSD__
M_ASSERTPKTHDR(m);
@@ -6991,7 +6682,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
if ((m->m_flags & M_PKTHDR) == 0)
panic("non-M_PKTHDR is passed to pf_test");
#endif /* DIAGNOSTIC */
-#endif /* __FreeBSD__ */
+#endif
if (m->m_pkthdr.len < (int)sizeof(*h)) {
action = PF_DROP;
@@ -7000,12 +6691,36 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
goto done;
}
+#ifdef __FreeBSD__
+ if (m->m_flags & M_SKIP_FIREWALL) {
+ PF_UNLOCK();
+ return (PF_PASS);
+ }
+#else
+ if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
+ return (PF_PASS);
+#endif
+
+#ifdef __FreeBSD__
+ if (ip_divert_ptr != NULL &&
+ ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
+ struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
+ if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
+ pd.pf_mtag->flags |= PF_PACKET_LOOPED;
+ m_tag_delete(m, ipfwtag);
+ }
+ if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
+ m->m_flags |= M_FASTFWD_OURS;
+ pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
+ }
+ } else
+#endif
/* We do IP header normalization and packet reassembly here */
if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
action = PF_DROP;
goto done;
}
- m = *m0;
+ m = *m0; /* pf_normalize messes with m0 */
h = mtod(m, struct ip *);
off = h->ip_hl << 2;
@@ -7018,9 +6733,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
pd.src = (struct pf_addr *)&h->ip_src;
pd.dst = (struct pf_addr *)&h->ip_dst;
- PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
+ pd.sport = pd.dport = NULL;
pd.ip_sum = &h->ip_sum;
+ pd.proto_sum = NULL;
pd.proto = h->ip_p;
+ pd.dir = dir;
+ pd.sidx = (dir == PF_IN) ? 0 : 1;
+ pd.didx = (dir == PF_IN) ? 1 : 0;
pd.af = AF_INET;
pd.tos = h->ip_tos;
pd.tot_len = ntohs(h->ip_len);
@@ -7044,12 +6763,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
log = action != PF_PASS;
goto done;
}
- if (dir == PF_IN && pf_check_proto_cksum(m, off,
- ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
- REASON_SET(&reason, PFRES_PROTCKSUM);
- action = PF_DROP;
- goto done;
- }
pd.p_len = pd.tot_len - off - (th.th_off << 2);
if ((th.th_flags & TH_ACK) && pd.p_len == 0)
pqid = 1;
@@ -7059,18 +6772,23 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
&reason);
if (action == PF_PASS) {
-#if NPFSYNC
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+#else
pfsync_update_state(s);
+#endif
#endif /* NPFSYNC */
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
} else if (s == NULL)
#ifdef __FreeBSD__
- action = pf_test_tcp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, NULL, inp);
#else
- action = pf_test_tcp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, &ipintrq);
#endif
break;
@@ -7085,12 +6803,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
log = action != PF_PASS;
goto done;
}
- if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
- off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
- action = PF_DROP;
- REASON_SET(&reason, PFRES_PROTCKSUM);
- goto done;
- }
if (uh.uh_dport == 0 ||
ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
@@ -7100,18 +6812,23 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
}
action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
if (action == PF_PASS) {
-#if NPFSYNC
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+#else
pfsync_update_state(s);
+#endif
#endif /* NPFSYNC */
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
} else if (s == NULL)
#ifdef __FreeBSD__
- action = pf_test_udp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, NULL, inp);
#else
- action = pf_test_udp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, &ipintrq);
#endif
break;
@@ -7126,47 +6843,60 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
log = action != PF_PASS;
goto done;
}
- if (dir == PF_IN && pf_check_proto_cksum(m, off,
- ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
- action = PF_DROP;
- REASON_SET(&reason, PFRES_PROTCKSUM);
- goto done;
- }
action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
&reason);
if (action == PF_PASS) {
-#if NPFSYNC
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+#else
pfsync_update_state(s);
+#endif
#endif /* NPFSYNC */
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
} else if (s == NULL)
#ifdef __FreeBSD__
- action = pf_test_icmp(&r, &s, dir, kif,
- m, off, h, &pd, &a, &ruleset, NULL);
+ action = pf_test_rule(&r, &s, dir, kif,
+ m, off, h, &pd, &a, &ruleset, NULL, inp);
#else
- action = pf_test_icmp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, &ipintrq);
#endif
break;
}
+#ifdef INET6
+ case IPPROTO_ICMPV6: {
+ action = PF_DROP;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
+ goto done;
+ }
+#endif
+
default:
- action = pf_test_state_other(&s, dir, kif, &pd);
+ action = pf_test_state_other(&s, dir, kif, m, &pd);
if (action == PF_PASS) {
-#if NPFSYNC
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+#else
pfsync_update_state(s);
+#endif
#endif /* NPFSYNC */
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
} else if (s == NULL)
#ifdef __FreeBSD__
- action = pf_test_other(&r, &s, dir, kif, m, off, h,
- &pd, &a, &ruleset, NULL);
+ action = pf_test_rule(&r, &s, dir, kif, m, off, h,
+ &pd, &a, &ruleset, NULL, inp);
#else
- action = pf_test_other(&r, &s, dir, kif, m, off, h,
+ action = pf_test_rule(&r, &s, dir, kif, m, off, h,
&pd, &a, &ruleset, &ipintrq);
#endif
break;
@@ -7182,18 +6912,38 @@ done:
("pf: dropping packet with ip options\n"));
}
- if ((s && s->tag) || r->rtableid)
- pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
+ if ((s && s->tag) || r->rtableid >= 0)
+#ifdef __FreeBSD__
+ pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag);
+#else
+ pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
+#endif
+
+ if (dir == PF_IN && s && s->key[PF_SK_STACK])
+#ifdef __FreeBSD__
+ pd.pf_mtag->statekey = s->key[PF_SK_STACK];
+#else
+ m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
+#endif
#ifdef ALTQ
if (action == PF_PASS && r->qid) {
+#ifdef __FreeBSD__
if (pqid || (pd.tos & IPTOS_LOWDELAY))
pd.pf_mtag->qid = r->pqid;
else
pd.pf_mtag->qid = r->qid;
/* add hints for ecn */
- pd.pf_mtag->af = AF_INET;
pd.pf_mtag->hdr = h;
+
+#else
+ if (pqid || (pd.tos & IPTOS_LOWDELAY))
+ m->m_pkthdr.pf.qid = r->pqid;
+ else
+ m->m_pkthdr.pf.qid = r->qid;
+ /* add hints for ecn */
+ m->m_pkthdr.pf.hdr = h;
+#endif
}
#endif /* ALTQ */
@@ -7207,7 +6957,56 @@ done:
(s->nat_rule.ptr->action == PF_RDR ||
s->nat_rule.ptr->action == PF_BINAT) &&
(ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
- pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
+#ifdef __FreeBSD__
+ m->m_flags |= M_SKIP_FIREWALL;
+#else
+ m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
+#endif
+
+#ifdef __FreeBSD__
+ if (action == PF_PASS && r->divert.port &&
+ ip_divert_ptr != NULL && !PACKET_LOOPED()) {
+
+ ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
+ sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
+ if (ipfwtag != NULL) {
+ ((struct ipfw_rule_ref *)(ipfwtag+1))->info =
+ ntohs(r->divert.port);
+ ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
+
+ m_tag_prepend(m, ipfwtag);
+
+ PF_UNLOCK();
+
+ if (m->m_flags & M_FASTFWD_OURS) {
+ pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT;
+ m->m_flags &= ~M_FASTFWD_OURS;
+ }
+
+ ip_divert_ptr(*m0,
+ dir == PF_IN ? DIR_IN : DIR_OUT);
+ *m0 = NULL;
+ return (action);
+ } else {
+ /* XXX: ipfw has the same behaviour! */
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ log = 1;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: failed to allocate divert tag\n"));
+ }
+ }
+#else
+ if (dir == PF_IN && action == PF_PASS && r->divert.port) {
+ struct pf_divert *divert;
+
+ if ((divert = pf_get_divert(m))) {
+ m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
+ divert->port = r->divert.port;
+ divert->addr.ipv4 = r->divert.addr.v4;
+ }
+ }
+#endif
if (log) {
struct pf_rule *lr;
@@ -7251,53 +7050,44 @@ done:
}
tr = r;
nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
- if (nr != NULL) {
- struct pf_addr *x;
- /*
- * XXX: we need to make sure that the addresses
- * passed to pfr_update_stats() are the same than
- * the addresses used during matching (pfr_match)
- */
- if (r == &pf_default_rule) {
- tr = nr;
- x = (s == NULL || s->direction == dir) ?
- &pd.baddr : &pd.naddr;
- } else
- x = (s == NULL || s->direction == dir) ?
- &pd.naddr : &pd.baddr;
- if (x == &pd.baddr || s == NULL) {
- /* we need to change the address */
- if (dir == PF_OUT)
- pd.src = x;
- else
- pd.dst = x;
- }
- }
+#ifdef __FreeBSD__
+ if (nr != NULL && r == &V_pf_default_rule)
+#else
+ if (nr != NULL && r == &pf_default_rule)
+#endif
+ tr = nr;
if (tr->src.addr.type == PF_ADDR_TABLE)
- pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
- s->direction == dir) ? pd.src : pd.dst, pd.af,
- pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
- tr->src.neg);
+ pfr_update_stats(tr->src.addr.p.tbl,
+ (s == NULL) ? pd.src :
+ &s->key[(s->direction == PF_IN)]->
+ addr[(s->direction == PF_OUT)],
+ pd.af, pd.tot_len, dir == PF_OUT,
+ r->action == PF_PASS, tr->src.neg);
if (tr->dst.addr.type == PF_ADDR_TABLE)
- pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
- s->direction == dir) ? pd.dst : pd.src, pd.af,
- pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
- tr->dst.neg);
+ pfr_update_stats(tr->dst.addr.p.tbl,
+ (s == NULL) ? pd.dst :
+ &s->key[(s->direction == PF_IN)]->
+ addr[(s->direction == PF_IN)],
+ pd.af, pd.tot_len, dir == PF_OUT,
+ r->action == PF_PASS, tr->dst.neg);
}
-
- if (action == PF_SYNPROXY_DROP) {
+ switch (action) {
+ case PF_SYNPROXY_DROP:
m_freem(*m0);
+ case PF_DEFER:
*m0 = NULL;
action = PF_PASS;
- } else if (r->rt)
+ break;
+ default:
/* pf_route can free the mbuf causing *m0 to become NULL */
- pf_route(m0, r, dir, ifp, s, &pd);
-
+ if (r->rt)
+ pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
+ break;
+ }
#ifdef __FreeBSD__
PF_UNLOCK();
#endif
-
return (action);
}
#endif /* INET */
@@ -7315,8 +7105,13 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
struct pfi_kif *kif;
u_short action, reason = 0, log = 0;
struct mbuf *m = *m0, *n = NULL;
+#ifdef __FreeBSD__
+ struct ip6_hdr *h = NULL;
+ struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
+#else
struct ip6_hdr *h;
struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr;
+#endif
struct pf_state *s = NULL;
struct pf_ruleset *ruleset = NULL;
struct pf_pdesc pd;
@@ -7324,38 +7119,31 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
#ifdef __FreeBSD__
PF_LOCK();
-#endif
-
- if (!pf_status.running)
-#ifdef __FreeBSD__
- {
+ if (!V_pf_status.running) {
PF_UNLOCK();
-#endif
return (PF_PASS);
-#ifdef __FreeBSD__
}
+#else
+ if (!pf_status.running)
+ return (PF_PASS);
#endif
memset(&pd, 0, sizeof(pd));
- if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
#ifdef __FreeBSD__
+ if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
PF_UNLOCK();
-#endif
DPFPRINTF(PF_DEBUG_URGENT,
- ("pf_test6: pf_get_mtag returned NULL\n"));
+ ("pf_test: pf_get_mtag returned NULL\n"));
return (PF_DROP);
}
- if (pd.pf_mtag->flags & PF_TAG_GENERATED)
- return (PF_PASS);
-
-#ifdef __FreeBSD__
- /* XXX_IMPORT: later */
-#else
+#endif
+#ifndef __FreeBSD__
if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
- ifp = ifp->if_carpdev;
+ kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
+ else
#endif
+ kif = (struct pfi_kif *)ifp->if_pf_kif;
- kif = (struct pfi_kif *)ifp->if_pf_kif;
if (kif == NULL) {
#ifdef __FreeBSD__
PF_UNLOCK();
@@ -7364,12 +7152,15 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
return (PF_DROP);
}
- if (kif->pfik_flags & PFI_IFLAG_SKIP) {
+ if (kif->pfik_flags & PFI_IFLAG_SKIP)
#ifdef __FreeBSD__
+ {
PF_UNLOCK();
#endif
return (PF_PASS);
+#ifdef __FreeBSD__
}
+#endif
#ifdef __FreeBSD__
M_ASSERTPKTHDR(m);
@@ -7380,10 +7171,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
#endif /* DIAGNOSTIC */
#endif
-#ifdef __FreeBSD__
- h = NULL; /* make the compiler happy */
-#endif
-
if (m->m_pkthdr.len < (int)sizeof(*h)) {
action = PF_DROP;
REASON_SET(&reason, PFRES_SHORT);
@@ -7391,12 +7178,23 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
goto done;
}
+#ifdef __FreeBSD__
+ if (pd.pf_mtag->flags & PF_TAG_GENERATED) {
+ PF_UNLOCK();
+#else
+ if (m->m_pkthdr.pf.flags & PF_TAG_GENERATED)
+#endif
+ return (PF_PASS);
+#ifdef __FreeBSD__
+ }
+#endif
+
/* We do IP header normalization and packet reassembly here */
if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
action = PF_DROP;
goto done;
}
- m = *m0;
+ m = *m0; /* pf_normalize messes with m0 */
h = mtod(m, struct ip6_hdr *);
#if 1
@@ -7413,8 +7211,12 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
pd.src = (struct pf_addr *)&h->ip6_src;
pd.dst = (struct pf_addr *)&h->ip6_dst;
- PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
+ pd.sport = pd.dport = NULL;
pd.ip_sum = NULL;
+ pd.proto_sum = NULL;
+ pd.dir = dir;
+ pd.sidx = (dir == PF_IN) ? 0 : 1;
+ pd.didx = (dir == PF_IN) ? 1 : 0;
pd.af = AF_INET6;
pd.tos = 0;
pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
@@ -7458,7 +7260,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
log = 1;
goto done;
}
- /* fallthrough */
+ /* FALLTHROUGH */
}
case IPPROTO_AH:
case IPPROTO_HOPOPTS:
@@ -7503,13 +7305,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
log = action != PF_PASS;
goto done;
}
- if (dir == PF_IN && pf_check_proto_cksum(n, off,
- ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
- IPPROTO_TCP, AF_INET6)) {
- action = PF_DROP;
- REASON_SET(&reason, PFRES_PROTCKSUM);
- goto done;
- }
pd.p_len = pd.tot_len - off - (th.th_off << 2);
action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
if (action == PF_DROP)
@@ -7517,18 +7312,23 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
&reason);
if (action == PF_PASS) {
-#if NPFSYNC
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+#else
pfsync_update_state(s);
+#endif
#endif /* NPFSYNC */
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
} else if (s == NULL)
#ifdef __FreeBSD__
- action = pf_test_tcp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, NULL, inp);
#else
- action = pf_test_tcp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, &ip6intrq);
#endif
break;
@@ -7543,13 +7343,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
log = action != PF_PASS;
goto done;
}
- if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n,
- off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
- IPPROTO_UDP, AF_INET6)) {
- action = PF_DROP;
- REASON_SET(&reason, PFRES_PROTCKSUM);
- goto done;
- }
if (uh.uh_dport == 0 ||
ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
@@ -7559,23 +7352,35 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
}
action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
if (action == PF_PASS) {
-#if NPFSYNC
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+#else
pfsync_update_state(s);
+#endif
#endif /* NPFSYNC */
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
} else if (s == NULL)
#ifdef __FreeBSD__
- action = pf_test_udp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, NULL, inp);
#else
- action = pf_test_udp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, &ip6intrq);
#endif
break;
}
+ case IPPROTO_ICMP: {
+ action = PF_DROP;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
+ goto done;
+ }
+
case IPPROTO_ICMPV6: {
struct icmp6_hdr ih;
@@ -7585,54 +7390,62 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
log = action != PF_PASS;
goto done;
}
- if (dir == PF_IN && pf_check_proto_cksum(n, off,
- ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
- IPPROTO_ICMPV6, AF_INET6)) {
- action = PF_DROP;
- REASON_SET(&reason, PFRES_PROTCKSUM);
- goto done;
- }
action = pf_test_state_icmp(&s, dir, kif,
m, off, h, &pd, &reason);
if (action == PF_PASS) {
-#if NPFSYNC
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+#else
pfsync_update_state(s);
+#endif
#endif /* NPFSYNC */
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
} else if (s == NULL)
#ifdef __FreeBSD__
- action = pf_test_icmp(&r, &s, dir, kif,
- m, off, h, &pd, &a, &ruleset, NULL);
+ action = pf_test_rule(&r, &s, dir, kif,
+ m, off, h, &pd, &a, &ruleset, NULL, inp);
#else
- action = pf_test_icmp(&r, &s, dir, kif,
+ action = pf_test_rule(&r, &s, dir, kif,
m, off, h, &pd, &a, &ruleset, &ip6intrq);
#endif
break;
}
default:
- action = pf_test_state_other(&s, dir, kif, &pd);
+ action = pf_test_state_other(&s, dir, kif, m, &pd);
if (action == PF_PASS) {
-#if NPFSYNC
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+#else
pfsync_update_state(s);
+#endif
#endif /* NPFSYNC */
r = s->rule.ptr;
a = s->anchor.ptr;
log = s->log;
} else if (s == NULL)
#ifdef __FreeBSD__
- action = pf_test_other(&r, &s, dir, kif, m, off, h,
- &pd, &a, &ruleset, NULL);
+ action = pf_test_rule(&r, &s, dir, kif, m, off, h,
+ &pd, &a, &ruleset, NULL, inp);
#else
- action = pf_test_other(&r, &s, dir, kif, m, off, h,
+ action = pf_test_rule(&r, &s, dir, kif, m, off, h,
&pd, &a, &ruleset, &ip6intrq);
#endif
break;
}
done:
+ if (n != m) {
+ m_freem(n);
+ n = NULL;
+ }
+
/* handle dangerous IPv6 extension headers. */
if (action == PF_PASS && rh_cnt &&
!((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
@@ -7643,18 +7456,37 @@ done:
("pf: dropping packet with dangerous v6 headers\n"));
}
- if ((s && s->tag) || r->rtableid)
- pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
+ if ((s && s->tag) || r->rtableid >= 0)
+#ifdef __FreeBSD__
+ pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag);
+#else
+ pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
+#endif
+
+ if (dir == PF_IN && s && s->key[PF_SK_STACK])
+#ifdef __FreeBSD__
+ pd.pf_mtag->statekey = s->key[PF_SK_STACK];
+#else
+ m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
+#endif
#ifdef ALTQ
if (action == PF_PASS && r->qid) {
+#ifdef __FreeBSD__
if (pd.tos & IPTOS_LOWDELAY)
pd.pf_mtag->qid = r->pqid;
else
pd.pf_mtag->qid = r->qid;
/* add hints for ecn */
- pd.pf_mtag->af = AF_INET6;
pd.pf_mtag->hdr = h;
+#else
+ if (pd.tos & IPTOS_LOWDELAY)
+ m->m_pkthdr.pf.qid = r->pqid;
+ else
+ m->m_pkthdr.pf.qid = r->qid;
+ /* add hints for ecn */
+ m->m_pkthdr.pf.hdr = h;
+#endif
}
#endif /* ALTQ */
@@ -7663,7 +7495,27 @@ done:
(s->nat_rule.ptr->action == PF_RDR ||
s->nat_rule.ptr->action == PF_BINAT) &&
IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
- pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
+#ifdef __FreeBSD__
+ m->m_flags |= M_SKIP_FIREWALL;
+#else
+ m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
+#endif
+
+#ifdef __FreeBSD__
+ /* XXX: Anybody working on it?! */
+ if (r->divert.port)
+ printf("pf: divert(9) is not supported for IPv6\n");
+#else
+ if (dir == PF_IN && action == PF_PASS && r->divert.port) {
+ struct pf_divert *divert;
+
+ if ((divert = pf_get_divert(m))) {
+ m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
+ divert->port = r->divert.port;
+ divert->addr.ipv6 = r->divert.addr.v6;
+ }
+ }
+#endif
if (log) {
struct pf_rule *lr;
@@ -7707,48 +7559,39 @@ done:
}
tr = r;
nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
- if (nr != NULL) {
- struct pf_addr *x;
- /*
- * XXX: we need to make sure that the addresses
- * passed to pfr_update_stats() are the same than
- * the addresses used during matching (pfr_match)
- */
- if (r == &pf_default_rule) {
- tr = nr;
- x = (s == NULL || s->direction == dir) ?
- &pd.baddr : &pd.naddr;
- } else {
- x = (s == NULL || s->direction == dir) ?
- &pd.naddr : &pd.baddr;
- }
- if (x == &pd.baddr || s == NULL) {
- if (dir == PF_OUT)
- pd.src = x;
- else
- pd.dst = x;
- }
- }
+#ifdef __FreeBSD__
+ if (nr != NULL && r == &V_pf_default_rule)
+#else
+ if (nr != NULL && r == &pf_default_rule)
+#endif
+ tr = nr;
if (tr->src.addr.type == PF_ADDR_TABLE)
- pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
- s->direction == dir) ? pd.src : pd.dst, pd.af,
- pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
- tr->src.neg);
+ pfr_update_stats(tr->src.addr.p.tbl,
+ (s == NULL) ? pd.src :
+ &s->key[(s->direction == PF_IN)]->addr[0],
+ pd.af, pd.tot_len, dir == PF_OUT,
+ r->action == PF_PASS, tr->src.neg);
if (tr->dst.addr.type == PF_ADDR_TABLE)
- pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
- s->direction == dir) ? pd.dst : pd.src, pd.af,
- pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
- tr->dst.neg);
+ pfr_update_stats(tr->dst.addr.p.tbl,
+ (s == NULL) ? pd.dst :
+ &s->key[(s->direction == PF_IN)]->addr[1],
+ pd.af, pd.tot_len, dir == PF_OUT,
+ r->action == PF_PASS, tr->dst.neg);
}
-
- if (action == PF_SYNPROXY_DROP) {
+ switch (action) {
+ case PF_SYNPROXY_DROP:
m_freem(*m0);
+ case PF_DEFER:
*m0 = NULL;
action = PF_PASS;
- } else if (r->rt)
+ break;
+ default:
/* pf_route6 can free the mbuf causing *m0 to become NULL */
- pf_route6(m0, r, dir, ifp, s, &pd);
+ if (r->rt)
+ pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
+ break;
+ }
#ifdef __FreeBSD__
PF_UNLOCK();
@@ -7770,3 +7613,20 @@ pf_check_congestion(struct ifqueue *ifq)
return (0);
#endif
}
+
+/*
+ * must be called whenever any addressing information such as
+ * address, port, protocol has changed
+ */
+void
+pf_pkt_addr_changed(struct mbuf *m)
+{
+#ifdef __FreeBSD__
+ struct pf_mtag *pf_tag;
+
+ if ((pf_tag = pf_find_mtag(m)) != NULL)
+ pf_tag->statekey = NULL;
+#else
+ m->m_pkthdr.pf.statekey = NULL;
+#endif
+}
diff --git a/freebsd/sys/contrib/pf/net/pf_if.c b/freebsd/sys/contrib/pf/net/pf_if.c
index 8ff3c614..3ac645f9 100644
--- a/freebsd/sys/contrib/pf/net/pf_if.c
+++ b/freebsd/sys/contrib/pf/net/pf_if.c
@@ -1,6 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */
+/* $OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $ */
/*
* Copyright 2005 Henning Brauer <henning@openbsd.org>
@@ -56,6 +56,9 @@ __FBSDID("$FreeBSD$");
#include <sys/device.h>
#endif
#include <rtems/bsd/sys/time.h>
+#ifndef __FreeBSD__
+#include <sys/pool.h>
+#endif
#include <net/if.h>
#include <net/if_types.h>
@@ -75,25 +78,35 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#endif /* INET6 */
-struct pfi_kif *pfi_all = NULL;
-struct pfi_statehead pfi_statehead;
#ifdef __FreeBSD__
-uma_zone_t pfi_addr_pl;
+VNET_DEFINE(struct pfi_kif *, pfi_all);
+VNET_DEFINE(uma_zone_t, pfi_addr_pl);
+VNET_DEFINE(struct pfi_ifhead, pfi_ifs);
+#define V_pfi_ifs VNET(pfi_ifs)
+VNET_DEFINE(long, pfi_update);
+#define V_pfi_update VNET(pfi_update)
+VNET_DEFINE(struct pfr_addr *, pfi_buffer);
+#define V_pfi_buffer VNET(pfi_buffer)
+VNET_DEFINE(int, pfi_buffer_cnt);
+#define V_pfi_buffer_cnt VNET(pfi_buffer_cnt)
+VNET_DEFINE(int, pfi_buffer_max);
+#define V_pfi_buffer_max VNET(pfi_buffer_max)
#else
+struct pfi_kif *pfi_all = NULL;
struct pool pfi_addr_pl;
-#endif
struct pfi_ifhead pfi_ifs;
long pfi_update = 1;
struct pfr_addr *pfi_buffer;
int pfi_buffer_cnt;
int pfi_buffer_max;
+#endif
#ifdef __FreeBSD__
-eventhandler_tag pfi_attach_cookie = NULL;
-eventhandler_tag pfi_detach_cookie = NULL;
-eventhandler_tag pfi_attach_group_cookie = NULL;
-eventhandler_tag pfi_change_group_cookie = NULL;
-eventhandler_tag pfi_detach_group_cookie = NULL;
-eventhandler_tag pfi_ifaddr_event_cookie = NULL;
+eventhandler_tag pfi_attach_cookie;
+eventhandler_tag pfi_detach_cookie;
+eventhandler_tag pfi_attach_group_cookie;
+eventhandler_tag pfi_change_group_cookie;
+eventhandler_tag pfi_detach_group_cookie;
+eventhandler_tag pfi_ifaddr_event_cookie;
#endif
void pfi_kif_update(struct pfi_kif *);
@@ -109,11 +122,10 @@ int pfi_unmask(void *);
#ifdef __FreeBSD__
void pfi_attach_ifnet_event(void * __unused, struct ifnet *);
void pfi_detach_ifnet_event(void * __unused, struct ifnet *);
-void pfi_attach_group_event(void * __unused, struct ifg_group *);
-void pfi_change_group_event(void * __unused, char *);
-void pfi_detach_group_event(void * __unused, struct ifg_group *);
+void pfi_attach_group_event(void *, struct ifg_group *);
+void pfi_change_group_event(void *, char *);
+void pfi_detach_group_event(void *, struct ifg_group *);
void pfi_ifaddr_event(void * __unused, struct ifnet *);
-
#endif
RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
@@ -125,22 +137,31 @@ RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
void
pfi_initialize(void)
{
-
+#ifdef __FreeBSD__
+ if (V_pfi_all != NULL) /* already initialized */
+#else
if (pfi_all != NULL) /* already initialized */
+#endif
return;
- TAILQ_INIT(&pfi_statehead);
#ifndef __FreeBSD__
- pool_init(&pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0,
+ pool_init(&V_pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0,
"pfiaddrpl", &pool_allocator_nointr);
#endif
+#ifdef __FreeBSD__
+ V_pfi_buffer_max = 64;
+ V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer),
+ PFI_MTYPE, M_WAITOK);
+
+ if ((V_pfi_all = pfi_kif_get(IFG_ALL)) == NULL)
+#else
pfi_buffer_max = 64;
pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer),
PFI_MTYPE, M_WAITOK);
if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL)
+#endif
panic("pfi_kif_get for pfi_all failed");
-
#ifdef __FreeBSD__
struct ifg_group *ifg;
struct ifnet *ifp;
@@ -157,11 +178,11 @@ pfi_initialize(void)
pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event,
pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event,
- pfi_attach_group_event, NULL, EVENTHANDLER_PRI_ANY);
+ pfi_attach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event,
- pfi_change_group_event, NULL, EVENTHANDLER_PRI_ANY);
+ pfi_change_group_event, curvnet, EVENTHANDLER_PRI_ANY);
pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event,
- pfi_detach_group_event, NULL, EVENTHANDLER_PRI_ANY);
+ pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
#endif
@@ -182,18 +203,18 @@ pfi_cleanup(void)
EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
PF_LOCK();
- pfi_all = NULL;
- while ((p = RB_MIN(pfi_ifhead, &pfi_ifs))) {
+ V_pfi_all = NULL;
+ while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
if (p->pfik_rules || p->pfik_states) {
printf("pfi_cleanup: dangling refs for %s\n",
p->pfik_name);
}
- RB_REMOVE(pfi_ifhead, &pfi_ifs, p);
+ RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p);
free(p, PFI_MTYPE);
}
- free(pfi_buffer, PFI_MTYPE);
+ free(V_pfi_buffer, PFI_MTYPE);
}
#endif
@@ -205,18 +226,21 @@ pfi_kif_get(const char *kif_name)
bzero(&s, sizeof(s));
strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name));
+#ifdef __FreeBSD__
+ if ((kif = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s)) != NULL)
+#else
if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL)
+#endif
return (kif);
/* create new one */
#ifdef __FreeBSD__
- if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL)
+ if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL)
#else
- if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT)) == NULL)
+ if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT|M_ZERO)) == NULL)
#endif
return (NULL);
- bzero(kif, sizeof(*kif));
strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name));
#ifdef __FreeBSD__
/*
@@ -232,7 +256,12 @@ pfi_kif_get(const char *kif_name)
#endif
TAILQ_INIT(&kif->pfik_dynaddrs);
+#ifdef __FreeBSD__
+ RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif);
+#else
RB_INSERT(pfi_ifhead, &pfi_ifs, kif);
+#endif
+
return (kif);
}
@@ -244,8 +273,7 @@ pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what)
kif->pfik_rules++;
break;
case PFI_KIF_REF_STATE:
- if (!kif->pfik_states++)
- TAILQ_INSERT_TAIL(&pfi_statehead, kif, pfik_w_states);
+ kif->pfik_states++;
break;
default:
panic("pfi_kif_ref with unknown type");
@@ -273,20 +301,27 @@ pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what)
printf("pfi_kif_unref: state refcount <= 0\n");
return;
}
- if (!--kif->pfik_states)
- TAILQ_REMOVE(&pfi_statehead, kif, pfik_w_states);
+ kif->pfik_states--;
break;
default:
panic("pfi_kif_unref with unknown type");
}
+#ifdef __FreeBSD__
+ if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all)
+#else
if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all)
+#endif
return;
if (kif->pfik_rules || kif->pfik_states)
return;
+#ifdef __FreeBSD__
+ RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
+#else
RB_REMOVE(pfi_ifhead, &pfi_ifs, kif);
+#endif
free(kif, PFI_MTYPE);
}
@@ -314,7 +349,11 @@ pfi_attach_ifnet(struct ifnet *ifp)
pfi_initialize();
s = splsoftnet();
+#ifdef __FreeBSD__
+ V_pfi_update++;
+#else
pfi_update++;
+#endif
if ((kif = pfi_kif_get(ifp->if_xname)) == NULL)
panic("pfi_kif_get failed");
@@ -343,7 +382,11 @@ pfi_detach_ifnet(struct ifnet *ifp)
return;
s = splsoftnet();
+#ifdef __FreeBSD__
+ V_pfi_update++;
+#else
pfi_update++;
+#endif
#ifndef __FreeBSD__
hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie);
#endif
@@ -363,7 +406,11 @@ pfi_attach_ifgroup(struct ifg_group *ifg)
pfi_initialize();
s = splsoftnet();
+#ifdef __FreeBSD__
+ V_pfi_update++;
+#else
pfi_update++;
+#endif
if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL)
panic("pfi_kif_get failed");
@@ -383,7 +430,11 @@ pfi_detach_ifgroup(struct ifg_group *ifg)
return;
s = splsoftnet();
+#ifdef __FreeBSD__
+ V_pfi_update++;
+#else
pfi_update++;
+#endif
kif->pfik_group = NULL;
ifg->ifg_pf_kif = NULL;
@@ -398,7 +449,11 @@ pfi_group_change(const char *group)
int s;
s = splsoftnet();
+#ifdef __FreeBSD__
+ V_pfi_update++;
+#else
pfi_update++;
+#endif
if ((kif = pfi_kif_get(group)) == NULL)
panic("pfi_kif_get failed");
@@ -452,9 +507,13 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
if (aw->type != PF_ADDR_DYNIFTL)
return (0);
- if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL)
+#ifdef __FreeBSD__
+ if ((dyn = pool_get(&V_pfi_addr_pl, PR_NOWAIT | PR_ZERO))
+#else
+ if ((dyn = pool_get(&pfi_addr_pl, PR_WAITOK | PR_LIMITFAIL | PR_ZERO))
+#endif
+ == NULL)
return (1);
- bzero(dyn, sizeof(*dyn));
s = splsoftnet();
if (!strcmp(aw->v.ifname, "self"))
@@ -487,7 +546,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
goto _bad;
}
- if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) {
+ if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname, 1)) == NULL) {
rv = 1;
goto _bad;
}
@@ -509,7 +568,11 @@ _bad:
pf_remove_if_empty_ruleset(ruleset);
if (dyn->pfid_kif != NULL)
pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE);
+#ifdef __FreeBSD__
+ pool_put(&V_pfi_addr_pl, dyn);
+#else
pool_put(&pfi_addr_pl, dyn);
+#endif
splx(s);
return (rv);
}
@@ -543,10 +606,18 @@ pfi_dynaddr_update(struct pfi_dynaddr *dyn)
kif = dyn->pfid_kif;
kt = dyn->pfid_kt;
+#ifdef __FreeBSD__
+ if (kt->pfrkt_larg != V_pfi_update) {
+#else
if (kt->pfrkt_larg != pfi_update) {
+#endif
/* this table needs to be brought up-to-date */
pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags);
+#ifdef __FreeBSD__
+ kt->pfrkt_larg = V_pfi_update;
+#else
kt->pfrkt_larg = pfi_update;
+#endif
}
pfr_dynaddr_update(kt, dyn);
}
@@ -557,7 +628,11 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
int e, size2 = 0;
struct ifg_member *ifgm;
+#ifdef __FreeBSD__
+ V_pfi_buffer_cnt = 0;
+#else
pfi_buffer_cnt = 0;
+#endif
if (kif->pfik_ifp != NULL)
pfi_instance_add(kif->pfik_ifp, net, flags);
@@ -565,10 +640,17 @@ pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
pfi_instance_add(ifgm->ifgm_ifp, net, flags);
+#ifdef __FreeBSD__
+ if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2,
+ NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
+ printf("pfi_table_update: cannot set %d new addresses "
+ "into table %s: %d\n", V_pfi_buffer_cnt, kt->pfrkt_name, e);
+#else
if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2,
NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
printf("pfi_table_update: cannot set %d new addresses "
"into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e);
+#endif
}
void
@@ -589,9 +671,9 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags)
#ifdef __FreeBSD__
/*
* XXX: For point-to-point interfaces, (ifname:0) and IPv4,
- * jump over addresses without a proper route to work
- * around a problem with ppp not fully removing the
- * address used during IPCP.
+ * jump over addresses without a proper route to work
+ * around a problem with ppp not fully removing the
+ * address used during IPCP.
*/
if ((ifp->if_flags & IFF_POINTOPOINT) &&
!(ia->ifa_flags & IFA_ROUTE) &&
@@ -646,15 +728,24 @@ pfi_address_add(struct sockaddr *sa, int af, int net)
struct pfr_addr *p;
int i;
+#ifdef __FreeBSD__
+ if (V_pfi_buffer_cnt >= V_pfi_buffer_max) {
+ int new_max = V_pfi_buffer_max * 2;
+#else
if (pfi_buffer_cnt >= pfi_buffer_max) {
int new_max = pfi_buffer_max * 2;
+#endif
if (new_max > PFI_BUFFER_MAX) {
printf("pfi_address_add: address buffer full (%d/%d)\n",
+#ifdef __FreeBSD__
+ V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+#else
pfi_buffer_cnt, PFI_BUFFER_MAX);
+#endif
return;
}
- p = malloc(new_max * sizeof(*pfi_buffer), PFI_MTYPE,
+ p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE,
#ifdef __FreeBSD__
M_NOWAIT);
#else
@@ -662,18 +753,34 @@ pfi_address_add(struct sockaddr *sa, int af, int net)
#endif
if (p == NULL) {
printf("pfi_address_add: no memory to grow buffer "
+#ifdef __FreeBSD__
+ "(%d/%d)\n", V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+#else
"(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX);
+#endif
return;
}
- memcpy(p, pfi_buffer, pfi_buffer_max * sizeof(*pfi_buffer));
+#ifdef __FreeBSD__
+ memcpy(V_pfi_buffer, p, V_pfi_buffer_cnt * sizeof(*V_pfi_buffer));
+ /* no need to zero buffer */
+ free(V_pfi_buffer, PFI_MTYPE);
+ V_pfi_buffer = p;
+ V_pfi_buffer_max = new_max;
+#else
+ memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer));
/* no need to zero buffer */
free(pfi_buffer, PFI_MTYPE);
pfi_buffer = p;
pfi_buffer_max = new_max;
+#endif
}
if (af == AF_INET && net > 32)
net = 128;
+#ifdef __FreeBSD__
+ p = V_pfi_buffer + V_pfi_buffer_cnt++;
+#else
p = pfi_buffer + pfi_buffer_cnt++;
+#endif
bzero(p, sizeof(*p));
p->pfra_af = af;
p->pfra_net = net;
@@ -706,7 +813,11 @@ pfi_dynaddr_remove(struct pf_addr_wrap *aw)
aw->p.dyn->pfid_kif = NULL;
pfr_detach_table(aw->p.dyn->pfid_kt);
aw->p.dyn->pfid_kt = NULL;
+#ifdef __FreeBSD__
+ pool_put(&V_pfi_addr_pl, aw->p.dyn);
+#else
pool_put(&pfi_addr_pl, aw->p.dyn);
+#endif
aw->p.dyn = NULL;
splx(s);
}
@@ -727,7 +838,11 @@ pfi_kifaddr_update(void *v)
struct pfi_kif *kif = (struct pfi_kif *)v;
s = splsoftnet();
+#ifdef __FreeBSD__
+ V_pfi_update++;
+#else
pfi_update++;
+#endif
pfi_kif_update(kif);
splx(s);
}
@@ -739,49 +854,61 @@ pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q)
}
void
-pfi_fill_oldstatus(struct pf_status *pfs)
+pfi_update_status(const char *name, struct pf_status *pfs)
{
struct pfi_kif *p;
- struct pfi_kif_cmp key;
+ struct pfi_kif_cmp key;
+ struct ifg_member p_member, *ifgm;
+ TAILQ_HEAD(, ifg_member) ifg_members;
int i, j, k, s;
- strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name));
+ strlcpy(key.pfik_name, name, sizeof(key.pfik_name));
s = splsoftnet();
+#ifdef __FreeBSD__
+ p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key);
+#else
p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key);
+#endif
if (p == NULL) {
splx(s);
return;
}
- bzero(pfs->pcounters, sizeof(pfs->pcounters));
- bzero(pfs->bcounters, sizeof(pfs->bcounters));
- for (i = 0; i < 2; i++)
- for (j = 0; j < 2; j++)
- for (k = 0; k < 2; k++) {
- pfs->pcounters[i][j][k] =
- p->pfik_packets[i][j][k];
- pfs->bcounters[i][j] +=
- p->pfik_bytes[i][j][k];
- }
- splx(s);
-}
-
-int
-pfi_clr_istats(const char *name)
-{
- struct pfi_kif *p;
- int s;
+ if (p->pfik_group != NULL) {
+ bcopy(&p->pfik_group->ifg_members, &ifg_members,
+ sizeof(ifg_members));
+ } else {
+ /* build a temporary list for p only */
+ bzero(&p_member, sizeof(p_member));
+ p_member.ifgm_ifp = p->pfik_ifp;
+ TAILQ_INIT(&ifg_members);
+ TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next);
+ }
+ if (pfs) {
+ bzero(pfs->pcounters, sizeof(pfs->pcounters));
+ bzero(pfs->bcounters, sizeof(pfs->bcounters));
+ }
+ TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) {
+ if (ifgm->ifgm_ifp == NULL)
+ continue;
+ p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif;
- s = splsoftnet();
- RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
- if (pfi_skip_if(name, p))
+ /* just clear statistics */
+ if (pfs == NULL) {
+ bzero(p->pfik_packets, sizeof(p->pfik_packets));
+ bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
+ p->pfik_tzero = time_second;
continue;
- bzero(p->pfik_packets, sizeof(p->pfik_packets));
- bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
- p->pfik_tzero = time_second;
+ }
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++) {
+ pfs->pcounters[i][j][k] +=
+ p->pfik_packets[i][j][k];
+ pfs->bcounters[i][j] +=
+ p->pfik_bytes[i][j][k];
+ }
}
splx(s);
-
- return (0);
}
int
@@ -794,8 +921,13 @@ pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
#endif
s = splsoftnet();
+#ifdef __FreeBSD__
+ for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) {
+ nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
+#else
for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) {
nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p);
+#endif
if (pfi_skip_if(name, p))
continue;
if (*size > n++) {
@@ -812,7 +944,11 @@ pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
splx(s);
return (EFAULT);
}
+#ifdef __FreeBSD__
+ nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
+#else
nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p);
+#endif
pfi_kif_unref(p, PFI_KIF_REF_RULE);
}
}
@@ -847,7 +983,11 @@ pfi_set_flags(const char *name, int flags)
int s;
s = splsoftnet();
+#ifdef __FreeBSD__
+ RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
+#else
RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
+#endif
if (pfi_skip_if(name, p))
continue;
p->pfik_flags |= flags;
@@ -863,7 +1003,11 @@ pfi_clear_flags(const char *name, int flags)
int s;
s = splsoftnet();
+#ifdef __FreeBSD__
+ RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
+#else
RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
+#endif
if (pfi_skip_if(name, p))
continue;
p->pfik_flags &= ~flags;
@@ -896,55 +1040,73 @@ pfi_unmask(void *addr)
void
pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp)
{
+
+ CURVNET_SET(ifp->if_vnet);
PF_LOCK();
pfi_attach_ifnet(ifp);
#ifdef ALTQ
pf_altq_ifnet_event(ifp, 0);
#endif
PF_UNLOCK();
+ CURVNET_RESTORE();
}
void
pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
{
+
+ CURVNET_SET(ifp->if_vnet);
PF_LOCK();
pfi_detach_ifnet(ifp);
#ifdef ALTQ
pf_altq_ifnet_event(ifp, 1);
#endif
PF_UNLOCK();
+ CURVNET_RESTORE();
}
void
-pfi_attach_group_event(void *arg __unused, struct ifg_group *ifg)
+pfi_attach_group_event(void *arg , struct ifg_group *ifg)
{
+
+ CURVNET_SET((struct vnet *)arg);
PF_LOCK();
pfi_attach_ifgroup(ifg);
PF_UNLOCK();
+ CURVNET_RESTORE();
}
void
-pfi_change_group_event(void *arg __unused, char *gname)
+pfi_change_group_event(void *arg, char *gname)
{
+
+ CURVNET_SET((struct vnet *)arg);
PF_LOCK();
pfi_group_change(gname);
PF_UNLOCK();
+ CURVNET_RESTORE();
}
void
-pfi_detach_group_event(void *arg __unused, struct ifg_group *ifg)
+pfi_detach_group_event(void *arg, struct ifg_group *ifg)
{
+
+ CURVNET_SET((struct vnet *)arg);
PF_LOCK();
pfi_detach_ifgroup(ifg);
PF_UNLOCK();
+ CURVNET_RESTORE();
}
void
pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp)
{
+
+ CURVNET_SET(ifp->if_vnet);
PF_LOCK();
if (ifp && ifp->if_pf_kif)
pfi_kifaddr_update(ifp->if_pf_kif);
PF_UNLOCK();
+ CURVNET_RESTORE();
}
#endif /* __FreeBSD__ */
diff --git a/freebsd/sys/contrib/pf/net/pf_ioctl.c b/freebsd/sys/contrib/pf/net/pf_ioctl.c
index 7479b510..9cfa9b32 100644
--- a/freebsd/sys/contrib/pf/net/pf_ioctl.c
+++ b/freebsd/sys/contrib/pf/net/pf_ioctl.c
@@ -1,6 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */
+/* $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -46,29 +46,18 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bpf.h>
#include <rtems/bsd/local/opt_pf.h>
-#ifdef DEV_BPF
-#define NBPFILTER DEV_BPF
-#else
-#define NBPFILTER 0
-#endif
+#define NPFSYNC 1
#ifdef DEV_PFLOG
-#define NPFLOG DEV_PFLOG
-#else
-#define NPFLOG 0
-#endif
-
-#ifdef DEV_PFSYNC
-#define NPFSYNC DEV_PFSYNC
+#define NPFLOG DEV_PFLOG
#else
-#define NPFSYNC 0
+#define NPFLOG 0
#endif
-#else
-#include "bpfilter.h"
-#include "pflog.h"
+#else /* !__FreeBSD__ */
#include "pfsync.h"
-#endif
+#include "pflog.h"
+#endif /* __FreeBSD__ */
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -79,8 +68,9 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/time.h>
-#include <sys/malloc.h>
#ifdef __FreeBSD__
+#include <sys/ucred.h>
+#include <sys/jail.h>
#include <sys/module.h>
#include <sys/conf.h>
#include <sys/proc.h>
@@ -102,6 +92,7 @@ __FBSDID("$FreeBSD$");
#ifdef __FreeBSD__
#include <net/vnet.h>
#endif
+#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
@@ -118,11 +109,11 @@ __FBSDID("$FreeBSD$");
#endif
#include <net/pfvar.h>
-#if NPFSYNC > 0
#include <net/if_pfsync.h>
-#endif /* NPFSYNC > 0 */
+#if NPFLOG > 0
#include <net/if_pflog.h>
+#endif /* NPFLOG > 0 */
#ifdef INET6
#include <netinet/ip6.h>
@@ -158,7 +149,7 @@ void pf_empty_pool(struct pf_palist *);
#ifdef __FreeBSD__
int pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *);
#else
-int pfioctl(struct cdev *, u_long, caddr_t, int, struct proc *);
+int pfioctl(dev_t, u_long, caddr_t, int, struct proc *);
#endif
#ifdef ALTQ
int pf_begin_altq(u_int32_t *);
@@ -173,25 +164,43 @@ int pf_setup_pfsync_matching(struct pf_ruleset *);
void pf_hash_rule(MD5_CTX *, struct pf_rule *);
void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
int pf_commit_rules(u_int32_t, int, char *);
+int pf_addr_setup(struct pf_ruleset *,
+ struct pf_addr_wrap *, sa_family_t);
+void pf_addr_copyout(struct pf_addr_wrap *);
+
+#define TAGID_MAX 50000
-struct pf_rule pf_default_rule;
#ifdef __FreeBSD__
-struct sx pf_consistency_lock;
-SX_SYSINIT(pf_consistency_lock, &pf_consistency_lock, "pf_statetbl_lock");
-#else
-struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER;
+VNET_DEFINE(struct pf_rule, pf_default_rule);
+VNET_DEFINE(struct sx, pf_consistency_lock);
+
+#ifdef ALTQ
+static VNET_DEFINE(int, pf_altq_running);
+#define V_pf_altq_running VNET(pf_altq_running)
#endif
+
+TAILQ_HEAD(pf_tags, pf_tagname);
+
+#define V_pf_tags VNET(pf_tags)
+VNET_DEFINE(struct pf_tags, pf_tags);
+#define V_pf_qids VNET(pf_qids)
+VNET_DEFINE(struct pf_tags, pf_qids);
+
+#else /* !__FreeBSD__ */
+struct pf_rule pf_default_rule;
+struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk");
#ifdef ALTQ
static int pf_altq_running;
#endif
-#define TAGID_MAX 50000
TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags),
pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
+#endif /* __FreeBSD__ */
#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
#endif
+
u_int16_t tagname2tag(struct pf_tags *, char *);
void tag2tagname(struct pf_tags *, u_int16_t, char *);
void tag_unref(struct pf_tags *, u_int16_t);
@@ -199,12 +208,15 @@ int pf_rtlabel_add(struct pf_addr_wrap *);
void pf_rtlabel_remove(struct pf_addr_wrap *);
void pf_rtlabel_copyout(struct pf_addr_wrap *);
+#ifdef __FreeBSD__
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+#else
#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
-
+#endif
#ifdef __FreeBSD__
-static struct cdev *pf_dev;
-
+struct cdev *pf_dev;
+
/*
* XXX - These are new and need to be checked when moveing to a new version
*/
@@ -218,22 +230,24 @@ static void pf_clear_srcnodes(void);
/*
* Wrapper functions for pfil(9) hooks
*/
+#ifdef INET
static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp,
- int dir, struct inpcb *inp);
+ int dir, struct inpcb *inp);
static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp,
- int dir, struct inpcb *inp);
+ int dir, struct inpcb *inp);
+#endif
#ifdef INET6
static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp,
- int dir, struct inpcb *inp);
+ int dir, struct inpcb *inp);
static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp,
- int dir, struct inpcb *inp);
+ int dir, struct inpcb *inp);
#endif
-
-static int hook_pf(void);
-static int dehook_pf(void);
-static int shutdown_pf(void);
-static int pf_load(void);
-static int pf_unload(void);
+
+static int hook_pf(void);
+static int dehook_pf(void);
+static int shutdown_pf(void);
+static int pf_load(void);
+static int pf_unload(void);
static struct cdevsw pf_cdevsw = {
.d_ioctl = pfioctl,
@@ -241,78 +255,99 @@ static struct cdevsw pf_cdevsw = {
.d_version = D_VERSION,
};
-static volatile int pf_pfil_hooked = 0;
-int pf_end_threads = 0;
-struct mtx pf_task_mtx;
-pflog_packet_t *pflog_packet_ptr = NULL;
+static volatile VNET_DEFINE(int, pf_pfil_hooked);
+#define V_pf_pfil_hooked VNET(pf_pfil_hooked)
+VNET_DEFINE(int, pf_end_threads);
+struct mtx pf_task_mtx;
+
+/* pfsync */
+pfsync_state_import_t *pfsync_state_import_ptr = NULL;
+pfsync_insert_state_t *pfsync_insert_state_ptr = NULL;
+pfsync_update_state_t *pfsync_update_state_ptr = NULL;
+pfsync_delete_state_t *pfsync_delete_state_ptr = NULL;
+pfsync_clear_states_t *pfsync_clear_states_ptr = NULL;
+pfsync_state_in_use_t *pfsync_state_in_use_ptr = NULL;
+pfsync_defer_t *pfsync_defer_ptr = NULL;
+pfsync_up_t *pfsync_up_ptr = NULL;
+/* pflow */
+export_pflow_t *export_pflow_ptr = NULL;
+/* pflog */
+pflog_packet_t *pflog_packet_ptr = NULL;
+
+VNET_DEFINE(int, debug_pfugidhack);
+SYSCTL_VNET_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW,
+ &VNET_NAME(debug_pfugidhack), 0,
+ "Enable/disable pf user/group rules mpsafe hack");
-int debug_pfugidhack = 0;
-SYSCTL_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, &debug_pfugidhack, 0,
- "Enable/disable pf user/group rules mpsafe hack");
-
-void
+static void
init_pf_mutex(void)
{
+
mtx_init(&pf_task_mtx, "pf task mtx", NULL, MTX_DEF);
}
-void
+static void
destroy_pf_mutex(void)
{
+
mtx_destroy(&pf_task_mtx);
}
-
void
init_zone_var(void)
{
- pf_src_tree_pl = pf_rule_pl = NULL;
- pf_state_pl = pf_altq_pl = pf_pooladdr_pl = NULL;
- pf_frent_pl = pf_frag_pl = pf_cache_pl = pf_cent_pl = NULL;
- pf_state_scrub_pl = NULL;
- pfr_ktable_pl = pfr_kentry_pl = NULL;
+ V_pf_src_tree_pl = V_pf_rule_pl = NULL;
+ V_pf_state_pl = V_pf_state_key_pl = V_pf_state_item_pl = NULL;
+ V_pf_altq_pl = V_pf_pooladdr_pl = NULL;
+ V_pf_frent_pl = V_pf_frag_pl = V_pf_cache_pl = V_pf_cent_pl = NULL;
+ V_pf_state_scrub_pl = NULL;
+ V_pfr_ktable_pl = V_pfr_kentry_pl = V_pfr_kcounters_pl = NULL;
}
void
cleanup_pf_zone(void)
{
- UMA_DESTROY(pf_src_tree_pl);
- UMA_DESTROY(pf_rule_pl);
- UMA_DESTROY(pf_state_pl);
- UMA_DESTROY(pf_altq_pl);
- UMA_DESTROY(pf_pooladdr_pl);
- UMA_DESTROY(pf_frent_pl);
- UMA_DESTROY(pf_frag_pl);
- UMA_DESTROY(pf_cache_pl);
- UMA_DESTROY(pf_cent_pl);
- UMA_DESTROY(pfr_ktable_pl);
- UMA_DESTROY(pfr_kentry_pl2);
- UMA_DESTROY(pfr_kentry_pl);
- UMA_DESTROY(pf_state_scrub_pl);
- UMA_DESTROY(pfi_addr_pl);
+ UMA_DESTROY(V_pf_src_tree_pl);
+ UMA_DESTROY(V_pf_rule_pl);
+ UMA_DESTROY(V_pf_state_pl);
+ UMA_DESTROY(V_pf_state_key_pl);
+ UMA_DESTROY(V_pf_state_item_pl);
+ UMA_DESTROY(V_pf_altq_pl);
+ UMA_DESTROY(V_pf_pooladdr_pl);
+ UMA_DESTROY(V_pf_frent_pl);
+ UMA_DESTROY(V_pf_frag_pl);
+ UMA_DESTROY(V_pf_cache_pl);
+ UMA_DESTROY(V_pf_cent_pl);
+ UMA_DESTROY(V_pfr_ktable_pl);
+ UMA_DESTROY(V_pfr_kentry_pl);
+ UMA_DESTROY(V_pfr_kcounters_pl);
+ UMA_DESTROY(V_pf_state_scrub_pl);
+ UMA_DESTROY(V_pfi_addr_pl);
}
int
pfattach(void)
{
- u_int32_t *my_timeout = pf_default_rule.timeout;
+ u_int32_t *my_timeout = V_pf_default_rule.timeout;
int error = 1;
do {
- UMA_CREATE(pf_src_tree_pl,struct pf_src_node, "pfsrctrpl");
- UMA_CREATE(pf_rule_pl, struct pf_rule, "pfrulepl");
- UMA_CREATE(pf_state_pl, struct pf_state, "pfstatepl");
- UMA_CREATE(pf_altq_pl, struct pf_altq, "pfaltqpl");
- UMA_CREATE(pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl");
- UMA_CREATE(pfr_ktable_pl, struct pfr_ktable, "pfrktable");
- UMA_CREATE(pfr_kentry_pl, struct pfr_kentry, "pfrkentry");
- UMA_CREATE(pfr_kentry_pl2, struct pfr_kentry, "pfrkentry2");
- UMA_CREATE(pf_frent_pl, struct pf_frent, "pffrent");
- UMA_CREATE(pf_frag_pl, struct pf_fragment, "pffrag");
- UMA_CREATE(pf_cache_pl, struct pf_fragment, "pffrcache");
- UMA_CREATE(pf_cent_pl, struct pf_frcache, "pffrcent");
- UMA_CREATE(pf_state_scrub_pl, struct pf_state_scrub,
+ UMA_CREATE(V_pf_src_tree_pl, struct pf_src_node, "pfsrctrpl");
+ UMA_CREATE(V_pf_rule_pl, struct pf_rule, "pfrulepl");
+ UMA_CREATE(V_pf_state_pl, struct pf_state, "pfstatepl");
+ UMA_CREATE(V_pf_state_key_pl, struct pf_state, "pfstatekeypl");
+ UMA_CREATE(V_pf_state_item_pl, struct pf_state, "pfstateitempl");
+ UMA_CREATE(V_pf_altq_pl, struct pf_altq, "pfaltqpl");
+ UMA_CREATE(V_pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl");
+ UMA_CREATE(V_pfr_ktable_pl, struct pfr_ktable, "pfrktable");
+ UMA_CREATE(V_pfr_kentry_pl, struct pfr_kentry, "pfrkentry");
+ UMA_CREATE(V_pfr_kcounters_pl, struct pfr_kcounters, "pfrkcounters");
+ UMA_CREATE(V_pf_frent_pl, struct pf_frent, "pffrent");
+ UMA_CREATE(V_pf_frag_pl, struct pf_fragment, "pffrag");
+ UMA_CREATE(V_pf_cache_pl, struct pf_fragment, "pffrcache");
+ UMA_CREATE(V_pf_cent_pl, struct pf_frcache, "pffrcent");
+ UMA_CREATE(V_pf_state_scrub_pl, struct pf_state_scrub,
"pfstatescrub");
- UMA_CREATE(pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl");
+ UMA_CREATE(V_pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl");
error = 0;
} while(0);
if (error) {
@@ -327,34 +362,35 @@ pfattach(void)
return (error);
}
- pf_pool_limits[PF_LIMIT_STATES].pp = pf_state_pl;
- pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
- pf_pool_limits[PF_LIMIT_SRC_NODES].pp = pf_src_tree_pl;
- pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
- pf_pool_limits[PF_LIMIT_FRAGS].pp = pf_frent_pl;
- pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
- pf_pool_limits[PF_LIMIT_TABLES].pp = pfr_ktable_pl;
- pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT;
- pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = pfr_kentry_pl;
- pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
- uma_zone_set_max(pf_pool_limits[PF_LIMIT_STATES].pp,
- pf_pool_limits[PF_LIMIT_STATES].limit);
-
- RB_INIT(&tree_src_tracking);
- RB_INIT(&pf_anchors);
+ V_pf_pool_limits[PF_LIMIT_STATES].pp = V_pf_state_pl;
+ V_pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
+ V_pf_pool_limits[PF_LIMIT_SRC_NODES].pp = V_pf_src_tree_pl;
+ V_pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
+ V_pf_pool_limits[PF_LIMIT_FRAGS].pp = V_pf_frent_pl;
+ V_pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
+ V_pf_pool_limits[PF_LIMIT_TABLES].pp = V_pfr_ktable_pl;
+ V_pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT;
+ V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = V_pfr_kentry_pl;
+ V_pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
+ uma_zone_set_max(V_pf_pool_limits[PF_LIMIT_STATES].pp,
+ V_pf_pool_limits[PF_LIMIT_STATES].limit);
+
+ RB_INIT(&V_tree_src_tracking);
+ RB_INIT(&V_pf_anchors);
pf_init_ruleset(&pf_main_ruleset);
- TAILQ_INIT(&pf_altqs[0]);
- TAILQ_INIT(&pf_altqs[1]);
- TAILQ_INIT(&pf_pabuf);
- pf_altqs_active = &pf_altqs[0];
- pf_altqs_inactive = &pf_altqs[1];
- TAILQ_INIT(&state_list);
+
+ TAILQ_INIT(&V_pf_altqs[0]);
+ TAILQ_INIT(&V_pf_altqs[1]);
+ TAILQ_INIT(&V_pf_pabuf);
+ V_pf_altqs_active = &V_pf_altqs[0];
+ V_pf_altqs_inactive = &V_pf_altqs[1];
+ TAILQ_INIT(&V_state_list);
/* default rule should never be garbage collected */
- pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next;
- pf_default_rule.action = PF_PASS;
- pf_default_rule.nr = -1;
- pf_default_rule.rtableid = -1;
+ V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
+ V_pf_default_rule.action = PF_PASS;
+ V_pf_default_rule.nr = -1;
+ V_pf_default_rule.rtableid = -1;
/* initialize default timeouts */
my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
@@ -379,20 +415,24 @@ pfattach(void)
my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
pf_normalize_init();
- bzero(&pf_status, sizeof(pf_status));
- pf_status.debug = PF_DEBUG_URGENT;
- pf_pfil_hooked = 0;
+ bzero(&V_pf_status, sizeof(V_pf_status));
+ V_pf_status.debug = PF_DEBUG_URGENT;
+
+ V_pf_pfil_hooked = 0;
/* XXX do our best to avoid a conflict */
- pf_status.hostid = arc4random();
+ V_pf_status.hostid = arc4random();
- if (kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pfpurge"))
+ if (kproc_create(pf_purge_thread, curvnet, NULL, 0, 0, "pfpurge"))
return (ENXIO);
+ m_addr_chg_pf_p = pf_pkt_addr_changed;
+
return (error);
}
#else /* !__FreeBSD__ */
+
void
pfattach(int num)
{
@@ -404,6 +444,10 @@ pfattach(int num)
"pfsrctrpl", NULL);
pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl",
NULL);
+ pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, 0, 0,
+ "pfstatekeypl", NULL);
+ pool_init(&pf_state_item_pl, sizeof(struct pf_state_item), 0, 0, 0,
+ "pfstateitempl", NULL);
pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl",
&pool_allocator_nointr);
pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0,
@@ -415,7 +459,7 @@ pfattach(int num)
pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp,
pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0);
- if (ctob(physmem) <= 100*1024*1024)
+ if (physmem <= atop(100*1024*1024))
pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit =
PFR_KENTRY_HIWAT_SMALL;
@@ -465,32 +509,32 @@ pfattach(int num)
pf_status.hostid = arc4random();
/* require process context to purge states, so perform in a thread */
- kproc_create_deferred(pf_thread_create, NULL);
+ kthread_create_deferred(pf_thread_create, NULL);
}
void
pf_thread_create(void *v)
{
- if (kproc_create(pf_purge_thread, NULL, NULL, "pfpurge"))
+ if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge"))
panic("pfpurge thread");
}
int
-pfopen(struct cdev *dev, int flags, int fmt, struct proc *p)
+pfopen(dev_t dev, int flags, int fmt, struct proc *p)
{
- if (dev2unit(dev) >= 1)
+ if (minor(dev) >= 1)
return (ENXIO);
return (0);
}
int
-pfclose(struct cdev *dev, int flags, int fmt, struct proc *p)
+pfclose(dev_t dev, int flags, int fmt, struct proc *p)
{
- if (dev2unit(dev) >= 1)
+ if (minor(dev) >= 1)
return (ENXIO);
return (0);
}
-#endif /* __FreeBSD__ */
+#endif
struct pf_pool *
pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action,
@@ -557,7 +601,11 @@ pf_empty_pool(struct pf_palist *poola)
pf_tbladdr_remove(&empty_pool_pa->addr);
pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE);
TAILQ_REMOVE(poola, empty_pool_pa, entries);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_pooladdr_pl, empty_pool_pa);
+#else
pool_put(&pf_pooladdr_pl, empty_pool_pa);
+#endif
}
}
@@ -565,7 +613,7 @@ void
pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
{
if (rulequeue != NULL) {
- if (rule->states <= 0) {
+ if (rule->states_cur <= 0) {
/*
* XXX - we need to remove the table *before* detaching
* the rule to make sure the table code does not delete
@@ -581,7 +629,7 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
rule->nr = -1;
}
- if (rule->states > 0 || rule->src_nodes > 0 ||
+ if (rule->states_cur > 0 || rule->src_nodes > 0 ||
rule->entries.tqe_prev != NULL)
return;
pf_tag_unref(rule->tag);
@@ -604,7 +652,11 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE);
pf_anchor_remove(rule);
pf_empty_pool(&rule->rpool.list);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_rule_pl, rule);
+#else
pool_put(&pf_rule_pl, rule);
+#endif
}
u_int16_t
@@ -635,11 +687,9 @@ tagname2tag(struct pf_tags *head, char *tagname)
return (0);
/* allocate and fill new struct pf_tagname */
- tag = (struct pf_tagname *)malloc(sizeof(struct pf_tagname),
- M_TEMP, M_NOWAIT);
+ tag = malloc(sizeof(*tag), M_TEMP, M_NOWAIT|M_ZERO);
if (tag == NULL)
return (0);
- bzero(tag, sizeof(struct pf_tagname));
strlcpy(tag->name, tagname, sizeof(tag->name));
tag->tag = new_tagid;
tag->ref++;
@@ -687,13 +737,21 @@ tag_unref(struct pf_tags *head, u_int16_t tag)
u_int16_t
pf_tagname2tag(char *tagname)
{
+#ifdef __FreeBSD__
+ return (tagname2tag(&V_pf_tags, tagname));
+#else
return (tagname2tag(&pf_tags, tagname));
+#endif
}
void
pf_tag2tagname(u_int16_t tagid, char *p)
{
+#ifdef __FreeBSD__
+ tag2tagname(&V_pf_tags, tagid, p);
+#else
tag2tagname(&pf_tags, tagid, p);
+#endif
}
void
@@ -701,7 +759,11 @@ pf_tag_ref(u_int16_t tag)
{
struct pf_tagname *t;
+#ifdef __FreeBSD__
+ TAILQ_FOREACH(t, &V_pf_tags, entries)
+#else
TAILQ_FOREACH(t, &pf_tags, entries)
+#endif
if (t->tag == tag)
break;
if (t != NULL)
@@ -711,7 +773,11 @@ pf_tag_ref(u_int16_t tag)
void
pf_tag_unref(u_int16_t tag)
{
+#ifdef __FreeBSD__
+ tag_unref(&V_pf_tags, tag);
+#else
tag_unref(&pf_tags, tag);
+#endif
}
int
@@ -764,19 +830,31 @@ pf_rtlabel_copyout(struct pf_addr_wrap *a)
u_int32_t
pf_qname2qid(char *qname)
{
+#ifdef __FreeBSD__
+ return ((u_int32_t)tagname2tag(&V_pf_qids, qname));
+#else
return ((u_int32_t)tagname2tag(&pf_qids, qname));
+#endif
}
void
pf_qid2qname(u_int32_t qid, char *p)
{
+#ifdef __FreeBSD__
+ tag2tagname(&V_pf_qids, (u_int16_t)qid, p);
+#else
tag2tagname(&pf_qids, (u_int16_t)qid, p);
+#endif
}
void
pf_qid_unref(u_int32_t qid)
{
+#ifdef __FreeBSD__
+ tag_unref(&V_pf_qids, (u_int16_t)qid);
+#else
tag_unref(&pf_qids, (u_int16_t)qid);
+#endif
}
int
@@ -786,24 +864,35 @@ pf_begin_altq(u_int32_t *ticket)
int error = 0;
/* Purge the old altq list */
- while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
- TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
#ifdef __FreeBSD__
+ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+ TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
if (altq->qname[0] == 0 &&
(altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
#else
+ while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
+ TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
if (altq->qname[0] == 0) {
#endif
/* detach and destroy the discipline */
error = altq_remove(altq);
} else
pf_qid_unref(altq->qid);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_altq_pl, altq);
+#else
pool_put(&pf_altq_pl, altq);
+#endif
}
if (error)
return (error);
+#ifdef __FreeBSD__
+ *ticket = ++V_ticket_altqs_inactive;
+ V_altqs_inactive_open = 1;
+#else
*ticket = ++ticket_altqs_inactive;
altqs_inactive_open = 1;
+#endif
return (0);
}
@@ -813,24 +902,37 @@ pf_rollback_altq(u_int32_t ticket)
struct pf_altq *altq;
int error = 0;
+#ifdef __FreeBSD__
+ if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
+ return (0);
+ /* Purge the old altq list */
+ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+ TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+ if (altq->qname[0] == 0 &&
+ (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+#else
if (!altqs_inactive_open || ticket != ticket_altqs_inactive)
return (0);
/* Purge the old altq list */
while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
-#ifdef __FreeBSD__
- if (altq->qname[0] == 0 &&
- (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
-#else
if (altq->qname[0] == 0) {
#endif
/* detach and destroy the discipline */
error = altq_remove(altq);
} else
pf_qid_unref(altq->qid);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_altq_pl, altq);
+#else
pool_put(&pf_altq_pl, altq);
+#endif
}
+#ifdef __FreeBSD__
+ V_altqs_inactive_open = 0;
+#else
altqs_inactive_open = 0;
+#endif
return (error);
}
@@ -841,27 +943,43 @@ pf_commit_altq(u_int32_t ticket)
struct pf_altq *altq;
int s, err, error = 0;
+#ifdef __FreeBSD__
+ if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
+#else
if (!altqs_inactive_open || ticket != ticket_altqs_inactive)
+#endif
return (EBUSY);
/* swap altqs, keep the old. */
s = splsoftnet();
+#ifdef __FreeBSD__
+ old_altqs = V_pf_altqs_active;
+ V_pf_altqs_active = V_pf_altqs_inactive;
+ V_pf_altqs_inactive = old_altqs;
+ V_ticket_altqs_active = V_ticket_altqs_inactive;
+#else
old_altqs = pf_altqs_active;
pf_altqs_active = pf_altqs_inactive;
pf_altqs_inactive = old_altqs;
ticket_altqs_active = ticket_altqs_inactive;
+#endif
/* Attach new disciplines */
- TAILQ_FOREACH(altq, pf_altqs_active, entries) {
#ifdef __FreeBSD__
- if (altq->qname[0] == 0 &&
- (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+ if (altq->qname[0] == 0 &&
+ (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
#else
+ TAILQ_FOREACH(altq, pf_altqs_active, entries) {
if (altq->qname[0] == 0) {
#endif
/* attach the discipline */
error = altq_pfattach(altq);
+#ifdef __FreeBSD__
+ if (error == 0 && V_pf_altq_running)
+#else
if (error == 0 && pf_altq_running)
+#endif
error = pf_enable_altq(altq);
if (error != 0) {
splx(s);
@@ -871,16 +989,22 @@ pf_commit_altq(u_int32_t ticket)
}
/* Purge the old altq list */
- while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
- TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
#ifdef __FreeBSD__
+ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+ TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
if (altq->qname[0] == 0 &&
(altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
#else
+ while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
+ TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
if (altq->qname[0] == 0) {
#endif
/* detach and destroy the discipline */
+#ifdef __FreeBSD__
+ if (V_pf_altq_running)
+#else
if (pf_altq_running)
+#endif
error = pf_disable_altq(altq);
err = altq_pfdetach(altq);
if (err != 0 && error == 0)
@@ -890,11 +1014,19 @@ pf_commit_altq(u_int32_t ticket)
error = err;
} else
pf_qid_unref(altq->qid);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_altq_pl, altq);
+#else
pool_put(&pf_altq_pl, altq);
+#endif
}
splx(s);
+#ifdef __FreeBSD__
+ V_altqs_inactive_open = 0;
+#else
altqs_inactive_open = 0;
+#endif
return (error);
}
@@ -969,22 +1101,32 @@ pf_disable_altq(struct pf_altq *altq)
void
pf_altq_ifnet_event(struct ifnet *ifp, int remove)
{
- struct ifnet *ifp1;
- struct pf_altq *a1, *a2, *a3;
- u_int32_t ticket;
- int error = 0;
+ struct ifnet *ifp1;
+ struct pf_altq *a1, *a2, *a3;
+ u_int32_t ticket;
+ int error = 0;
/* Interrupt userland queue modifications */
+#ifdef __FreeBSD__
+ if (V_altqs_inactive_open)
+ pf_rollback_altq(V_ticket_altqs_inactive);
+#else
if (altqs_inactive_open)
pf_rollback_altq(ticket_altqs_inactive);
+#endif
/* Start new altq ruleset */
if (pf_begin_altq(&ticket))
return;
/* Copy the current active set */
+#ifdef __FreeBSD__
+ TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
+ a2 = pool_get(&V_pf_altq_pl, PR_NOWAIT);
+#else
TAILQ_FOREACH(a1, pf_altqs_active, entries) {
a2 = pool_get(&pf_altq_pl, PR_NOWAIT);
+#endif
if (a2 == NULL) {
error = ENOMEM;
break;
@@ -994,11 +1136,19 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove)
if (a2->qname[0] != 0) {
if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
error = EBUSY;
+#ifdef __FreeBSD__
+ pool_put(&V_pf_altq_pl, a2);
+#else
pool_put(&pf_altq_pl, a2);
+#endif
break;
}
a2->altq_disc = NULL;
+#ifdef __FreeBSD__
+ TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) {
+#else
TAILQ_FOREACH(a3, pf_altqs_inactive, entries) {
+#endif
if (strncmp(a3->ifname, a2->ifname,
IFNAMSIZ) == 0 && a3->qname[0] == 0) {
a2->altq_disc = a3->altq_disc;
@@ -1016,23 +1166,35 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove)
error = altq_add(a2);
PF_LOCK();
+#ifdef __FreeBSD__
+ if (ticket != V_ticket_altqs_inactive)
+#else
if (ticket != ticket_altqs_inactive)
+#endif
error = EBUSY;
if (error) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_altq_pl, a2);
+#else
pool_put(&pf_altq_pl, a2);
+#endif
break;
}
}
+#ifdef __FreeBSD__
+ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
+#else
TAILQ_INSERT_TAIL(pf_altqs_inactive, a2, entries);
+#endif
}
if (error != 0)
pf_rollback_altq(ticket);
else
pf_commit_altq(ticket);
-}
+ }
#endif
#endif /* ALTQ */
@@ -1252,11 +1414,34 @@ pf_setup_pfsync_matching(struct pf_ruleset *rs)
}
MD5Final(digest, &ctx);
+#ifdef __FreeBSD__
+ memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
+#else
memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum));
+#endif
return (0);
}
int
+pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr,
+ sa_family_t af)
+{
+ if (pfi_dynaddr_setup(addr, af) ||
+ pf_tbladdr_setup(ruleset, addr))
+ return (EINVAL);
+
+ return (0);
+}
+
+void
+pf_addr_copyout(struct pf_addr_wrap *addr)
+{
+ pfi_dynaddr_copyout(addr);
+ pf_tbladdr_copyout(addr);
+ pf_rtlabel_copyout(addr);
+}
+
+int
#ifdef __FreeBSD__
pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
#else
@@ -1270,6 +1455,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
#endif
int error = 0;
+ CURVNET_SET(TD_TO_VNET(td));
+
/* XXX keep in sync with switch() below */
#ifdef __FreeBSD__
if (securelevel_gt(td->td_ucred, 2))
@@ -1373,7 +1560,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
return (EACCES);
case DIOCGETRULE:
- if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR)
+ if (((struct pfioc_rule *)addr)->action ==
+ PF_GET_CLR_CNTR)
return (EACCES);
break;
default:
@@ -1382,9 +1570,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (flags & FWRITE)
#ifdef __FreeBSD__
- sx_xlock(&pf_consistency_lock);
+ sx_xlock(&V_pf_consistency_lock);
else
- sx_slock(&pf_consistency_lock);
+ sx_slock(&V_pf_consistency_lock);
#else
rw_enter_write(&pf_consistency_lock);
else
@@ -1399,7 +1587,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
switch (cmd) {
case DIOCSTART:
+#ifdef __FreeBSD__
+ if (V_pf_status.running)
+#else
if (pf_status.running)
+#endif
error = EEXIST;
else {
#ifdef __FreeBSD__
@@ -1411,33 +1603,48 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
("pf: pfil registeration fail\n"));
break;
}
-#endif
+ V_pf_status.running = 1;
+ V_pf_status.since = time_second;
+
+ if (V_pf_status.stateid == 0) {
+ V_pf_status.stateid = time_second;
+ V_pf_status.stateid = V_pf_status.stateid << 32;
+ }
+#else
pf_status.running = 1;
pf_status.since = time_second;
+
if (pf_status.stateid == 0) {
pf_status.stateid = time_second;
pf_status.stateid = pf_status.stateid << 32;
}
+#endif
DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
}
break;
case DIOCSTOP:
- if (!pf_status.running)
+#ifdef __FreeBSD__
+ if (!V_pf_status.running)
error = ENOENT;
else {
- pf_status.running = 0;
-#ifdef __FreeBSD__
+ V_pf_status.running = 0;
PF_UNLOCK();
error = dehook_pf();
PF_LOCK();
if (error) {
- pf_status.running = 1;
+ V_pf_status.running = 1;
DPFPRINTF(PF_DEBUG_MISC,
- ("pf: pfil unregisteration failed\n"));
+ ("pf: pfil unregisteration failed\n"));
}
-#endif
+ V_pf_status.since = time_second;
+#else
+ if (!pf_status.running)
+ error = ENOENT;
+ else {
+ pf_status.running = 0;
pf_status.since = time_second;
+#endif
DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
}
break;
@@ -1473,16 +1680,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EBUSY;
break;
}
- if (pr->pool_ticket != ticket_pabuf) {
#ifdef __FreeBSD__
+ if (pr->pool_ticket != V_ticket_pabuf) {
DPFPRINTF(PF_DEBUG_MISC,
("pool_ticket: %d != %d\n", pr->pool_ticket,
- ticket_pabuf));
+ V_ticket_pabuf));
+#else
+ if (pr->pool_ticket != ticket_pabuf) {
#endif
error = EBUSY;
break;
}
- rule = pool_get(&pf_rule_pl, PR_NOWAIT);
+#ifdef __FreeBSD__
+ rule = pool_get(&V_pf_rule_pl, PR_NOWAIT);
+#else
+ rule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL);
+#endif
if (rule == NULL) {
error = ENOMEM;
break;
@@ -1504,19 +1717,27 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
rule->kif = NULL;
TAILQ_INIT(&rule->rpool.list);
/* initialize refcounting */
- rule->states = 0;
+ rule->states_cur = 0;
rule->src_nodes = 0;
rule->entries.tqe_prev = NULL;
#ifndef INET
if (rule->af == AF_INET) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_rule_pl, rule);
+#else
pool_put(&pf_rule_pl, rule);
+#endif
error = EAFNOSUPPORT;
break;
}
#endif /* INET */
#ifndef INET6
if (rule->af == AF_INET6) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_rule_pl, rule);
+#else
pool_put(&pf_rule_pl, rule);
+#endif
error = EAFNOSUPPORT;
break;
}
@@ -1530,7 +1751,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (rule->ifname[0]) {
rule->kif = pfi_kif_get(rule->ifname);
if (rule->kif == NULL) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_rule_pl, rule);
+#else
pool_put(&pf_rule_pl, rule);
+#endif
error = EINVAL;
break;
}
@@ -1567,40 +1792,42 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (rule->rt && !rule->direction)
error = EINVAL;
#if NPFLOG > 0
-#ifdef __FreeBSD__
if (!rule->log)
rule->logif = 0;
-#endif
if (rule->logif >= PFLOGIFS_MAX)
error = EINVAL;
#endif
if (pf_rtlabel_add(&rule->src.addr) ||
pf_rtlabel_add(&rule->dst.addr))
error = EBUSY;
- if (pfi_dynaddr_setup(&rule->src.addr, rule->af))
+ if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
error = EINVAL;
- if (pfi_dynaddr_setup(&rule->dst.addr, rule->af))
- error = EINVAL;
- if (pf_tbladdr_setup(ruleset, &rule->src.addr))
- error = EINVAL;
- if (pf_tbladdr_setup(ruleset, &rule->dst.addr))
+ if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
error = EINVAL;
if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
error = EINVAL;
+#ifdef __FreeBSD__
+ TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
+#else
TAILQ_FOREACH(pa, &pf_pabuf, entries)
+#endif
if (pf_tbladdr_setup(ruleset, &pa->addr))
error = EINVAL;
if (rule->overload_tblname[0]) {
if ((rule->overload_tbl = pfr_attach_table(ruleset,
- rule->overload_tblname)) == NULL)
+ rule->overload_tblname, 0)) == NULL)
error = EINVAL;
else
rule->overload_tbl->pfrkt_flags |=
PFR_TFLAG_ACTIVE;
}
+#ifdef __FreeBSD__
+ pf_mv_pool(&V_pf_pabuf, &rule->rpool.list);
+#else
pf_mv_pool(&pf_pabuf, &rule->rpool.list);
+#endif
if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
(rule->action == PF_BINAT)) && rule->anchor == NULL) ||
(rule->rt > PF_FASTROUTE)) &&
@@ -1613,14 +1840,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
#ifdef __FreeBSD__
- if (!debug_pfugidhack && (rule->uid.op || rule->gid.op ||
+ if (!V_debug_pfugidhack && (rule->uid.op || rule->gid.op ||
rule->log & PF_LOG_SOCKET_LOOKUP)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: debug.pfugidhack enabled\n"));
- debug_pfugidhack = 1;
+ V_debug_pfugidhack = 1;
}
#endif
-
rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
rule->evaluations = rule->packets[0] = rule->packets[1] =
rule->bytes[0] = rule->bytes[1] = 0;
@@ -1690,12 +1916,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EBUSY;
break;
}
- pfi_dynaddr_copyout(&pr->rule.src.addr);
- pfi_dynaddr_copyout(&pr->rule.dst.addr);
- pf_tbladdr_copyout(&pr->rule.src.addr);
- pf_tbladdr_copyout(&pr->rule.dst.addr);
- pf_rtlabel_copyout(&pr->rule.src.addr);
- pf_rtlabel_copyout(&pr->rule.dst.addr);
+ pf_addr_copyout(&pr->rule.src.addr);
+ pf_addr_copyout(&pr->rule.dst.addr);
for (i = 0; i < PF_SKIP_COUNT; ++i)
if (rule->skip[i].ptr == NULL)
pr->rule.skip[i].nr = -1;
@@ -1707,6 +1929,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
rule->evaluations = 0;
rule->packets[0] = rule->packets[1] = 0;
rule->bytes[0] = rule->bytes[1] = 0;
+ rule->states_tot = 0;
}
break;
}
@@ -1720,7 +1943,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (!(pcr->action == PF_CHANGE_REMOVE ||
pcr->action == PF_CHANGE_GET_TICKET) &&
+#ifdef __FreeBSD__
+ pcr->pool_ticket != V_ticket_pabuf) {
+#else
pcr->pool_ticket != ticket_pabuf) {
+#endif
error = EBUSY;
break;
}
@@ -1757,7 +1984,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
if (pcr->action != PF_CHANGE_REMOVE) {
- newrule = pool_get(&pf_rule_pl, PR_NOWAIT);
+#ifdef __FreeBSD__
+ newrule = pool_get(&V_pf_rule_pl, PR_NOWAIT);
+#else
+ newrule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL);
+#endif
if (newrule == NULL) {
error = ENOMEM;
break;
@@ -1777,18 +2008,26 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
#endif
TAILQ_INIT(&newrule->rpool.list);
/* initialize refcounting */
- newrule->states = 0;
+ newrule->states_cur = 0;
newrule->entries.tqe_prev = NULL;
#ifndef INET
if (newrule->af == AF_INET) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_rule_pl, newrule);
+#else
pool_put(&pf_rule_pl, newrule);
+#endif
error = EAFNOSUPPORT;
break;
}
#endif /* INET */
#ifndef INET6
if (newrule->af == AF_INET6) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_rule_pl, newrule);
+#else
pool_put(&pf_rule_pl, newrule);
+#endif
error = EAFNOSUPPORT;
break;
}
@@ -1796,7 +2035,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (newrule->ifname[0]) {
newrule->kif = pfi_kif_get(newrule->ifname);
if (newrule->kif == NULL) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_rule_pl, newrule);
+#else
pool_put(&pf_rule_pl, newrule);
+#endif
error = EINVAL;
break;
}
@@ -1836,34 +2079,32 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EBUSY;
if (newrule->rt && !newrule->direction)
error = EINVAL;
-#ifdef __FreeBSD__
#if NPFLOG > 0
if (!newrule->log)
newrule->logif = 0;
if (newrule->logif >= PFLOGIFS_MAX)
error = EINVAL;
#endif
-#endif
if (pf_rtlabel_add(&newrule->src.addr) ||
pf_rtlabel_add(&newrule->dst.addr))
error = EBUSY;
- if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af))
- error = EINVAL;
- if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af))
+ if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
error = EINVAL;
- if (pf_tbladdr_setup(ruleset, &newrule->src.addr))
- error = EINVAL;
- if (pf_tbladdr_setup(ruleset, &newrule->dst.addr))
+ if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
error = EINVAL;
if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call))
error = EINVAL;
+#ifdef __FreeBSD__
+ TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
+#else
TAILQ_FOREACH(pa, &pf_pabuf, entries)
+#endif
if (pf_tbladdr_setup(ruleset, &pa->addr))
error = EINVAL;
if (newrule->overload_tblname[0]) {
if ((newrule->overload_tbl = pfr_attach_table(
- ruleset, newrule->overload_tblname)) ==
+ ruleset, newrule->overload_tblname, 0)) ==
NULL)
error = EINVAL;
else
@@ -1871,7 +2112,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
PFR_TFLAG_ACTIVE;
}
+#ifdef __FreeBSD__
+ pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list);
+#else
pf_mv_pool(&pf_pabuf, &newrule->rpool.list);
+#endif
if (((((newrule->action == PF_NAT) ||
(newrule->action == PF_RDR) ||
(newrule->action == PF_BINAT) ||
@@ -1886,12 +2131,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
#ifdef __FreeBSD__
- if (!debug_pfugidhack && (newrule->uid.op ||
+ if (!V_debug_pfugidhack && (newrule->uid.op ||
newrule->gid.op ||
newrule->log & PF_LOG_SOCKET_LOOKUP)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: debug.pfugidhack enabled\n"));
- debug_pfugidhack = 1;
+ V_debug_pfugidhack = 1;
}
#endif
@@ -1900,7 +2145,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
newrule->packets[0] = newrule->packets[1] = 0;
newrule->bytes[0] = newrule->bytes[1] = 0;
}
+#ifdef __FreeBSD__
+ pf_empty_pool(&V_pf_pabuf);
+#else
pf_empty_pool(&pf_pabuf);
+#endif
if (pcr->action == PF_CHANGE_ADD_HEAD)
oldrule = TAILQ_FIRST(
@@ -1953,166 +2202,164 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
case DIOCCLRSTATES: {
- struct pf_state *state, *nexts;
+ struct pf_state *s, *nexts;
struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
- int killed = 0;
+ u_int killed = 0;
- for (state = RB_MIN(pf_state_tree_id, &tree_id); state;
- state = nexts) {
- nexts = RB_NEXT(pf_state_tree_id, &tree_id, state);
+#ifdef __FreeBSD__
+ for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s; s = nexts) {
+ nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s);
+#else
+ for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) {
+ nexts = RB_NEXT(pf_state_tree_id, &tree_id, s);
+#endif
if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
- state->u.s.kif->pfik_name)) {
-#if NPFSYNC
+ s->kif->pfik_name)) {
+#if NPFSYNC > 0
/* don't send out individual delete messages */
- state->sync_flags = PFSTATE_NOSYNC;
+ SET(s->state_flags, PFSTATE_NOSYNC);
#endif
- pf_unlink_state(state);
+ pf_unlink_state(s);
killed++;
}
}
- psk->psk_af = killed;
-#if NPFSYNC
+ psk->psk_killed = killed;
+#if NPFSYNC > 0
+#ifdef __FreeBSD__
+ if (pfsync_clear_states_ptr != NULL)
+ pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname);
+#else
pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
#endif
+#endif
break;
}
case DIOCKILLSTATES: {
- struct pf_state *state, *nexts;
- struct pf_state_host *src, *dst;
+ struct pf_state *s, *nexts;
+ struct pf_state_key *sk;
+ struct pf_addr *srcaddr, *dstaddr;
+ u_int16_t srcport, dstport;
struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
- int killed = 0;
+ u_int killed = 0;
- for (state = RB_MIN(pf_state_tree_id, &tree_id); state;
- state = nexts) {
- nexts = RB_NEXT(pf_state_tree_id, &tree_id, state);
+ if (psk->psk_pfcmp.id) {
+ if (psk->psk_pfcmp.creatorid == 0)
+#ifdef __FreeBSD__
+ psk->psk_pfcmp.creatorid = V_pf_status.hostid;
+#else
+ psk->psk_pfcmp.creatorid = pf_status.hostid;
+#endif
+ if ((s = pf_find_state_byid(&psk->psk_pfcmp))) {
+ pf_unlink_state(s);
+ psk->psk_killed = 1;
+ }
+ break;
+ }
- if (state->direction == PF_OUT) {
- src = &state->lan;
- dst = &state->ext;
+#ifdef __FreeBSD__
+ for (s = RB_MIN(pf_state_tree_id, &V_tree_id); s;
+ s = nexts) {
+ nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, s);
+#else
+ for (s = RB_MIN(pf_state_tree_id, &tree_id); s;
+ s = nexts) {
+ nexts = RB_NEXT(pf_state_tree_id, &tree_id, s);
+#endif
+ sk = s->key[PF_SK_WIRE];
+
+ if (s->direction == PF_OUT) {
+ srcaddr = &sk->addr[1];
+ dstaddr = &sk->addr[0];
+ srcport = sk->port[0];
+ dstport = sk->port[0];
} else {
- src = &state->ext;
- dst = &state->lan;
+ srcaddr = &sk->addr[0];
+ dstaddr = &sk->addr[1];
+ srcport = sk->port[0];
+ dstport = sk->port[0];
}
- if ((!psk->psk_af || state->af == psk->psk_af)
+ if ((!psk->psk_af || sk->af == psk->psk_af)
&& (!psk->psk_proto || psk->psk_proto ==
- state->proto) &&
+ sk->proto) &&
PF_MATCHA(psk->psk_src.neg,
&psk->psk_src.addr.v.a.addr,
&psk->psk_src.addr.v.a.mask,
- &src->addr, state->af) &&
+ srcaddr, sk->af) &&
PF_MATCHA(psk->psk_dst.neg,
&psk->psk_dst.addr.v.a.addr,
&psk->psk_dst.addr.v.a.mask,
- &dst->addr, state->af) &&
+ dstaddr, sk->af) &&
(psk->psk_src.port_op == 0 ||
pf_match_port(psk->psk_src.port_op,
psk->psk_src.port[0], psk->psk_src.port[1],
- src->port)) &&
+ srcport)) &&
(psk->psk_dst.port_op == 0 ||
pf_match_port(psk->psk_dst.port_op,
psk->psk_dst.port[0], psk->psk_dst.port[1],
- dst->port)) &&
+ dstport)) &&
+ (!psk->psk_label[0] || (s->rule.ptr->label[0] &&
+ !strcmp(psk->psk_label, s->rule.ptr->label))) &&
(!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
- state->u.s.kif->pfik_name))) {
-#if NPFSYNC > 0
- /* send immediate delete of state */
- pfsync_delete_state(state);
- state->sync_flags |= PFSTATE_NOSYNC;
-#endif
- pf_unlink_state(state);
+ s->kif->pfik_name))) {
+ pf_unlink_state(s);
killed++;
}
}
- psk->psk_af = killed;
+ psk->psk_killed = killed;
break;
}
case DIOCADDSTATE: {
struct pfioc_state *ps = (struct pfioc_state *)addr;
- struct pf_state *state;
- struct pfi_kif *kif;
+ struct pfsync_state *sp = &ps->state;
- if (ps->state.timeout >= PFTM_MAX &&
- ps->state.timeout != PFTM_UNTIL_PACKET) {
+ if (sp->timeout >= PFTM_MAX &&
+ sp->timeout != PFTM_UNTIL_PACKET) {
error = EINVAL;
break;
}
- state = pool_get(&pf_state_pl, PR_NOWAIT);
- if (state == NULL) {
- error = ENOMEM;
- break;
- }
- kif = pfi_kif_get(ps->state.u.ifname);
- if (kif == NULL) {
- pool_put(&pf_state_pl, state);
- error = ENOENT;
- break;
- }
- bcopy(&ps->state, state, sizeof(struct pf_state));
- bzero(&state->u, sizeof(state->u));
- state->rule.ptr = &pf_default_rule;
- state->nat_rule.ptr = NULL;
- state->anchor.ptr = NULL;
- state->rt_kif = NULL;
- state->creation = time_second;
- state->pfsync_time = 0;
- state->packets[0] = state->packets[1] = 0;
- state->bytes[0] = state->bytes[1] = 0;
-
- if (pf_insert_state(kif, state)) {
- pfi_kif_unref(kif, PFI_KIF_REF_NONE);
- pool_put(&pf_state_pl, state);
- error = ENOMEM;
- }
+#ifdef __FreeBSD__
+ if (pfsync_state_import_ptr != NULL)
+ error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL);
+#else
+ error = pfsync_state_import(sp, PFSYNC_SI_IOCTL);
+#endif
break;
}
case DIOCGETSTATE: {
struct pfioc_state *ps = (struct pfioc_state *)addr;
- struct pf_state *state;
- u_int32_t nr;
- int secs;
+ struct pf_state *s;
+ struct pf_state_cmp id_key;
- nr = 0;
- RB_FOREACH(state, pf_state_tree_id, &tree_id) {
- if (nr >= ps->nr)
- break;
- nr++;
- }
- if (state == NULL) {
- error = EBUSY;
+ bcopy(ps->state.id, &id_key.id, sizeof(id_key.id));
+ id_key.creatorid = ps->state.creatorid;
+
+ s = pf_find_state_byid(&id_key);
+ if (s == NULL) {
+ error = ENOENT;
break;
}
- secs = time_second;
- bcopy(state, &ps->state, sizeof(ps->state));
- strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name,
- sizeof(ps->state.u.ifname));
- ps->state.rule.nr = state->rule.ptr->nr;
- ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ?
- -1 : state->nat_rule.ptr->nr;
- ps->state.anchor.nr = (state->anchor.ptr == NULL) ?
- -1 : state->anchor.ptr->nr;
- ps->state.creation = secs - ps->state.creation;
- ps->state.expire = pf_state_expires(state);
- if (ps->state.expire > secs)
- ps->state.expire -= secs;
- else
- ps->state.expire = 0;
+
+ pfsync_state_export(&ps->state, s);
break;
}
case DIOCGETSTATES: {
struct pfioc_states *ps = (struct pfioc_states *)addr;
struct pf_state *state;
- struct pf_state *p, *pstore;
+ struct pfsync_state *p, *pstore;
u_int32_t nr = 0;
- int space = ps->ps_len;
- if (space == 0) {
+ if (ps->ps_len == 0) {
+#ifdef __FreeBSD__
+ nr = V_pf_status.states;
+#else
nr = pf_status.states;
- ps->ps_len = sizeof(struct pf_state) * nr;
+#endif
+ ps->ps_len = sizeof(struct pfsync_state) * nr;
break;
}
@@ -2126,29 +2373,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
p = ps->ps_states;
+#ifdef __FreeBSD__
+ state = TAILQ_FIRST(&V_state_list);
+#else
state = TAILQ_FIRST(&state_list);
+#endif
while (state) {
if (state->timeout != PFTM_UNLINKED) {
- int secs = time_second;
-
if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len)
break;
-
- bcopy(state, pstore, sizeof(*pstore));
- strlcpy(pstore->u.ifname,
- state->u.s.kif->pfik_name,
- sizeof(pstore->u.ifname));
- pstore->rule.nr = state->rule.ptr->nr;
- pstore->nat_rule.nr = (state->nat_rule.ptr ==
- NULL) ? -1 : state->nat_rule.ptr->nr;
- pstore->anchor.nr = (state->anchor.ptr ==
- NULL) ? -1 : state->anchor.ptr->nr;
- pstore->creation = secs - pstore->creation;
- pstore->expire = pf_state_expires(state);
- if (pstore->expire > secs)
- pstore->expire -= secs;
- else
- pstore->expire = 0;
+ pfsync_state_export(pstore, state);
#ifdef __FreeBSD__
PF_COPYOUT(pstore, p, sizeof(*p), error);
#else
@@ -2161,10 +2395,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
p++;
nr++;
}
- state = TAILQ_NEXT(state, u.s.entry_list);
+ state = TAILQ_NEXT(state, entry_list);
}
- ps->ps_len = sizeof(struct pf_state) * nr;
+ ps->ps_len = sizeof(struct pfsync_state) * nr;
free(pstore, M_TEMP);
break;
@@ -2172,8 +2406,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
case DIOCGETSTATUS: {
struct pf_status *s = (struct pf_status *)addr;
+#ifdef __FreeBSD__
+ bcopy(&V_pf_status, s, sizeof(struct pf_status));
+#else
bcopy(&pf_status, s, sizeof(struct pf_status));
- pfi_fill_oldstatus(s);
+#endif
+ pfi_update_status(s->ifname, s);
break;
}
@@ -2181,35 +2419,51 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
struct pfioc_if *pi = (struct pfioc_if *)addr;
if (pi->ifname[0] == 0) {
+#ifdef __FreeBSD__
+ bzero(V_pf_status.ifname, IFNAMSIZ);
+#else
bzero(pf_status.ifname, IFNAMSIZ);
+#endif
break;
}
- if (ifunit(pi->ifname) == NULL) {
- error = EINVAL;
- break;
- }
+#ifdef __FreeBSD__
+ strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
+#else
strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ);
+#endif
break;
}
case DIOCCLRSTATUS: {
+#ifdef __FreeBSD__
+ bzero(V_pf_status.counters, sizeof(V_pf_status.counters));
+ bzero(V_pf_status.fcounters, sizeof(V_pf_status.fcounters));
+ bzero(V_pf_status.scounters, sizeof(V_pf_status.scounters));
+ V_pf_status.since = time_second;
+ if (*V_pf_status.ifname)
+ pfi_update_status(V_pf_status.ifname, NULL);
+#else
bzero(pf_status.counters, sizeof(pf_status.counters));
bzero(pf_status.fcounters, sizeof(pf_status.fcounters));
bzero(pf_status.scounters, sizeof(pf_status.scounters));
pf_status.since = time_second;
if (*pf_status.ifname)
- pfi_clr_istats(pf_status.ifname);
+ pfi_update_status(pf_status.ifname, NULL);
+#endif
break;
}
case DIOCNATLOOK: {
struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr;
+ struct pf_state_key *sk;
struct pf_state *state;
- struct pf_state_cmp key;
+ struct pf_state_key_cmp key;
int m = 0, direction = pnl->direction;
+ int sidx, didx;
- key.af = pnl->af;
- key.proto = pnl->proto;
+ /* NATLOOK src and dst are reversed, so reverse sidx/didx */
+ sidx = (direction == PF_IN) ? 1 : 0;
+ didx = (direction == PF_IN) ? 0 : 1;
if (!pnl->proto ||
PF_AZERO(&pnl->saddr, pnl->af) ||
@@ -2219,43 +2473,23 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
(!pnl->dport || !pnl->sport)))
error = EINVAL;
else {
- /*
- * userland gives us source and dest of connection,
- * reverse the lookup so we ask for what happens with
- * the return traffic, enabling us to find it in the
- * state tree.
- */
- if (direction == PF_IN) {
- PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af);
- key.ext.port = pnl->dport;
- PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af);
- key.gwy.port = pnl->sport;
- state = pf_find_state_all(&key, PF_EXT_GWY, &m);
- } else {
- PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af);
- key.lan.port = pnl->dport;
- PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af);
- key.ext.port = pnl->sport;
- state = pf_find_state_all(&key, PF_LAN_EXT, &m);
- }
+ key.af = pnl->af;
+ key.proto = pnl->proto;
+ PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
+ key.port[sidx] = pnl->sport;
+ PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
+ key.port[didx] = pnl->dport;
+
+ state = pf_find_state_all(&key, direction, &m);
+
if (m > 1)
error = E2BIG; /* more than one state */
else if (state != NULL) {
- if (direction == PF_IN) {
- PF_ACPY(&pnl->rsaddr, &state->lan.addr,
- state->af);
- pnl->rsport = state->lan.port;
- PF_ACPY(&pnl->rdaddr, &pnl->daddr,
- pnl->af);
- pnl->rdport = pnl->dport;
- } else {
- PF_ACPY(&pnl->rdaddr, &state->gwy.addr,
- state->af);
- pnl->rdport = state->gwy.port;
- PF_ACPY(&pnl->rsaddr, &pnl->saddr,
- pnl->af);
- pnl->rsport = pnl->sport;
- }
+ sk = state->key[sidx];
+ PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
+ pnl->rsport = sk->port[sidx];
+ PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
+ pnl->rdport = sk->port[didx];
} else
error = ENOENT;
}
@@ -2271,10 +2505,18 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EINVAL;
goto fail;
}
+#ifdef __FreeBSD__
+ old = V_pf_default_rule.timeout[pt->timeout];
+#else
old = pf_default_rule.timeout[pt->timeout];
+#endif
if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
pt->seconds = 1;
+#ifdef __FreeBSD__
+ V_pf_default_rule.timeout[pt->timeout] = pt->seconds;
+#else
pf_default_rule.timeout[pt->timeout] = pt->seconds;
+#endif
if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
wakeup(pf_purge_thread);
pt->seconds = old;
@@ -2288,7 +2530,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EINVAL;
goto fail;
}
+#ifdef __FreeBSD__
+ pt->seconds = V_pf_default_rule.timeout[pt->timeout];
+#else
pt->seconds = pf_default_rule.timeout[pt->timeout];
+#endif
break;
}
@@ -2299,7 +2545,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EINVAL;
goto fail;
}
+#ifdef __FreeBSD__
+ pl->limit = V_pf_pool_limits[pl->index].limit;
+#else
pl->limit = pf_pool_limits[pl->index].limit;
+#endif
break;
}
@@ -2308,29 +2558,40 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
int old_limit;
if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
+#ifdef __FreeBSD__
+ V_pf_pool_limits[pl->index].pp == NULL) {
+#else
pf_pool_limits[pl->index].pp == NULL) {
+#endif
error = EINVAL;
goto fail;
}
#ifdef __FreeBSD__
- uma_zone_set_max(pf_pool_limits[pl->index].pp, pl->limit);
+ uma_zone_set_max(V_pf_pool_limits[pl->index].pp, pl->limit);
+ old_limit = V_pf_pool_limits[pl->index].limit;
+ V_pf_pool_limits[pl->index].limit = pl->limit;
+ pl->limit = old_limit;
#else
if (pool_sethardlimit(pf_pool_limits[pl->index].pp,
pl->limit, NULL, 0) != 0) {
error = EBUSY;
goto fail;
}
-#endif
old_limit = pf_pool_limits[pl->index].limit;
pf_pool_limits[pl->index].limit = pl->limit;
pl->limit = old_limit;
+#endif
break;
}
case DIOCSETDEBUG: {
u_int32_t *level = (u_int32_t *)addr;
+#ifdef __FreeBSD__
+ V_pf_status.debug = *level;
+#else
pf_status.debug = *level;
+#endif
break;
}
@@ -2373,11 +2634,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
struct pf_altq *altq;
/* enable all altq interfaces on active list */
- TAILQ_FOREACH(altq, pf_altqs_active, entries) {
#ifdef __FreeBSD__
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
if (altq->qname[0] == 0 && (altq->local_flags &
PFALTQ_FLAG_IF_REMOVED) == 0) {
#else
+ TAILQ_FOREACH(altq, pf_altqs_active, entries) {
if (altq->qname[0] == 0) {
#endif
error = pf_enable_altq(altq);
@@ -2386,7 +2648,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
}
if (error == 0)
+#ifdef __FreeBSD__
+ V_pf_altq_running = 1;
+#else
pf_altq_running = 1;
+#endif
DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
break;
}
@@ -2395,11 +2661,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
struct pf_altq *altq;
/* disable all altq interfaces on active list */
- TAILQ_FOREACH(altq, pf_altqs_active, entries) {
#ifdef __FreeBSD__
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
if (altq->qname[0] == 0 && (altq->local_flags &
PFALTQ_FLAG_IF_REMOVED) == 0) {
#else
+ TAILQ_FOREACH(altq, pf_altqs_active, entries) {
if (altq->qname[0] == 0) {
#endif
error = pf_disable_altq(altq);
@@ -2408,7 +2675,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
}
if (error == 0)
+#ifdef __FreeBSD__
+ V_pf_altq_running = 0;
+#else
pf_altq_running = 0;
+#endif
DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
break;
}
@@ -2417,11 +2688,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
struct pfioc_altq *pa = (struct pfioc_altq *)addr;
struct pf_altq *altq, *a;
+#ifdef __FreeBSD__
+ if (pa->ticket != V_ticket_altqs_inactive) {
+#else
if (pa->ticket != ticket_altqs_inactive) {
+#endif
error = EBUSY;
break;
}
- altq = pool_get(&pf_altq_pl, PR_NOWAIT);
+#ifdef __FreeBSD__
+ altq = pool_get(&V_pf_altq_pl, PR_NOWAIT);
+#else
+ altq = pool_get(&pf_altq_pl, PR_WAITOK|PR_LIMITFAIL);
+#endif
if (altq == NULL) {
error = ENOMEM;
break;
@@ -2438,11 +2717,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (altq->qname[0] != 0) {
if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
error = EBUSY;
+#ifdef __FreeBSD__
+ pool_put(&V_pf_altq_pl, altq);
+#else
pool_put(&pf_altq_pl, altq);
+#endif
break;
}
altq->altq_disc = NULL;
+#ifdef __FreeBSD__
+ TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) {
+#else
TAILQ_FOREACH(a, pf_altqs_inactive, entries) {
+#endif
if (strncmp(a->ifname, altq->ifname,
IFNAMSIZ) == 0 && a->qname[0] == 0) {
altq->altq_disc = a->altq_disc;
@@ -2458,18 +2745,26 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
} else {
PF_UNLOCK();
-#endif
+#endif
error = altq_add(altq);
#ifdef __FreeBSD__
PF_LOCK();
}
#endif
if (error) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_altq_pl, altq);
+#else
pool_put(&pf_altq_pl, altq);
+#endif
break;
}
+#ifdef __FreeBSD__
+ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
+#else
TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries);
+#endif
bcopy(altq, &pa->altq, sizeof(struct pf_altq));
break;
}
@@ -2479,9 +2774,15 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
struct pf_altq *altq;
pa->nr = 0;
+#ifdef __FreeBSD__
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
+ pa->nr++;
+ pa->ticket = V_ticket_altqs_active;
+#else
TAILQ_FOREACH(altq, pf_altqs_active, entries)
pa->nr++;
pa->ticket = ticket_altqs_active;
+#endif
break;
}
@@ -2490,12 +2791,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
struct pf_altq *altq;
u_int32_t nr;
+#ifdef __FreeBSD__
+ if (pa->ticket != V_ticket_altqs_active) {
+#else
if (pa->ticket != ticket_altqs_active) {
+#endif
error = EBUSY;
break;
}
nr = 0;
+#ifdef __FreeBSD__
+ altq = TAILQ_FIRST(V_pf_altqs_active);
+#else
altq = TAILQ_FIRST(pf_altqs_active);
+#endif
while ((altq != NULL) && (nr < pa->nr)) {
altq = TAILQ_NEXT(altq, entries);
nr++;
@@ -2519,13 +2828,21 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
u_int32_t nr;
int nbytes;
+#ifdef __FreeBSD__
+ if (pq->ticket != V_ticket_altqs_active) {
+#else
if (pq->ticket != ticket_altqs_active) {
+#endif
error = EBUSY;
break;
}
nbytes = pq->nbytes;
nr = 0;
+#ifdef __FreeBSD__
+ altq = TAILQ_FIRST(V_pf_altqs_active);
+#else
altq = TAILQ_FIRST(pf_altqs_active);
+#endif
while ((altq != NULL) && (nr < pq->nr)) {
altq = TAILQ_NEXT(altq, entries);
nr++;
@@ -2534,6 +2851,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EBUSY;
break;
}
+
#ifdef __FreeBSD__
if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
error = ENXIO;
@@ -2556,15 +2874,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
case DIOCBEGINADDRS: {
struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr;
+#ifdef __FreeBSD__
+ pf_empty_pool(&V_pf_pabuf);
+ pp->ticket = ++V_ticket_pabuf;
+#else
pf_empty_pool(&pf_pabuf);
pp->ticket = ++ticket_pabuf;
+#endif
break;
}
case DIOCADDADDR: {
struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr;
+#ifdef __FreeBSD__
+ if (pp->ticket != V_ticket_pabuf) {
+#else
if (pp->ticket != ticket_pabuf) {
+#endif
error = EBUSY;
break;
}
@@ -2586,7 +2913,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EINVAL;
break;
}
- pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT);
+#ifdef __FreeBSD__
+ pa = pool_get(&V_pf_pooladdr_pl, PR_NOWAIT);
+#else
+ pa = pool_get(&pf_pooladdr_pl, PR_WAITOK|PR_LIMITFAIL);
+#endif
if (pa == NULL) {
error = ENOMEM;
break;
@@ -2595,7 +2926,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (pa->ifname[0]) {
pa->kif = pfi_kif_get(pa->ifname);
if (pa->kif == NULL) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_pooladdr_pl, pa);
+#else
pool_put(&pf_pooladdr_pl, pa);
+#endif
error = EINVAL;
break;
}
@@ -2604,11 +2939,19 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (pfi_dynaddr_setup(&pa->addr, pp->af)) {
pfi_dynaddr_remove(&pa->addr);
pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_pooladdr_pl, pa);
+#else
pool_put(&pf_pooladdr_pl, pa);
+#endif
error = EINVAL;
break;
}
+#ifdef __FreeBSD__
+ TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries);
+#else
TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries);
+#endif
break;
}
@@ -2647,9 +2990,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
break;
}
bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr));
- pfi_dynaddr_copyout(&pp->addr.addr);
- pf_tbladdr_copyout(&pp->addr.addr);
- pf_rtlabel_copyout(&pp->addr.addr);
+ pf_addr_copyout(&pp->addr.addr);
break;
}
@@ -2682,7 +3023,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
break;
}
if (pca->action != PF_CHANGE_REMOVE) {
- newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT);
+#ifdef __FreeBSD__
+ newpa = pool_get(&V_pf_pooladdr_pl,
+ PR_NOWAIT);
+#else
+ newpa = pool_get(&pf_pooladdr_pl,
+ PR_WAITOK|PR_LIMITFAIL);
+#endif
if (newpa == NULL) {
error = ENOMEM;
break;
@@ -2690,14 +3037,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
#ifndef INET
if (pca->af == AF_INET) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_pooladdr_pl, newpa);
+#else
pool_put(&pf_pooladdr_pl, newpa);
+#endif
error = EAFNOSUPPORT;
break;
}
#endif /* INET */
#ifndef INET6
if (pca->af == AF_INET6) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_pooladdr_pl, newpa);
+#else
pool_put(&pf_pooladdr_pl, newpa);
+#endif
error = EAFNOSUPPORT;
break;
}
@@ -2705,7 +3060,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (newpa->ifname[0]) {
newpa->kif = pfi_kif_get(newpa->ifname);
if (newpa->kif == NULL) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_pooladdr_pl, newpa);
+#else
pool_put(&pf_pooladdr_pl, newpa);
+#endif
error = EINVAL;
break;
}
@@ -2716,7 +3075,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
pf_tbladdr_setup(ruleset, &newpa->addr)) {
pfi_dynaddr_remove(&newpa->addr);
pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_pooladdr_pl, newpa);
+#else
pool_put(&pf_pooladdr_pl, newpa);
+#endif
error = EINVAL;
break;
}
@@ -2745,7 +3108,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
pfi_dynaddr_remove(&oldpa->addr);
pf_tbladdr_remove(&oldpa->addr);
pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_pooladdr_pl, oldpa);
+#else
pool_put(&pf_pooladdr_pl, oldpa);
+#endif
} else {
if (oldpa == NULL)
TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
@@ -2776,7 +3143,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
pr->nr = 0;
if (ruleset->anchor == NULL) {
/* XXX kludge for pf_main_ruleset */
+#ifdef __FreeBSD__
+ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors)
+#else
RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
+#endif
if (anchor->parent == NULL)
pr->nr++;
} else {
@@ -2801,7 +3172,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
pr->name[0] = 0;
if (ruleset->anchor == NULL) {
/* XXX kludge for pf_main_ruleset */
+#ifdef __FreeBSD__
+ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors)
+#else
RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
+#endif
if (anchor->parent == NULL && nr++ == pr->nr) {
strlcpy(pr->name, anchor->name,
sizeof(pr->name));
@@ -3046,17 +3421,15 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
#ifdef __FreeBSD__
PF_UNLOCK();
#endif
- ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
- M_TEMP, M_WAITOK);
- table = (struct pfr_table *)malloc(sizeof(*table),
- M_TEMP, M_WAITOK);
+ ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK);
+ table = malloc(sizeof(*table), M_TEMP, M_WAITOK);
#ifdef __FreeBSD__
PF_LOCK();
#endif
for (i = 0; i < io->size; i++) {
#ifdef __FreeBSD__
- PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
- if (error) {
+ PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
+ if (error) {
#else
if (copyin(io->array+i, ioe, sizeof(*ioe))) {
#endif
@@ -3132,10 +3505,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
#ifdef __FreeBSD__
PF_UNLOCK();
#endif
- ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
- M_TEMP, M_WAITOK);
- table = (struct pfr_table *)malloc(sizeof(*table),
- M_TEMP, M_WAITOK);
+ ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK);
+ table = malloc(sizeof(*table), M_TEMP, M_WAITOK);
#ifdef __FreeBSD__
PF_LOCK();
#endif
@@ -3207,10 +3578,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
#ifdef __FreeBSD__
PF_UNLOCK();
#endif
- ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
- M_TEMP, M_WAITOK);
- table = (struct pfr_table *)malloc(sizeof(*table),
- M_TEMP, M_WAITOK);
+ ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK);
+ table = malloc(sizeof(*table), M_TEMP, M_WAITOK);
#ifdef __FreeBSD__
PF_LOCK();
#endif
@@ -3236,8 +3605,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = EINVAL;
goto fail;
}
+#ifdef __FreeBSD__
+ if (!V_altqs_inactive_open || ioe->ticket !=
+ V_ticket_altqs_inactive) {
+#else
if (!altqs_inactive_open || ioe->ticket !=
ticket_altqs_inactive) {
+#endif
free(table, M_TEMP);
free(ioe, M_TEMP);
error = EBUSY;
@@ -3248,7 +3622,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
case PF_RULESET_TABLE:
rs = pf_find_ruleset(ioe->anchor);
if (rs == NULL || !rs->topen || ioe->ticket !=
- rs->tticket) {
+ rs->tticket) {
free(table, M_TEMP);
free(ioe, M_TEMP);
error = EBUSY;
@@ -3332,7 +3706,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
int space = psn->psn_len;
if (space == 0) {
+#ifdef __FreeBSD__
+ RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking)
+#else
RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
+#endif
nr++;
psn->psn_len = sizeof(struct pf_src_node) * nr;
break;
@@ -3345,9 +3723,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
#ifdef __FreeBSD__
PF_LOCK();
#endif
-
p = psn->psn_src_nodes;
+#ifdef __FreeBSD__
+ RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) {
+#else
RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
+#endif
int secs = time_second, diff;
if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
@@ -3393,39 +3774,59 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
struct pf_src_node *n;
struct pf_state *state;
+#ifdef __FreeBSD__
+ RB_FOREACH(state, pf_state_tree_id, &V_tree_id) {
+#else
RB_FOREACH(state, pf_state_tree_id, &tree_id) {
+#endif
state->src_node = NULL;
state->nat_src_node = NULL;
}
+#ifdef __FreeBSD__
+ RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) {
+#else
RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
+#endif
n->expire = 1;
n->states = 0;
}
pf_purge_expired_src_nodes(1);
+#ifdef __FreeBSD__
+ V_pf_status.src_nodes = 0;
+#else
pf_status.src_nodes = 0;
+#endif
break;
}
case DIOCKILLSRCNODES: {
struct pf_src_node *sn;
struct pf_state *s;
- struct pfioc_src_node_kill *psnk = \
- (struct pfioc_src_node_kill *) addr;
- int killed = 0;
+ struct pfioc_src_node_kill *psnk =
+ (struct pfioc_src_node_kill *)addr;
+ u_int killed = 0;
+#ifdef __FreeBSD__
+ RB_FOREACH(sn, pf_src_tree, &V_tree_src_tracking) {
+#else
RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) {
- if (PF_MATCHA(psnk->psnk_src.neg, \
- &psnk->psnk_src.addr.v.a.addr, \
- &psnk->psnk_src.addr.v.a.mask, \
- &sn->addr, sn->af) &&
- PF_MATCHA(psnk->psnk_dst.neg, \
- &psnk->psnk_dst.addr.v.a.addr, \
- &psnk->psnk_dst.addr.v.a.mask, \
- &sn->raddr, sn->af)) {
+#endif
+ if (PF_MATCHA(psnk->psnk_src.neg,
+ &psnk->psnk_src.addr.v.a.addr,
+ &psnk->psnk_src.addr.v.a.mask,
+ &sn->addr, sn->af) &&
+ PF_MATCHA(psnk->psnk_dst.neg,
+ &psnk->psnk_dst.addr.v.a.addr,
+ &psnk->psnk_dst.addr.v.a.mask,
+ &sn->raddr, sn->af)) {
/* Handle state to src_node linkage */
if (sn->states != 0) {
- RB_FOREACH(s, pf_state_tree_id,
+ RB_FOREACH(s, pf_state_tree_id,
+#ifdef __FreeBSD__
+ &V_tree_id) {
+#else
&tree_id) {
+#endif
if (s->src_node == sn)
s->src_node = NULL;
if (s->nat_src_node == sn)
@@ -3441,17 +3842,24 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (killed > 0)
pf_purge_expired_src_nodes(1);
- psnk->psnk_af = killed;
+ psnk->psnk_killed = killed;
break;
}
case DIOCSETHOSTID: {
u_int32_t *hostid = (u_int32_t *)addr;
+#ifdef __FreeBSD__
+ if (*hostid == 0)
+ V_pf_status.hostid = arc4random();
+ else
+ V_pf_status.hostid = *hostid;
+#else
if (*hostid == 0)
pf_status.hostid = arc4random();
else
pf_status.hostid = *hostid;
+#endif
break;
}
@@ -3494,43 +3902,110 @@ fail:
PF_UNLOCK();
if (flags & FWRITE)
- sx_xunlock(&pf_consistency_lock);
+ sx_xunlock(&V_pf_consistency_lock);
else
- sx_sunlock(&pf_consistency_lock);
+ sx_sunlock(&V_pf_consistency_lock);
#else
splx(s);
- /* XXX: Lock order? */
if (flags & FWRITE)
rw_exit_write(&pf_consistency_lock);
else
rw_exit_read(&pf_consistency_lock);
#endif
+
+ CURVNET_RESTORE();
+
return (error);
}
#ifdef __FreeBSD__
+void
+pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
+{
+ bzero(sp, sizeof(struct pfsync_state));
+
+ /* copy from state key */
+ sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
+ sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
+ sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
+ sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
+ sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
+ sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
+ sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
+ sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
+ sp->proto = st->key[PF_SK_WIRE]->proto;
+ sp->af = st->key[PF_SK_WIRE]->af;
+
+ /* copy from state */
+ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
+ bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
+ sp->creation = htonl(time_second - st->creation);
+ sp->expire = pf_state_expires(st);
+ if (sp->expire <= time_second)
+ sp->expire = htonl(0);
+ else
+ sp->expire = htonl(sp->expire - time_second);
+
+ sp->direction = st->direction;
+ sp->log = st->log;
+ sp->timeout = st->timeout;
+ sp->state_flags = st->state_flags;
+ if (st->src_node)
+ sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
+ if (st->nat_src_node)
+ sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
+
+ bcopy(&st->id, &sp->id, sizeof(sp->id));
+ sp->creatorid = st->creatorid;
+ pf_state_peer_hton(&st->src, &sp->src);
+ pf_state_peer_hton(&st->dst, &sp->dst);
+
+ if (st->rule.ptr == NULL)
+ sp->rule = htonl(-1);
+ else
+ sp->rule = htonl(st->rule.ptr->nr);
+ if (st->anchor.ptr == NULL)
+ sp->anchor = htonl(-1);
+ else
+ sp->anchor = htonl(st->anchor.ptr->nr);
+ if (st->nat_rule.ptr == NULL)
+ sp->nat_rule = htonl(-1);
+ else
+ sp->nat_rule = htonl(st->nat_rule.ptr->nr);
+
+ pf_state_counter_hton(st->packets[0], sp->packets[0]);
+ pf_state_counter_hton(st->packets[1], sp->packets[1]);
+ pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
+ pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
+
+}
+
/*
* XXX - Check for version missmatch!!!
*/
static void
pf_clear_states(void)
{
- struct pf_state *state;
-
+ struct pf_state *state;
+
+#ifdef __FreeBSD__
+ RB_FOREACH(state, pf_state_tree_id, &V_tree_id) {
+#else
RB_FOREACH(state, pf_state_tree_id, &tree_id) {
+#endif
state->timeout = PFTM_PURGE;
#if NPFSYNC
/* don't send out individual delete messages */
- state->sync_flags = PFSTATE_NOSYNC;
+ state->sync_state = PFSTATE_NOSYNC;
#endif
pf_unlink_state(state);
}
-
+
#if 0 /* NPFSYNC */
/*
* XXX This is called on module unload, we do not want to sync that over? */
*/
- pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
+ pfsync_clear_states(V_pf_status.hostid, psk->psk_ifname);
#endif
}
@@ -3554,11 +4029,19 @@ pf_clear_srcnodes(void)
struct pf_src_node *n;
struct pf_state *state;
+#ifdef __FreeBSD__
+ RB_FOREACH(state, pf_state_tree_id, &V_tree_id) {
+#else
RB_FOREACH(state, pf_state_tree_id, &tree_id) {
+#endif
state->src_node = NULL;
state->nat_src_node = NULL;
}
+#ifdef __FreeBSD__
+ RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) {
+#else
RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
+#endif
n->expire = 1;
n->states = 0;
}
@@ -3576,8 +4059,8 @@ shutdown_pf(void)
int error = 0;
u_int32_t t[5];
char nn = '\0';
-
- pf_status.running = 0;
+
+ V_pf_status.running = 0;
do {
if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
!= 0) {
@@ -3587,22 +4070,22 @@ shutdown_pf(void)
if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
!= 0) {
DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
- break; /* XXX: rollback? */
+ break; /* XXX: rollback? */
}
if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
!= 0) {
DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
- break; /* XXX: rollback? */
+ break; /* XXX: rollback? */
}
if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
!= 0) {
DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
- break; /* XXX: rollback? */
+ break; /* XXX: rollback? */
}
if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
!= 0) {
DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
- break; /* XXX: rollback? */
+ break; /* XXX: rollback? */
}
/* XXX: these should always succeed here */
@@ -3615,13 +4098,13 @@ shutdown_pf(void)
if ((error = pf_clear_tables()) != 0)
break;
-#ifdef ALTQ
+ #ifdef ALTQ
if ((error = pf_begin_altq(&t[0])) != 0) {
DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
break;
}
pf_commit_altq(t[0]);
-#endif
+ #endif
pf_clear_states();
@@ -3631,9 +4114,10 @@ shutdown_pf(void)
/* fingerprints and interfaces have thier own cleanup code */
} while(0);
- return (error);
+ return (error);
}
+#ifdef INET
static int
pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
struct inpcb *inp)
@@ -3652,10 +4136,12 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
if ((*m)->m_pkthdr.len >= (int)sizeof(struct ip)) {
/* if m_pkthdr.len is less than ip header, pf will handle. */
h = mtod(*m, struct ip *);
- HTONS(h->ip_len);
- HTONS(h->ip_off);
+ HTONS(h->ip_len);
+ HTONS(h->ip_off);
}
+ CURVNET_SET(ifp->if_vnet);
chk = pf_test(PF_IN, ifp, m, NULL, inp);
+ CURVNET_RESTORE();
if (chk && *m) {
m_freem(*m);
*m = NULL;
@@ -3692,10 +4178,12 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) {
/* if m_pkthdr.len is less than ip header, pf will handle. */
h = mtod(*m, struct ip *);
- HTONS(h->ip_len);
- HTONS(h->ip_off);
+ HTONS(h->ip_len);
+ HTONS(h->ip_off);
}
+ CURVNET_SET(ifp->if_vnet);
chk = pf_test(PF_OUT, ifp, m, NULL, inp);
+ CURVNET_RESTORE();
if (chk && *m) {
m_freem(*m);
*m = NULL;
@@ -3708,6 +4196,7 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
}
return chk;
}
+#endif
#ifdef INET6
static int
@@ -3725,8 +4214,10 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
* order to support scoped addresses. In order to support stateful
* filtering we have change this to lo0 as it is the case in IPv4.
*/
+ CURVNET_SET(ifp->if_vnet);
chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m,
NULL, inp);
+ CURVNET_RESTORE();
if (chk && *m) {
m_freem(*m);
*m = NULL;
@@ -3743,12 +4234,17 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
*/
int chk;
- /* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
+ /* We need a proper CSUM before we start (s. OpenBSD ip_output) */
if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+#ifdef INET
+ /* XXX-BZ copy&paste error from r126261? */
in_delayed_cksum(*m);
+#endif
(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
+ CURVNET_SET(ifp->if_vnet);
chk = pf_test6(PF_OUT, ifp, m, NULL, inp);
+ CURVNET_RESTORE();
if (chk && *m) {
m_freem(*m);
*m = NULL;
@@ -3760,51 +4256,60 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
static int
hook_pf(void)
{
+#ifdef INET
struct pfil_head *pfh_inet;
+#endif
#ifdef INET6
struct pfil_head *pfh_inet6;
#endif
-
- PF_ASSERT(MA_NOTOWNED);
- if (pf_pfil_hooked)
+ PF_UNLOCK_ASSERT();
+
+ if (V_pf_pfil_hooked)
return (0);
-
+
+#ifdef INET
pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
if (pfh_inet == NULL)
return (ESRCH); /* XXX */
pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
+#endif
#ifdef INET6
pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
if (pfh_inet6 == NULL) {
+#ifdef INET
pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
pfh_inet);
pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
pfh_inet);
+#endif
return (ESRCH); /* XXX */
}
pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
#endif
- pf_pfil_hooked = 1;
+ V_pf_pfil_hooked = 1;
return (0);
}
static int
dehook_pf(void)
{
+#ifdef INET
struct pfil_head *pfh_inet;
+#endif
#ifdef INET6
struct pfil_head *pfh_inet6;
#endif
- PF_ASSERT(MA_NOTOWNED);
+ PF_UNLOCK_ASSERT();
- if (pf_pfil_hooked == 0)
+ if (V_pf_pfil_hooked == 0)
return (0);
+#ifdef INET
pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
if (pfh_inet == NULL)
return (ESRCH); /* XXX */
@@ -3812,6 +4317,7 @@ dehook_pf(void)
pfh_inet);
pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
pfh_inet);
+#endif
#ifdef INET6
pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
if (pfh_inet6 == NULL)
@@ -3822,21 +4328,34 @@ dehook_pf(void)
pfh_inet6);
#endif
- pf_pfil_hooked = 0;
+ V_pf_pfil_hooked = 0;
return (0);
}
static int
pf_load(void)
{
- init_zone_var();
+ VNET_ITERATOR_DECL(vnet_iter);
+
+ VNET_LIST_RLOCK();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_pf_pfil_hooked = 0;
+ V_pf_end_threads = 0;
+ V_debug_pfugidhack = 0;
+ TAILQ_INIT(&V_pf_tags);
+ TAILQ_INIT(&V_pf_qids);
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK();
+
init_pf_mutex();
pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME);
- if (pfattach() < 0) {
- destroy_dev(pf_dev);
- destroy_pf_mutex();
+ init_zone_var();
+ sx_init(&V_pf_consistency_lock, "pf_statetbl_lock");
+ if (pfattach() < 0)
return (ENOMEM);
- }
+
return (0);
}
@@ -3846,8 +4365,9 @@ pf_unload(void)
int error = 0;
PF_LOCK();
- pf_status.running = 0;
+ V_pf_status.running = 0;
PF_UNLOCK();
+ m_addr_chg_pf_p = NULL;
error = dehook_pf();
if (error) {
/*
@@ -3860,8 +4380,8 @@ pf_unload(void)
}
PF_LOCK();
shutdown_pf();
- pf_end_threads = 1;
- while (pf_end_threads < 2) {
+ V_pf_end_threads = 1;
+ while (V_pf_end_threads < 2) {
wakeup_one(pf_purge_thread);
msleep(pf_purge_thread, &pf_task_mtx, 0, "pftmo", hz);
}
@@ -3872,6 +4392,7 @@ pf_unload(void)
PF_UNLOCK();
destroy_dev(pf_dev);
destroy_pf_mutex();
+ sx_destroy(&V_pf_consistency_lock);
return error;
}
@@ -3884,7 +4405,12 @@ pf_modevent(module_t mod, int type, void *data)
case MOD_LOAD:
error = pf_load();
break;
-
+ case MOD_QUIESCE:
+ /*
+ * Module should not be unloaded due to race conditions.
+ */
+ error = EPERM;
+ break;
case MOD_UNLOAD:
error = pf_unload();
break;
@@ -3894,13 +4420,13 @@ pf_modevent(module_t mod, int type, void *data)
}
return error;
}
-
+
static moduledata_t pf_mod = {
"pf",
pf_modevent,
0
};
-DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST);
+DECLARE_MODULE(pf, pf_mod, SI_SUB_PSEUDO, SI_ORDER_FIRST);
MODULE_VERSION(pf, PF_MODVER);
-#endif /* __FreeBSD__ */
+#endif /* __FreeBSD__ */
diff --git a/freebsd/sys/contrib/pf/net/pf_lb.c b/freebsd/sys/contrib/pf/net/pf_lb.c
new file mode 100644
index 00000000..0c2046c2
--- /dev/null
+++ b/freebsd/sys/contrib/pf/net/pf_lb.c
@@ -0,0 +1,795 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ */
+
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002 - 2008 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ */
+
+#ifdef __FreeBSD__
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#endif
+
+#ifdef __FreeBSD__
+#include <rtems/bsd/local/opt_bpf.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#ifdef DEV_BPF
+#define NBPFILTER DEV_BPF
+#else
+#define NBPFILTER 0
+#endif
+
+#ifdef DEV_PFLOG
+#define NPFLOG DEV_PFLOG
+#else
+#define NPFLOG 0
+#endif
+
+#ifdef DEV_PFSYNC
+#define NPFSYNC DEV_PFSYNC
+#else
+#define NPFSYNC 0
+#endif
+
+#ifdef DEV_PFLOW
+#define NPFLOW DEV_PFLOW
+#else
+#define NPFLOW 0
+#endif
+
+#else
+#include "bpfilter.h"
+#include "pflog.h"
+#include "pfsync.h"
+#include "pflow.h"
+#endif
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/filio.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/time.h>
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#endif
+#ifndef __FreeBSD__
+#include <sys/pool.h>
+#endif
+#include <sys/proc.h>
+#ifdef __FreeBSD__
+#include <sys/kthread.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/sx.h>
+#else
+#include <sys/rwlock.h>
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/md5.h>
+#else
+#include <crypto/md5.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/bpf.h>
+#include <net/route.h>
+#include <net/radix_mpath.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp_var.h>
+#include <netinet/icmp_var.h>
+#include <netinet/if_ether.h>
+
+#ifndef __FreeBSD__
+#include <dev/rndvar.h>
+#endif
+#include <net/pfvar.h>
+#include <net/if_pflog.h>
+#include <net/if_pflow.h>
+
+#if NPFSYNC > 0
+#include <net/if_pfsync.h>
+#endif /* NPFSYNC > 0 */
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet/in_pcb.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#endif /* INET6 */
+
+
+#ifdef __FreeBSD__
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+#else
+#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
+#endif
+
+/*
+ * Global variables
+ */
+
+void pf_hash(struct pf_addr *, struct pf_addr *,
+ struct pf_poolhashkey *, sa_family_t);
+struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *,
+ int, int, struct pfi_kif *,
+ struct pf_addr *, u_int16_t, struct pf_addr *,
+ u_int16_t, int);
+int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
+ struct pf_addr *, struct pf_addr *, u_int16_t,
+ struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
+ struct pf_src_node **);
+
+#define mix(a,b,c) \
+ do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+ } while (0)
+
+/*
+ * hash function based on bridge_hash in if_bridge.c
+ */
+void
+pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
+ struct pf_poolhashkey *key, sa_family_t af)
+{
+ u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ a += inaddr->addr32[0];
+ b += key->key32[1];
+ mix(a, b, c);
+ hash->addr32[0] = c + key->key32[2];
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ a += inaddr->addr32[0];
+ b += inaddr->addr32[2];
+ mix(a, b, c);
+ hash->addr32[0] = c;
+ a += inaddr->addr32[1];
+ b += inaddr->addr32[3];
+ c += key->key32[1];
+ mix(a, b, c);
+ hash->addr32[1] = c;
+ a += inaddr->addr32[2];
+ b += inaddr->addr32[1];
+ c += key->key32[2];
+ mix(a, b, c);
+ hash->addr32[2] = c;
+ a += inaddr->addr32[3];
+ b += inaddr->addr32[0];
+ c += key->key32[3];
+ mix(a, b, c);
+ hash->addr32[3] = c;
+ break;
+#endif /* INET6 */
+ }
+}
+
+struct pf_rule *
+pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
+ int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
+ struct pf_addr *daddr, u_int16_t dport, int rs_num)
+{
+ struct pf_rule *r, *rm = NULL;
+ struct pf_ruleset *ruleset = NULL;
+ int tag = -1;
+ int rtableid = -1;
+ int asd = 0;
+
+ r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
+ while (r && rm == NULL) {
+ struct pf_rule_addr *src = NULL, *dst = NULL;
+ struct pf_addr_wrap *xdst = NULL;
+
+ if (r->action == PF_BINAT && direction == PF_IN) {
+ src = &r->dst;
+ if (r->rpool.cur != NULL)
+ xdst = &r->rpool.cur->addr;
+ } else {
+ src = &r->src;
+ dst = &r->dst;
+ }
+
+ r->evaluations++;
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
+ r = r->skip[PF_SKIP_IFP].ptr;
+ else if (r->direction && r->direction != direction)
+ r = r->skip[PF_SKIP_DIR].ptr;
+ else if (r->af && r->af != pd->af)
+ r = r->skip[PF_SKIP_AF].ptr;
+ else if (r->proto && r->proto != pd->proto)
+ r = r->skip[PF_SKIP_PROTO].ptr;
+ else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
+ src->neg, kif, M_GETFIB(m)))
+ r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
+ PF_SKIP_DST_ADDR].ptr;
+ else if (src->port_op && !pf_match_port(src->port_op,
+ src->port[0], src->port[1], sport))
+ r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
+ PF_SKIP_DST_PORT].ptr;
+ else if (dst != NULL &&
+ PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
+ M_GETFIB(m)))
+ r = r->skip[PF_SKIP_DST_ADDR].ptr;
+ else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
+ 0, NULL, M_GETFIB(m)))
+ r = TAILQ_NEXT(r, entries);
+ else if (dst != NULL && dst->port_op &&
+ !pf_match_port(dst->port_op, dst->port[0],
+ dst->port[1], dport))
+ r = r->skip[PF_SKIP_DST_PORT].ptr;
+#ifdef __FreeBSD__
+ else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
+#else
+ else if (r->match_tag && !pf_match_tag(m, r, &tag))
+#endif
+ r = TAILQ_NEXT(r, entries);
+ else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
+ IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
+ off, pd->hdr.tcp), r->os_fingerprint)))
+ r = TAILQ_NEXT(r, entries);
+ else {
+ if (r->tag)
+ tag = r->tag;
+ if (r->rtableid >= 0)
+ rtableid = r->rtableid;
+ if (r->anchor == NULL) {
+ rm = r;
+ } else
+ pf_step_into_anchor(&asd, &ruleset, rs_num,
+ &r, NULL, NULL);
+ }
+ if (r == NULL)
+ pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
+ NULL, NULL);
+ }
+#ifdef __FreeBSD__
+ if (pf_tag_packet(m, tag, rtableid, pd->pf_mtag))
+#else
+ if (pf_tag_packet(m, tag, rtableid))
+#endif
+ return (NULL);
+ if (rm != NULL && (rm->action == PF_NONAT ||
+ rm->action == PF_NORDR || rm->action == PF_NOBINAT))
+ return (NULL);
+ return (rm);
+}
+
+int
+pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
+ struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
+ struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
+ struct pf_src_node **sn)
+{
+ struct pf_state_key_cmp key;
+ struct pf_addr init_addr;
+ u_int16_t cut;
+
+ bzero(&init_addr, sizeof(init_addr));
+ if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+ return (1);
+
+ if (proto == IPPROTO_ICMP) {
+ low = 1;
+ high = 65535;
+ }
+
+ do {
+ key.af = af;
+ key.proto = proto;
+ PF_ACPY(&key.addr[1], daddr, key.af);
+ PF_ACPY(&key.addr[0], naddr, key.af);
+ key.port[1] = dport;
+
+ /*
+ * port search; start random, step;
+ * similar 2 portloop in in_pcbbind
+ */
+ if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
+ proto == IPPROTO_ICMP)) {
+ key.port[0] = dport;
+ if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
+ return (0);
+ } else if (low == 0 && high == 0) {
+ key.port[0] = *nport;
+ if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
+ return (0);
+ } else if (low == high) {
+ key.port[0] = htons(low);
+ if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
+ *nport = htons(low);
+ return (0);
+ }
+ } else {
+ u_int16_t tmp;
+
+ if (low > high) {
+ tmp = low;
+ low = high;
+ high = tmp;
+ }
+ /* low < high */
+#ifdef __FreeBSD__
+ cut = htonl(arc4random()) % (1 + high - low) + low;
+#else
+ cut = arc4random_uniform(1 + high - low) + low;
+#endif
+ /* low <= cut <= high */
+ for (tmp = cut; tmp <= high; ++(tmp)) {
+ key.port[0] = htons(tmp);
+ if (pf_find_state_all(&key, PF_IN, NULL) ==
+#ifdef __FreeBSD__
+ NULL) {
+#else
+ NULL && !in_baddynamic(tmp, proto)) {
+#endif
+ *nport = htons(tmp);
+ return (0);
+ }
+ }
+ for (tmp = cut - 1; tmp >= low; --(tmp)) {
+ key.port[0] = htons(tmp);
+ if (pf_find_state_all(&key, PF_IN, NULL) ==
+#ifdef __FreeBSD__
+ NULL) {
+#else
+ NULL && !in_baddynamic(tmp, proto)) {
+#endif
+ *nport = htons(tmp);
+ return (0);
+ }
+ }
+ }
+
+ switch (r->rpool.opts & PF_POOL_TYPEMASK) {
+ case PF_POOL_RANDOM:
+ case PF_POOL_ROUNDROBIN:
+ if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+ return (1);
+ break;
+ case PF_POOL_NONE:
+ case PF_POOL_SRCHASH:
+ case PF_POOL_BITMASK:
+ default:
+ return (1);
+ }
+ } while (! PF_AEQ(&init_addr, naddr, af) );
+ return (1); /* none available */
+}
+
+int
+pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
+ struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
+{
+ unsigned char hash[16];
+ struct pf_pool *rpool = &r->rpool;
+ struct pf_addr *raddr = &rpool->cur->addr.v.a.addr;
+ struct pf_addr *rmask = &rpool->cur->addr.v.a.mask;
+ struct pf_pooladdr *acur = rpool->cur;
+ struct pf_src_node k;
+
+ if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
+ (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
+ k.af = af;
+ PF_ACPY(&k.addr, saddr, af);
+ if (r->rule_flag & PFRULE_RULESRCTRACK ||
+ r->rpool.opts & PF_POOL_STICKYADDR)
+ k.rule.ptr = r;
+ else
+ k.rule.ptr = NULL;
+#ifdef __FreeBSD__
+ V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
+ *sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k);
+#else
+ pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
+ *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
+#endif
+ if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
+ PF_ACPY(naddr, &(*sn)->raddr, af);
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
+ printf("pf_map_addr: src tracking maps ");
+ pf_print_host(&k.addr, 0, af);
+ printf(" to ");
+ pf_print_host(naddr, 0, af);
+ printf("\n");
+ }
+ return (0);
+ }
+ }
+
+ if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
+ return (1);
+ if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
+ (rpool->opts & PF_POOL_TYPEMASK) !=
+ PF_POOL_ROUNDROBIN)
+ return (1);
+ raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
+ rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
+ (rpool->opts & PF_POOL_TYPEMASK) !=
+ PF_POOL_ROUNDROBIN)
+ return (1);
+ raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
+ rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
+ break;
+#endif /* INET6 */
+ }
+ } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+ if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
+ return (1); /* unsupported */
+ } else {
+ raddr = &rpool->cur->addr.v.a.addr;
+ rmask = &rpool->cur->addr.v.a.mask;
+ }
+
+ switch (rpool->opts & PF_POOL_TYPEMASK) {
+ case PF_POOL_NONE:
+ PF_ACPY(naddr, raddr, af);
+ break;
+ case PF_POOL_BITMASK:
+ PF_POOLMASK(naddr, raddr, rmask, saddr, af);
+ break;
+ case PF_POOL_RANDOM:
+ if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ rpool->counter.addr32[0] = htonl(arc4random());
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (rmask->addr32[3] != 0xffffffff)
+ rpool->counter.addr32[3] =
+ htonl(arc4random());
+ else
+ break;
+ if (rmask->addr32[2] != 0xffffffff)
+ rpool->counter.addr32[2] =
+ htonl(arc4random());
+ else
+ break;
+ if (rmask->addr32[1] != 0xffffffff)
+ rpool->counter.addr32[1] =
+ htonl(arc4random());
+ else
+ break;
+ if (rmask->addr32[0] != 0xffffffff)
+ rpool->counter.addr32[0] =
+ htonl(arc4random());
+ break;
+#endif /* INET6 */
+ }
+ PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+ PF_ACPY(init_addr, naddr, af);
+
+ } else {
+ PF_AINC(&rpool->counter, af);
+ PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+ }
+ break;
+ case PF_POOL_SRCHASH:
+ pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
+ PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+ break;
+ case PF_POOL_ROUNDROBIN:
+ if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+ if (!pfr_pool_get(rpool->cur->addr.p.tbl,
+ &rpool->tblidx, &rpool->counter,
+ &raddr, &rmask, af))
+ goto get_addr;
+ } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+ if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter,
+ &raddr, &rmask, af))
+ goto get_addr;
+ } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
+ goto get_addr;
+
+ try_next:
+ if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
+ rpool->cur = TAILQ_FIRST(&rpool->list);
+ if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+ rpool->tblidx = -1;
+ if (pfr_pool_get(rpool->cur->addr.p.tbl,
+ &rpool->tblidx, &rpool->counter,
+ &raddr, &rmask, af)) {
+ /* table contains no address of type 'af' */
+ if (rpool->cur != acur)
+ goto try_next;
+ return (1);
+ }
+ } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+ rpool->tblidx = -1;
+ if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter,
+ &raddr, &rmask, af)) {
+ /* table contains no address of type 'af' */
+ if (rpool->cur != acur)
+ goto try_next;
+ return (1);
+ }
+ } else {
+ raddr = &rpool->cur->addr.v.a.addr;
+ rmask = &rpool->cur->addr.v.a.mask;
+ PF_ACPY(&rpool->counter, raddr, af);
+ }
+
+ get_addr:
+ PF_ACPY(naddr, &rpool->counter, af);
+ if (init_addr != NULL && PF_AZERO(init_addr, af))
+ PF_ACPY(init_addr, naddr, af);
+ PF_AINC(&rpool->counter, af);
+ break;
+ }
+ if (*sn != NULL)
+ PF_ACPY(&(*sn)->raddr, naddr, af);
+
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC &&
+#else
+ if (pf_status.debug >= PF_DEBUG_MISC &&
+#endif
+ (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
+ printf("pf_map_addr: selected address ");
+ pf_print_host(naddr, 0, af);
+ printf("\n");
+ }
+
+ return (0);
+}
+
+struct pf_rule *
+pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
+ struct pfi_kif *kif, struct pf_src_node **sn,
+ struct pf_state_key **skw, struct pf_state_key **sks,
+ struct pf_state_key **skp, struct pf_state_key **nkp,
+ struct pf_addr *saddr, struct pf_addr *daddr,
+ u_int16_t sport, u_int16_t dport)
+{
+ struct pf_rule *r = NULL;
+
+
+ if (direction == PF_OUT) {
+ r = pf_match_translation(pd, m, off, direction, kif, saddr,
+ sport, daddr, dport, PF_RULESET_BINAT);
+ if (r == NULL)
+ r = pf_match_translation(pd, m, off, direction, kif,
+ saddr, sport, daddr, dport, PF_RULESET_NAT);
+ } else {
+ r = pf_match_translation(pd, m, off, direction, kif, saddr,
+ sport, daddr, dport, PF_RULESET_RDR);
+ if (r == NULL)
+ r = pf_match_translation(pd, m, off, direction, kif,
+ saddr, sport, daddr, dport, PF_RULESET_BINAT);
+ }
+
+ if (r != NULL) {
+ struct pf_addr *naddr;
+ u_int16_t *nport;
+
+ if (pf_state_key_setup(pd, r, skw, sks, skp, nkp,
+ saddr, daddr, sport, dport))
+ return r;
+
+ /* XXX We only modify one side for now. */
+ naddr = &(*nkp)->addr[1];
+ nport = &(*nkp)->port[1];
+
+ switch (r->action) {
+ case PF_NONAT:
+ case PF_NOBINAT:
+ case PF_NORDR:
+ return (NULL);
+ case PF_NAT:
+ if (pf_get_sport(pd->af, pd->proto, r, saddr,
+ daddr, dport, naddr, nport, r->rpool.proxy_port[0],
+ r->rpool.proxy_port[1], sn)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: NAT proxy port allocation "
+ "(%u-%u) failed\n",
+ r->rpool.proxy_port[0],
+ r->rpool.proxy_port[1]));
+ return (NULL);
+ }
+ break;
+ case PF_BINAT:
+ switch (direction) {
+ case PF_OUT:
+ if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
+ if (r->rpool.cur->addr.p.dyn->
+ pfid_acnt4 < 1)
+ return (NULL);
+ PF_POOLMASK(naddr,
+ &r->rpool.cur->addr.p.dyn->
+ pfid_addr4,
+ &r->rpool.cur->addr.p.dyn->
+ pfid_mask4,
+ saddr, AF_INET);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (r->rpool.cur->addr.p.dyn->
+ pfid_acnt6 < 1)
+ return (NULL);
+ PF_POOLMASK(naddr,
+ &r->rpool.cur->addr.p.dyn->
+ pfid_addr6,
+ &r->rpool.cur->addr.p.dyn->
+ pfid_mask6,
+ saddr, AF_INET6);
+ break;
+#endif /* INET6 */
+ }
+ } else
+ PF_POOLMASK(naddr,
+ &r->rpool.cur->addr.v.a.addr,
+ &r->rpool.cur->addr.v.a.mask,
+ saddr, pd->af);
+ break;
+ case PF_IN:
+ if (r->src.addr.type == PF_ADDR_DYNIFTL) {
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
+ if (r->src.addr.p.dyn->
+ pfid_acnt4 < 1)
+ return (NULL);
+ PF_POOLMASK(naddr,
+ &r->src.addr.p.dyn->
+ pfid_addr4,
+ &r->src.addr.p.dyn->
+ pfid_mask4,
+ daddr, AF_INET);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (r->src.addr.p.dyn->
+ pfid_acnt6 < 1)
+ return (NULL);
+ PF_POOLMASK(naddr,
+ &r->src.addr.p.dyn->
+ pfid_addr6,
+ &r->src.addr.p.dyn->
+ pfid_mask6,
+ daddr, AF_INET6);
+ break;
+#endif /* INET6 */
+ }
+ } else
+ PF_POOLMASK(naddr,
+ &r->src.addr.v.a.addr,
+ &r->src.addr.v.a.mask, daddr,
+ pd->af);
+ break;
+ }
+ break;
+ case PF_RDR: {
+ if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
+ return (NULL);
+ if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
+ PF_POOL_BITMASK)
+ PF_POOLMASK(naddr, naddr,
+ &r->rpool.cur->addr.v.a.mask, daddr,
+ pd->af);
+
+ if (r->rpool.proxy_port[1]) {
+ u_int32_t tmp_nport;
+
+ tmp_nport = ((ntohs(dport) -
+ ntohs(r->dst.port[0])) %
+ (r->rpool.proxy_port[1] -
+ r->rpool.proxy_port[0] + 1)) +
+ r->rpool.proxy_port[0];
+
+ /* wrap around if necessary */
+ if (tmp_nport > 65535)
+ tmp_nport -= 65535;
+ *nport = htons((u_int16_t)tmp_nport);
+ } else if (r->rpool.proxy_port[0])
+ *nport = htons(r->rpool.proxy_port[0]);
+ break;
+ }
+ default:
+ return (NULL);
+ }
+ /*
+ * Translation was a NOP.
+ * Pretend there was no match.
+ */
+ if (!bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
+#ifdef __FreeBSD__
+ pool_put(&V_pf_state_key_pl, *nkp);
+ pool_put(&V_pf_state_key_pl, *skp);
+#else
+ pool_put(&pf_state_key_pl, *nkp);
+ pool_put(&pf_state_key_pl, *skp);
+#endif
+ *skw = *sks = *nkp = *skp = NULL;
+ return (NULL);
+ }
+ }
+
+ return (r);
+}
+
diff --git a/freebsd/sys/contrib/pf/net/pf_mtag.h b/freebsd/sys/contrib/pf/net/pf_mtag.h
index a0ebf7ef..141a8679 100644
--- a/freebsd/sys/contrib/pf/net/pf_mtag.h
+++ b/freebsd/sys/contrib/pf/net/pf_mtag.h
@@ -37,15 +37,17 @@
#define PF_TAG_GENERATED 0x01
#define PF_TAG_FRAGCACHE 0x02
#define PF_TAG_TRANSLATE_LOCALHOST 0x04
+#define PF_PACKET_LOOPED 0x08
+#define PF_FASTFWD_OURS_PRESENT 0x10
struct pf_mtag {
void *hdr; /* saved hdr pos in mbuf, for ECN */
- u_int rtableid; /* alternate routing table id */
+ void *statekey; /* pf stackside statekey */
u_int32_t qid; /* queue id */
+ u_int rtableid; /* alternate routing table id */
u_int16_t tag; /* tag id */
u_int8_t flags;
u_int8_t routed;
- sa_family_t af; /* for ECN */
};
static __inline struct pf_mtag *pf_find_mtag(struct mbuf *);
diff --git a/freebsd/sys/contrib/pf/net/pf_norm.c b/freebsd/sys/contrib/pf/net/pf_norm.c
index a59c3fd8..3780fa82 100644
--- a/freebsd/sys/contrib/pf/net/pf_norm.c
+++ b/freebsd/sys/contrib/pf/net/pf_norm.c
@@ -1,6 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
+/* $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ */
/*
* Copyright 2001 Niels Provos <provos@citi.umich.edu>
@@ -36,9 +36,9 @@
__FBSDID("$FreeBSD$");
#ifdef DEV_PFLOG
-#define NPFLOG DEV_PFLOG
+#define NPFLOG DEV_PFLOG
#else
-#define NPFLOG 0
+#define NPFLOG 0
#endif
#else
#include "pflog.h"
@@ -80,8 +80,6 @@ __FBSDID("$FreeBSD$");
#include <net/pfvar.h>
#ifndef __FreeBSD__
-#include <inttypes.h>
-
struct pf_frent {
LIST_ENTRY(pf_frent) fr_next;
struct ip *fr_ip;
@@ -120,17 +118,35 @@ struct pf_fragment {
};
#endif
+#ifdef __FreeBSD__
+TAILQ_HEAD(pf_fragqueue, pf_fragment);
+TAILQ_HEAD(pf_cachequeue, pf_fragment);
+VNET_DEFINE(struct pf_fragqueue, pf_fragqueue);
+#define V_pf_fragqueue VNET(pf_fragqueue)
+VNET_DEFINE(struct pf_cachequeue, pf_cachequeue);
+#define V_pf_cachequeue VNET(pf_cachequeue)
+#else
TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
+#endif
#ifndef __FreeBSD__
static __inline int pf_frag_compare(struct pf_fragment *,
struct pf_fragment *);
#else
-static int pf_frag_compare(struct pf_fragment *,
+static int pf_frag_compare(struct pf_fragment *,
struct pf_fragment *);
#endif
+
+#ifdef __FreeBSD__
+RB_HEAD(pf_frag_tree, pf_fragment);
+VNET_DEFINE(struct pf_frag_tree, pf_frag_tree);
+#define V_pf_frag_tree VNET(pf_frag_tree)
+VNET_DEFINE(struct pf_frag_tree, pf_cache_tree);
+#define V_pf_cache_tree VNET(pf_cache_tree)
+#else
RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
+#endif
RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
@@ -145,24 +161,45 @@ struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
struct pf_fragment **, int, int, int *);
int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
- struct tcphdr *, int);
-
+ struct tcphdr *, int, sa_family_t);
+void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t,
+ u_int8_t);
+#ifdef INET6
+void pf_scrub_ip6(struct mbuf **, u_int8_t);
+#endif
+#ifdef __FreeBSD__
+#define DPFPRINTF(x) do { \
+ if (V_pf_status.debug >= PF_DEBUG_MISC) { \
+ printf("%s: ", __func__); \
+ printf x ; \
+ } \
+} while(0)
+#else
#define DPFPRINTF(x) do { \
if (pf_status.debug >= PF_DEBUG_MISC) { \
printf("%s: ", __func__); \
printf x ; \
} \
} while(0)
+#endif
/* Globals */
#ifdef __FreeBSD__
-uma_zone_t pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
-uma_zone_t pf_state_scrub_pl;
+VNET_DEFINE(uma_zone_t, pf_frent_pl);
+VNET_DEFINE(uma_zone_t, pf_frag_pl);
+VNET_DEFINE(uma_zone_t, pf_cache_pl);
+VNET_DEFINE(uma_zone_t, pf_cent_pl);
+VNET_DEFINE(uma_zone_t, pf_state_scrub_pl);
+
+VNET_DEFINE(int, pf_nfrents);
+#define V_pf_nfrents VNET(pf_nfrents)
+VNET_DEFINE(int, pf_ncache);
+#define V_pf_ncache VNET(pf_ncache)
#else
struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
struct pool pf_state_scrub_pl;
-#endif
int pf_nfrents, pf_ncache;
+#endif
void
pf_normalize_init(void)
@@ -173,9 +210,9 @@ pf_normalize_init(void)
* No high water mark support(It's hint not hard limit).
* uma_zone_set_max(pf_frag_pl, PFFRAG_FRAG_HIWAT);
*/
- uma_zone_set_max(pf_frent_pl, PFFRAG_FRENT_HIWAT);
- uma_zone_set_max(pf_cache_pl, PFFRAG_FRCACHE_HIWAT);
- uma_zone_set_max(pf_cent_pl, PFFRAG_FRCENT_HIWAT);
+ uma_zone_set_max(V_pf_frent_pl, PFFRAG_FRENT_HIWAT);
+ uma_zone_set_max(V_pf_cache_pl, PFFRAG_FRCACHE_HIWAT);
+ uma_zone_set_max(V_pf_cent_pl, PFFRAG_FRCENT_HIWAT);
#else
pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
NULL);
@@ -194,8 +231,13 @@ pf_normalize_init(void)
pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
#endif
+#ifdef __FreeBSD__
+ TAILQ_INIT(&V_pf_fragqueue);
+ TAILQ_INIT(&V_pf_cachequeue);
+#else
TAILQ_INIT(&pf_fragqueue);
TAILQ_INIT(&pf_cachequeue);
+#endif
}
#ifdef __FreeBSD__
@@ -226,14 +268,20 @@ void
pf_purge_expired_fragments(void)
{
struct pf_fragment *frag;
+#ifdef __FreeBSD__
+ u_int32_t expire = time_second -
+ V_pf_default_rule.timeout[PFTM_FRAG];
+#else
u_int32_t expire = time_second -
pf_default_rule.timeout[PFTM_FRAG];
+#endif
- while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
#ifdef __FreeBSD__
+ while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
KASSERT((BUFFER_FRAGMENTS(frag)),
- ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
+ ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__));
#else
+ while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
KASSERT(BUFFER_FRAGMENTS(frag));
#endif
if (frag->fr_timeout > expire)
@@ -243,11 +291,12 @@ pf_purge_expired_fragments(void)
pf_free_fragment(frag);
}
- while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
#ifdef __FreeBSD__
+ while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) {
KASSERT((!BUFFER_FRAGMENTS(frag)),
- ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
+ ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__));
#else
+ while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
KASSERT(!BUFFER_FRAGMENTS(frag));
#endif
if (frag->fr_timeout > expire)
@@ -256,8 +305,8 @@ pf_purge_expired_fragments(void)
DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
pf_free_fragment(frag);
#ifdef __FreeBSD__
- KASSERT((TAILQ_EMPTY(&pf_cachequeue) ||
- TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag),
+ KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) ||
+ TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag),
("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
__FUNCTION__));
#else
@@ -277,22 +326,44 @@ pf_flush_fragments(void)
struct pf_fragment *frag;
int goal;
+#ifdef __FreeBSD__
+ goal = V_pf_nfrents * 9 / 10;
+ DPFPRINTF(("trying to free > %d frents\n",
+ V_pf_nfrents - goal));
+ while (goal < V_pf_nfrents) {
+#else
goal = pf_nfrents * 9 / 10;
DPFPRINTF(("trying to free > %d frents\n",
pf_nfrents - goal));
while (goal < pf_nfrents) {
+#endif
+#ifdef __FreeBSD__
+ frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
+#else
frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
+#endif
if (frag == NULL)
break;
pf_free_fragment(frag);
}
+#ifdef __FreeBSD__
+ goal = V_pf_ncache * 9 / 10;
+ DPFPRINTF(("trying to free > %d cache entries\n",
+ V_pf_ncache - goal));
+ while (goal < V_pf_ncache) {
+#else
goal = pf_ncache * 9 / 10;
DPFPRINTF(("trying to free > %d cache entries\n",
pf_ncache - goal));
while (goal < pf_ncache) {
+#endif
+#ifdef __FreeBSD__
+ frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue);
+#else
frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
+#endif
if (frag == NULL)
break;
pf_free_fragment(frag);
@@ -314,8 +385,13 @@ pf_free_fragment(struct pf_fragment *frag)
LIST_REMOVE(frent, fr_next);
m_freem(frent->fr_m);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_frent_pl, frent);
+ V_pf_nfrents--;
+#else
pool_put(&pf_frent_pl, frent);
pf_nfrents--;
+#endif
}
} else {
for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
@@ -327,15 +403,18 @@ pf_free_fragment(struct pf_fragment *frag)
LIST_FIRST(&frag->fr_cache)->fr_off >
frcache->fr_end),
("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
- " frcache->fr_end): %s", __FUNCTION__));
+ " frcache->fr_end): %s", __FUNCTION__));
+
+ pool_put(&V_pf_cent_pl, frcache);
+ V_pf_ncache--;
#else
KASSERT(LIST_EMPTY(&frag->fr_cache) ||
LIST_FIRST(&frag->fr_cache)->fr_off >
frcache->fr_end);
-#endif
pool_put(&pf_cent_pl, frcache);
pf_ncache--;
+#endif
}
}
@@ -364,11 +443,21 @@ pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
/* XXX Are we sure we want to update the timeout? */
frag->fr_timeout = time_second;
if (BUFFER_FRAGMENTS(frag)) {
+#ifdef __FreeBSD__
+ TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
+ TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
+#else
TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
+#endif
} else {
+#ifdef __FreeBSD__
+ TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
+ TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next);
+#else
TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
+#endif
}
}
@@ -381,13 +470,25 @@ void
pf_remove_fragment(struct pf_fragment *frag)
{
if (BUFFER_FRAGMENTS(frag)) {
+#ifdef __FreeBSD__
+ RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
+ TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
+ pool_put(&V_pf_frag_pl, frag);
+#else
RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
pool_put(&pf_frag_pl, frag);
+#endif
} else {
+#ifdef __FreeBSD__
+ RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag);
+ TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next);
+ pool_put(&V_pf_cache_pl, frag);
+#else
RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
pool_put(&pf_cache_pl, frag);
+#endif
}
}
@@ -418,10 +519,18 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
/* Create a new reassembly queue for this packet */
if (*frag == NULL) {
+#ifdef __FreeBSD__
+ *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT);
+#else
*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
+#endif
if (*frag == NULL) {
pf_flush_fragments();
+#ifdef __FreeBSD__
+ *frag = pool_get(&V_pf_frag_pl, PR_NOWAIT);
+#else
*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
+#endif
if (*frag == NULL)
goto drop_fragment;
}
@@ -435,8 +544,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
(*frag)->fr_timeout = time_second;
LIST_INIT(&(*frag)->fr_queue);
+#ifdef __FreeBSD__
+ RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag);
+ TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next);
+#else
RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
+#endif
/* We do not have a previous fragment */
frep = NULL;
@@ -501,8 +615,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
next = LIST_NEXT(frea, fr_next);
m_freem(frea->fr_m);
LIST_REMOVE(frea, fr_next);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_frent_pl, frea);
+ V_pf_nfrents--;
+#else
pool_put(&pf_frent_pl, frea);
pf_nfrents--;
+#endif
}
insert:
@@ -562,26 +681,36 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
m2 = m->m_next;
m->m_next = NULL;
m_cat(m, m2);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_frent_pl, frent);
+ V_pf_nfrents--;
+#else
pool_put(&pf_frent_pl, frent);
pf_nfrents--;
+#endif
for (frent = next; frent != NULL; frent = next) {
next = LIST_NEXT(frent, fr_next);
m2 = frent->fr_m;
+#ifdef __FreeBSD__
+ pool_put(&V_pf_frent_pl, frent);
+ V_pf_nfrents--;
+#else
pool_put(&pf_frent_pl, frent);
pf_nfrents--;
+#endif
#ifdef __FreeBSD__
m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
#endif
m_cat(m, m2);
}
+
#ifdef __FreeBSD__
while (m->m_pkthdr.csum_data & 0xffff0000)
m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
(m->m_pkthdr.csum_data >> 16);
#endif
-
ip->ip_src = (*frag)->fr_src;
ip->ip_dst = (*frag)->fr_dst;
@@ -608,8 +737,13 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
drop_fragment:
/* Oops - fail safe - drop packet */
+#ifdef __FreeBSD__
+ pool_put(&V_pf_frent_pl, frent);
+ V_pf_nfrents--;
+#else
pool_put(&pf_frent_pl, frent);
pf_nfrents--;
+#endif
m_freem(m);
return (NULL);
}
@@ -634,22 +768,40 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
/* Create a new range queue for this packet */
if (*frag == NULL) {
+#ifdef __FreeBSD__
+ *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT);
+#else
*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
+#endif
if (*frag == NULL) {
pf_flush_fragments();
+#ifdef __FreeBSD__
+ *frag = pool_get(&V_pf_cache_pl, PR_NOWAIT);
+#else
*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
+#endif
if (*frag == NULL)
goto no_mem;
}
/* Get an entry for the queue */
+#ifdef __FreeBSD__
+ cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
+ if (cur == NULL) {
+ pool_put(&V_pf_cache_pl, *frag);
+#else
cur = pool_get(&pf_cent_pl, PR_NOWAIT);
if (cur == NULL) {
pool_put(&pf_cache_pl, *frag);
+#endif
*frag = NULL;
goto no_mem;
}
+#ifdef __FreeBSD__
+ V_pf_ncache++;
+#else
pf_ncache++;
+#endif
(*frag)->fr_flags = PFFRAG_NOBUFFER;
(*frag)->fr_max = 0;
@@ -664,8 +816,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
LIST_INIT(&(*frag)->fr_cache);
LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
+#ifdef __FreeBSD__
+ RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
+ TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
+#else
RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
+#endif
DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
@@ -784,10 +941,18 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
max));
+#ifdef __FreeBSD__
+ cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
+#else
cur = pool_get(&pf_cent_pl, PR_NOWAIT);
+#endif
if (cur == NULL)
goto no_mem;
+#ifdef __FreeBSD__
+ V_pf_ncache++;
+#else
pf_ncache++;
+#endif
cur->fr_off = off;
cur->fr_end = max;
@@ -844,10 +1009,18 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
h->ip_id, -aftercut, off, max, fra->fr_off,
fra->fr_end));
+#ifdef __FreeBSD__
+ cur = pool_get(&V_pf_cent_pl, PR_NOWAIT);
+#else
cur = pool_get(&pf_cent_pl, PR_NOWAIT);
+#endif
if (cur == NULL)
goto no_mem;
+#ifdef __FreeBSD__
+ V_pf_ncache++;
+#else
pf_ncache++;
+#endif
cur->fr_off = off;
cur->fr_end = max;
@@ -865,8 +1038,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
max, fra->fr_off, fra->fr_end));
fra->fr_off = cur->fr_off;
LIST_REMOVE(cur, fr_next);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_cent_pl, cur);
+ V_pf_ncache--;
+#else
pool_put(&pf_cent_pl, cur);
pf_ncache--;
+#endif
cur = NULL;
} else if (frp && fra->fr_off <= frp->fr_end) {
@@ -883,8 +1061,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
max, fra->fr_off, fra->fr_end));
fra->fr_off = frp->fr_off;
LIST_REMOVE(frp, fr_next);
+#ifdef __FreeBSD__
+ pool_put(&V_pf_cent_pl, frp);
+ V_pf_ncache--;
+#else
pool_put(&pf_cent_pl, frp);
pf_ncache--;
+#endif
frp = NULL;
}
@@ -951,6 +1134,7 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
return (NULL);
}
+#ifdef INET
int
pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
struct pf_pdesc *pd)
@@ -966,6 +1150,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
u_int16_t max;
int ip_len;
int ip_off;
+ int tag = -1;
r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
while (r != NULL) {
@@ -980,12 +1165,18 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
r = r->skip[PF_SKIP_PROTO].ptr;
else if (PF_MISMATCHAW(&r->src.addr,
(struct pf_addr *)&h->ip_src.s_addr, AF_INET,
- r->src.neg, kif))
+ r->src.neg, kif, M_GETFIB(m)))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
else if (PF_MISMATCHAW(&r->dst.addr,
(struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
- r->dst.neg, NULL))
+ r->dst.neg, NULL, M_GETFIB(m)))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
+#ifdef __FreeBSD__
+ else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag))
+#else
+ else if (r->match_tag && !pf_match_tag(m, r, &tag))
+#endif
+ r = TAILQ_NEXT(r, entries);
else
break;
}
@@ -1044,7 +1235,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
/* Fully buffer all of the fragments */
+#ifdef __FreeBSD__
+ frag = pf_find_fragment(h, &V_pf_frag_tree);
+#else
frag = pf_find_fragment(h, &pf_frag_tree);
+#endif
/* Check if we saw the last fragment already */
if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
@@ -1052,12 +1247,20 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
goto bad;
/* Get an entry for the fragment queue */
+#ifdef __FreeBSD__
+ frent = pool_get(&V_pf_frent_pl, PR_NOWAIT);
+#else
frent = pool_get(&pf_frent_pl, PR_NOWAIT);
+#endif
if (frent == NULL) {
REASON_SET(reason, PFRES_MEMORY);
return (PF_DROP);
}
+#ifdef __FreeBSD__
+ V_pf_nfrents++;
+#else
pf_nfrents++;
+#endif
frent->fr_ip = h;
frent->fr_m = m;
@@ -1088,7 +1291,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
/* non-buffering fragment cache (drops or masks overlaps) */
int nomem = 0;
+#ifdef __FreeBSD__
if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
+#else
+ if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
+#endif
/*
* Already passed the fragment cache in the
* input direction. If we continued, it would
@@ -1097,7 +1304,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
goto fragment_pass;
}
+#ifdef __FreeBSD__
+ frag = pf_find_fragment(h, &V_pf_cache_tree);
+#else
frag = pf_find_fragment(h, &pf_cache_tree);
+#endif
/* Check if we saw the last fragment already */
if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
@@ -1128,7 +1339,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
}
#endif
if (dir == PF_IN)
+#ifdef __FreeBSD__
pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
+#else
+ m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
+#endif
if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
goto drop;
@@ -1144,33 +1359,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
}
- /* Enforce a minimum ttl, may cause endless packet loops */
- if (r->min_ttl && h->ip_ttl < r->min_ttl) {
- u_int16_t ip_ttl = h->ip_ttl;
-
- h->ip_ttl = r->min_ttl;
- h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
- }
-
- if (r->rule_flag & PFRULE_RANDOMID) {
- u_int16_t ip_id = h->ip_id;
-
- h->ip_id = ip_randomid();
- h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
- }
- if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
- pd->flags |= PFDESC_IP_REAS;
-
- return (PF_PASS);
+ /* not missing a return here */
fragment_pass:
- /* Enforce a minimum ttl, may cause endless packet loops */
- if (r->min_ttl && h->ip_ttl < r->min_ttl) {
- u_int16_t ip_ttl = h->ip_ttl;
+ pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
- h->ip_ttl = r->min_ttl;
- h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
- }
if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
pd->flags |= PFDESC_IP_REAS;
return (PF_PASS);
@@ -1200,6 +1393,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
return (PF_DROP);
}
+#endif
#ifdef INET6
int
@@ -1236,11 +1430,11 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
#endif
else if (PF_MISMATCHAW(&r->src.addr,
(struct pf_addr *)&h->ip6_src, AF_INET6,
- r->src.neg, kif))
+ r->src.neg, kif, M_GETFIB(m)))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
else if (PF_MISMATCHAW(&r->dst.addr,
(struct pf_addr *)&h->ip6_dst, AF_INET6,
- r->dst.neg, NULL))
+ r->dst.neg, NULL, M_GETFIB(m)))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
else
break;
@@ -1339,9 +1533,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
goto shortpkt;
- /* Enforce a minimum ttl, may cause endless packet loops */
- if (r->min_ttl && h->ip6_hlim < r->min_ttl)
- h->ip6_hlim = r->min_ttl;
+ pf_scrub_ip6(&m, r->min_ttl);
return (PF_PASS);
@@ -1403,13 +1595,13 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
else if (r->proto && r->proto != pd->proto)
r = r->skip[PF_SKIP_PROTO].ptr;
else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
- r->src.neg, kif))
+ r->src.neg, kif, M_GETFIB(m)))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
else if (r->src.port_op && !pf_match_port(r->src.port_op,
r->src.port[0], r->src.port[1], th->th_sport))
r = r->skip[PF_SKIP_SRC_PORT].ptr;
else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
- r->dst.neg, NULL))
+ r->dst.neg, NULL, M_GETFIB(m)))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
r->dst.port[0], r->dst.port[1], th->th_dport))
@@ -1479,12 +1671,16 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
}
/* Process options */
- if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
+ if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
rewrite = 1;
/* copy back packet headers if we sanitized */
if (rewrite)
+#ifdef __FreeBSD__
m_copyback(m, off, sizeof(*th), (caddr_t)th);
+#else
+ m_copyback(m, off, sizeof(*th), th);
+#endif
return (PF_PASS);
@@ -1506,11 +1702,13 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
#ifdef __FreeBSD__
KASSERT((src->scrub == NULL),
("pf_normalize_tcp_init: src->scrub != NULL"));
+
+ src->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT);
#else
KASSERT(src->scrub == NULL);
-#endif
src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
+#endif
if (src->scrub == NULL)
return (1);
bzero(src->scrub, sizeof(*src->scrub));
@@ -1586,10 +1784,17 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
void
pf_normalize_tcp_cleanup(struct pf_state *state)
{
+#ifdef __FreeBSD__
+ if (state->src.scrub)
+ pool_put(&V_pf_state_scrub_pl, state->src.scrub);
+ if (state->dst.scrub)
+ pool_put(&V_pf_state_scrub_pl, state->dst.scrub);
+#else
if (state->src.scrub)
pool_put(&pf_state_scrub_pl, state->src.scrub);
if (state->dst.scrub)
pool_put(&pf_state_scrub_pl, state->dst.scrub);
+#endif
/* Someday... flush the TCP segment reassembly descriptors. */
}
@@ -1667,7 +1872,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
if (got_ts) {
/* Huh? Multiple timestamps!? */
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
DPFPRINTF(("multiple TS??"));
pf_print_state(state);
printf("\n");
@@ -1736,7 +1945,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
(uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
time_second - state->creation > TS_MAX_CONN)) {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
DPFPRINTF(("src idled out of PAWS\n"));
pf_print_state(state);
printf("\n");
@@ -1746,7 +1959,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
}
if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
DPFPRINTF(("dst idled out of PAWS\n"));
pf_print_state(state);
printf("\n");
@@ -1807,7 +2024,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
* network conditions that re-order packets and
* cause our view of them to decrease. For now the
* only lowerbound we can safely determine is that
- * the TS echo will never be less than the orginal
+ * the TS echo will never be less than the original
* TS. XXX There is probably a better lowerbound.
* Remove TS_MAX_CONN with better lowerbound check.
* tescr >= other original TS
@@ -1830,7 +2047,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
* this packet.
*/
if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
+#ifdef __FreeBSD__
+ ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
+#else
ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
+#endif
/* Calculate max ticks since the last timestamp */
@@ -1838,7 +2059,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
#define TS_MICROSECS 1000000 /* microseconds per second */
#ifdef __FreeBSD__
#ifndef timersub
-#define timersub(tvp, uvp, vvp) \
+#define timersub(tvp, uvp, vvp) \
do { \
(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \
@@ -1895,7 +2116,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
"\n", dst->scrub->pfss_tsval,
dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
#endif
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
pf_print_state(state);
pf_print_flags(th->th_flags);
printf("\n");
@@ -1943,7 +2168,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
* Hey! Someone tried to sneak a packet in. Or the
* stack changed its RFC1323 behavior?!?!
*/
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+#else
if (pf_status.debug >= PF_DEBUG_MISC) {
+#endif
DPFPRINTF(("Did not receive expected RFC1323 "
"timestamp\n"));
pf_print_state(state);
@@ -1970,7 +2199,11 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
src->scrub->pfss_flags |= PFSS_DATA_TS;
else {
src->scrub->pfss_flags |= PFSS_DATA_NOTS;
+#ifdef __FreeBSD__
+ if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
+#else
if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
+#endif
(dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
/* Don't warn if other host rejected RFC1323 */
DPFPRINTF(("Broken RFC1323 stack did not "
@@ -2018,17 +2251,25 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
int
pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
- int off)
+ int off, sa_family_t af)
{
u_int16_t *mss;
int thoff;
int opt, cnt, optlen = 0;
int rewrite = 0;
- u_char *optp;
+#ifdef __FreeBSD__
+ u_char opts[TCP_MAXOLEN];
+#else
+ u_char opts[MAX_TCPOPTLEN];
+#endif
+ u_char *optp = opts;
thoff = th->th_off << 2;
cnt = thoff - sizeof(struct tcphdr);
- optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr);
+
+ if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
+ NULL, NULL, af))
+ return (rewrite);
for (; cnt > 0; cnt -= optlen, optp += optlen) {
opt = optp[0];
@@ -2058,5 +2299,63 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
}
}
+ if (rewrite)
+ m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
+
return (rewrite);
}
+
+void
+pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
+{
+ struct mbuf *m = *m0;
+ struct ip *h = mtod(m, struct ip *);
+
+ /* Clear IP_DF if no-df was requested */
+ if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
+ u_int16_t ip_off = h->ip_off;
+
+ h->ip_off &= htons(~IP_DF);
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+ }
+
+ /* Enforce a minimum ttl, may cause endless packet loops */
+ if (min_ttl && h->ip_ttl < min_ttl) {
+ u_int16_t ip_ttl = h->ip_ttl;
+
+ h->ip_ttl = min_ttl;
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
+ }
+
+ /* Enforce tos */
+ if (flags & PFRULE_SET_TOS) {
+ u_int16_t ov, nv;
+
+ ov = *(u_int16_t *)h;
+ h->ip_tos = tos;
+ nv = *(u_int16_t *)h;
+
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
+ }
+
+ /* random-id, but not for fragments */
+ if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
+ u_int16_t ip_id = h->ip_id;
+
+ h->ip_id = ip_randomid();
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
+ }
+}
+
+#ifdef INET6
+void
+pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
+{
+ struct mbuf *m = *m0;
+ struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+
+ /* Enforce a minimum ttl, may cause endless packet loops */
+ if (min_ttl && h->ip6_hlim < min_ttl)
+ h->ip6_hlim = min_ttl;
+}
+#endif
diff --git a/freebsd/sys/contrib/pf/net/pf_osfp.c b/freebsd/sys/contrib/pf/net/pf_osfp.c
index 7ff79c00..9ff90ad0 100644
--- a/freebsd/sys/contrib/pf/net/pf_osfp.c
+++ b/freebsd/sys/contrib/pf/net/pf_osfp.c
@@ -1,6 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */
+/* $OpenBSD: pf_osfp.c,v 1.14 2008/06/12 18:17:01 henning Exp $ */
/*
* Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org>
@@ -27,7 +27,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
#ifdef _KERNEL
-# include <sys/systm.h>
+#include <sys/systm.h>
+#ifndef __FreeBSD__
+#include <sys/pool.h>
+#endif
#endif /* _KERNEL */
#include <sys/mbuf.h>
@@ -44,10 +47,17 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_var.h>
#endif
+
#ifdef _KERNEL
-# define DPFPRINTF(format, x...) \
+#ifdef __FreeBSD__
+#define DPFPRINTF(format, x...) \
+ if (V_pf_status.debug >= PF_DEBUG_NOISY) \
+ printf(format , ##x)
+#else
+#define DPFPRINTF(format, x...) \
if (pf_status.debug >= PF_DEBUG_NOISY) \
printf(format , ##x)
+#endif
#ifdef __FreeBSD__
typedef uma_zone_t pool_t;
#else
@@ -57,33 +67,43 @@ typedef struct pool pool_t;
#else
/* Userland equivalents so we can lend code to tcpdump et al. */
-# include <arpa/inet.h>
-# include <errno.h>
-# include <stdio.h>
-# include <stdlib.h>
-# include <string.h>
-# include <netdb.h>
-# define pool_t int
-# define pool_get(pool, flags) malloc(*(pool))
-# define pool_put(pool, item) free(item)
-# define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size)
-
-# ifdef __FreeBSD__
-# define NTOHS(x) (x) = ntohs((u_int16_t)(x))
-# endif
-
-# ifdef PFDEBUG
-# include <sys/stdarg.h>
-# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x)
-# else
-# define DPFPRINTF(format, x...) ((void)0)
-# endif /* PFDEBUG */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#define pool_t int
+#define pool_get(pool, flags) malloc(*(pool))
+#define pool_put(pool, item) free(item)
+#define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size)
+
+#ifdef __FreeBSD__
+#define NTOHS(x) (x) = ntohs((u_int16_t)(x))
+#endif
+
+#ifdef PFDEBUG
+#include <sys/stdarg.h>
+#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x)
+#else
+#define DPFPRINTF(format, x...) ((void)0)
+#endif /* PFDEBUG */
#endif /* _KERNEL */
+#ifdef __FreeBSD__
+SLIST_HEAD(pf_osfp_list, pf_os_fingerprint);
+VNET_DEFINE(struct pf_osfp_list, pf_osfp_list);
+#define V_pf_osfp_list VNET(pf_osfp_list)
+VNET_DEFINE(pool_t, pf_osfp_entry_pl);
+#define pf_osfp_entry_pl VNET(pf_osfp_entry_pl)
+VNET_DEFINE(pool_t, pf_osfp_pl);
+#define pf_osfp_pl VNET(pf_osfp_pl)
+#else
SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list;
pool_t pf_osfp_entry_pl;
pool_t pf_osfp_pl;
+#endif
struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *,
struct pf_os_fingerprint *, u_int8_t);
@@ -274,7 +294,11 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st
(fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
fp.fp_wscale);
+#ifdef __FreeBSD__
+ if ((fpresult = pf_osfp_find(&V_pf_osfp_list, &fp,
+#else
if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp,
+#endif
PF_OSFP_MAXTTL_OFFSET)))
return (&fpresult->fp_oses);
return (NULL);
@@ -320,20 +344,23 @@ pf_osfp_initialize(void)
{
#if defined(__FreeBSD__) && defined(_KERNEL)
int error = ENOMEM;
-
+
do {
pf_osfp_entry_pl = pf_osfp_pl = NULL;
UMA_CREATE(pf_osfp_entry_pl, struct pf_osfp_entry, "pfospfen");
UMA_CREATE(pf_osfp_pl, struct pf_os_fingerprint, "pfosfp");
error = 0;
} while(0);
+
+ SLIST_INIT(&V_pf_osfp_list);
#else
pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0,
"pfosfpen", &pool_allocator_nointr);
pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0,
"pfosfp", &pool_allocator_nointr);
-#endif
SLIST_INIT(&pf_osfp_list);
+#endif
+
#ifdef __FreeBSD__
#ifdef _KERNEL
return (error);
@@ -347,6 +374,7 @@ pf_osfp_initialize(void)
void
pf_osfp_cleanup(void)
{
+
UMA_DESTROY(pf_osfp_entry_pl);
UMA_DESTROY(pf_osfp_pl);
}
@@ -359,8 +387,13 @@ pf_osfp_flush(void)
struct pf_os_fingerprint *fp;
struct pf_osfp_entry *entry;
+#ifdef __FreeBSD__
+ while ((fp = SLIST_FIRST(&V_pf_osfp_list))) {
+ SLIST_REMOVE_HEAD(&V_pf_osfp_list, fp_next);
+#else
while ((fp = SLIST_FIRST(&pf_osfp_list))) {
SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next);
+#endif
while ((entry = SLIST_FIRST(&fp->fp_oses))) {
SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry);
pool_put(&pf_osfp_entry_pl, entry);
@@ -387,6 +420,7 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc)
fpadd.fp_wscale = fpioc->fp_wscale;
fpadd.fp_ttl = fpioc->fp_ttl;
+#if 0 /* XXX RYAN wants to fix logging */
DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d "
"(TS=%s,M=%s%d,W=%s%d) %x\n",
fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm,
@@ -410,17 +444,31 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc)
(fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
fpadd.fp_wscale,
fpioc->fp_os.fp_os);
+#endif
-
+#ifdef __FreeBSD__
+ if ((fp = pf_osfp_find_exact(&V_pf_osfp_list, &fpadd))) {
+#else
if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) {
+#endif
SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os))
return (EEXIST);
}
- if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL)
+ if ((entry = pool_get(&pf_osfp_entry_pl,
+#ifdef __FreeBSD__
+ PR_NOWAIT)) == NULL)
+#else
+ PR_WAITOK|PR_LIMITFAIL)) == NULL)
+#endif
return (ENOMEM);
} else {
- if ((fp = pool_get(&pf_osfp_pl, PR_NOWAIT)) == NULL)
+ if ((fp = pool_get(&pf_osfp_pl,
+#ifdef __FreeBSD__
+ PR_NOWAIT)) == NULL)
+#else
+ PR_WAITOK|PR_LIMITFAIL)) == NULL)
+#endif
return (ENOMEM);
memset(fp, 0, sizeof(*fp));
fp->fp_tcpopts = fpioc->fp_tcpopts;
@@ -432,11 +480,20 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc)
fp->fp_wscale = fpioc->fp_wscale;
fp->fp_ttl = fpioc->fp_ttl;
SLIST_INIT(&fp->fp_oses);
- if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) {
+ if ((entry = pool_get(&pf_osfp_entry_pl,
+#ifdef __FreeBSD__
+ PR_NOWAIT)) == NULL) {
+#else
+ PR_WAITOK|PR_LIMITFAIL)) == NULL) {
+#endif
pool_put(&pf_osfp_pl, fp);
return (ENOMEM);
}
+#ifdef __FreeBSD__
+ pf_osfp_insert(&V_pf_osfp_list, fp);
+#else
pf_osfp_insert(&pf_osfp_list, fp);
+#endif
}
memcpy(entry, &fpioc->fp_os, sizeof(*entry));
@@ -462,7 +519,7 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find,
{
struct pf_os_fingerprint *f;
-#define MATCH_INT(_MOD, _DC, _field) \
+#define MATCH_INT(_MOD, _DC, _field) \
if ((f->fp_flags & _DC) == 0) { \
if ((f->fp_flags & _MOD) == 0) { \
if (f->_field != find->_field) \
@@ -490,10 +547,11 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find,
if (find->fp_mss == 0)
continue;
-/* Some "smart" NAT devices and DSL routers will tweak the MSS size and
+/*
+ * Some "smart" NAT devices and DSL routers will tweak the MSS size and
* will set it to whatever is suitable for the link type.
*/
-#define SMART_MSS 1460
+#define SMART_MSS 1460
if ((find->fp_wsize % find->fp_mss ||
find->fp_wsize / find->fp_mss !=
f->fp_wsize) &&
@@ -505,8 +563,8 @@ pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find,
if (find->fp_mss == 0)
continue;
-#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr))
-#define SMART_MTU (SMART_MSS + MTUOFF)
+#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr))
+#define SMART_MTU (SMART_MSS + MTUOFF)
if ((find->fp_wsize % (find->fp_mss + MTUOFF) ||
find->fp_wsize / (find->fp_mss + MTUOFF) !=
f->fp_wsize) &&
@@ -577,7 +635,11 @@ pf_osfp_get(struct pf_osfp_ioctl *fpioc)
memset(fpioc, 0, sizeof(*fpioc));
+#ifdef __FreeBSD__
+ SLIST_FOREACH(fp, &V_pf_osfp_list, fp_next) {
+#else
SLIST_FOREACH(fp, &pf_osfp_list, fp_next) {
+#endif
SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
if (i++ == num) {
fpioc->fp_mss = fp->fp_mss;
@@ -604,19 +666,27 @@ pf_osfp_validate(void)
{
struct pf_os_fingerprint *f, *f2, find;
+#ifdef __FreeBSD__
+ SLIST_FOREACH(f, &V_pf_osfp_list, fp_next) {
+#else
SLIST_FOREACH(f, &pf_osfp_list, fp_next) {
+#endif
memcpy(&find, f, sizeof(find));
/* We do a few MSS/th_win percolations to make things unique */
if (find.fp_mss == 0)
find.fp_mss = 128;
if (f->fp_flags & PF_OSFP_WSIZE_MSS)
- find.fp_wsize *= find.fp_mss, 1;
+ find.fp_wsize *= find.fp_mss;
else if (f->fp_flags & PF_OSFP_WSIZE_MTU)
find.fp_wsize *= (find.fp_mss + 40);
else if (f->fp_flags & PF_OSFP_WSIZE_MOD)
find.fp_wsize *= 2;
+#ifdef __FreeBSD__
+ if (f != (f2 = pf_osfp_find(&V_pf_osfp_list, &find, 0))) {
+#else
if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) {
+#endif
if (f2)
printf("Found \"%s %s %s\" instead of "
"\"%s %s %s\"\n",
diff --git a/freebsd/sys/contrib/pf/net/pf_ruleset.c b/freebsd/sys/contrib/pf/net/pf_ruleset.c
index 5e018b48..c7ab6178 100644
--- a/freebsd/sys/contrib/pf/net/pf_ruleset.c
+++ b/freebsd/sys/contrib/pf/net/pf_ruleset.c
@@ -1,6 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */
+/* $OpenBSD: pf_ruleset.c,v 1.2 2008/12/18 15:31:37 dhill Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -63,48 +63,55 @@ __FBSDID("$FreeBSD$");
#ifdef _KERNEL
-# define DPFPRINTF(format, x...) \
- if (pf_status.debug >= PF_DEBUG_NOISY) \
+#ifdef __FreeBSD__
+#define DPFPRINTF(format, x...) \
+ if (V_pf_status.debug >= PF_DEBUG_NOISY) \
+ printf(format , ##x)
+#else
+#define DPFPRINTF(format, x...) \
+ if (pf_status.debug >= PF_DEBUG_NOISY) \
printf(format , ##x)
+#endif
#ifdef __FreeBSD__
-#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT)
+#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO)
#else
-#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK)
+#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO)
#endif
#define rs_free(x) free(x, M_TEMP)
#else
/* Userland equivalents so we can lend code to pfctl et al. */
-# include <arpa/inet.h>
-# include <errno.h>
-# include <stdio.h>
-# include <stdlib.h>
-# include <string.h>
-# define rs_malloc(x) malloc(x)
-# define rs_free(x) free(x)
-
-# ifdef PFDEBUG
-# include <sys/stdarg.h>
-# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x)
-# else
-# define DPFPRINTF(format, x...) ((void)0)
-# endif /* PFDEBUG */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define rs_malloc(x) calloc(1, x)
+#define rs_free(x) free(x)
+
+#ifdef PFDEBUG
+#include <sys/stdarg.h>
+#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x)
+#else
+#define DPFPRINTF(format, x...) ((void)0)
+#endif /* PFDEBUG */
#endif /* _KERNEL */
+#if defined(__FreeBSD__) && !defined(_KERNEL)
+#undef V_pf_anchors
+#define V_pf_anchors pf_anchors
+
+#undef pf_main_ruleset
+#define pf_main_ruleset pf_main_anchor.ruleset
+#endif
+#if defined(__FreeBSD__) && defined(_KERNEL)
+VNET_DEFINE(struct pf_anchor_global, pf_anchors);
+VNET_DEFINE(struct pf_anchor, pf_main_anchor);
+#else
struct pf_anchor_global pf_anchors;
struct pf_anchor pf_main_anchor;
-
-#ifndef __FreeBSD__
-/* XXX: hum? */
-int pf_get_ruleset_number(u_int8_t);
-void pf_init_ruleset(struct pf_ruleset *);
-int pf_anchor_setup(struct pf_rule *,
- const struct pf_ruleset *, const char *);
-int pf_anchor_copyout(const struct pf_ruleset *,
- const struct pf_rule *, struct pfioc_rule *);
-void pf_anchor_remove(struct pf_rule *);
#endif
static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *);
@@ -170,9 +177,14 @@ pf_find_anchor(const char *path)
struct pf_anchor *key, *found;
key = (struct pf_anchor *)rs_malloc(sizeof(*key));
- memset(key, 0, sizeof(*key));
+ if (key == NULL)
+ return (NULL);
strlcpy(key->path, path, sizeof(key->path));
+#ifdef __FreeBSD__
+ found = RB_FIND(pf_anchor_global, &V_pf_anchors, key);
+#else
found = RB_FIND(pf_anchor_global, &pf_anchors, key);
+#endif
rs_free(key);
return (found);
}
@@ -212,7 +224,8 @@ pf_find_or_create_ruleset(const char *path)
if (ruleset != NULL)
return (ruleset);
p = (char *)rs_malloc(MAXPATHLEN);
- bzero(p, MAXPATHLEN);
+ if (p == NULL)
+ return (NULL);
strlcpy(p, path, MAXPATHLEN);
while (parent == NULL && (q = strrchr(p, '/')) != NULL) {
*q = 0;
@@ -244,7 +257,6 @@ pf_find_or_create_ruleset(const char *path)
rs_free(p);
return (NULL);
}
- memset(anchor, 0, sizeof(*anchor));
RB_INIT(&anchor->children);
strlcpy(anchor->name, q, sizeof(anchor->name));
if (parent != NULL) {
@@ -253,7 +265,11 @@ pf_find_or_create_ruleset(const char *path)
strlcat(anchor->path, "/", sizeof(anchor->path));
}
strlcat(anchor->path, anchor->name, sizeof(anchor->path));
+#ifdef __FreeBSD__
+ if ((dup = RB_INSERT(pf_anchor_global, &V_pf_anchors, anchor)) !=
+#else
if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) !=
+#endif
NULL) {
printf("pf_find_or_create_ruleset: RB_INSERT1 "
"'%s' '%s' collides with '%s' '%s'\n",
@@ -270,7 +286,11 @@ pf_find_or_create_ruleset(const char *path)
"RB_INSERT2 '%s' '%s' collides with "
"'%s' '%s'\n", anchor->path, anchor->name,
dup->path, dup->name);
+#ifdef __FreeBSD__
+ RB_REMOVE(pf_anchor_global, &V_pf_anchors,
+#else
RB_REMOVE(pf_anchor_global, &pf_anchors,
+#endif
anchor);
rs_free(anchor);
rs_free(p);
@@ -306,7 +326,11 @@ pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset)
!TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) ||
ruleset->rules[i].inactive.open)
return;
+#ifdef __FreeBSD__
+ RB_REMOVE(pf_anchor_global, &V_pf_anchors, ruleset->anchor);
+#else
RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor);
+#endif
if ((parent = ruleset->anchor->parent) != NULL)
RB_REMOVE(pf_anchor_node, &parent->children,
ruleset->anchor);
@@ -330,7 +354,8 @@ pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s,
if (!name[0])
return (0);
path = (char *)rs_malloc(MAXPATHLEN);
- bzero(path, MAXPATHLEN);
+ if (path == NULL)
+ return (1);
if (name[0] == '/')
strlcpy(path, name + 1, MAXPATHLEN);
else {
@@ -388,7 +413,8 @@ pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r,
int i;
a = (char *)rs_malloc(MAXPATHLEN);
- bzero(a, MAXPATHLEN);
+ if (a == NULL)
+ return (1);
if (rs->anchor == NULL)
a[0] = 0;
else
diff --git a/freebsd/sys/contrib/pf/net/pf_table.c b/freebsd/sys/contrib/pf/net/pf_table.c
index ee13e851..14e75dda 100644
--- a/freebsd/sys/contrib/pf/net/pf_table.c
+++ b/freebsd/sys/contrib/pf/net/pf_table.c
@@ -1,6 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $ */
+/* $OpenBSD: pf_table.c,v 1.79 2008/10/08 06:24:50 mcbride Exp $ */
/*
* Copyright (c) 2002 Cedric Berger
@@ -45,10 +45,10 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/rwlock.h>
#ifdef __FreeBSD__
#include <sys/malloc.h>
+#else
+#include <sys/pool.h>
#endif
#include <net/if.h>
@@ -57,10 +57,9 @@ __FBSDID("$FreeBSD$");
#ifndef __FreeBSD__
#include <netinet/ip_ipsp.h>
#endif
-
#include <net/pfvar.h>
-#define ACCEPT_FLAGS(oklist) \
+#define ACCEPT_FLAGS(flags, oklist) \
do { \
if ((flags & ~(oklist)) & \
PFR_FLAG_ALLMASK) \
@@ -92,28 +91,26 @@ _copyout(const void *uaddr, void *kaddr, size_t len)
return (r);
}
-#define COPYIN(from, to, size) \
+#define COPYIN(from, to, size, flags) \
((flags & PFR_FLAG_USERIOCTL) ? \
_copyin((from), (to), (size)) : \
(bcopy((from), (to), (size)), 0))
-#define COPYOUT(from, to, size) \
+#define COPYOUT(from, to, size, flags) \
((flags & PFR_FLAG_USERIOCTL) ? \
_copyout((from), (to), (size)) : \
(bcopy((from), (to), (size)), 0))
#else
-
-#define COPYIN(from, to, size) \
+#define COPYIN(from, to, size, flags) \
((flags & PFR_FLAG_USERIOCTL) ? \
copyin((from), (to), (size)) : \
(bcopy((from), (to), (size)), 0))
-#define COPYOUT(from, to, size) \
+#define COPYOUT(from, to, size, flags) \
((flags & PFR_FLAG_USERIOCTL) ? \
copyout((from), (to), (size)) : \
(bcopy((from), (to), (size)), 0))
-
#endif
#define FILLIN_SIN(sin, addr) \
@@ -130,26 +127,26 @@ _copyout(const void *uaddr, void *kaddr, size_t len)
(sin6).sin6_addr = (addr); \
} while (0)
-#define SWAP(type, a1, a2) \
+#define SWAP(type, a1, a2) \
do { \
type tmp = a1; \
a1 = a2; \
a2 = tmp; \
} while (0)
-#define SUNION2PF(su, af) (((af)==AF_INET) ? \
+#define SUNION2PF(su, af) (((af)==AF_INET) ? \
(struct pf_addr *)&(su)->sin.sin_addr : \
(struct pf_addr *)&(su)->sin6.sin6_addr)
#define AF_BITS(af) (((af)==AF_INET)?32:128)
#define ADDR_NETWORK(ad) ((ad)->pfra_net < AF_BITS((ad)->pfra_af))
#define KENTRY_NETWORK(ke) ((ke)->pfrke_net < AF_BITS((ke)->pfrke_af))
-#define KENTRY_RNF_ROOT(ke) \
+#define KENTRY_RNF_ROOT(ke) \
((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0)
-#define NO_ADDRESSES (-1)
-#define ENQUEUE_UNMARKED_ONLY (1)
-#define INVERT_NEG_FLAG (1)
+#define NO_ADDRESSES (-1)
+#define ENQUEUE_UNMARKED_ONLY (1)
+#define INVERT_NEG_FLAG (1)
struct pfr_walktree {
enum pfrw_op {
@@ -171,28 +168,36 @@ struct pfr_walktree {
int pfrw_free;
int pfrw_flags;
};
-#define pfrw_addr pfrw_1.pfrw1_addr
-#define pfrw_astats pfrw_1.pfrw1_astats
-#define pfrw_workq pfrw_1.pfrw1_workq
-#define pfrw_kentry pfrw_1.pfrw1_kentry
-#define pfrw_dyn pfrw_1.pfrw1_dyn
-#define pfrw_cnt pfrw_free
+#define pfrw_addr pfrw_1.pfrw1_addr
+#define pfrw_astats pfrw_1.pfrw1_astats
+#define pfrw_workq pfrw_1.pfrw1_workq
+#define pfrw_kentry pfrw_1.pfrw1_kentry
+#define pfrw_dyn pfrw_1.pfrw1_dyn
+#define pfrw_cnt pfrw_free
-#define senderr(e) do { rv = (e); goto _bad; } while (0)
+#define senderr(e) do { rv = (e); goto _bad; } while (0)
#ifdef __FreeBSD__
-uma_zone_t pfr_ktable_pl;
-uma_zone_t pfr_kentry_pl;
-uma_zone_t pfr_kentry_pl2;
+VNET_DEFINE(uma_zone_t, pfr_ktable_pl);
+VNET_DEFINE(uma_zone_t, pfr_kentry_pl);
+VNET_DEFINE(uma_zone_t, pfr_kcounters_pl);
+VNET_DEFINE(struct sockaddr_in, pfr_sin);
+#define V_pfr_sin VNET(pfr_sin)
+VNET_DEFINE(struct sockaddr_in6, pfr_sin6);
+#define V_pfr_sin6 VNET(pfr_sin6)
+VNET_DEFINE(union sockaddr_union, pfr_mask);
+#define V_pfr_mask VNET(pfr_mask)
+VNET_DEFINE(struct pf_addr, pfr_ffaddr);
+#define V_pfr_ffaddr VNET(pfr_ffaddr)
#else
struct pool pfr_ktable_pl;
struct pool pfr_kentry_pl;
-struct pool pfr_kentry_pl2;
-#endif
+struct pool pfr_kcounters_pl;
struct sockaddr_in pfr_sin;
struct sockaddr_in6 pfr_sin6;
union sockaddr_union pfr_mask;
struct pf_addr pfr_ffaddr;
+#endif
void pfr_copyout_addr(struct pfr_addr *,
struct pfr_kentry *ke);
@@ -228,7 +233,7 @@ void pfr_setflags_ktable(struct pfr_ktable *, int);
void pfr_clstats_ktables(struct pfr_ktableworkq *, long,
int);
void pfr_clstats_ktable(struct pfr_ktable *, long, int);
-struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int);
+struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int, int);
void pfr_destroy_ktables(struct pfr_ktableworkq *, int);
void pfr_destroy_ktable(struct pfr_ktable *, int);
int pfr_ktable_compare(struct pfr_ktable *,
@@ -253,12 +258,11 @@ pfr_initialize(void)
{
#ifndef __FreeBSD__
pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0,
- "pfrktable", &pool_allocator_oldnointr);
+ "pfrktable", NULL);
pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0,
- "pfrkentry", &pool_allocator_oldnointr);
- pool_init(&pfr_kentry_pl2, sizeof(struct pfr_kentry), 0, 0, 0,
- "pfrkentry2", NULL);
-#endif
+ "pfrkentry", NULL);
+ pool_init(&pfr_kcounters_pl, sizeof(struct pfr_kcounters), 0, 0, 0,
+ "pfrkcounters", NULL);
pfr_sin.sin_len = sizeof(pfr_sin);
pfr_sin.sin_family = AF_INET;
@@ -266,6 +270,14 @@ pfr_initialize(void)
pfr_sin6.sin6_family = AF_INET6;
memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr));
+#else
+ V_pfr_sin.sin_len = sizeof(V_pfr_sin);
+ V_pfr_sin.sin_family = AF_INET;
+ V_pfr_sin6.sin6_len = sizeof(V_pfr_sin6);
+ V_pfr_sin6.sin6_family = AF_INET6;
+
+ memset(&V_pfr_ffaddr, 0xff, sizeof(V_pfr_ffaddr));
+#endif
}
int
@@ -275,7 +287,7 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
struct pfr_kentryworkq workq;
int s;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY);
if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
return (EINVAL);
kt = pfr_lookup_table(tbl);
@@ -286,7 +298,6 @@ pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
pfr_enqueue_addrs(kt, &workq, ndel, 0);
if (!(flags & PFR_FLAG_DUMMY)) {
- s = 0;
if (flags & PFR_FLAG_ATOMIC)
s = splsoftnet();
pfr_remove_kentries(kt, &workq);
@@ -309,10 +320,11 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
struct pfr_kentryworkq workq;
struct pfr_kentry *p, *q;
struct pfr_addr ad;
- int i, rv, s = 0, xadd = 0;
+ int i, rv, s, xadd = 0;
long tzero = time_second;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY |
+ PFR_FLAG_FEEDBACK);
if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
return (EINVAL);
kt = pfr_lookup_table(tbl);
@@ -320,12 +332,13 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
return (ESRCH);
if (kt->pfrkt_flags & PFR_TFLAG_CONST)
return (EPERM);
- tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+ tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0,
+ !(flags & PFR_FLAG_USERIOCTL));
if (tmpkt == NULL)
return (ENOMEM);
SLIST_INIT(&workq);
for (i = 0; i < size; i++) {
- if (COPYIN(addr+i, &ad, sizeof(ad)))
+ if (COPYIN(addr+i, &ad, sizeof(ad), flags))
senderr(EFAULT);
if (pfr_validate_addr(&ad))
senderr(EINVAL);
@@ -342,7 +355,8 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
ad.pfra_fback = PFR_FB_NONE;
}
if (p == NULL && q == NULL) {
- p = pfr_create_kentry(&ad, 0);
+ p = pfr_create_kentry(&ad,
+ !(flags & PFR_FLAG_USERIOCTL));
if (p == NULL)
senderr(ENOMEM);
if (pfr_route_kentry(tmpkt, p)) {
@@ -353,10 +367,9 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
xadd++;
}
}
- if (flags & PFR_FLAG_FEEDBACK) {
- if (COPYOUT(&ad, addr+i, sizeof(ad)))
+ if (flags & PFR_FLAG_FEEDBACK)
+ if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
senderr(EFAULT);
- }
}
pfr_clean_node_mask(tmpkt, &workq);
if (!(flags & PFR_FLAG_DUMMY)) {
@@ -388,9 +401,10 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
struct pfr_kentryworkq workq;
struct pfr_kentry *p;
struct pfr_addr ad;
- int i, rv, s = 0, xdel = 0, log = 1;
+ int i, rv, s, xdel = 0, log = 1;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY |
+ PFR_FLAG_FEEDBACK);
if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
return (EINVAL);
kt = pfr_lookup_table(tbl);
@@ -417,7 +431,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
} else {
/* iterate over addresses to delete */
for (i = 0; i < size; i++) {
- if (COPYIN(addr+i, &ad, sizeof(ad)))
+ if (COPYIN(addr+i, &ad, sizeof(ad), flags))
return (EFAULT);
if (pfr_validate_addr(&ad))
return (EINVAL);
@@ -428,7 +442,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
}
SLIST_INIT(&workq);
for (i = 0; i < size; i++) {
- if (COPYIN(addr+i, &ad, sizeof(ad)))
+ if (COPYIN(addr+i, &ad, sizeof(ad), flags))
senderr(EFAULT);
if (pfr_validate_addr(&ad))
senderr(EINVAL);
@@ -450,7 +464,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
xdel++;
}
if (flags & PFR_FLAG_FEEDBACK)
- if (COPYOUT(&ad, addr+i, sizeof(ad)))
+ if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
senderr(EFAULT);
}
if (!(flags & PFR_FLAG_DUMMY)) {
@@ -478,10 +492,11 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
struct pfr_kentryworkq addq, delq, changeq;
struct pfr_kentry *p, *q;
struct pfr_addr ad;
- int i, rv, s = 0, xadd = 0, xdel = 0, xchange = 0;
+ int i, rv, s, xadd = 0, xdel = 0, xchange = 0;
long tzero = time_second;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY |
+ PFR_FLAG_FEEDBACK);
if (pfr_validate_table(tbl, ignore_pfrt_flags, flags &
PFR_FLAG_USERIOCTL))
return (EINVAL);
@@ -490,7 +505,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
return (ESRCH);
if (kt->pfrkt_flags & PFR_TFLAG_CONST)
return (EPERM);
- tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+ tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0,
+ !(flags & PFR_FLAG_USERIOCTL));
if (tmpkt == NULL)
return (ENOMEM);
pfr_mark_addrs(kt);
@@ -498,7 +514,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
SLIST_INIT(&delq);
SLIST_INIT(&changeq);
for (i = 0; i < size; i++) {
- if (COPYIN(addr+i, &ad, sizeof(ad)))
+ if (COPYIN(addr+i, &ad, sizeof(ad), flags))
senderr(EFAULT);
if (pfr_validate_addr(&ad))
senderr(EINVAL);
@@ -521,7 +537,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
ad.pfra_fback = PFR_FB_DUPLICATE;
goto _skip;
}
- p = pfr_create_kentry(&ad, 0);
+ p = pfr_create_kentry(&ad,
+ !(flags & PFR_FLAG_USERIOCTL));
if (p == NULL)
senderr(ENOMEM);
if (pfr_route_kentry(tmpkt, p)) {
@@ -535,7 +552,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
}
_skip:
if (flags & PFR_FLAG_FEEDBACK)
- if (COPYOUT(&ad, addr+i, sizeof(ad)))
+ if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
senderr(EFAULT);
}
pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY);
@@ -548,7 +565,7 @@ _skip:
SLIST_FOREACH(p, &delq, pfrke_workq) {
pfr_copyout_addr(&ad, p);
ad.pfra_fback = PFR_FB_DELETED;
- if (COPYOUT(&ad, addr+size+i, sizeof(ad)))
+ if (COPYOUT(&ad, addr+size+i, sizeof(ad), flags))
senderr(EFAULT);
i++;
}
@@ -592,7 +609,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
struct pfr_addr ad;
int i, xmatch = 0;
- ACCEPT_FLAGS(PFR_FLAG_REPLACE);
+ ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE);
if (pfr_validate_table(tbl, 0, 0))
return (EINVAL);
kt = pfr_lookup_table(tbl);
@@ -600,7 +617,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
return (ESRCH);
for (i = 0; i < size; i++) {
- if (COPYIN(addr+i, &ad, sizeof(ad)))
+ if (COPYIN(addr+i, &ad, sizeof(ad), flags))
return (EFAULT);
if (pfr_validate_addr(&ad))
return (EINVAL);
@@ -613,7 +630,7 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
(p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH);
if (p != NULL && !p->pfrke_not)
xmatch++;
- if (COPYOUT(&ad, addr+i, sizeof(ad)))
+ if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
return (EFAULT);
}
if (nmatch != NULL)
@@ -629,7 +646,7 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
struct pfr_walktree w;
int rv;
- ACCEPT_FLAGS(0);
+ ACCEPT_FLAGS(flags, 0);
if (pfr_validate_table(tbl, 0, 0))
return (EINVAL);
kt = pfr_lookup_table(tbl);
@@ -652,7 +669,7 @@ pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
#endif
if (!rv)
#ifdef __FreeBSD__
- rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree,
+ rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree,
&w);
#else
rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
@@ -676,10 +693,11 @@ pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
struct pfr_ktable *kt;
struct pfr_walktree w;
struct pfr_kentryworkq workq;
- int rv, s = 0;
+ int rv, s;
long tzero = time_second;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC); /* XXX PFR_FLAG_CLSTATS disabled */
+ /* XXX PFR_FLAG_CLSTATS disabled */
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC);
if (pfr_validate_table(tbl, 0, 0))
return (EINVAL);
kt = pfr_lookup_table(tbl);
@@ -735,9 +753,10 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
struct pfr_kentryworkq workq;
struct pfr_kentry *p;
struct pfr_addr ad;
- int i, rv, s = 0, xzero = 0;
+ int i, rv, s, xzero = 0;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY |
+ PFR_FLAG_FEEDBACK);
if (pfr_validate_table(tbl, 0, 0))
return (EINVAL);
kt = pfr_lookup_table(tbl);
@@ -745,7 +764,7 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
return (ESRCH);
SLIST_INIT(&workq);
for (i = 0; i < size; i++) {
- if (COPYIN(addr+i, &ad, sizeof(ad)))
+ if (COPYIN(addr+i, &ad, sizeof(ad), flags))
senderr(EFAULT);
if (pfr_validate_addr(&ad))
senderr(EINVAL);
@@ -753,7 +772,7 @@ pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
if (flags & PFR_FLAG_FEEDBACK) {
ad.pfra_fback = (p != NULL) ?
PFR_FB_CLEARED : PFR_FB_NONE;
- if (COPYOUT(&ad, addr+i, sizeof(ad)))
+ if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
senderr(EFAULT);
}
if (p != NULL) {
@@ -868,7 +887,11 @@ struct pfr_kentry *
pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
{
union sockaddr_union sa, mask;
- struct radix_node_head *head = NULL; /* make the compiler happy */
+#ifdef __FreeBSD__
+ struct radix_node_head *head = NULL;
+#else
+ struct radix_node_head *head;
+#endif
struct pfr_kentry *ke;
int s;
@@ -884,7 +907,7 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
s = splsoftnet(); /* rn_lookup makes use of globals */
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ PF_LOCK_ASSERT();
#endif
ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
splx(s);
@@ -905,13 +928,16 @@ pfr_create_kentry(struct pfr_addr *ad, int intr)
{
struct pfr_kentry *ke;
+#ifdef __FreeBSD__
+ ke = pool_get(&V_pfr_kentry_pl, PR_NOWAIT | PR_ZERO);
+#else
if (intr)
- ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT);
+ ke = pool_get(&pfr_kentry_pl, PR_NOWAIT | PR_ZERO);
else
- ke = pool_get(&pfr_kentry_pl, PR_NOWAIT);
+ ke = pool_get(&pfr_kentry_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL);
+#endif
if (ke == NULL)
return (NULL);
- bzero(ke, sizeof(*ke));
if (ad->pfra_af == AF_INET)
FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
@@ -920,7 +946,6 @@ pfr_create_kentry(struct pfr_addr *ad, int intr)
ke->pfrke_af = ad->pfra_af;
ke->pfrke_net = ad->pfra_net;
ke->pfrke_not = ad->pfra_not;
- ke->pfrke_intrpool = intr;
return (ke);
}
@@ -938,10 +963,14 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq)
void
pfr_destroy_kentry(struct pfr_kentry *ke)
{
- if (ke->pfrke_intrpool)
- pool_put(&pfr_kentry_pl2, ke);
- else
- pool_put(&pfr_kentry_pl, ke);
+ if (ke->pfrke_counters)
+#ifdef __FreeBSD__
+ pool_put(&V_pfr_kcounters_pl, ke->pfrke_counters);
+ pool_put(&V_pfr_kentry_pl, ke);
+#else
+ pool_put(&pfr_kcounters_pl, ke->pfrke_counters);
+ pool_put(&pfr_kentry_pl, ke);
+#endif
}
void
@@ -1022,8 +1051,14 @@ pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange)
s = splsoftnet();
if (negchange)
p->pfrke_not = !p->pfrke_not;
- bzero(p->pfrke_packets, sizeof(p->pfrke_packets));
- bzero(p->pfrke_bytes, sizeof(p->pfrke_bytes));
+ if (p->pfrke_counters) {
+#ifdef __FreeBSD__
+ pool_put(&V_pfr_kcounters_pl, p->pfrke_counters);
+#else
+ pool_put(&pfr_kcounters_pl, p->pfrke_counters);
+#endif
+ p->pfrke_counters = NULL;
+ }
splx(s);
p->pfrke_tzero = tzero;
}
@@ -1036,10 +1071,10 @@ pfr_reset_feedback(struct pfr_addr *addr, int size, int flags)
int i;
for (i = 0; i < size; i++) {
- if (COPYIN(addr+i, &ad, sizeof(ad)))
+ if (COPYIN(addr+i, &ad, sizeof(ad), flags))
break;
ad.pfra_fback = PFR_FB_NONE;
- if (COPYOUT(&ad, addr+i, sizeof(ad)))
+ if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
break;
}
}
@@ -1074,7 +1109,11 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
{
union sockaddr_union mask;
struct radix_node *rn;
- struct radix_node_head *head = NULL; /* make the compiler happy */
+#ifdef __FreeBSD__
+ struct radix_node_head *head = NULL;
+#else
+ struct radix_node_head *head;
+#endif
int s;
bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
@@ -1085,13 +1124,21 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
s = splsoftnet();
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ PF_LOCK_ASSERT();
#endif
if (KENTRY_NETWORK(ke)) {
pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+#ifdef __FreeBSD__
rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node);
+#else
+ rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0);
+#endif
} else
+#ifdef __FreeBSD__
rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node);
+#else
+ rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0);
+#endif
splx(s);
return (rn == NULL ? -1 : 0);
@@ -1102,7 +1149,11 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
{
union sockaddr_union mask;
struct radix_node *rn;
- struct radix_node_head *head = NULL; /* make the compiler happy */
+#ifdef __FreeBSD__
+ struct radix_node_head *head = NULL;
+#else
+ struct radix_node_head *head;
+#endif
int s;
if (ke->pfrke_af == AF_INET)
@@ -1112,7 +1163,7 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
s = splsoftnet();
#ifdef __FreeBSD__
- PF_ASSERT(MA_OWNED);
+ PF_LOCK_ASSERT();
#endif
if (KENTRY_NETWORK(ke)) {
pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
@@ -1175,7 +1226,7 @@ pfr_walktree(struct radix_node *rn, void *arg)
struct pfr_addr ad;
pfr_copyout_addr(&ad, ke);
- if (COPYOUT(&ad, w->pfrw_addr, sizeof(ad)))
+ if (copyout(&ad, w->pfrw_addr, sizeof(ad)))
return (EFAULT);
w->pfrw_addr++;
}
@@ -1187,14 +1238,20 @@ pfr_walktree(struct radix_node *rn, void *arg)
pfr_copyout_addr(&as.pfras_a, ke);
s = splsoftnet();
- bcopy(ke->pfrke_packets, as.pfras_packets,
- sizeof(as.pfras_packets));
- bcopy(ke->pfrke_bytes, as.pfras_bytes,
- sizeof(as.pfras_bytes));
+ if (ke->pfrke_counters) {
+ bcopy(ke->pfrke_counters->pfrkc_packets,
+ as.pfras_packets, sizeof(as.pfras_packets));
+ bcopy(ke->pfrke_counters->pfrkc_bytes,
+ as.pfras_bytes, sizeof(as.pfras_bytes));
+ } else {
+ bzero(as.pfras_packets, sizeof(as.pfras_packets));
+ bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
+ as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
+ }
splx(s);
as.pfras_tzero = ke->pfrke_tzero;
- if (COPYOUT(&as, w->pfrw_astats, sizeof(as)))
+ if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags))
return (EFAULT);
w->pfrw_astats++;
}
@@ -1211,19 +1268,35 @@ pfr_walktree(struct radix_node *rn, void *arg)
if (ke->pfrke_af == AF_INET) {
if (w->pfrw_dyn->pfid_acnt4++ > 0)
break;
+#ifdef __FreeBSD__
+ pfr_prepare_network(&V_pfr_mask, AF_INET, ke->pfrke_net);
+#else
pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net);
+#endif
w->pfrw_dyn->pfid_addr4 = *SUNION2PF(
&ke->pfrke_sa, AF_INET);
w->pfrw_dyn->pfid_mask4 = *SUNION2PF(
+#ifdef __FreeBSD__
+ &V_pfr_mask, AF_INET);
+#else
&pfr_mask, AF_INET);
+#endif
} else if (ke->pfrke_af == AF_INET6){
if (w->pfrw_dyn->pfid_acnt6++ > 0)
break;
+#ifdef __FreeBSD__
+ pfr_prepare_network(&V_pfr_mask, AF_INET6, ke->pfrke_net);
+#else
pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net);
+#endif
w->pfrw_dyn->pfid_addr6 = *SUNION2PF(
&ke->pfrke_sa, AF_INET6);
w->pfrw_dyn->pfid_mask6 = *SUNION2PF(
+#ifdef __FreeBSD__
+ &V_pfr_mask, AF_INET6);
+#else
&pfr_mask, AF_INET6);
+#endif
}
break;
}
@@ -1235,9 +1308,10 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags)
{
struct pfr_ktableworkq workq;
struct pfr_ktable *p;
- int s = 0, xdel = 0;
+ int s, xdel = 0;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY |
+ PFR_FLAG_ALLRSETS);
if (pfr_fix_anchor(filter->pfrt_anchor))
return (EINVAL);
if (pfr_table_count(filter, flags) < 0)
@@ -1272,14 +1346,14 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
{
struct pfr_ktableworkq addq, changeq;
struct pfr_ktable *p, *q, *r, key;
- int i, rv, s = 0, xadd = 0;
+ int i, rv, s, xadd = 0;
long tzero = time_second;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY);
SLIST_INIT(&addq);
SLIST_INIT(&changeq);
for (i = 0; i < size; i++) {
- if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)))
+ if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
senderr(EFAULT);
if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
flags & PFR_FLAG_USERIOCTL))
@@ -1287,7 +1361,8 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
if (p == NULL) {
- p = pfr_create_ktable(&key.pfrkt_t, tzero, 1);
+ p = pfr_create_ktable(&key.pfrkt_t, tzero, 1,
+ !(flags & PFR_FLAG_USERIOCTL));
if (p == NULL)
senderr(ENOMEM);
SLIST_FOREACH(q, &addq, pfrkt_workq) {
@@ -1313,7 +1388,8 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
}
}
key.pfrkt_flags = 0;
- r = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+ r = pfr_create_ktable(&key.pfrkt_t, 0, 1,
+ !(flags & PFR_FLAG_USERIOCTL));
if (r == NULL)
senderr(ENOMEM);
SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
@@ -1352,12 +1428,12 @@ pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags)
{
struct pfr_ktableworkq workq;
struct pfr_ktable *p, *q, key;
- int i, s = 0, xdel = 0;
+ int i, s, xdel = 0;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY);
SLIST_INIT(&workq);
for (i = 0; i < size; i++) {
- if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)))
+ if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
return (EFAULT);
if (pfr_validate_table(&key.pfrkt_t, 0,
flags & PFR_FLAG_USERIOCTL))
@@ -1394,7 +1470,7 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
struct pfr_ktable *p;
int n, nn;
- ACCEPT_FLAGS(PFR_FLAG_ALLRSETS);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
if (pfr_fix_anchor(filter->pfrt_anchor))
return (EINVAL);
n = nn = pfr_table_count(filter, flags);
@@ -1409,7 +1485,7 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
continue;
if (n-- <= 0)
continue;
- if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl)))
+ if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl), flags))
return (EFAULT);
}
if (n) {
@@ -1426,11 +1502,11 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
{
struct pfr_ktable *p;
struct pfr_ktableworkq workq;
- int s = 0, n, nn;
+ int s, n, nn;
long tzero = time_second;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS);
- /* XXX PFR_FLAG_CLSTATS disabled */
+ /* XXX PFR_FLAG_CLSTATS disabled */
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_ALLRSETS);
if (pfr_fix_anchor(filter->pfrt_anchor))
return (EINVAL);
n = nn = pfr_table_count(filter, flags);
@@ -1450,9 +1526,8 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
continue;
if (!(flags & PFR_FLAG_ATOMIC))
s = splsoftnet();
- if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl))) {
- if (!(flags & PFR_FLAG_ATOMIC))
- splx(s);
+ if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags)) {
+ splx(s);
return (EFAULT);
}
if (!(flags & PFR_FLAG_ATOMIC))
@@ -1477,13 +1552,14 @@ pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags)
{
struct pfr_ktableworkq workq;
struct pfr_ktable *p, key;
- int i, s = 0, xzero = 0;
+ int i, s, xzero = 0;
long tzero = time_second;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ADDRSTOO);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY |
+ PFR_FLAG_ADDRSTOO);
SLIST_INIT(&workq);
for (i = 0; i < size; i++) {
- if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)))
+ if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
return (EFAULT);
if (pfr_validate_table(&key.pfrkt_t, 0, 0))
return (EINVAL);
@@ -1511,16 +1587,16 @@ pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag,
{
struct pfr_ktableworkq workq;
struct pfr_ktable *p, *q, key;
- int i, s = 0, xchange = 0, xdel = 0;
+ int i, s, xchange = 0, xdel = 0;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY);
if ((setflag & ~PFR_TFLAG_USRMASK) ||
(clrflag & ~PFR_TFLAG_USRMASK) ||
(setflag & clrflag))
return (EINVAL);
SLIST_INIT(&workq);
for (i = 0; i < size; i++) {
- if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t)))
+ if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
return (EFAULT);
if (pfr_validate_table(&key.pfrkt_t, 0,
flags & PFR_FLAG_USERIOCTL))
@@ -1567,7 +1643,7 @@ pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags)
struct pf_ruleset *rs;
int xdel = 0;
- ACCEPT_FLAGS(PFR_FLAG_DUMMY);
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
rs = pf_find_or_create_ruleset(trs->pfrt_anchor);
if (rs == NULL)
return (ENOMEM);
@@ -1604,7 +1680,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
struct pf_ruleset *rs;
int i, rv, xadd = 0, xaddr = 0;
- ACCEPT_FLAGS(PFR_FLAG_DUMMY|PFR_FLAG_ADDRSTOO);
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
if (size && !(flags & PFR_FLAG_ADDRSTOO))
return (EINVAL);
if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK,
@@ -1617,7 +1693,8 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
SLIST_INIT(&tableq);
kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
if (kt == NULL) {
- kt = pfr_create_ktable(tbl, 0, 1);
+ kt = pfr_create_ktable(tbl, 0, 1,
+ !(flags & PFR_FLAG_USERIOCTL));
if (kt == NULL)
return (ENOMEM);
SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq);
@@ -1633,7 +1710,8 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
kt->pfrkt_root = rt;
goto _skip;
}
- rt = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+ rt = pfr_create_ktable(&key.pfrkt_t, 0, 1,
+ !(flags & PFR_FLAG_USERIOCTL));
if (rt == NULL) {
pfr_destroy_ktables(&tableq, 0);
return (ENOMEM);
@@ -1643,14 +1721,14 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
} else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE))
xadd++;
_skip:
- shadow = pfr_create_ktable(tbl, 0, 0);
+ shadow = pfr_create_ktable(tbl, 0, 0, !(flags & PFR_FLAG_USERIOCTL));
if (shadow == NULL) {
pfr_destroy_ktables(&tableq, 0);
return (ENOMEM);
}
SLIST_INIT(&addrq);
for (i = 0; i < size; i++) {
- if (COPYIN(addr+i, &ad, sizeof(ad)))
+ if (COPYIN(addr+i, &ad, sizeof(ad), flags))
senderr(EFAULT);
if (pfr_validate_addr(&ad))
senderr(EINVAL);
@@ -1700,7 +1778,7 @@ pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags)
struct pf_ruleset *rs;
int xdel = 0;
- ACCEPT_FLAGS(PFR_FLAG_DUMMY);
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
rs = pf_find_ruleset(trs->pfrt_anchor);
if (rs == NULL || !rs->topen || ticket != rs->tticket)
return (0);
@@ -1730,10 +1808,10 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
struct pfr_ktable *p, *q;
struct pfr_ktableworkq workq;
struct pf_ruleset *rs;
- int s = 0, xadd = 0, xchange = 0;
+ int s, xadd = 0, xchange = 0;
long tzero = time_second;
- ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY);
+ ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY);
rs = pf_find_ruleset(trs->pfrt_anchor);
if (rs == NULL || !rs->topen || ticket != rs->tticket)
return (EBUSY);
@@ -1994,15 +2072,22 @@ pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
}
struct pfr_ktable *
-pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
+pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset,
+ int intr)
{
struct pfr_ktable *kt;
struct pf_ruleset *rs;
- kt = pool_get(&pfr_ktable_pl, PR_NOWAIT);
+#ifdef __FreeBSD__
+ kt = pool_get(&V_pfr_ktable_pl, PR_NOWAIT|PR_ZERO);
+#else
+ if (intr)
+ kt = pool_get(&pfr_ktable_pl, PR_NOWAIT|PR_ZERO|PR_LIMITFAIL);
+ else
+ kt = pool_get(&pfr_ktable_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL);
+#endif
if (kt == NULL)
return (NULL);
- bzero(kt, sizeof(*kt));
kt->pfrkt_t = *tbl;
if (attachruleset) {
@@ -2069,7 +2154,11 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
kt->pfrkt_rs->tables--;
pf_remove_if_empty_ruleset(kt->pfrkt_rs);
}
+#ifdef __FreeBSD__
+ pool_put(&V_pfr_ktable_pl, kt);
+#else
pool_put(&pfr_ktable_pl, kt);
+#endif
}
int
@@ -2104,16 +2193,26 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
switch (af) {
#ifdef INET
case AF_INET:
+#ifdef __FreeBSD__
+ V_pfr_sin.sin_addr.s_addr = a->addr32[0];
+ ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4);
+#else
pfr_sin.sin_addr.s_addr = a->addr32[0];
ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4);
+#endif
if (ke && KENTRY_RNF_ROOT(ke))
ke = NULL;
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
+#ifdef __FreeBSD__
+ bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr));
+ ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6);
+#else
bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr));
ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6);
+#endif
if (ke && KENTRY_RNF_ROOT(ke))
ke = NULL;
break;
@@ -2141,16 +2240,26 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
switch (af) {
#ifdef INET
case AF_INET:
+#ifdef __FreeBSD__
+ V_pfr_sin.sin_addr.s_addr = a->addr32[0];
+ ke = (struct pfr_kentry *)rn_match(&V_pfr_sin, kt->pfrkt_ip4);
+#else
pfr_sin.sin_addr.s_addr = a->addr32[0];
ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4);
+#endif
if (ke && KENTRY_RNF_ROOT(ke))
ke = NULL;
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
+#ifdef __FreeBSD__
+ bcopy(a, &V_pfr_sin6.sin6_addr, sizeof(V_pfr_sin6.sin6_addr));
+ ke = (struct pfr_kentry *)rn_match(&V_pfr_sin6, kt->pfrkt_ip6);
+#else
bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr));
ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6);
+#endif
if (ke && KENTRY_RNF_ROOT(ke))
ke = NULL;
break;
@@ -2165,14 +2274,24 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
}
kt->pfrkt_packets[dir_out][op_pass]++;
kt->pfrkt_bytes[dir_out][op_pass] += len;
- if (ke != NULL && op_pass != PFR_OP_XPASS) {
- ke->pfrke_packets[dir_out][op_pass]++;
- ke->pfrke_bytes[dir_out][op_pass] += len;
+ if (ke != NULL && op_pass != PFR_OP_XPASS &&
+ (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
+ if (ke->pfrke_counters == NULL)
+#ifdef __FreeBSD__
+ ke->pfrke_counters = pool_get(&V_pfr_kcounters_pl,
+#else
+ ke->pfrke_counters = pool_get(&pfr_kcounters_pl,
+#endif
+ PR_NOWAIT | PR_ZERO);
+ if (ke->pfrke_counters != NULL) {
+ ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++;
+ ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len;
+ }
}
}
struct pfr_ktable *
-pfr_attach_table(struct pf_ruleset *rs, char *name)
+pfr_attach_table(struct pf_ruleset *rs, char *name, int intr)
{
struct pfr_ktable *kt, *rt;
struct pfr_table tbl;
@@ -2184,14 +2303,14 @@ pfr_attach_table(struct pf_ruleset *rs, char *name)
strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor));
kt = pfr_lookup_table(&tbl);
if (kt == NULL) {
- kt = pfr_create_ktable(&tbl, time_second, 1);
+ kt = pfr_create_ktable(&tbl, time_second, 1, intr);
if (kt == NULL)
return (NULL);
if (ac != NULL) {
bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor));
rt = pfr_lookup_table(&tbl);
if (rt == NULL) {
- rt = pfr_create_ktable(&tbl, 0, 1);
+ rt = pfr_create_ktable(&tbl, 0, 1, intr);
if (rt == NULL) {
pfr_destroy_ktable(kt, 0);
return (NULL);
@@ -2217,20 +2336,31 @@ pfr_detach_table(struct pfr_ktable *kt)
pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED);
}
-
int
pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af)
{
+#ifdef __FreeBSD__
struct pfr_kentry *ke, *ke2 = NULL;
struct pf_addr *addr = NULL;
+#else
+ struct pfr_kentry *ke, *ke2;
+ struct pf_addr *addr;
+#endif
union sockaddr_union mask;
int idx = -1, use_counter = 0;
+#ifdef __FreeBSD__
+ if (af == AF_INET)
+ addr = (struct pf_addr *)&V_pfr_sin.sin_addr;
+ else if (af == AF_INET6)
+ addr = (struct pf_addr *)&V_pfr_sin6.sin6_addr;
+#else
if (af == AF_INET)
addr = (struct pf_addr *)&pfr_sin.sin_addr;
else if (af == AF_INET6)
addr = (struct pf_addr *)&pfr_sin6.sin6_addr;
+#endif
if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
kt = kt->pfrkt_root;
if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
@@ -2245,11 +2375,21 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
_next_block:
ke = pfr_kentry_byidx(kt, idx, af);
- if (ke == NULL)
+ if (ke == NULL) {
+ kt->pfrkt_nomatch++;
return (1);
+ }
+#ifdef __FreeBSD__
+ pfr_prepare_network(&V_pfr_mask, af, ke->pfrke_net);
+#else
pfr_prepare_network(&pfr_mask, af, ke->pfrke_net);
+#endif
*raddr = SUNION2PF(&ke->pfrke_sa, af);
+#ifdef __FreeBSD__
+ *rmask = SUNION2PF(&V_pfr_mask, af);
+#else
*rmask = SUNION2PF(&pfr_mask, af);
+#endif
if (use_counter) {
/* is supplied address within block? */
@@ -2269,27 +2409,42 @@ _next_block:
/* this is a single IP address - no possible nested block */
PF_ACPY(counter, addr, af);
*pidx = idx;
+ kt->pfrkt_match++;
return (0);
}
for (;;) {
/* we don't want to use a nested block */
+#ifdef __FreeBSD__
+ if (af == AF_INET)
+ ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin,
+ kt->pfrkt_ip4);
+ else if (af == AF_INET6)
+ ke2 = (struct pfr_kentry *)rn_match(&V_pfr_sin6,
+ kt->pfrkt_ip6);
+#else
if (af == AF_INET)
ke2 = (struct pfr_kentry *)rn_match(&pfr_sin,
kt->pfrkt_ip4);
else if (af == AF_INET6)
ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6,
kt->pfrkt_ip6);
+#endif
/* no need to check KENTRY_RNF_ROOT() here */
if (ke2 == ke) {
/* lookup return the same block - perfect */
PF_ACPY(counter, addr, af);
*pidx = idx;
+ kt->pfrkt_match++;
return (0);
}
/* we need to increase the counter past the nested block */
pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net);
+#ifdef __FreeBSD__
+ PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &V_pfr_ffaddr, af);
+#else
PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af);
+#endif
PF_AINC(addr, af);
if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) {
/* ok, we reached the end of our main block */
diff --git a/freebsd/sys/contrib/pf/net/pfvar.h b/freebsd/sys/contrib/pf/net/pfvar.h
index 0d711ffc..59177b5b 100644
--- a/freebsd/sys/contrib/pf/net/pfvar.h
+++ b/freebsd/sys/contrib/pf/net/pfvar.h
@@ -1,5 +1,4 @@
-/* $FreeBSD$ */
-/* $OpenBSD: pfvar.h,v 1.244 2007/02/23 21:31:51 deraadt Exp $ */
+/* $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -78,9 +77,8 @@ struct inpcb;
#endif
enum { PF_INOUT, PF_IN, PF_OUT };
-enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID };
enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
- PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP };
+ PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER };
enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX };
enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT,
@@ -90,6 +88,7 @@ enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL,
PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER,
PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET };
enum { PF_GET_NONE, PF_GET_CLR_CNTR };
+enum { PF_SK_WIRE, PF_SK_STACK, PF_SK_BOTH };
/*
* Note about PFTM_*: real indices into pf_rule.timeout[] come before
@@ -132,7 +131,8 @@ enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM,
PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN };
enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
- PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED };
+ PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED,
+ PF_ADDR_RANGE };
#define PF_POOL_TYPEMASK 0x0f
#define PF_POOL_STICKYADDR 0x20
#define PF_WSCALE_FLAG 0x80
@@ -212,87 +212,88 @@ struct pfi_dynaddr {
*/
#ifdef __FreeBSD__
-#define splsoftnet() splnet()
+#define splsoftnet() splnet()
#define HTONL(x) (x) = htonl((__uint32_t)(x))
#define HTONS(x) (x) = htons((__uint16_t)(x))
#define NTOHL(x) (x) = ntohl((__uint32_t)(x))
#define NTOHS(x) (x) = ntohs((__uint16_t)(x))
-#define PF_NAME "pf"
+#define PF_NAME "pf"
-#define PR_NOWAIT M_NOWAIT
-#define pool_get(p, f) uma_zalloc(*(p), (f))
-#define pool_put(p, o) uma_zfree(*(p), (o))
+#define PR_NOWAIT M_NOWAIT
+#define PR_WAITOK M_WAITOK
+#define PR_ZERO M_ZERO
+#define pool_get(p, f) uma_zalloc(*(p), (f))
+#define pool_put(p, o) uma_zfree(*(p), (o))
-#define UMA_CREATE(var, type, desc) \
- var = uma_zcreate(desc, sizeof(type), \
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \
- if (var == NULL) break
-#define UMA_DESTROY(var) \
- if(var) uma_zdestroy(var)
+#define UMA_CREATE(var, type, desc) \
+ var = uma_zcreate(desc, sizeof(type), \
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); \
+ if (var == NULL) \
+ break
+#define UMA_DESTROY(var) \
+ if (var) \
+ uma_zdestroy(var)
+#ifdef __FreeBSD__
extern struct mtx pf_task_mtx;
-#define PF_ASSERT(h) mtx_assert(&pf_task_mtx, (h))
-
-#define PF_LOCK() do { \
- PF_ASSERT(MA_NOTOWNED); \
- mtx_lock(&pf_task_mtx); \
-} while(0)
-#define PF_UNLOCK() do { \
- PF_ASSERT(MA_OWNED); \
- mtx_unlock(&pf_task_mtx); \
-} while(0)
-
-#define PF_COPYIN(uaddr, kaddr, len, r) do { \
- PF_UNLOCK(); \
- r = copyin((uaddr), (kaddr), (len)); \
- PF_LOCK(); \
+#define PF_LOCK_ASSERT() mtx_assert(&pf_task_mtx, MA_OWNED)
+#define PF_UNLOCK_ASSERT() mtx_assert(&pf_task_mtx, MA_NOTOWNED)
+#define PF_LOCK() mtx_lock(&pf_task_mtx)
+#define PF_UNLOCK() mtx_unlock(&pf_task_mtx)
+#else
+#define PF_LOCK_ASSERT()
+#define PF_UNLOCK_ASSERT()
+#define PF_LOCK()
+#define PF_UNLOCK()
+#endif /* __FreeBSD__ */
+
+#define PF_COPYIN(uaddr, kaddr, len, r) do { \
+ PF_UNLOCK(); \
+ r = copyin((uaddr), (kaddr), (len)); \
+ PF_LOCK(); \
} while(0)
-#define PF_COPYOUT(kaddr, uaddr, len, r) do { \
- PF_UNLOCK(); \
- r = copyout((kaddr), (uaddr), (len)); \
- PF_LOCK(); \
+#define PF_COPYOUT(kaddr, uaddr, len, r) do { \
+ PF_UNLOCK(); \
+ r = copyout((kaddr), (uaddr), (len)); \
+ PF_LOCK(); \
} while(0)
-extern void init_pf_mutex(void);
-extern void destroy_pf_mutex(void);
-
-#define PF_MODVER 1
-#define PFLOG_MODVER 1
-#define PFSYNC_MODVER 1
-
-#define PFLOG_MINVER 1
-#define PFLOG_PREFVER PFLOG_MODVER
-#define PFLOG_MAXVER 1
-#define PFSYNC_MINVER 1
-#define PFSYNC_PREFVER PFSYNC_MODVER
-#define PFSYNC_MAXVER 1
-#endif /* __FreeBSD__ */
-
+#define PF_MODVER 1
+#define PFLOG_MODVER 1
+#define PFSYNC_MODVER 1
+
+#define PFLOG_MINVER 1
+#define PFLOG_PREFVER PFLOG_MODVER
+#define PFLOG_MAXVER 1
+#define PFSYNC_MINVER 1
+#define PFSYNC_PREFVER PFSYNC_MODVER
+#define PFSYNC_MAXVER 1
+#endif /* __FreeBSD__ */
#ifdef INET
#ifndef INET6
-#define PF_INET_ONLY
+#define PF_INET_ONLY
#endif /* ! INET6 */
#endif /* INET */
#ifdef INET6
#ifndef INET
-#define PF_INET6_ONLY
+#define PF_INET6_ONLY
#endif /* ! INET */
#endif /* INET6 */
#ifdef INET
#ifdef INET6
-#define PF_INET_INET6
+#define PF_INET_INET6
#endif /* INET6 */
#endif /* INET */
#else
-#define PF_INET_INET6
+#define PF_INET_INET6
#endif /* _KERNEL */
@@ -401,18 +402,25 @@ extern void destroy_pf_mutex(void);
#endif /* PF_INET6_ONLY */
#endif /* PF_INET_INET6 */
-#define PF_MISMATCHAW(aw, x, af, neg, ifp) \
+/*
+ * XXX callers not FIB-aware in our version of pf yet.
+ * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio.
+ */
+#define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \
( \
(((aw)->type == PF_ADDR_NOROUTE && \
- pf_routable((x), (af), NULL)) || \
+ pf_routable((x), (af), NULL, (rtid))) || \
(((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \
- pf_routable((x), (af), (ifp))) || \
+ pf_routable((x), (af), (ifp), (rtid))) || \
((aw)->type == PF_ADDR_RTLABEL && \
- !pf_rtlabel_match((x), (af), (aw))) || \
+ !pf_rtlabel_match((x), (af), (aw), (rtid))) || \
((aw)->type == PF_ADDR_TABLE && \
!pfr_match_addr((aw)->p.tbl, (x), (af))) || \
((aw)->type == PF_ADDR_DYNIFTL && \
- !pfi_match_addr((aw)->p.dyn, (x), (af))) || \
+ !pfi_match_addr((aw)->p.dyn, (x), (af))) || \
+ ((aw)->type == PF_ADDR_RANGE && \
+ !pf_match_addr_range(&(aw)->v.a.addr, \
+ &(aw)->v.a.mask, (x), (af))) || \
((aw)->type == PF_ADDR_ADDRMASK && \
!PF_AZERO(&(aw)->v.a.mask, (af)) && \
!PF_MATCHA(0, &(aw)->v.a.addr, \
@@ -619,12 +627,13 @@ struct pf_rule {
int rtableid;
u_int32_t timeout[PFTM_MAX];
- u_int32_t states;
+ u_int32_t states_cur;
+ u_int32_t states_tot;
u_int32_t max_states;
u_int32_t src_nodes;
u_int32_t max_src_nodes;
u_int32_t max_src_states;
- u_int32_t spare1; /* netgraph */
+ u_int32_t spare1; /* netgraph */
u_int32_t max_src_conn;
struct {
u_int32_t limit;
@@ -643,7 +652,7 @@ struct pf_rule {
u_int16_t max_mss;
u_int16_t tag;
u_int16_t match_tag;
- u_int16_t spare2; /* netgraph */
+ u_int16_t spare2; /* netgraph */
struct pf_rule_uid uid;
struct pf_rule_gid gid;
@@ -673,12 +682,18 @@ struct pf_rule {
u_int8_t rt;
u_int8_t return_ttl;
u_int8_t tos;
+ u_int8_t set_tos;
u_int8_t anchor_relative;
u_int8_t anchor_wildcard;
#define PF_FLUSH 0x01
#define PF_FLUSH_GLOBAL 0x02
u_int8_t flush;
+
+ struct {
+ struct pf_addr addr;
+ u_int16_t port;
+ } divert;
};
/* rule flags */
@@ -697,10 +712,12 @@ struct pf_rule {
#define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */
#define PFRULE_RANDOMID 0x0800
#define PFRULE_REASSEMBLE_TCP 0x1000
+#define PFRULE_SET_TOS 0x2000
/* rule flags again */
#define PFRULE_IFBOUND 0x00010000 /* if-bound */
#define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */
+#define PFRULE_PFLOW 0x00040000
#define PFSTATE_HIWAT 10000 /* default state table size */
#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */
@@ -758,83 +775,268 @@ struct pf_state_host {
};
struct pf_state_peer {
+ struct pf_state_scrub *scrub; /* state is scrubbed */
u_int32_t seqlo; /* Max sequence number sent */
u_int32_t seqhi; /* Max the other end ACKd + win */
u_int32_t seqdiff; /* Sequence number modulator */
u_int16_t max_win; /* largest window (pre scaling) */
+ u_int16_t mss; /* Maximum segment size option */
u_int8_t state; /* active state level */
u_int8_t wscale; /* window scaling factor */
- u_int16_t mss; /* Maximum segment size option */
u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */
- struct pf_state_scrub *scrub; /* state is scrubbed */
- u_int8_t pad[3];
+ u_int8_t pad[1];
};
TAILQ_HEAD(pf_state_queue, pf_state);
-/* keep synced with struct pf_state, used in RB_FIND */
-struct pf_state_cmp {
- u_int64_t id;
- u_int32_t creatorid;
- struct pf_state_host lan;
- struct pf_state_host gwy;
- struct pf_state_host ext;
+/* keep synced with struct pf_state_key, used in RB_FIND */
+struct pf_state_key_cmp {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
sa_family_t af;
u_int8_t proto;
- u_int8_t direction;
- u_int8_t pad;
+ u_int8_t pad[2];
+};
+
+struct pf_state_item {
+ TAILQ_ENTRY(pf_state_item) entry;
+ struct pf_state *s;
+};
+
+TAILQ_HEAD(pf_statelisthead, pf_state_item);
+
+struct pf_state_key {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t pad[2];
+
+ RB_ENTRY(pf_state_key) entry;
+ struct pf_statelisthead states;
+ struct pf_state_key *reverse;
+ struct inpcb *inp;
+};
+
+/* keep synced with struct pf_state, used in RB_FIND */
+struct pf_state_cmp {
+ u_int64_t id;
+ u_int32_t creatorid;
+ u_int8_t direction;
+ u_int8_t pad[3];
};
struct pf_state {
- u_int64_t id;
+ u_int64_t id;
+ u_int32_t creatorid;
+ u_int8_t direction;
+#ifdef __FreeBSD__
+ u_int8_t pad[2];
+ u_int8_t local_flags;
+#define PFSTATE_EXPIRING 0x01
+#else
+ u_int8_t pad[3];
+#endif
+
+ TAILQ_ENTRY(pf_state) sync_list;
+ TAILQ_ENTRY(pf_state) entry_list;
+ RB_ENTRY(pf_state) entry_id;
+ struct pf_state_peer src;
+ struct pf_state_peer dst;
+ union pf_rule_ptr rule;
+ union pf_rule_ptr anchor;
+ union pf_rule_ptr nat_rule;
+ struct pf_addr rt_addr;
+ struct pf_state_key *key[2]; /* addresses stack and wire */
+ struct pfi_kif *kif;
+ struct pfi_kif *rt_kif;
+ struct pf_src_node *src_node;
+ struct pf_src_node *nat_src_node;
+ u_int64_t packets[2];
+ u_int64_t bytes[2];
+ u_int32_t creation;
+ u_int32_t expire;
+ u_int32_t pfsync_time;
+ u_int16_t tag;
+ u_int8_t log;
+ u_int8_t state_flags;
+#define PFSTATE_ALLOWOPTS 0x01
+#define PFSTATE_SLOPPY 0x02
+#define PFSTATE_PFLOW 0x04
+#define PFSTATE_NOSYNC 0x08
+#define PFSTATE_ACK 0x10
+ u_int8_t timeout;
+ u_int8_t sync_state; /* PFSYNC_S_x */
+
+ /* XXX */
+ u_int8_t sync_updates;
+ u_int8_t _tail[3];
+};
+
+/*
+ * Unified state structures for pulling states out of the kernel
+ * used by pfsync(4) and the pf(4) ioctl.
+ */
+struct pfsync_state_scrub {
+ u_int16_t pfss_flags;
+ u_int8_t pfss_ttl; /* stashed TTL */
+#define PFSYNC_SCRUB_FLAG_VALID 0x01
+ u_int8_t scrub_flag;
+ u_int32_t pfss_ts_mod; /* timestamp modulation */
+} __packed;
+
+struct pfsync_state_peer {
+ struct pfsync_state_scrub scrub; /* state is scrubbed */
+ u_int32_t seqlo; /* Max sequence number sent */
+ u_int32_t seqhi; /* Max the other end ACKd + win */
+ u_int32_t seqdiff; /* Sequence number modulator */
+ u_int16_t max_win; /* largest window (pre scaling) */
+ u_int16_t mss; /* Maximum segment size option */
+ u_int8_t state; /* active state level */
+ u_int8_t wscale; /* window scaling factor */
+ u_int8_t pad[6];
+} __packed;
+
+struct pfsync_state_key {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+};
+
+struct pfsync_state {
+ u_int32_t id[2];
+ char ifname[IFNAMSIZ];
+ struct pfsync_state_key key[2];
+ struct pfsync_state_peer src;
+ struct pfsync_state_peer dst;
+ struct pf_addr rt_addr;
+ u_int32_t rule;
+ u_int32_t anchor;
+ u_int32_t nat_rule;
+ u_int32_t creation;
+ u_int32_t expire;
+ u_int32_t packets[2][2];
+ u_int32_t bytes[2][2];
u_int32_t creatorid;
- struct pf_state_host lan;
- struct pf_state_host gwy;
- struct pf_state_host ext;
sa_family_t af;
u_int8_t proto;
u_int8_t direction;
#ifdef __FreeBSD__
u_int8_t local_flags;
-#define PFSTATE_EXPIRING 0x01
-#else
+#define PFSTATE_EXPIRING 0x01
u_int8_t pad;
#endif
u_int8_t log;
u_int8_t state_flags;
-#define PFSTATE_ALLOWOPTS 0x01
-#define PFSTATE_SLOPPY 0x02
u_int8_t timeout;
u_int8_t sync_flags;
-#define PFSTATE_NOSYNC 0x01
-#define PFSTATE_FROMSYNC 0x02
-#define PFSTATE_STALE 0x04
- union {
- struct {
- RB_ENTRY(pf_state) entry_lan_ext;
- RB_ENTRY(pf_state) entry_ext_gwy;
- RB_ENTRY(pf_state) entry_id;
- TAILQ_ENTRY(pf_state) entry_list;
- struct pfi_kif *kif;
- } s;
- char ifname[IFNAMSIZ];
- } u;
- struct pf_state_peer src;
- struct pf_state_peer dst;
- union pf_rule_ptr rule;
- union pf_rule_ptr anchor;
- union pf_rule_ptr nat_rule;
- struct pf_addr rt_addr;
- struct pfi_kif *rt_kif;
- struct pf_src_node *src_node;
- struct pf_src_node *nat_src_node;
- u_int64_t packets[2];
- u_int64_t bytes[2];
- u_int32_t creation;
- u_int32_t expire;
- u_int32_t pfsync_time;
- u_int16_t tag;
-};
+ u_int8_t updates;
+} __packed;
+
+#ifdef __FreeBSD__
+#ifdef _KERNEL
+/* pfsync */
+typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t);
+typedef void pfsync_insert_state_t(struct pf_state *);
+typedef void pfsync_update_state_t(struct pf_state *);
+typedef void pfsync_delete_state_t(struct pf_state *);
+typedef void pfsync_clear_states_t(u_int32_t, const char *);
+typedef int pfsync_state_in_use_t(struct pf_state *);
+typedef int pfsync_defer_t(struct pf_state *, struct mbuf *);
+typedef int pfsync_up_t(void);
+
+extern pfsync_state_import_t *pfsync_state_import_ptr;
+extern pfsync_insert_state_t *pfsync_insert_state_ptr;
+extern pfsync_update_state_t *pfsync_update_state_ptr;
+extern pfsync_delete_state_t *pfsync_delete_state_ptr;
+extern pfsync_clear_states_t *pfsync_clear_states_ptr;
+extern pfsync_state_in_use_t *pfsync_state_in_use_ptr;
+extern pfsync_defer_t *pfsync_defer_ptr;
+extern pfsync_up_t *pfsync_up_ptr;
+
+void pfsync_state_export(struct pfsync_state *,
+ struct pf_state *);
+
+/* pflow */
+typedef int export_pflow_t(struct pf_state *);
+
+extern export_pflow_t *export_pflow_ptr;
+
+/* pflog */
+struct pf_ruleset;
+struct pf_pdesc;
+typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
+ u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
+ struct pf_ruleset *, struct pf_pdesc *);
+
+extern pflog_packet_t *pflog_packet_ptr;
+
+/* pf uid hack */
+VNET_DECLARE(int, debug_pfugidhack);
+#define V_debug_pfugidhack VNET(debug_pfugidhack)
+
+#define V_pf_end_threads VNET(pf_end_threads)
+#endif
+
+/* Macros to set/clear/test flags. */
+#ifdef _KERNEL
+#define SET(t, f) ((t) |= (f))
+#define CLR(t, f) ((t) &= ~(f))
+#define ISSET(t, f) ((t) & (f))
+#endif
+#endif
+
+#define PFSYNC_FLAG_SRCNODE 0x04
+#define PFSYNC_FLAG_NATSRCNODE 0x08
+
+/* for copies to/from network byte order */
+/* ioctl interface also uses network byte order */
+#define pf_state_peer_hton(s,d) do { \
+ (d)->seqlo = htonl((s)->seqlo); \
+ (d)->seqhi = htonl((s)->seqhi); \
+ (d)->seqdiff = htonl((s)->seqdiff); \
+ (d)->max_win = htons((s)->max_win); \
+ (d)->mss = htons((s)->mss); \
+ (d)->state = (s)->state; \
+ (d)->wscale = (s)->wscale; \
+ if ((s)->scrub) { \
+ (d)->scrub.pfss_flags = \
+ htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \
+ (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \
+ (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+ (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \
+ } \
+} while (0)
+
+#define pf_state_peer_ntoh(s,d) do { \
+ (d)->seqlo = ntohl((s)->seqlo); \
+ (d)->seqhi = ntohl((s)->seqhi); \
+ (d)->seqdiff = ntohl((s)->seqdiff); \
+ (d)->max_win = ntohs((s)->max_win); \
+ (d)->mss = ntohs((s)->mss); \
+ (d)->state = (s)->state; \
+ (d)->wscale = (s)->wscale; \
+ if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \
+ (d)->scrub != NULL) { \
+ (d)->scrub->pfss_flags = \
+ ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \
+ (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \
+ (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+ } \
+} while (0)
+
+#define pf_state_counter_hton(s,d) do { \
+ d[0] = htonl((s>>32)&0xffffffff); \
+ d[1] = htonl(s&0xffffffff); \
+} while (0)
+
+#define pf_state_counter_from_pfsync(s) \
+ (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1]))
+
+#define pf_state_counter_ntoh(s,d) do { \
+ d = ntohl(s[0]); \
+ d = d<<32; \
+ d += ntohl(s[1]); \
+} while (0)
TAILQ_HEAD(pf_rulequeue, pf_rule);
@@ -881,9 +1083,11 @@ RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
#define PFR_TFLAG_INACTIVE 0x00000008
#define PFR_TFLAG_REFERENCED 0x00000010
#define PFR_TFLAG_REFDANCHOR 0x00000020
-#define PFR_TFLAG_USRMASK 0x00000003
+#define PFR_TFLAG_COUNTERS 0x00000040
+/* Adjust masks below when adding flags. */
+#define PFR_TFLAG_USRMASK 0x00000043
#define PFR_TFLAG_SETMASK 0x0000003C
-#define PFR_TFLAG_ALLMASK 0x0000003F
+#define PFR_TFLAG_ALLMASK 0x0000007F
struct pfr_table {
char pfrt_anchor[MAXPATHLEN];
@@ -894,7 +1098,7 @@ struct pfr_table {
enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED,
PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE,
- PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX };
+ PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX };
struct pfr_addr {
union {
@@ -944,20 +1148,32 @@ union sockaddr_union {
};
#endif /* _SOCKADDR_UNION_DEFINED */
+struct pfr_kcounters {
+ u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+};
+
SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
struct pfr_kentry {
struct radix_node pfrke_node[2];
union sockaddr_union pfrke_sa;
- u_int64_t pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
- u_int64_t pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
SLIST_ENTRY(pfr_kentry) pfrke_workq;
+ union {
+
+ struct pfr_kcounters *pfrke_counters;
+#if 0
+ struct pfr_kroute *pfrke_route;
+#endif
+ } u;
long pfrke_tzero;
u_int8_t pfrke_af;
u_int8_t pfrke_net;
u_int8_t pfrke_not;
u_int8_t pfrke_mark;
- u_int8_t pfrke_intrpool;
};
+#define pfrke_counters u.pfrke_counters
+#define pfrke_route u.pfrke_route
+
SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
RB_HEAD(pfr_ktablehead, pfr_ktable);
@@ -986,17 +1202,25 @@ struct pfr_ktable {
#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch
#define pfrkt_tzero pfrkt_ts.pfrts_tzero
-RB_HEAD(pf_state_tree_lan_ext, pf_state);
-RB_PROTOTYPE(pf_state_tree_lan_ext, pf_state,
- u.s.entry_lan_ext, pf_state_compare_lan_ext);
+RB_HEAD(pf_state_tree, pf_state_key);
+RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
-RB_HEAD(pf_state_tree_ext_gwy, pf_state);
-RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state,
- u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
+RB_HEAD(pf_state_tree_ext_gwy, pf_state_key);
+RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key,
+ entry_ext_gwy, pf_state_compare_ext_gwy);
-TAILQ_HEAD(pfi_statehead, pfi_kif);
RB_HEAD(pfi_ifhead, pfi_kif);
+/* state tables */
+#ifdef __FreeBSD__
+#ifdef _KERNEL
+VNET_DECLARE(struct pf_state_tree, pf_statetbl);
+#define V_pf_statetbl VNET(pf_statetbl)
+#endif
+#else
+extern struct pf_state_tree pf_statetbl;
+#endif
+
/* keep synced with pfi_kif, used in RB_FIND */
struct pfi_kif_cmp {
char pfik_name[IFNAMSIZ];
@@ -1009,12 +1233,7 @@ struct pfi_kif {
u_int64_t pfik_bytes[2][2][2];
u_int32_t pfik_tzero;
int pfik_flags;
- struct pf_state_tree_lan_ext pfik_lan_ext;
- struct pf_state_tree_ext_gwy pfik_ext_gwy;
- TAILQ_ENTRY(pfi_kif) pfik_w_states;
-#ifndef __FreeBSD__
void *pfik_ah_cookie;
-#endif
struct ifnet *pfik_ifp;
struct ifg_group *pfik_group;
int pfik_states;
@@ -1029,9 +1248,6 @@ enum pfi_kif_refs {
};
#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */
-/* XXX: revisist */
-#define PFI_IFLAG_SETABLE_MASK 0x0100 /* setable via DIOC{SET,CLR}IFFLAG */
-#define PFI_IFLAG_PLACEHOLDER 0x8000 /* placeholder group/interface */
struct pf_pdesc {
struct {
@@ -1050,16 +1266,22 @@ struct pf_pdesc {
#endif /* INET6 */
void *any;
} hdr;
- struct pf_addr baddr; /* address before translation */
- struct pf_addr naddr; /* address after translation */
+
struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */
- struct pf_addr *src;
- struct pf_addr *dst;
struct ether_header
*eh;
+ struct pf_addr *src; /* src address */
+ struct pf_addr *dst; /* dst address */
+ u_int16_t *sport;
+ u_int16_t *dport;
+#ifdef __FreeBSD__
struct pf_mtag *pf_mtag;
- u_int16_t *ip_sum;
+#endif
+
u_int32_t p_len; /* total length of payload */
+
+ u_int16_t *ip_sum;
+ u_int16_t *proto_sum;
u_int16_t flags; /* Let SCRUB trigger behavior in
* state code. Easier than tags */
#define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */
@@ -1067,6 +1289,9 @@ struct pf_pdesc {
sa_family_t af;
u_int8_t proto;
u_int8_t tos;
+ u_int8_t dir; /* direction */
+ u_int8_t sidx; /* key index for source */
+ u_int8_t didx; /* key index for destination */
};
/* flags for RDR options */
@@ -1175,6 +1400,15 @@ struct pf_pdesc {
*(a) = (x); \
} while (0)
+#ifdef __FreeBSD__
+#define REASON_SET(a, x) \
+ do { \
+ if ((a) != NULL) \
+ *(a) = (x); \
+ if (x < PFRES_MAX) \
+ V_pf_status.counters[x]++; \
+ } while (0)
+#else
#define REASON_SET(a, x) \
do { \
if ((a) != NULL) \
@@ -1182,6 +1416,7 @@ struct pf_pdesc {
if (x < PFRES_MAX) \
pf_status.counters[x]++; \
} while (0)
+#endif
struct pf_status {
u_int64_t counters[PFRES_MAX];
@@ -1265,27 +1500,6 @@ struct pf_altq {
u_int32_t qid; /* return value */
};
-#ifndef __FreeBSD__
-
-#define PF_TAG_GENERATED 0x01
-#define PF_TAG_FRAGCACHE 0x02
-#define PF_TAG_TRANSLATE_LOCALHOST 0x04
-
-struct pf_mtag {
- void *hdr; /* saved hdr pos in mbuf, for ECN */
- u_int rtableid; /* alternate routing table id */
- u_int32_t qid; /* queue id */
- u_int16_t tag; /* tag id */
- u_int8_t flags;
- u_int8_t routed;
- sa_family_t af; /* for ECN */
-};
-#endif
-
-struct pf_tag {
- u_int16_t tag; /* tag id */
-};
-
struct pf_tagname {
TAILQ_ENTRY(pf_tagname) entries;
char name[PF_TAG_NAME_SIZE];
@@ -1293,6 +1507,14 @@ struct pf_tagname {
int ref;
};
+struct pf_divert {
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ } addr;
+ u_int16_t port;
+};
+
#define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */
#define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */
#define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */
@@ -1343,31 +1565,32 @@ struct pfioc_natlook {
};
struct pfioc_state {
- u_int32_t nr;
- struct pf_state state;
+ struct pfsync_state state;
};
struct pfioc_src_node_kill {
- /* XXX returns the number of src nodes killed in psnk_af */
sa_family_t psnk_af;
struct pf_rule_addr psnk_src;
struct pf_rule_addr psnk_dst;
+ u_int psnk_killed;
};
struct pfioc_state_kill {
- /* XXX returns the number of states killed in psk_af */
+ struct pf_state_cmp psk_pfcmp;
sa_family_t psk_af;
int psk_proto;
struct pf_rule_addr psk_src;
struct pf_rule_addr psk_dst;
char psk_ifname[IFNAMSIZ];
+ char psk_label[PF_RULE_LABEL_SIZE];
+ u_int psk_killed;
};
struct pfioc_states {
int ps_len;
union {
- caddr_t psu_buf;
- struct pf_state *psu_states;
+ caddr_t psu_buf;
+ struct pfsync_state *psu_states;
} ps_u;
#define ps_buf ps_u.psu_buf
#define ps_states ps_u.psu_states
@@ -1518,55 +1741,97 @@ struct pfioc_iface {
#define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table)
#define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table)
#define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table)
-#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table)
+#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table)
#define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table)
#define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table)
#define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table)
#define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table)
#define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table)
#define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table)
-#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table)
+#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table)
#define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table)
#define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table)
-#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table)
-#define DIOCOSFPFLUSH _IO('D', 78)
-#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl)
-#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl)
-#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans)
-#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans)
-#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans)
-#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes)
-#define DIOCCLRSRCNODES _IO('D', 85)
-#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t)
-#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface)
-#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface)
-#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface)
-#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill)
+#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table)
+#define DIOCOSFPFLUSH _IO('D', 78)
+#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl)
+#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl)
+#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans)
+#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans)
+#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans)
+#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes)
+#define DIOCCLRSRCNODES _IO('D', 85)
+#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t)
+#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface)
+#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface)
+#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface)
+#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill)
#ifdef __FreeBSD__
struct pf_ifspeed {
char ifname[IFNAMSIZ];
u_int32_t baudrate;
};
-#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed)
+#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed)
#endif
#ifdef _KERNEL
RB_HEAD(pf_src_tree, pf_src_node);
RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare);
+#ifdef __FreeBSD__
+VNET_DECLARE(struct pf_src_tree, tree_src_tracking);
+#define V_tree_src_tracking VNET(tree_src_tracking)
+#else
extern struct pf_src_tree tree_src_tracking;
+#endif
RB_HEAD(pf_state_tree_id, pf_state);
RB_PROTOTYPE(pf_state_tree_id, pf_state,
entry_id, pf_state_compare_id);
+#ifdef __FreeBSD__
+VNET_DECLARE(struct pf_state_tree_id, tree_id);
+#define V_tree_id VNET(tree_id)
+VNET_DECLARE(struct pf_state_queue, state_list);
+#define V_state_list VNET(state_list)
+#else
extern struct pf_state_tree_id tree_id;
extern struct pf_state_queue state_list;
+#endif
TAILQ_HEAD(pf_poolqueue, pf_pool);
+#ifdef __FreeBSD__
+VNET_DECLARE(struct pf_poolqueue, pf_pools[2]);
+#define V_pf_pools VNET(pf_pools)
+#else
extern struct pf_poolqueue pf_pools[2];
+#endif
TAILQ_HEAD(pf_altqqueue, pf_altq);
+#ifdef __FreeBSD__
+VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]);
+#define V_pf_altqs VNET(pf_altqs)
+VNET_DECLARE(struct pf_palist, pf_pabuf);
+#define V_pf_pabuf VNET(pf_pabuf)
+#else
extern struct pf_altqqueue pf_altqs[2];
extern struct pf_palist pf_pabuf;
+#endif
+#ifdef __FreeBSD__
+VNET_DECLARE(u_int32_t, ticket_altqs_active);
+#define V_ticket_altqs_active VNET(ticket_altqs_active)
+VNET_DECLARE(u_int32_t, ticket_altqs_inactive);
+#define V_ticket_altqs_inactive VNET(ticket_altqs_inactive)
+VNET_DECLARE(int, altqs_inactive_open);
+#define V_altqs_inactive_open VNET(altqs_inactive_open)
+VNET_DECLARE(u_int32_t, ticket_pabuf);
+#define V_ticket_pabuf VNET(ticket_pabuf)
+VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active);
+#define V_pf_altqs_active VNET(pf_altqs_active)
+VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive);
+#define V_pf_altqs_inactive VNET(pf_altqs_inactive)
+VNET_DECLARE(struct pf_poolqueue *, pf_pools_active);
+#define V_pf_pools_active VNET(pf_pools_active)
+VNET_DECLARE(struct pf_poolqueue *, pf_pools_inactive);
+#define V_pf_pools_inactive VNET(pf_pools_inactive)
+#else
extern u_int32_t ticket_altqs_active;
extern u_int32_t ticket_altqs_inactive;
extern int altqs_inactive_open;
@@ -1575,6 +1840,7 @@ extern struct pf_altqqueue *pf_altqs_active;
extern struct pf_altqqueue *pf_altqs_inactive;
extern struct pf_poolqueue *pf_pools_active;
extern struct pf_poolqueue *pf_pools_inactive;
+#endif
extern int pf_tbladdr_setup(struct pf_ruleset *,
struct pf_addr_wrap *);
extern void pf_tbladdr_remove(struct pf_addr_wrap *);
@@ -1582,49 +1848,84 @@ extern void pf_tbladdr_copyout(struct pf_addr_wrap *);
extern void pf_calc_skip_steps(struct pf_rulequeue *);
#ifdef __FreeBSD__
#ifdef ALTQ
-extern void pf_altq_ifnet_event(struct ifnet *, int);
+extern void pf_altq_ifnet_event(struct ifnet *, int);
#endif
-extern uma_zone_t pf_src_tree_pl, pf_rule_pl;
-extern uma_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
-extern uma_zone_t pfr_ktable_pl, pfr_kentry_pl, pfr_kentry_pl2;
-extern uma_zone_t pf_cache_pl, pf_cent_pl;
-extern uma_zone_t pf_state_scrub_pl;
-extern uma_zone_t pfi_addr_pl;
+VNET_DECLARE(uma_zone_t, pf_src_tree_pl);
+#define V_pf_src_tree_pl VNET(pf_src_tree_pl)
+VNET_DECLARE(uma_zone_t, pf_rule_pl);
+#define V_pf_rule_pl VNET(pf_rule_pl)
+VNET_DECLARE(uma_zone_t, pf_state_pl);
+#define V_pf_state_pl VNET(pf_state_pl)
+VNET_DECLARE(uma_zone_t, pf_state_key_pl);
+#define V_pf_state_key_pl VNET(pf_state_key_pl)
+VNET_DECLARE(uma_zone_t, pf_state_item_pl);
+#define V_pf_state_item_pl VNET(pf_state_item_pl)
+VNET_DECLARE(uma_zone_t, pf_altq_pl);
+#define V_pf_altq_pl VNET(pf_altq_pl)
+VNET_DECLARE(uma_zone_t, pf_pooladdr_pl);
+#define V_pf_pooladdr_pl VNET(pf_pooladdr_pl)
+VNET_DECLARE(uma_zone_t, pfr_ktable_pl);
+#define V_pfr_ktable_pl VNET(pfr_ktable_pl)
+VNET_DECLARE(uma_zone_t, pfr_kentry_pl);
+#define V_pfr_kentry_pl VNET(pfr_kentry_pl)
+VNET_DECLARE(uma_zone_t, pfr_kcounters_pl);
+#define V_pfr_kcounters_pl VNET(pfr_kcounters_pl)
+VNET_DECLARE(uma_zone_t, pf_cache_pl);
+#define V_pf_cache_pl VNET(pf_cache_pl)
+VNET_DECLARE(uma_zone_t, pf_cent_pl);
+#define V_pf_cent_pl VNET(pf_cent_pl)
+VNET_DECLARE(uma_zone_t, pf_state_scrub_pl);
+#define V_pf_state_scrub_pl VNET(pf_state_scrub_pl)
+VNET_DECLARE(uma_zone_t, pfi_addr_pl);
+#define V_pfi_addr_pl VNET(pfi_addr_pl)
#else
extern struct pool pf_src_tree_pl, pf_rule_pl;
-extern struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
+extern struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl,
+ pf_altq_pl, pf_pooladdr_pl;
extern struct pool pf_state_scrub_pl;
#endif
extern void pf_purge_thread(void *);
#ifdef __FreeBSD__
extern int pf_purge_expired_src_nodes(int);
-extern int pf_purge_expired_states(u_int32_t, int);
+extern int pf_purge_expired_states(u_int32_t , int);
#else
extern void pf_purge_expired_src_nodes(int);
extern void pf_purge_expired_states(u_int32_t);
#endif
extern void pf_unlink_state(struct pf_state *);
extern void pf_free_state(struct pf_state *);
-extern int pf_insert_state(struct pfi_kif *,
+extern int pf_state_insert(struct pfi_kif *,
+ struct pf_state_key *,
+ struct pf_state_key *,
struct pf_state *);
extern int pf_insert_src_node(struct pf_src_node **,
struct pf_rule *, struct pf_addr *,
sa_family_t);
void pf_src_tree_remove_state(struct pf_state *);
extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *);
-extern struct pf_state *pf_find_state_all(struct pf_state_cmp *key,
- u_int8_t tree, int *more);
+extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *,
+ u_int, int *);
extern void pf_print_state(struct pf_state *);
extern void pf_print_flags(u_int8_t);
extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
u_int8_t);
+#ifdef __FreeBSD__
+VNET_DECLARE(struct ifnet *, sync_ifp);
+#define V_sync_ifp VNET(sync_ifp);
+VNET_DECLARE(struct pf_rule, pf_default_rule);
+#define V_pf_default_rule VNET(pf_default_rule)
+#else
extern struct ifnet *sync_ifp;
extern struct pf_rule pf_default_rule;
+#endif
extern void pf_addrcpy(struct pf_addr *, struct pf_addr *,
u_int8_t);
void pf_rm_rule(struct pf_rulequeue *,
struct pf_rule *);
+#ifndef __FreeBSD__
+struct pf_divert *pf_find_divert(struct mbuf *);
+#endif
#ifdef INET
#ifdef __FreeBSD__
@@ -1656,8 +1957,11 @@ void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t,
u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *,
struct pf_pdesc *);
+void pf_send_deferred_syn(struct pf_state *);
int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
struct pf_addr *, sa_family_t);
+int pf_match_addr_range(struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, sa_family_t);
int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t);
int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t);
@@ -1679,13 +1983,18 @@ int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
u_int32_t
pf_state_expires(const struct pf_state *);
void pf_purge_expired_fragments(void);
-int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *);
-int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *);
+int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,
+ int);
+int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *,
+ int);
#ifdef __FreeBSD__
-int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *);
+int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *);
#else
int pf_socket_lookup(int, struct pf_pdesc *);
#endif
+struct pf_state_key *pf_alloc_state_key(int);
+void pf_pkt_addr_changed(struct mbuf *);
+int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int);
void pfr_initialize(void);
int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
@@ -1694,7 +2003,7 @@ int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *,
struct pf_addr **, struct pf_addr **, sa_family_t);
void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
struct pfr_ktable *
- pfr_attach_table(struct pf_ruleset *, char *);
+ pfr_attach_table(struct pf_ruleset *, char *, int);
void pfr_detach_table(struct pfr_ktable *);
int pfr_clr_tables(struct pfr_table *, int *, int);
int pfr_add_tables(struct pfr_table *, int, int *, int);
@@ -1723,8 +2032,12 @@ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
int *, u_int32_t, int);
-extern struct pfi_statehead pfi_statehead;
+#ifdef __FreeBSD__
+VNET_DECLARE(struct pfi_kif *, pfi_all);
+#define V_pfi_all VNET(pfi_all)
+#else
extern struct pfi_kif *pfi_all;
+#endif
void pfi_initialize(void);
#ifdef __FreeBSD__
@@ -1744,30 +2057,44 @@ int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *,
int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t);
void pfi_dynaddr_remove(struct pf_addr_wrap *);
void pfi_dynaddr_copyout(struct pf_addr_wrap *);
-void pfi_fill_oldstatus(struct pf_status *);
-int pfi_clr_istats(const char *);
+void pfi_update_status(const char *, struct pf_status *);
int pfi_get_ifaces(const char *, struct pfi_kif *, int *);
int pfi_set_flags(const char *, int);
int pfi_clear_flags(const char *, int);
+#ifdef __FreeBSD__
+int pf_match_tag(struct mbuf *, struct pf_rule *, int *,
+ struct pf_mtag *);
+#else
+int pf_match_tag(struct mbuf *, struct pf_rule *, int *);
+#endif
u_int16_t pf_tagname2tag(char *);
void pf_tag2tagname(u_int16_t, char *);
void pf_tag_ref(u_int16_t);
void pf_tag_unref(u_int16_t);
-int pf_tag_packet(struct mbuf *, struct pf_mtag *, int, int);
+#ifdef __FreeBSD__
+int pf_tag_packet(struct mbuf *, int, int, struct pf_mtag *);
+#else
+int pf_tag_packet(struct mbuf *, int, int);
+#endif
u_int32_t pf_qname2qid(char *);
void pf_qid2qname(u_int32_t, char *);
void pf_qid_unref(u_int32_t);
-#ifndef __FreeBSD__
-struct pf_mtag *pf_find_mtag(struct mbuf *);
-struct pf_mtag *pf_get_mtag(struct mbuf *);
-#endif
+#ifdef __FreeBSD__
+VNET_DECLARE(struct pf_status, pf_status);
+#define V_pf_status VNET(pf_status)
+#else
extern struct pf_status pf_status;
+#endif
#ifdef __FreeBSD__
-extern uma_zone_t pf_frent_pl, pf_frag_pl;
-extern struct sx pf_consistency_lock;
+VNET_DECLARE(uma_zone_t, pf_frent_pl);
+#define V_pf_frent_pl VNET(pf_frent_pl)
+VNET_DECLARE(uma_zone_t, pf_frag_pl);
+#define V_pf_frag_pl VNET(pf_frag_pl)
+VNET_DECLARE(struct sx, pf_consistency_lock);
+#define V_pf_consistency_lock VNET(pf_consistency_lock)
#else
extern struct pool pf_frent_pl, pf_frag_pl;
extern struct rwlock pf_consistency_lock;
@@ -1777,7 +2104,12 @@ struct pf_pool_limit {
void *pp;
unsigned limit;
};
+#ifdef __FreeBSD__
+VNET_DECLARE(struct pf_pool_limit, pf_pool_limits[PF_LIMIT_MAX]);
+#define V_pf_pool_limits VNET(pf_pool_limits)
+#else
extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
+#endif
#ifdef __FreeBSD__
struct pf_frent {
@@ -1788,34 +2120,44 @@ struct pf_frent {
struct pf_frcache {
LIST_ENTRY(pf_frcache) fr_next;
- uint16_t fr_off;
- uint16_t fr_end;
+ uint16_t fr_off;
+ uint16_t fr_end;
};
struct pf_fragment {
RB_ENTRY(pf_fragment) fr_entry;
TAILQ_ENTRY(pf_fragment) frag_next;
- struct in_addr fr_src;
- struct in_addr fr_dst;
- u_int8_t fr_p; /* protocol of this fragment */
- u_int8_t fr_flags; /* status flags */
- u_int16_t fr_id; /* fragment id for reassemble */
- u_int16_t fr_max; /* fragment data max */
- u_int32_t fr_timeout;
-#define fr_queue fr_u.fru_queue
-#define fr_cache fr_u.fru_cache
+ struct in_addr fr_src;
+ struct in_addr fr_dst;
+ u_int8_t fr_p; /* protocol of this fragment */
+ u_int8_t fr_flags; /* status flags */
+ u_int16_t fr_id; /* fragment id for reassemble */
+ u_int16_t fr_max; /* fragment data max */
+ u_int32_t fr_timeout;
+#define fr_queue fr_u.fru_queue
+#define fr_cache fr_u.fru_cache
union {
- LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
- LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
+ LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
+ LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
} fr_u;
};
#endif /* (__FreeBSD__) */
#endif /* _KERNEL */
-extern struct pf_anchor_global pf_anchors;
-extern struct pf_anchor pf_main_anchor;
+#ifdef __FreeBSD__
+#ifdef _KERNEL
+VNET_DECLARE(struct pf_anchor_global, pf_anchors);
+#define V_pf_anchors VNET(pf_anchors)
+VNET_DECLARE(struct pf_anchor, pf_main_anchor);
+#define V_pf_main_anchor VNET(pf_main_anchor)
+#define pf_main_ruleset V_pf_main_anchor.ruleset
+#endif
+#else
+extern struct pf_anchor_global pf_anchors;
+extern struct pf_anchor pf_main_anchor;
#define pf_main_ruleset pf_main_anchor.ruleset
+#endif
/* these ruleset functions can be linked into userland programs (pfctl) */
int pf_get_ruleset_number(u_int8_t);
@@ -1832,7 +2174,6 @@ struct pf_ruleset *pf_find_or_create_ruleset(const char *);
void pf_rs_initialize(void);
#ifndef __FreeBSD__
-/* ?!? */
#ifdef _KERNEL
int pf_anchor_copyout(const struct pf_ruleset *,
const struct pf_rule *, struct pfioc_rule *);
@@ -1863,4 +2204,31 @@ int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
struct pf_os_fingerprint *
pf_osfp_validate(void);
+#ifdef _KERNEL
+void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
+
+void pf_step_into_anchor(int *, struct pf_ruleset **, int,
+ struct pf_rule **, struct pf_rule **, int *);
+int pf_step_out_of_anchor(int *, struct pf_ruleset **,
+ int, struct pf_rule **, struct pf_rule **,
+ int *);
+
+int pf_map_addr(u_int8_t, struct pf_rule *,
+ struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, struct pf_src_node **);
+struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *,
+ int, int, struct pfi_kif *, struct pf_src_node **,
+ struct pf_state_key **, struct pf_state_key **,
+ struct pf_state_key **, struct pf_state_key **,
+ struct pf_addr *, struct pf_addr *,
+ u_int16_t, u_int16_t);
+
+int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *,
+ struct pf_state_key **, struct pf_state_key **,
+ struct pf_state_key **, struct pf_state_key **,
+ struct pf_addr *, struct pf_addr *,
+ u_int16_t, u_int16_t);
+#endif /* _KERNEL */
+
+
#endif /* _NET_PFVAR_H_ */
diff --git a/freebsd/sys/contrib/pf/netinet/in4_cksum.c b/freebsd/sys/contrib/pf/netinet/in4_cksum.c
index 9b4a5360..19cc8ac4 100644
--- a/freebsd/sys/contrib/pf/netinet/in4_cksum.c
+++ b/freebsd/sys/contrib/pf/netinet/in4_cksum.c
@@ -77,7 +77,7 @@
#include <machine/in_cksum.h>
#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x)
-#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; (void)ADDCARRY(sum);}
int in4_cksum(struct mbuf *, u_int8_t, int, int);
diff --git a/freebsd/sys/dev/bce/if_bce.c b/freebsd/sys/dev/bce/if_bce.c
index 21ef5dc7..dfae5dcc 100644
--- a/freebsd/sys/dev/bce/if_bce.c
+++ b/freebsd/sys/dev/bce/if_bce.c
@@ -377,7 +377,8 @@ static void bce_release_resources (struct bce_softc *);
/****************************************************************************/
static void bce_fw_cap_init (struct bce_softc *);
static int bce_fw_sync (struct bce_softc *, u32);
-static void bce_load_rv2p_fw (struct bce_softc *, u32 *, u32, u32);
+static void bce_load_rv2p_fw (struct bce_softc *, const u32 *, u32,
+ u32);
static void bce_load_cpu_fw (struct bce_softc *,
struct cpu_reg *, struct fw_info *);
static void bce_start_cpu (struct bce_softc *, struct cpu_reg *);
@@ -400,14 +401,12 @@ static int bce_blockinit (struct bce_softc *);
static int bce_init_tx_chain (struct bce_softc *);
static void bce_free_tx_chain (struct bce_softc *);
-static int bce_get_rx_buf (struct bce_softc *,
- struct mbuf *, u16 *, u16 *, u32 *);
+static int bce_get_rx_buf (struct bce_softc *, u16, u16, u32 *);
static int bce_init_rx_chain (struct bce_softc *);
static void bce_fill_rx_chain (struct bce_softc *);
static void bce_free_rx_chain (struct bce_softc *);
-static int bce_get_pg_buf (struct bce_softc *,
- struct mbuf *, u16 *, u16 *);
+static int bce_get_pg_buf (struct bce_softc *, u16, u16);
static int bce_init_pg_chain (struct bce_softc *);
static void bce_fill_pg_chain (struct bce_softc *);
static void bce_free_pg_chain (struct bce_softc *);
@@ -490,7 +489,7 @@ DRIVER_MODULE(miibus, bce, miibus_driver, miibus_devclass, NULL, NULL);
/****************************************************************************/
/* Tunable device values */
/****************************************************************************/
-SYSCTL_NODE(_hw, OID_AUTO, bce, CTLFLAG_RD, 0, "bce driver parameters");
+static SYSCTL_NODE(_hw, OID_AUTO, bce, CTLFLAG_RD, 0, "bce driver parameters");
/* Allowable values are TRUE or FALSE */
static int bce_verbose = TRUE;
@@ -501,14 +500,14 @@ SYSCTL_INT(_hw_bce, OID_AUTO, verbose, CTLFLAG_RDTUN, &bce_verbose, 0,
/* Allowable values are TRUE or FALSE */
static int bce_tso_enable = TRUE;
TUNABLE_INT("hw.bce.tso_enable", &bce_tso_enable);
-SYSCTL_UINT(_hw_bce, OID_AUTO, tso_enable, CTLFLAG_RDTUN, &bce_tso_enable, 0,
+SYSCTL_INT(_hw_bce, OID_AUTO, tso_enable, CTLFLAG_RDTUN, &bce_tso_enable, 0,
"TSO Enable/Disable");
/* Allowable values are 0 (IRQ), 1 (MSI/IRQ), and 2 (MSI-X/MSI/IRQ) */
/* ToDo: Add MSI-X support. */
static int bce_msi_enable = 1;
TUNABLE_INT("hw.bce.msi_enable", &bce_msi_enable);
-SYSCTL_UINT(_hw_bce, OID_AUTO, msi_enable, CTLFLAG_RDTUN, &bce_msi_enable, 0,
+SYSCTL_INT(_hw_bce, OID_AUTO, msi_enable, CTLFLAG_RDTUN, &bce_msi_enable, 0,
"MSI-X|MSI|INTx selector");
/* Allowable values are 1, 2, 4, 8. */
@@ -808,13 +807,13 @@ bce_probe_pci_caps(device_t dev, struct bce_softc *sc)
DBENTER(BCE_VERBOSE_LOAD);
/* Check if PCI-X capability is enabled. */
- if (pci_find_extcap(dev, PCIY_PCIX, &reg) == 0) {
+ if (pci_find_cap(dev, PCIY_PCIX, &reg) == 0) {
if (reg != 0)
sc->bce_cap_flags |= BCE_PCIX_CAPABLE_FLAG;
}
/* Check if PCIe capability is enabled. */
- if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
+ if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
if (reg != 0) {
u16 link_status = pci_read_config(dev, reg + 0x12, 2);
DBPRINT(sc, BCE_INFO_LOAD, "PCIe link_status = "
@@ -827,13 +826,13 @@ bce_probe_pci_caps(device_t dev, struct bce_softc *sc)
}
/* Check if MSI capability is enabled. */
- if (pci_find_extcap(dev, PCIY_MSI, &reg) == 0) {
+ if (pci_find_cap(dev, PCIY_MSI, &reg) == 0) {
if (reg != 0)
sc->bce_cap_flags |= BCE_MSI_CAPABLE_FLAG;
}
/* Check if MSI-X capability is enabled. */
- if (pci_find_extcap(dev, PCIY_MSIX, &reg) == 0) {
+ if (pci_find_cap(dev, PCIY_MSIX, &reg) == 0) {
if (reg != 0)
sc->bce_cap_flags |= BCE_MSIX_CAPABLE_FLAG;
}
@@ -1021,7 +1020,6 @@ bce_set_tunables(struct bce_softc *sc)
sc->bce_tx_ticks = DEFAULT_TX_TICKS;
sc->bce_tx_quick_cons_trip = DEFAULT_TX_QUICK_CONS_TRIP;
}
-
}
@@ -1334,23 +1332,6 @@ bce_attach(device_t dev)
/* Fetch the permanent Ethernet MAC address. */
bce_get_mac_addr(sc);
- /*
- * Trip points control how many BDs
- * should be ready before generating an
- * interrupt while ticks control how long
- * a BD can sit in the chain before
- * generating an interrupt. Set the default
- * values for the RX and TX chains.
- */
-
- /* Not used for L2. */
- sc->bce_comp_prod_trip_int = 0;
- sc->bce_comp_prod_trip = 0;
- sc->bce_com_ticks_int = 0;
- sc->bce_com_ticks = 0;
- sc->bce_cmd_ticks_int = 0;
- sc->bce_cmd_ticks = 0;
-
/* Update statistics once every second. */
sc->bce_stats_ticks = 1000000 & 0xffff00;
@@ -1465,7 +1446,7 @@ bce_attach(device_t dev)
/* MII child bus by attaching the PHY. */
rc = mii_attach(dev, &sc->bce_miibus, ifp, bce_ifmedia_upd,
bce_ifmedia_sts, BMSR_DEFCAPMASK, sc->bce_phy_addr,
- MII_OFFSET_ANY, MIIF_DOPAUSE | MIIF_FORCEPAUSE);
+ MII_OFFSET_ANY, MIIF_DOPAUSE);
if (rc != 0) {
BCE_PRINTF("%s(%d): attaching PHYs failed\n", __FILE__,
__LINE__);
@@ -1937,7 +1918,6 @@ bce_miibus_read_reg(device_t dev, int phy, int reg)
DB_PRINT_PHY_REG(reg, val);
return (val & 0xffff);
-
}
@@ -2099,10 +2079,12 @@ bce_miibus_statchg(device_t dev)
DBPRINT(sc, BCE_INFO_PHY,
"%s(): Enabling RX flow control.\n", __FUNCTION__);
BCE_SETBIT(sc, BCE_EMAC_RX_MODE, BCE_EMAC_RX_MODE_FLOW_EN);
+ sc->bce_flags |= BCE_USING_RX_FLOW_CONTROL;
} else {
DBPRINT(sc, BCE_INFO_PHY,
"%s(): Disabling RX flow control.\n", __FUNCTION__);
BCE_CLRBIT(sc, BCE_EMAC_RX_MODE, BCE_EMAC_RX_MODE_FLOW_EN);
+ sc->bce_flags &= ~BCE_USING_RX_FLOW_CONTROL;
}
if ((IFM_OPTIONS(media_active) & IFM_ETH_TXPAUSE) != 0) {
@@ -3039,7 +3021,6 @@ bce_get_rx_buffer_sizes(struct bce_softc *sc, int mtu)
roundup2((MSIZE - MHLEN), 16) - (MSIZE - MHLEN);
sc->rx_bd_mbuf_data_len = sc->rx_bd_mbuf_alloc_size -
sc->rx_bd_mbuf_align_pad;
- sc->pg_bd_mbuf_alloc_size = MCLBYTES;
} else {
if ((mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
ETHER_CRC_LEN) > MCLBYTES) {
@@ -3069,7 +3050,6 @@ bce_get_rx_buffer_sizes(struct bce_softc *sc, int mtu)
sc->rx_bd_mbuf_align_pad);
DBEXIT(BCE_VERBOSE_LOAD);
-
}
/****************************************************************************/
@@ -3486,8 +3466,6 @@ bce_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
} else {
*busaddr = segs->ds_addr;
}
-
- return;
}
@@ -3568,7 +3546,7 @@ bce_dma_alloc(device_t dev)
sc->status_block, BCE_STATUS_BLK_SZ, bce_dma_map_addr,
&sc->status_block_paddr, BUS_DMA_NOWAIT);
- if (error) {
+ if (error || sc->status_block_paddr == 0) {
BCE_PRINTF("%s(%d): Could not map status block "
"DMA memory!\n", __FILE__, __LINE__);
rc = ENOMEM;
@@ -3605,7 +3583,7 @@ bce_dma_alloc(device_t dev)
sc->stats_block, BCE_STATS_BLK_SZ, bce_dma_map_addr,
&sc->stats_block_paddr, BUS_DMA_NOWAIT);
- if(error) {
+ if (error || sc->stats_block_paddr == 0) {
BCE_PRINTF("%s(%d): Could not map statistics block "
"DMA memory!\n", __FILE__, __LINE__);
rc = ENOMEM;
@@ -3657,7 +3635,7 @@ bce_dma_alloc(device_t dev)
sc->ctx_block[i], BCM_PAGE_SIZE, bce_dma_map_addr,
&sc->ctx_paddr[i], BUS_DMA_NOWAIT);
- if (error) {
+ if (error || sc->ctx_paddr[i] == 0) {
BCE_PRINTF("%s(%d): Could not map CTX "
"DMA memory!\n", __FILE__, __LINE__);
rc = ENOMEM;
@@ -3702,7 +3680,7 @@ bce_dma_alloc(device_t dev)
BCE_TX_CHAIN_PAGE_SZ, bce_dma_map_addr,
&sc->tx_bd_chain_paddr[i], BUS_DMA_NOWAIT);
- if (error) {
+ if (error || sc->tx_bd_chain_paddr[i] == 0) {
BCE_PRINTF("%s(%d): Could not map TX descriptor "
"chain DMA memory!\n", __FILE__, __LINE__);
rc = ENOMEM;
@@ -3779,7 +3757,7 @@ bce_dma_alloc(device_t dev)
BCE_RX_CHAIN_PAGE_SZ, bce_dma_map_addr,
&sc->rx_bd_chain_paddr[i], BUS_DMA_NOWAIT);
- if (error) {
+ if (error || sc->rx_bd_chain_paddr[i] == 0) {
BCE_PRINTF("%s(%d): Could not map RX descriptor "
"chain DMA memory!\n", __FILE__, __LINE__);
rc = ENOMEM;
@@ -3795,21 +3773,17 @@ bce_dma_alloc(device_t dev)
* Create a DMA tag for RX mbufs.
*/
if (bce_hdr_split == TRUE)
- max_size = max_seg_size = ((sc->rx_bd_mbuf_alloc_size < MCLBYTES) ?
+ max_size = ((sc->rx_bd_mbuf_alloc_size < MCLBYTES) ?
MCLBYTES : sc->rx_bd_mbuf_alloc_size);
else
- max_size = max_seg_size = MJUM9BYTES;
- max_segments = 1;
+ max_size = MJUM9BYTES;
DBPRINT(sc, BCE_INFO_LOAD, "%s(): Creating rx_mbuf_tag "
- "(max size = 0x%jX max segments = %d, max segment "
- "size = 0x%jX)\n", __FUNCTION__, (uintmax_t) max_size,
- max_segments, (uintmax_t) max_seg_size);
+ "(max size = 0x%jX)\n", __FUNCTION__, (uintmax_t)max_size);
if (bus_dma_tag_create(sc->parent_tag, BCE_RX_BUF_ALIGN,
BCE_DMA_BOUNDARY, sc->max_bus_addr, BUS_SPACE_MAXADDR, NULL, NULL,
- max_size, max_segments, max_seg_size, 0, NULL, NULL,
- &sc->rx_mbuf_tag)) {
+ max_size, 1, max_size, 0, NULL, NULL, &sc->rx_mbuf_tag)) {
BCE_PRINTF("%s(%d): Could not allocate RX mbuf DMA tag!\n",
__FILE__, __LINE__);
rc = ENOMEM;
@@ -3860,7 +3834,7 @@ bce_dma_alloc(device_t dev)
BCE_PG_CHAIN_PAGE_SZ, bce_dma_map_addr,
&sc->pg_bd_chain_paddr[i], BUS_DMA_NOWAIT);
- if (error) {
+ if (error || sc->pg_bd_chain_paddr[i] == 0) {
BCE_PRINTF("%s(%d): Could not map page descriptor "
"chain DMA memory!\n", __FILE__, __LINE__);
rc = ENOMEM;
@@ -3875,12 +3849,9 @@ bce_dma_alloc(device_t dev)
/*
* Create a DMA tag for page mbufs.
*/
- max_size = max_seg_size = ((sc->pg_bd_mbuf_alloc_size < MCLBYTES) ?
- MCLBYTES : sc->pg_bd_mbuf_alloc_size);
-
if (bus_dma_tag_create(sc->parent_tag, 1, BCE_DMA_BOUNDARY,
- sc->max_bus_addr, BUS_SPACE_MAXADDR, NULL, NULL,
- max_size, 1, max_seg_size, 0, NULL, NULL, &sc->pg_mbuf_tag)) {
+ sc->max_bus_addr, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
+ 1, MCLBYTES, 0, NULL, NULL, &sc->pg_mbuf_tag)) {
BCE_PRINTF("%s(%d): Could not allocate page mbuf "
"DMA tag!\n", __FILE__, __LINE__);
rc = ENOMEM;
@@ -4030,7 +4001,7 @@ bce_fw_sync_exit:
/* Nothing. */
/****************************************************************************/
static void
-bce_load_rv2p_fw(struct bce_softc *sc, u32 *rv2p_code,
+bce_load_rv2p_fw(struct bce_softc *sc, const u32 *rv2p_code,
u32 rv2p_code_len, u32 rv2p_proc)
{
int i;
@@ -5246,24 +5217,28 @@ bce_blockinit(struct bce_softc *sc)
REG_WR(sc, BCE_HC_STATISTICS_ADDR_H,
BCE_ADDR_HI(sc->stats_block_paddr));
- /* Program various host coalescing parameters. */
+ /*
+ * Program various host coalescing parameters.
+ * Trip points control how many BDs should be ready before generating
+ * an interrupt while ticks control how long a BD can sit in the chain
+ * before generating an interrupt.
+ */
REG_WR(sc, BCE_HC_TX_QUICK_CONS_TRIP,
- (sc->bce_tx_quick_cons_trip_int << 16) | sc->bce_tx_quick_cons_trip);
+ (sc->bce_tx_quick_cons_trip_int << 16) |
+ sc->bce_tx_quick_cons_trip);
REG_WR(sc, BCE_HC_RX_QUICK_CONS_TRIP,
- (sc->bce_rx_quick_cons_trip_int << 16) | sc->bce_rx_quick_cons_trip);
- REG_WR(sc, BCE_HC_COMP_PROD_TRIP,
- (sc->bce_comp_prod_trip_int << 16) | sc->bce_comp_prod_trip);
+ (sc->bce_rx_quick_cons_trip_int << 16) |
+ sc->bce_rx_quick_cons_trip);
REG_WR(sc, BCE_HC_TX_TICKS,
(sc->bce_tx_ticks_int << 16) | sc->bce_tx_ticks);
REG_WR(sc, BCE_HC_RX_TICKS,
(sc->bce_rx_ticks_int << 16) | sc->bce_rx_ticks);
- REG_WR(sc, BCE_HC_COM_TICKS,
- (sc->bce_com_ticks_int << 16) | sc->bce_com_ticks);
- REG_WR(sc, BCE_HC_CMD_TICKS,
- (sc->bce_cmd_ticks_int << 16) | sc->bce_cmd_ticks);
- REG_WR(sc, BCE_HC_STATS_TICKS,
- (sc->bce_stats_ticks & 0xffff00));
+ REG_WR(sc, BCE_HC_STATS_TICKS, sc->bce_stats_ticks & 0xffff00);
REG_WR(sc, BCE_HC_STAT_COLLECT_TICKS, 0xbb8); /* 3ms */
+ /* Not used for L2. */
+ REG_WR(sc, BCE_HC_COMP_PROD_TRIP, 0);
+ REG_WR(sc, BCE_HC_COM_TICKS, 0);
+ REG_WR(sc, BCE_HC_CMD_TICKS, 0);
/* Configure the Host Coalescing block. */
val = BCE_HC_CONFIG_RX_TMR_MODE | BCE_HC_CONFIG_TX_TMR_MODE |
@@ -5378,29 +5353,27 @@ bce_blockinit_exit:
/* 0 for success, positive value for failure. */
/****************************************************************************/
static int
-bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod,
- u16 *chain_prod, u32 *prod_bseq)
+bce_get_rx_buf(struct bce_softc *sc, u16 prod, u16 chain_prod, u32 *prod_bseq)
{
- bus_dmamap_t map;
- bus_dma_segment_t segs[BCE_MAX_SEGMENTS];
+ bus_dma_segment_t segs[1];
struct mbuf *m_new = NULL;
struct rx_bd *rxbd;
int nsegs, error, rc = 0;
#ifdef BCE_DEBUG
- u16 debug_chain_prod = *chain_prod;
+ u16 debug_chain_prod = chain_prod;
#endif
DBENTER(BCE_EXTREME_RESET | BCE_EXTREME_RECV | BCE_EXTREME_LOAD);
/* Make sure the inputs are valid. */
- DBRUNIF((*chain_prod > MAX_RX_BD_ALLOC),
+ DBRUNIF((chain_prod > MAX_RX_BD_ALLOC),
BCE_PRINTF("%s(%d): RX producer out of range: "
"0x%04X > 0x%04X\n", __FILE__, __LINE__,
- *chain_prod, (u16) MAX_RX_BD_ALLOC));
+ chain_prod, (u16)MAX_RX_BD_ALLOC));
DBPRINT(sc, BCE_EXTREME_RECV, "%s(enter): prod = 0x%04X, "
"chain_prod = 0x%04X, prod_bseq = 0x%08X\n", __FUNCTION__,
- *prod, *chain_prod, *prod_bseq);
+ prod, chain_prod, *prod_bseq);
/* Update some debug statistic counters */
DBRUNIF((sc->free_rx_bd < sc->rx_low_watermark),
@@ -5408,35 +5381,28 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod,
DBRUNIF((sc->free_rx_bd == sc->max_rx_bd),
sc->rx_empty_count++);
- /* Check whether this is a new mbuf allocation. */
- if (m == NULL) {
-
- /* Simulate an mbuf allocation failure. */
- DBRUNIF(DB_RANDOMTRUE(mbuf_alloc_failed_sim_control),
- sc->mbuf_alloc_failed_count++;
- sc->mbuf_alloc_failed_sim_count++;
- rc = ENOBUFS;
- goto bce_get_rx_buf_exit);
+ /* Simulate an mbuf allocation failure. */
+ DBRUNIF(DB_RANDOMTRUE(mbuf_alloc_failed_sim_control),
+ sc->mbuf_alloc_failed_count++;
+ sc->mbuf_alloc_failed_sim_count++;
+ rc = ENOBUFS;
+ goto bce_get_rx_buf_exit);
- /* This is a new mbuf allocation. */
- if (bce_hdr_split == TRUE)
- MGETHDR(m_new, M_DONTWAIT, MT_DATA);
- else
- m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR,
- sc->rx_bd_mbuf_alloc_size);
-
- if (m_new == NULL) {
- sc->mbuf_alloc_failed_count++;
- rc = ENOBUFS;
- goto bce_get_rx_buf_exit;
- }
+ /* This is a new mbuf allocation. */
+ if (bce_hdr_split == TRUE)
+ MGETHDR(m_new, M_NOWAIT, MT_DATA);
+ else
+ m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
+ sc->rx_bd_mbuf_alloc_size);
- DBRUN(sc->debug_rx_mbuf_alloc++);
- } else {
- /* Reuse an existing mbuf. */
- m_new = m;
+ if (m_new == NULL) {
+ sc->mbuf_alloc_failed_count++;
+ rc = ENOBUFS;
+ goto bce_get_rx_buf_exit;
}
+ DBRUN(sc->debug_rx_mbuf_alloc++);
+
/* Make sure we have a valid packet header. */
M_ASSERTPKTHDR(m_new);
@@ -5447,9 +5413,8 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod,
/* ToDo: Consider calling m_fragment() to test error handling. */
/* Map the mbuf cluster into device memory. */
- map = sc->rx_mbuf_map[*chain_prod];
- error = bus_dmamap_load_mbuf_sg(sc->rx_mbuf_tag, map, m_new,
- segs, &nsegs, BUS_DMA_NOWAIT);
+ error = bus_dmamap_load_mbuf_sg(sc->rx_mbuf_tag,
+ sc->rx_mbuf_map[chain_prod], m_new, segs, &nsegs, BUS_DMA_NOWAIT);
/* Handle any mapping errors. */
if (error) {
@@ -5470,7 +5435,7 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod,
__FUNCTION__, nsegs));
/* Setup the rx_bd for the segment. */
- rxbd = &sc->rx_bd_chain[RX_PAGE(*chain_prod)][RX_IDX(*chain_prod)];
+ rxbd = &sc->rx_bd_chain[RX_PAGE(chain_prod)][RX_IDX(chain_prod)];
rxbd->rx_bd_haddr_lo = htole32(BCE_ADDR_LO(segs[0].ds_addr));
rxbd->rx_bd_haddr_hi = htole32(BCE_ADDR_HI(segs[0].ds_addr));
@@ -5479,15 +5444,15 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod,
*prod_bseq += segs[0].ds_len;
/* Save the mbuf and update our counter. */
- sc->rx_mbuf_ptr[*chain_prod] = m_new;
+ sc->rx_mbuf_ptr[chain_prod] = m_new;
sc->free_rx_bd -= nsegs;
DBRUNMSG(BCE_INSANE_RECV,
bce_dump_rx_mbuf_chain(sc, debug_chain_prod, nsegs));
DBPRINT(sc, BCE_EXTREME_RECV, "%s(exit): prod = 0x%04X, "
- "chain_prod = 0x%04X, prod_bseq = 0x%08X\n",
- __FUNCTION__, *prod, *chain_prod, *prod_bseq);
+ "chain_prod = 0x%04X, prod_bseq = 0x%08X\n", __FUNCTION__, prod,
+ chain_prod, *prod_bseq);
bce_get_rx_buf_exit:
DBEXIT(BCE_EXTREME_RESET | BCE_EXTREME_RECV | BCE_EXTREME_LOAD);
@@ -5503,68 +5468,56 @@ bce_get_rx_buf_exit:
/* 0 for success, positive value for failure. */
/****************************************************************************/
static int
-bce_get_pg_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod,
- u16 *prod_idx)
+bce_get_pg_buf(struct bce_softc *sc, u16 prod, u16 prod_idx)
{
- bus_dmamap_t map;
- bus_addr_t busaddr;
+ bus_dma_segment_t segs[1];
struct mbuf *m_new = NULL;
struct rx_bd *pgbd;
- int error, rc = 0;
+ int error, nsegs, rc = 0;
#ifdef BCE_DEBUG
- u16 debug_prod_idx = *prod_idx;
+ u16 debug_prod_idx = prod_idx;
#endif
DBENTER(BCE_EXTREME_RESET | BCE_EXTREME_RECV | BCE_EXTREME_LOAD);
/* Make sure the inputs are valid. */
- DBRUNIF((*prod_idx > MAX_PG_BD_ALLOC),
+ DBRUNIF((prod_idx > MAX_PG_BD_ALLOC),
BCE_PRINTF("%s(%d): page producer out of range: "
"0x%04X > 0x%04X\n", __FILE__, __LINE__,
- *prod_idx, (u16) MAX_PG_BD_ALLOC));
+ prod_idx, (u16)MAX_PG_BD_ALLOC));
DBPRINT(sc, BCE_EXTREME_RECV, "%s(enter): prod = 0x%04X, "
- "chain_prod = 0x%04X\n", __FUNCTION__, *prod, *prod_idx);
+ "chain_prod = 0x%04X\n", __FUNCTION__, prod, prod_idx);
/* Update counters if we've hit a new low or run out of pages. */
DBRUNIF((sc->free_pg_bd < sc->pg_low_watermark),
sc->pg_low_watermark = sc->free_pg_bd);
DBRUNIF((sc->free_pg_bd == sc->max_pg_bd), sc->pg_empty_count++);
- /* Check whether this is a new mbuf allocation. */
- if (m == NULL) {
-
- /* Simulate an mbuf allocation failure. */
- DBRUNIF(DB_RANDOMTRUE(mbuf_alloc_failed_sim_control),
- sc->mbuf_alloc_failed_count++;
- sc->mbuf_alloc_failed_sim_count++;
- rc = ENOBUFS;
- goto bce_get_pg_buf_exit);
-
- /* This is a new mbuf allocation. */
- m_new = m_getcl(M_DONTWAIT, MT_DATA, 0);
- if (m_new == NULL) {
- sc->mbuf_alloc_failed_count++;
- rc = ENOBUFS;
- goto bce_get_pg_buf_exit;
- }
-
- DBRUN(sc->debug_pg_mbuf_alloc++);
- } else {
- /* Reuse an existing mbuf. */
- m_new = m;
- m_new->m_data = m_new->m_ext.ext_buf;
+ /* Simulate an mbuf allocation failure. */
+ DBRUNIF(DB_RANDOMTRUE(mbuf_alloc_failed_sim_control),
+ sc->mbuf_alloc_failed_count++;
+ sc->mbuf_alloc_failed_sim_count++;
+ rc = ENOBUFS;
+ goto bce_get_pg_buf_exit);
+
+ /* This is a new mbuf allocation. */
+ m_new = m_getcl(M_NOWAIT, MT_DATA, 0);
+ if (m_new == NULL) {
+ sc->mbuf_alloc_failed_count++;
+ rc = ENOBUFS;
+ goto bce_get_pg_buf_exit;
}
- m_new->m_len = sc->pg_bd_mbuf_alloc_size;
+ DBRUN(sc->debug_pg_mbuf_alloc++);
+
+ m_new->m_len = MCLBYTES;
/* ToDo: Consider calling m_fragment() to test error handling. */
/* Map the mbuf cluster into device memory. */
- map = sc->pg_mbuf_map[*prod_idx];
- error = bus_dmamap_load(sc->pg_mbuf_tag, map, mtod(m_new, void *),
- sc->pg_bd_mbuf_alloc_size, bce_dma_map_addr,
- &busaddr, BUS_DMA_NOWAIT);
+ error = bus_dmamap_load_mbuf_sg(sc->pg_mbuf_tag,
+ sc->pg_mbuf_map[prod_idx], m_new, segs, &nsegs, BUS_DMA_NOWAIT);
/* Handle any mapping errors. */
if (error) {
@@ -5578,28 +5531,32 @@ bce_get_pg_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod,
goto bce_get_pg_buf_exit;
}
+ /* All mbufs must map to a single segment. */
+ KASSERT(nsegs == 1, ("%s(): Too many segments returned (%d)!",
+ __FUNCTION__, nsegs));
+
/* ToDo: Do we need bus_dmamap_sync(,,BUS_DMASYNC_PREREAD) here? */
/*
* The page chain uses the same rx_bd data structure
* as the receive chain but doesn't require a byte sequence (bseq).
*/
- pgbd = &sc->pg_bd_chain[PG_PAGE(*prod_idx)][PG_IDX(*prod_idx)];
+ pgbd = &sc->pg_bd_chain[PG_PAGE(prod_idx)][PG_IDX(prod_idx)];
- pgbd->rx_bd_haddr_lo = htole32(BCE_ADDR_LO(busaddr));
- pgbd->rx_bd_haddr_hi = htole32(BCE_ADDR_HI(busaddr));
- pgbd->rx_bd_len = htole32(sc->pg_bd_mbuf_alloc_size);
+ pgbd->rx_bd_haddr_lo = htole32(BCE_ADDR_LO(segs[0].ds_addr));
+ pgbd->rx_bd_haddr_hi = htole32(BCE_ADDR_HI(segs[0].ds_addr));
+ pgbd->rx_bd_len = htole32(MCLBYTES);
pgbd->rx_bd_flags = htole32(RX_BD_FLAGS_START | RX_BD_FLAGS_END);
/* Save the mbuf and update our counter. */
- sc->pg_mbuf_ptr[*prod_idx] = m_new;
+ sc->pg_mbuf_ptr[prod_idx] = m_new;
sc->free_pg_bd--;
DBRUNMSG(BCE_INSANE_RECV,
bce_dump_pg_mbuf_chain(sc, debug_prod_idx, 1));
DBPRINT(sc, BCE_EXTREME_RECV, "%s(exit): prod = 0x%04X, "
- "prod_idx = 0x%04X\n", __FUNCTION__, *prod, *prod_idx);
+ "prod_idx = 0x%04X\n", __FUNCTION__, prod, prod_idx);
bce_get_pg_buf_exit:
DBEXIT(BCE_EXTREME_RESET | BCE_EXTREME_RECV | BCE_EXTREME_LOAD);
@@ -5921,7 +5878,7 @@ bce_fill_rx_chain(struct bce_softc *sc)
/* Keep filling the RX chain until it's full. */
while (sc->free_rx_bd > 0) {
prod_idx = RX_CHAIN_IDX(prod);
- if (bce_get_rx_buf(sc, NULL, &prod, &prod_idx, &prod_bseq)) {
+ if (bce_get_rx_buf(sc, prod, prod_idx, &prod_bseq)) {
/* Bail out if we can't add an mbuf to the chain. */
break;
}
@@ -5935,13 +5892,11 @@ bce_fill_rx_chain(struct bce_softc *sc)
/* We should never end up pointing to a next page pointer. */
DBRUNIF(((prod & USABLE_RX_BD_PER_PAGE) == USABLE_RX_BD_PER_PAGE),
BCE_PRINTF("%s(): Invalid rx_prod value: 0x%04X\n",
- __FUNCTION__, sc->rx_prod));
+ __FUNCTION__, rx_prod));
/* Write the mailbox and tell the chip about the waiting rx_bd's. */
- REG_WR16(sc, MB_GET_CID_ADDR(RX_CID) +
- BCE_L2MQ_RX_HOST_BDIDX, sc->rx_prod);
- REG_WR(sc, MB_GET_CID_ADDR(RX_CID) +
- BCE_L2MQ_RX_HOST_BSEQ, sc->rx_prod_bseq);
+ REG_WR16(sc, MB_GET_CID_ADDR(RX_CID) + BCE_L2MQ_RX_HOST_BDIDX, prod);
+ REG_WR(sc, MB_GET_CID_ADDR(RX_CID) + BCE_L2MQ_RX_HOST_BSEQ, prod_bseq);
DBEXIT(BCE_VERBOSE_RESET | BCE_EXTREME_RECV | BCE_VERBOSE_LOAD |
BCE_VERBOSE_CTX);
@@ -5976,10 +5931,9 @@ bce_free_rx_chain(struct bce_softc *sc)
/* Clear each RX chain page. */
for (i = 0; i < sc->rx_pages; i++)
- if (sc->rx_bd_chain[i] != NULL) {
+ if (sc->rx_bd_chain[i] != NULL)
bzero((char *)sc->rx_bd_chain[i],
BCE_RX_CHAIN_PAGE_SZ);
- }
sc->free_rx_bd = sc->max_rx_bd;
@@ -6043,7 +5997,7 @@ bce_init_pg_chain(struct bce_softc *sc)
CTX_WR(sc, GET_CID_ADDR(RX_CID), BCE_L2CTX_RX_PG_BUF_SIZE, 0);
/* Configure the rx_bd and page chain mbuf cluster size. */
- val = (sc->rx_bd_mbuf_data_len << 16) | sc->pg_bd_mbuf_alloc_size;
+ val = (sc->rx_bd_mbuf_data_len << 16) | MCLBYTES;
CTX_WR(sc, GET_CID_ADDR(RX_CID), BCE_L2CTX_RX_PG_BUF_SIZE, val);
/* Configure the context reserved for jumbo support. */
@@ -6093,7 +6047,7 @@ bce_fill_pg_chain(struct bce_softc *sc)
/* Keep filling the page chain until it's full. */
while (sc->free_pg_bd > 0) {
prod_idx = PG_CHAIN_IDX(prod);
- if (bce_get_pg_buf(sc, NULL, &prod, &prod_idx)) {
+ if (bce_get_pg_buf(sc, prod, prod_idx)) {
/* Bail out if we can't add an mbuf to the chain. */
break;
}
@@ -6105,14 +6059,14 @@ bce_fill_pg_chain(struct bce_softc *sc)
DBRUNIF(((prod & USABLE_RX_BD_PER_PAGE) == USABLE_RX_BD_PER_PAGE),
BCE_PRINTF("%s(): Invalid pg_prod value: 0x%04X\n",
- __FUNCTION__, sc->pg_prod));
+ __FUNCTION__, pg_prod));
/*
* Write the mailbox and tell the chip about
* the new rx_bd's in the page chain.
*/
- REG_WR16(sc, MB_GET_CID_ADDR(RX_CID) +
- BCE_L2MQ_RX_HOST_PG_BDIDX, sc->pg_prod);
+ REG_WR16(sc, MB_GET_CID_ADDR(RX_CID) + BCE_L2MQ_RX_HOST_PG_BDIDX,
+ prod);
DBEXIT(BCE_VERBOSE_RESET | BCE_EXTREME_RECV | BCE_VERBOSE_LOAD |
BCE_VERBOSE_CTX);
@@ -6345,7 +6299,7 @@ bce_ifmedia_upd_locked(struct ifnet *ifp)
/* Make sure the MII bus has been enumerated. */
if (mii) {
LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
- mii_phy_reset(miisc);
+ PHY_RESET(miisc);
error = mii_mediachg(mii);
}
}
@@ -6627,14 +6581,6 @@ bce_rx_intr(struct bce_softc *sc)
DBRUN(sc->debug_rx_mbuf_alloc--);
sc->free_rx_bd++;
- if(m0 == NULL) {
- DBPRINT(sc, BCE_EXTREME_RECV,
- "%s(): Oops! Empty mbuf pointer "
- "found in sc->rx_mbuf_ptr[0x%04X]!\n",
- __FUNCTION__, sw_rx_cons_idx);
- goto bce_rx_int_next_rx;
- }
-
/*
* Frames received on the NetXteme II are prepended
* with an l2_fhdr structure which provides status
@@ -6793,7 +6739,7 @@ bce_rx_intr(struct bce_softc *sc)
m_freem(m0);
m0 = NULL;
- goto bce_rx_int_next_rx;
+ goto bce_rx_intr_next_rx;
}
/* Send the packet to the appropriate interface. */
@@ -6804,7 +6750,6 @@ bce_rx_intr(struct bce_softc *sc)
/* Validate the checksum if offload enabled. */
if (ifp->if_capenable & IFCAP_RXCSUM) {
-
/* Check for an IP datagram. */
if (!(status & L2_FHDR_STATUS_SPLIT) &&
(status & L2_FHDR_STATUS_IP_DATAGRAM)) {
@@ -6834,7 +6779,8 @@ bce_rx_intr(struct bce_softc *sc)
}
/* Attach the VLAN tag. */
- if (status & L2_FHDR_STATUS_L2_VLAN_TAG) {
+ if ((status & L2_FHDR_STATUS_L2_VLAN_TAG) &&
+ !(sc->rx_mode & BCE_EMAC_RX_MODE_KEEP_VLAN_TAG)) {
DBRUN(sc->vlan_tagged_frames_rcvd++);
if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
DBRUN(sc->vlan_tagged_frames_stripped++);
@@ -6873,7 +6819,7 @@ bce_rx_intr(struct bce_softc *sc)
/* Increment received packet statistics. */
ifp->if_ipackets++;
-bce_rx_int_next_rx:
+bce_rx_intr_next_rx:
sw_rx_cons = NEXT_RX_BD(sw_rx_cons);
/* If we have a packet, pass it up the stack */
@@ -7165,10 +7111,9 @@ bce_init_locked(struct bce_softc *sc)
ether_mtu = ifp->if_mtu;
else {
if (bce_hdr_split == TRUE) {
- if (ifp->if_mtu <= (sc->rx_bd_mbuf_data_len +
- sc->pg_bd_mbuf_alloc_size))
- ether_mtu = sc->rx_bd_mbuf_data_len +
- sc->pg_bd_mbuf_alloc_size;
+ if (ifp->if_mtu <= sc->rx_bd_mbuf_data_len + MCLBYTES)
+ ether_mtu = sc->rx_bd_mbuf_data_len +
+ MCLBYTES;
else
ether_mtu = ifp->if_mtu;
} else {
@@ -7196,9 +7141,6 @@ bce_init_locked(struct bce_softc *sc)
bce_set_rx_mode(sc);
if (bce_hdr_split == TRUE) {
- DBPRINT(sc, BCE_INFO_LOAD, "%s(): pg_bd_mbuf_alloc_size = %d\n",
- __FUNCTION__, sc->pg_bd_mbuf_alloc_size);
-
/* Init page buffer descriptor chain. */
bce_init_pg_chain(sc);
}
@@ -7303,7 +7245,7 @@ bce_tso_setup(struct bce_softc *sc, struct mbuf **m_head, u16 *flags)
/* Controller may modify mbuf chains. */
if (M_WRITABLE(*m_head) == 0) {
- m = m_dup(*m_head, M_DONTWAIT);
+ m = m_dup(*m_head, M_NOWAIT);
m_freem(*m_head);
if (m == NULL) {
sc->mbuf_alloc_failed_count++;
@@ -7469,7 +7411,7 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m_head)
sc->mbuf_frag_count++;
/* Try to defrag the mbuf. */
- m0 = m_collapse(*m_head, M_DONTWAIT, BCE_MAX_SEGMENTS);
+ m0 = m_collapse(*m_head, M_NOWAIT, BCE_MAX_SEGMENTS);
if (m0 == NULL) {
/* Defrag was unsuccessful */
m_freem(*m_head);
@@ -7692,7 +7634,6 @@ bce_start_locked(struct ifnet *ifp)
bce_start_locked_exit:
DBEXIT(BCE_VERBOSE_SEND | BCE_VERBOSE_CTX);
- return;
}
@@ -7891,18 +7832,42 @@ bce_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
static void
bce_watchdog(struct bce_softc *sc)
{
+ uint32_t status;
+
DBENTER(BCE_EXTREME_SEND);
BCE_LOCK_ASSERT(sc);
+ status = 0;
/* If the watchdog timer hasn't expired then just exit. */
if (sc->watchdog_timer == 0 || --sc->watchdog_timer)
goto bce_watchdog_exit;
+ status = REG_RD(sc, BCE_EMAC_RX_STATUS);
/* If pause frames are active then don't reset the hardware. */
- /* ToDo: Should we reset the timer here? */
- if (REG_RD(sc, BCE_EMAC_TX_STATUS) & BCE_EMAC_TX_STATUS_XOFFED)
- goto bce_watchdog_exit;
+ if ((sc->bce_flags & BCE_USING_RX_FLOW_CONTROL) != 0) {
+ if ((status & BCE_EMAC_RX_STATUS_FFED) != 0) {
+ /*
+ * If link partner has us in XOFF state then wait for
+ * the condition to clear.
+ */
+ sc->watchdog_timer = BCE_TX_TIMEOUT;
+ goto bce_watchdog_exit;
+ } else if ((status & BCE_EMAC_RX_STATUS_FF_RECEIVED) != 0 &&
+ (status & BCE_EMAC_RX_STATUS_N_RECEIVED) != 0) {
+ /*
+ * If we're not currently XOFF'ed but have recently
+ * been XOFF'd/XON'd then assume that's delaying TX
+ * this time around.
+ */
+ sc->watchdog_timer = BCE_TX_TIMEOUT;
+ goto bce_watchdog_exit;
+ }
+ /*
+ * Any other condition is unexpected and the controller
+ * should be reset.
+ */
+ }
BCE_PRINTF("%s(%d): Watchdog timeout occurred, resetting!\n",
__FILE__, __LINE__);
@@ -7926,6 +7891,7 @@ bce_watchdog(struct bce_softc *sc)
sc->bce_ifp->if_oerrors++;
bce_watchdog_exit:
+ REG_WR(sc, BCE_EMAC_RX_STATUS, status);
DBEXIT(BCE_EXTREME_SEND);
}
@@ -7939,7 +7905,7 @@ bce_watchdog_exit:
/* interrupt causes (PHY, TX, RX). */
/* */
/* Returns: */
-/* 0 for success, positive value for failure. */
+/* Nothing. */
/****************************************************************************/
static void
bce_intr(void *xsc)
@@ -7961,16 +7927,16 @@ bce_intr(void *xsc)
DBRUN(sc->interrupts_generated++);
/* Synchnorize before we read from interface's status block */
- bus_dmamap_sync(sc->status_tag, sc->status_map,
- BUS_DMASYNC_POSTREAD);
+ bus_dmamap_sync(sc->status_tag, sc->status_map, BUS_DMASYNC_POSTREAD);
/*
- * If the hardware status block index
- * matches the last value read by the
- * driver and we haven't asserted our
- * interrupt then there's nothing to do.
+ * If the hardware status block index matches the last value read
+ * by the driver and we haven't asserted our interrupt then there's
+ * nothing to do. This may only happen in case of INTx due to the
+ * interrupt arriving at the CPU before the status block is updated.
*/
- if ((sc->status_block->status_idx == sc->last_status_idx) &&
+ if ((sc->bce_flags & (BCE_USING_MSI_FLAG | BCE_USING_MSIX_FLAG)) == 0 &&
+ sc->status_block->status_idx == sc->last_status_idx &&
(REG_RD(sc, BCE_PCICFG_MISC_STATUS) &
BCE_PCICFG_MISC_STATUS_INTA_VALUE)) {
DBPRINT(sc, BCE_VERBOSE_INTR, "%s(): Spurious interrupt.\n",
@@ -8058,11 +8024,9 @@ bce_intr(void *xsc)
if ((hw_rx_cons == sc->hw_rx_cons) &&
(hw_tx_cons == sc->hw_tx_cons))
break;
-
}
- bus_dmamap_sync(sc->status_tag, sc->status_map,
- BUS_DMASYNC_PREREAD);
+ bus_dmamap_sync(sc->status_tag, sc->status_map, BUS_DMASYNC_PREREAD);
/* Re-enable interrupts. */
bce_enable_intr(sc, 0);
@@ -8128,8 +8092,9 @@ bce_set_rx_mode(struct bce_softc *sc)
/* Enable all multicast addresses. */
for (i = 0; i < NUM_MC_HASH_REGISTERS; i++) {
- REG_WR(sc, BCE_EMAC_MULTICAST_HASH0 + (i * 4), 0xffffffff);
- }
+ REG_WR(sc, BCE_EMAC_MULTICAST_HASH0 + (i * 4),
+ 0xffffffff);
+ }
sort_mode |= BCE_RPM_SORT_USER0_MC_EN;
} else {
/* Accept one or more multicast(s). */
@@ -8186,6 +8151,8 @@ bce_stats_update(struct bce_softc *sc)
ifp = sc->bce_ifp;
+ bus_dmamap_sync(sc->stats_tag, sc->stats_map, BUS_DMASYNC_POSTREAD);
+
stats = (struct statistics_block *) sc->stats_block;
/*
@@ -8493,11 +8460,7 @@ bce_tick(void *xsc)
/* Update the statistics from the hardware statistics block. */
bce_stats_update(sc);
- /*
- * ToDo: This is a safety measure. Need to re-evaluate
- * high level processing logic and eliminate this code.
- */
- /* Top off the receive and page chains. */
+ /* Ensure page and RX chains get refilled in low-memory situations. */
if (bce_hdr_split == TRUE)
bce_fill_pg_chain(sc);
bce_fill_rx_chain(sc);
@@ -8546,7 +8509,6 @@ bce_tick(void *xsc)
bce_tick_exit:
DBEXIT(BCE_EXTREME_MISC);
- return;
}
static void
@@ -8718,6 +8680,8 @@ bce_sysctl_stats_clear(SYSCTL_HANDLER_ARGS)
stats = (struct statistics_block *) sc->stats_block;
bzero(stats, sizeof(struct statistics_block));
+ bus_dmamap_sync(sc->stats_tag, sc->stats_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* Clear the internal H/W statistics counters. */
REG_WR(sc, BCE_HC_COMMAND, BCE_HC_COMMAND_CLR_STAT_NOW);
@@ -9162,7 +9126,7 @@ bce_add_sysctls(struct bce_softc *sc)
0, "Number of simulated l2_fhdr errors");
#endif
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"l2fhdr_error_count",
CTLFLAG_RD, &sc->l2fhdr_error_count,
0, "Number of l2_fhdr errors");
@@ -9173,18 +9137,18 @@ bce_add_sysctls(struct bce_softc *sc)
CTLFLAG_RW, &mbuf_alloc_failed_sim_control,
0, "Debug control to force mbuf allocation failures");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"mbuf_alloc_failed_sim_count",
CTLFLAG_RD, &sc->mbuf_alloc_failed_sim_count,
0, "Number of simulated mbuf cluster allocation failures");
#endif
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"mbuf_alloc_failed_count",
CTLFLAG_RD, &sc->mbuf_alloc_failed_count,
0, "Number of mbuf allocation failures");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"mbuf_frag_count",
CTLFLAG_RD, &sc->mbuf_frag_count,
0, "Number of fragmented mbufs");
@@ -9196,19 +9160,19 @@ bce_add_sysctls(struct bce_softc *sc)
0, "Debug control to force DMA mapping failures");
/* ToDo: Figure out how to update this value in bce_dma_map_addr(). */
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"dma_map_addr_failed_sim_count",
CTLFLAG_RD, &sc->dma_map_addr_failed_sim_count,
0, "Number of simulated DMA mapping failures");
#endif
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"dma_map_addr_rx_failed_count",
CTLFLAG_RD, &sc->dma_map_addr_rx_failed_count,
0, "Number of RX DMA mapping failures");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"dma_map_addr_tx_failed_count",
CTLFLAG_RD, &sc->dma_map_addr_tx_failed_count,
0, "Number of TX DMA mapping failures");
@@ -9219,13 +9183,13 @@ bce_add_sysctls(struct bce_softc *sc)
CTLFLAG_RW, &unexpected_attention_sim_control,
0, "Debug control to simulate unexpected attentions");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"unexpected_attention_sim_count",
CTLFLAG_RW, &sc->unexpected_attention_sim_count,
0, "Number of simulated unexpected attentions");
#endif
- SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
"unexpected_attention_count",
CTLFLAG_RW, &sc->unexpected_attention_count,
0, "Number of unexpected attentions");
@@ -9863,7 +9827,7 @@ bce_dump_mbuf(struct bce_softc *sc, struct mbuf *m)
"\15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG"
"\22M_PROMISC\23M_NOFREE",
mp->m_pkthdr.csum_flags,
- "\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP\4CSUM_IP_FRAGS"
+ "\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP"
"\5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED"
"\12CSUM_IP_VALID\13CSUM_DATA_VALID"
"\14CSUM_PSEUDO_HDR");
@@ -10703,6 +10667,8 @@ bce_dump_status_block(struct bce_softc *sc)
{
struct status_block *sblk;
+ bus_dmamap_sync(sc->status_tag, sc->status_map, BUS_DMASYNC_POSTREAD);
+
sblk = sc->status_block;
BCE_PRINTF(
@@ -10765,6 +10731,8 @@ bce_dump_stats_block(struct bce_softc *sc)
{
struct statistics_block *sblk;
+ bus_dmamap_sync(sc->stats_tag, sc->stats_map, BUS_DMASYNC_POSTREAD);
+
sblk = sc->stats_block;
BCE_PRINTF(
@@ -11629,7 +11597,5 @@ bce_breakpoint(struct bce_softc *sc)
/* Call the debugger. */
breakpoint();
-
- return;
}
#endif
diff --git a/freebsd/sys/dev/bce/if_bcefw.h b/freebsd/sys/dev/bce/if_bcefw.h
index 0b30bb83..8d97b31c 100644
--- a/freebsd/sys/dev/bce/if_bcefw.h
+++ b/freebsd/sys/dev/bce/if_bcefw.h
@@ -57,7 +57,7 @@ u32 bce_COM_b06FwSbssAddr = 0x08004aa0;
int bce_COM_b06FwSbssLen = 0x38;
u32 bce_COM_b06FwSDataAddr = 0x00000000;
int bce_COM_b06FwSDataLen = 0x0;
-u32 bce_COM_b06FwText[(0x4a68/4) + 1] = {
+const u32 bce_COM_b06FwText[(0x4a68/4) + 1] = {
0xa000046, 0x0, 0x0,
0xd, 0x636f6d36, 0x2e302e31, 0x35000000,
0x6000f02, 0x0, 0x3, 0xc8,
@@ -1249,14 +1249,14 @@ u32 bce_COM_b06FwText[(0x4a68/4) + 1] = {
0x440fffe, 0x24020002, 0xaf5101c0, 0xa34201c4,
0x3c021000, 0xaf4201f8, 0x8fbf0018, 0x8fb10014,
0x8fb00010, 0x3e00008, 0x27bd0020, 0x0 };
-u32 bce_COM_b06FwData[(0x0/4) + 1] = { 0x0 };
-u32 bce_COM_b06FwRodata[(0x14/4) + 1] = {
+const u32 bce_COM_b06FwData[(0x0/4) + 1] = { 0x0 };
+const u32 bce_COM_b06FwRodata[(0x14/4) + 1] = {
0x8000acc,
0x8000b14, 0x8000b98, 0x8000be4, 0x8000c20,
0x0 };
-u32 bce_COM_b06FwBss[(0xc4/4) + 1] = { 0x0 };
-u32 bce_COM_b06FwSbss[(0x38/4) + 1] = { 0x0 };
-u32 bce_COM_b06FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_COM_b06FwBss[(0xc4/4) + 1] = { 0x0 };
+const u32 bce_COM_b06FwSbss[(0x38/4) + 1] = { 0x0 };
+const u32 bce_COM_b06FwSdata[(0x0/4) + 1] = { 0x0 };
int bce_RXP_b06FwReleaseMajor = 0x6;
@@ -1275,7 +1275,7 @@ u32 bce_RXP_b06FwSbssAddr = 0x08007320;
int bce_RXP_b06FwSbssLen = 0x4c;
u32 bce_RXP_b06FwSDataAddr = 0x00000000;
int bce_RXP_b06FwSDataLen = 0x0;
-u32 bce_RXP_b06FwText[(0x72d0/4) + 1] = {
+const u32 bce_RXP_b06FwText[(0x72d0/4) + 1] = {
0xa000c84, 0x0, 0x0,
0xd, 0x72787036, 0x2e302e31, 0x35000000,
0x6000f03, 0x0, 0x1, 0x0,
@@ -3114,15 +3114,15 @@ u32 bce_RXP_b06FwText[(0x72d0/4) + 1] = {
0x8fbf0020, 0x8fb3001c, 0x8fb20018, 0x8fb10014,
0x8fb00010, 0x3c021000, 0x27bd0028, 0x3e00008,
0xaf4201b8, 0x0 };
-u32 bce_RXP_b06FwData[(0x0/4) + 1] = { 0x0 };
-u32 bce_RXP_b06FwRodata[(0x24/4) + 1] = {
+const u32 bce_RXP_b06FwData[(0x0/4) + 1] = { 0x0 };
+const u32 bce_RXP_b06FwRodata[(0x24/4) + 1] = {
0x8003430,
0x8003430, 0x80033a8, 0x80033e0, 0x8003414,
0x8003438, 0x8003438, 0x8003438, 0x8003318,
0x0 };
-u32 bce_RXP_b06FwBss[(0x440/4) + 1] = { 0x0 };
-u32 bce_RXP_b06FwSbss[(0x4c/4) + 1] = { 0x0 };
-u32 bce_RXP_b06FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_RXP_b06FwBss[(0x440/4) + 1] = { 0x0 };
+const u32 bce_RXP_b06FwSbss[(0x4c/4) + 1] = { 0x0 };
+const u32 bce_RXP_b06FwSdata[(0x0/4) + 1] = { 0x0 };
int bce_TPAT_b06FwReleaseMajor = 0x6;
@@ -3141,7 +3141,7 @@ u32 bce_TPAT_b06FwSbssAddr = 0x08001c00;
int bce_TPAT_b06FwSbssLen = 0x44;
u32 bce_TPAT_b06FwSDataAddr = 0x00000000;
int bce_TPAT_b06FwSDataLen = 0x0;
-u32 bce_TPAT_b06FwText[(0x17d4/4) + 1] = {
+const u32 bce_TPAT_b06FwText[(0x17d4/4) + 1] = {
0xa000124, 0x0, 0x0,
0xd, 0x74706136, 0x2e302e31, 0x35000000,
0x6000f01, 0x0, 0x0, 0x0,
@@ -3524,11 +3524,11 @@ u32 bce_TPAT_b06FwText[(0x17d4/4) + 1] = {
0x14a0fffb, 0x42042, 0xc35021, 0x8fbf0010,
0xa4c02, 0x312200ff, 0x27bd0018, 0xaf8a002c,
0x3e00008, 0xaf890030, 0x0 };
-u32 bce_TPAT_b06FwData[(0x0/4) + 1] = { 0x0 };
-u32 bce_TPAT_b06FwRodata[(0x0/4) + 1] = { 0x0 };
-u32 bce_TPAT_b06FwBss[(0x450/4) + 1] = { 0x0 };
-u32 bce_TPAT_b06FwSbss[(0x44/4) + 1] = { 0x0 };
-u32 bce_TPAT_b06FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b06FwData[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b06FwRodata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b06FwBss[(0x450/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b06FwSbss[(0x44/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b06FwSdata[(0x0/4) + 1] = { 0x0 };
int bce_TXP_b06FwReleaseMajor = 0x6;
@@ -3547,7 +3547,7 @@ u32 bce_TXP_b06FwSbssAddr = 0x08003c20;
int bce_TXP_b06FwSbssLen = 0x68;
u32 bce_TXP_b06FwSDataAddr = 0x00000000;
int bce_TXP_b06FwSDataLen = 0x0;
-u32 bce_TXP_b06FwText[(0x3bfc/4) + 1] = {
+const u32 bce_TXP_b06FwText[(0x3bfc/4) + 1] = {
0xa00002a, 0x0, 0x0,
0xd, 0x74787036, 0x2e302e31, 0x35000000,
0x6000f00, 0x0, 0x136, 0xea60,
@@ -4509,11 +4509,11 @@ u32 bce_TXP_b06FwText[(0x3bfc/4) + 1] = {
0x3c010800, 0xac243d58, 0x3c010800, 0xac233d68,
0x3c010800, 0xac223d60, 0x3e00008, 0x0,
0x0 };
-u32 bce_TXP_b06FwData[(0x0/4) + 1] = { 0x0 };
-u32 bce_TXP_b06FwRodata[(0x0/4) + 1] = { 0x0 };
-u32 bce_TXP_b06FwBss[(0x14c/4) + 1] = { 0x0 };
-u32 bce_TXP_b06FwSbss[(0x68/4) + 1] = { 0x0 };
-u32 bce_TXP_b06FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TXP_b06FwData[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TXP_b06FwRodata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TXP_b06FwBss[(0x14c/4) + 1] = { 0x0 };
+const u32 bce_TXP_b06FwSbss[(0x68/4) + 1] = { 0x0 };
+const u32 bce_TXP_b06FwSdata[(0x0/4) + 1] = { 0x0 };
int bce_CP_b06FwReleaseMajor = 0x6;
@@ -4532,7 +4532,7 @@ u32 bce_CP_b06FwSbssAddr = 0x08005884;
int bce_CP_b06FwSbssLen = 0xf1;
u32 bce_CP_b06FwSDataAddr = 0x00000000;
int bce_CP_b06FwSDataLen = 0x0;
-u32 bce_CP_b06FwText[(0x5688/4) + 1] = {
+const u32 bce_CP_b06FwText[(0x5688/4) + 1] = {
0xa000028, 0x0, 0x0,
0xd, 0x6370362e, 0x302e3135, 0x0,
0x6000f04, 0x0, 0x0, 0x0,
@@ -5918,7 +5918,7 @@ u32 bce_CP_b06FwText[(0x5688/4) + 1] = {
0x27bd0030, 0x8f83001c, 0x8c620004, 0x10400003,
0x0, 0x3e00008, 0x0, 0x8c640010,
0x8c650008, 0xa001527, 0x8c66000c, 0x0 };
-u32 bce_CP_b06FwData[(0x84/4) + 1] = {
+const u32 bce_CP_b06FwData[(0x84/4) + 1] = {
0x0, 0x1b, 0xf,
0xa, 0x8, 0x6, 0x5,
0x5, 0x4, 0x4, 0x3,
@@ -5928,7 +5928,7 @@ u32 bce_CP_b06FwData[(0x84/4) + 1] = {
0x2, 0x2, 0x2, 0x2,
0x2, 0x2, 0x2, 0x1,
0x1, 0x1, 0x0 };
-u32 bce_CP_b06FwRodata[(0x158/4) + 1] = {
+const u32 bce_CP_b06FwRodata[(0x158/4) + 1] = {
0x8000f24, 0x8000d6c, 0x8000fb8,
0x8001060, 0x8000f4c, 0x8000f8c, 0x8001194,
0x8000d88, 0x80011b8, 0x8000dd8, 0x8001554,
@@ -5951,12 +5951,12 @@ u32 bce_CP_b06FwRodata[(0x158/4) + 1] = {
0x8002e1c, 0x8002de4, 0x8002df0, 0x8002dfc,
0x8002e08, 0x80052e8, 0x80052a8, 0x8005274,
0x8005248, 0x8005224, 0x80051e0, 0x0 };
-u32 bce_CP_b06FwBss[(0x5d8/4) + 1] = { 0x0 };
-u32 bce_CP_b06FwSbss[(0xf1/4) + 1] = { 0x0 };
-u32 bce_CP_b06FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_CP_b06FwBss[(0x5d8/4) + 1] = { 0x0 };
+const u32 bce_CP_b06FwSbss[(0xf1/4) + 1] = { 0x0 };
+const u32 bce_CP_b06FwSdata[(0x0/4) + 1] = { 0x0 };
-u32 bce_rv2p_proc1[] = {
+const u32 bce_rv2p_proc1[] = {
0x00000010, 0xb1800006,
0x0000001f, 0x0106000f,
0x00000008, 0x0500ffff,
@@ -6681,7 +6681,7 @@ u32 bce_TXP_b09FwSbssAddr = 0x08003d88;
int bce_TXP_b09FwSbssLen = 0x64;
u32 bce_TXP_b09FwSDataAddr = 0x00000000;
int bce_TXP_b09FwSDataLen = 0x0;
-u32 bce_TXP_b09FwText[(0x3d28/4) + 1] = {
+const u32 bce_TXP_b09FwText[(0x3d28/4) + 1] = {
0xa00002a, 0x0, 0x0,
0xd, 0x74787036, 0x2e302e31, 0x37000000,
0x6001100, 0x0, 0x136, 0xea60,
@@ -7661,15 +7661,15 @@ u32 bce_TXP_b09FwText[(0x3d28/4) + 1] = {
0xac263fcc, 0x3c010800, 0xac253fc4, 0x3c010800,
0xac243fc0, 0x3c010800, 0xac233fd0, 0x3c010800,
0xac223fc8, 0x3e00008, 0x0, 0x0 };
-u32 bce_TXP_b09FwData[(0x0/4) + 1] = { 0x0 };
-u32 bce_TXP_b09FwRodata[(0x30/4) + 1] = {
+const u32 bce_TXP_b09FwData[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TXP_b09FwRodata[(0x30/4) + 1] = {
0x80000940, 0x80000900, 0x80080100,
0x80080080, 0x80080000, 0x800e0000, 0x80080080,
0x80080000, 0x80000a80, 0x80000a00, 0x80000980,
0x80000900, 0x0 };
-u32 bce_TXP_b09FwBss[(0x24c/4) + 1] = { 0x0 };
-u32 bce_TXP_b09FwSbss[(0x64/4) + 1] = { 0x0 };
-u32 bce_TXP_b09FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TXP_b09FwBss[(0x24c/4) + 1] = { 0x0 };
+const u32 bce_TXP_b09FwSbss[(0x64/4) + 1] = { 0x0 };
+const u32 bce_TXP_b09FwSdata[(0x0/4) + 1] = { 0x0 };
int bce_TPAT_b09FwReleaseMajor = 0x6;
@@ -7688,7 +7688,7 @@ u32 bce_TPAT_b09FwSbssAddr = 0x08001720;
int bce_TPAT_b09FwSbssLen = 0x3c;
u32 bce_TPAT_b09FwSDataAddr = 0x00000000;
int bce_TPAT_b09FwSDataLen = 0x0;
-u32 bce_TPAT_b09FwText[(0x12fc/4) + 1] = {
+const u32 bce_TPAT_b09FwText[(0x12fc/4) + 1] = {
0xa000124, 0x0, 0x0,
0xd, 0x74706136, 0x2e302e31, 0x37000000,
0x6001101, 0x0, 0x0, 0x0,
@@ -7994,12 +7994,12 @@ u32 bce_TPAT_b09FwText[(0x12fc/4) + 1] = {
0x0, 0x0, 0x2402ffff, 0x2463ffff,
0x1462fffa, 0x24840004, 0x3e00008, 0x0,
0x0 };
-u32 bce_TPAT_b09FwData[(0x0/4) + 1] = { 0x0 };
-u32 bce_TPAT_b09FwRodata[(0x4/4) + 1] = {
+const u32 bce_TPAT_b09FwData[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b09FwRodata[(0x4/4) + 1] = {
0x1, 0x0 };
-u32 bce_TPAT_b09FwBss[(0x12b4/4) + 1] = { 0x0 };
-u32 bce_TPAT_b09FwSbss[(0x3c/4) + 1] = { 0x0 };
-u32 bce_TPAT_b09FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b09FwBss[(0x12b4/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b09FwSbss[(0x3c/4) + 1] = { 0x0 };
+const u32 bce_TPAT_b09FwSdata[(0x0/4) + 1] = { 0x0 };
int bce_COM_b09FwReleaseMajor = 0x6;
@@ -8018,7 +8018,7 @@ u32 bce_COM_b09FwSbssAddr = 0x08005608;
int bce_COM_b09FwSbssLen = 0x30;
u32 bce_COM_b09FwSDataAddr = 0x00000000;
int bce_COM_b09FwSDataLen = 0x0;
-u32 bce_COM_b09FwText[(0x5594/4) + 1] = {
+const u32 bce_COM_b09FwText[(0x5594/4) + 1] = {
0xa000046, 0x0, 0x0,
0xd, 0x636f6d36, 0x2e302e31, 0x37000000,
0x6001102, 0x0, 0x3, 0xc8,
@@ -9389,15 +9389,15 @@ u32 bce_COM_b09FwText[(0x5594/4) + 1] = {
0x40f809, 0x0, 0xa001560, 0x0,
0xd, 0x3c1c0800, 0x279c5608, 0x8fbf0010,
0x3e00008, 0x27bd0018, 0x0 };
-u32 bce_COM_b09FwData[(0x0/4) + 1] = { 0x0 };
-u32 bce_COM_b09FwRodata[(0x38/4) + 1] = {
+const u32 bce_COM_b09FwData[(0x0/4) + 1] = { 0x0 };
+const u32 bce_COM_b09FwRodata[(0x38/4) + 1] = {
0x80080240, 0x80080100, 0x80080080,
0x80080000, 0xc80, 0x3200, 0x8000e98,
0x8000ef4, 0x8000f88, 0x8001028, 0x8001074,
0x80080100, 0x80080080, 0x80080000, 0x0 };
-u32 bce_COM_b09FwBss[(0x11c/4) + 1] = { 0x0 };
-u32 bce_COM_b09FwSbss[(0x30/4) + 1] = { 0x0 };
-u32 bce_COM_b09FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_COM_b09FwBss[(0x11c/4) + 1] = { 0x0 };
+const u32 bce_COM_b09FwSbss[(0x30/4) + 1] = { 0x0 };
+const u32 bce_COM_b09FwSdata[(0x0/4) + 1] = { 0x0 };
int bce_RXP_b09FwReleaseMajor = 0x6;
@@ -9416,7 +9416,7 @@ u32 bce_RXP_b09FwSbssAddr = 0x08009400;
int bce_RXP_b09FwSbssLen = 0x78;
u32 bce_RXP_b09FwSDataAddr = 0x00000000;
int bce_RXP_b09FwSDataLen = 0x0;
-u32 bce_RXP_b09FwText[(0x9090/4) + 1] = {
+const u32 bce_RXP_b09FwText[(0x9090/4) + 1] = {
0xa000c84, 0x0, 0x0,
0xd, 0x72787036, 0x2e302e31, 0x37000000,
0x6001103, 0x0, 0x1, 0x0,
@@ -11786,9 +11786,9 @@ u32 bce_RXP_b09FwRodata[(0x33c/4) + 1] = {
0x8007fc0, 0x8007fc0, 0x8007fc0, 0x8007fc0,
0x8007fe8, 0x8008b6c, 0x8008cc8, 0x8008ca8,
0x8008710, 0x8008b84, 0x0 };
-u32 bce_RXP_b09FwBss[(0x1bc/4) + 1] = { 0x0 };
-u32 bce_RXP_b09FwSbss[(0x78/4) + 1] = { 0x0 };
-u32 bce_RXP_b09FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_RXP_b09FwBss[(0x1bc/4) + 1] = { 0x0 };
+const u32 bce_RXP_b09FwSbss[(0x78/4) + 1] = { 0x0 };
+const u32 bce_RXP_b09FwSdata[(0x0/4) + 1] = { 0x0 };
int bce_CP_b09FwReleaseMajor = 0x6;
@@ -11807,7 +11807,7 @@ u32 bce_CP_b09FwSbssAddr = 0x080059b0;
int bce_CP_b09FwSbssLen = 0xa8;
u32 bce_CP_b09FwSDataAddr = 0x00000000;
int bce_CP_b09FwSDataLen = 0x0;
-u32 bce_CP_b09FwText[(0x5744/4) + 1] = {
+const u32 bce_CP_b09FwText[(0x5744/4) + 1] = {
0xa000028, 0x0, 0x0,
0xd, 0x6370362e, 0x302e3137, 0x0,
0x6001104, 0x0, 0x0, 0x0,
@@ -13205,7 +13205,7 @@ u32 bce_CP_b09FwText[(0x5744/4) + 1] = {
0xa00156a, 0x8fbf001c, 0xe0010d1, 0x0,
0x5040ff9e, 0x8fbf001c, 0x9259007d, 0x3330003f,
0xa0015c6, 0x36020040, 0x0 };
-u32 bce_CP_b09FwData[(0x84/4) + 1] = {
+const u32 bce_CP_b09FwData[(0x84/4) + 1] = {
0x0, 0x1b, 0xf,
0xa, 0x8, 0x6, 0x5,
0x5, 0x4, 0x4, 0x3,
@@ -13215,7 +13215,7 @@ u32 bce_CP_b09FwData[(0x84/4) + 1] = {
0x2, 0x2, 0x2, 0x2,
0x2, 0x2, 0x2, 0x1,
0x1, 0x1, 0x0 };
-u32 bce_CP_b09FwRodata[(0x1c0/4) + 1] = {
+const u32 bce_CP_b09FwRodata[(0x1c0/4) + 1] = {
0x80080100,
0x80080080, 0x80080000, 0xc00, 0x3080,
0x80011d0, 0x800127c, 0x8001294, 0x80012a8,
@@ -13245,12 +13245,12 @@ u32 bce_CP_b09FwRodata[(0x1c0/4) + 1] = {
0x80080080, 0x80080000, 0x80080080, 0x8004c64,
0x8004c9c, 0x8004be4, 0x8004c64, 0x8004c64,
0x80049b8, 0x8004c64, 0x8005050, 0x0 };
-u32 bce_CP_b09FwBss[(0x19c/4) + 1] = { 0x0 };
-u32 bce_CP_b09FwSbss[(0xa8/4) + 1] = { 0x0 };
-u32 bce_CP_b09FwSdata[(0x0/4) + 1] = { 0x0 };
+const u32 bce_CP_b09FwBss[(0x19c/4) + 1] = { 0x0 };
+const u32 bce_CP_b09FwSbss[(0xa8/4) + 1] = { 0x0 };
+const u32 bce_CP_b09FwSdata[(0x0/4) + 1] = { 0x0 };
-u32 bce_xi_rv2p_proc1[] = {
+const u32 bce_xi_rv2p_proc1[] = {
0x00000010, 0xb1800006,
0x0000001f, 0x05060011,
0x00000008, 0x0500ffff,
@@ -13541,7 +13541,7 @@ u32 bce_xi_rv2p_proc1[] = {
};
-u32 bce_xi_rv2p_proc2[] = {
+const u32 bce_xi_rv2p_proc2[] = {
0x00000010, 0xb1800004,
0x0000001f, 0x05060011,
0x00000008, 0x050000ff,
@@ -14008,9 +14008,9 @@ u32 bce_xi_rv2p_proc2[] = {
0x00000010, 0x001f0000,
0x00000018, 0x8000fe35,
};
-
-u32 bce_xi90_rv2p_proc1[] = {
+
+const u32 bce_xi90_rv2p_proc1[] = {
0x00000010, 0xb1800006,
0x0000001f, 0x03060011,
0x00000008, 0x0500ffff,
@@ -14316,7 +14316,7 @@ u32 bce_xi90_rv2p_proc1[] = {
};
-u32 bce_xi90_rv2p_proc2[] = {
+const u32 bce_xi90_rv2p_proc2[] = {
0x00000010, 0xb1800004,
0x0000001f, 0x03060011,
0x00000008, 0x050000ff,
@@ -14849,6 +14849,3 @@ u32 bce_xi90_rv2p_proc2[] = {
bce_rv2p_proc2[BCE_RV2P_PROC2_MAX_BD_PAGE_LOC] = \
(bce_rv2p_proc2[BCE_RV2P_PROC2_MAX_BD_PAGE_LOC] & ~0xFFFF) | (value); \
}
-
-
-
diff --git a/freebsd/sys/dev/bce/if_bcereg.h b/freebsd/sys/dev/bce/if_bcereg.h
index b043df5b..450180bd 100644
--- a/freebsd/sys/dev/bce/if_bcereg.h
+++ b/freebsd/sys/dev/bce/if_bcereg.h
@@ -32,10 +32,6 @@
#ifndef _BCEREG_H_DEFINED
#define _BCEREG_H_DEFINED
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include <rtems/bsd/local/opt_device_polling.h>
-#endif
-
#include <rtems/bsd/sys/param.h>
#include <sys/endian.h>
#include <sys/systm.h>
@@ -6337,13 +6333,13 @@ struct fw_info {
u32 bss_addr;
u32 bss_len;
u32 bss_index;
- u32 *bss;
+ const u32 *bss;
/* Read-only section. */
u32 rodata_addr;
u32 rodata_len;
u32 rodata_index;
- u32 *rodata;
+ const u32 *rodata;
};
#define RV2P_PROC1 0
@@ -6422,6 +6418,8 @@ struct fw_info {
struct bce_softc
{
+ struct mtx bce_mtx;
+
/* Interface info */
struct ifnet *bce_ifp;
@@ -6449,8 +6447,6 @@ struct bce_softc
/* IRQ Resource Handle */
struct resource *bce_res_irq;
- struct mtx bce_mtx;
-
/* Interrupt handler. */
void *bce_intrhand;
@@ -6470,6 +6466,7 @@ struct bce_softc
#define BCE_USING_MSIX_FLAG 0x00000100
#define BCE_PCIE_FLAG 0x00000200
#define BCE_USING_TX_FLOW_CONTROL 0x00000400
+#define BCE_USING_RX_FLOW_CONTROL 0x00000800
/* Controller capability flags. */
u32 bce_cap_flags;
@@ -6564,14 +6561,6 @@ struct bce_softc
u16 bce_rx_ticks;
u32 bce_stats_ticks;
- /* ToDo: Can these be removed? */
- u16 bce_comp_prod_trip_int;
- u16 bce_comp_prod_trip;
- u16 bce_com_ticks_int;
- u16 bce_com_ticks;
- u16 bce_cmd_ticks_int;
- u16 bce_cmd_ticks;
-
/* The address of the integrated PHY on the MII bus. */
int bce_phy_addr;
@@ -6604,11 +6593,9 @@ struct bce_softc
int watchdog_timer;
/* Frame size and mbuf allocation size for RX frames. */
- u32 max_frame_size;
int rx_bd_mbuf_alloc_size;
int rx_bd_mbuf_data_len;
int rx_bd_mbuf_align_pad;
- int pg_bd_mbuf_alloc_size;
/* Receive mode settings (i.e promiscuous, multicast, etc.). */
u32 rx_mode;
diff --git a/freebsd/sys/dev/bfe/if_bfe.c b/freebsd/sys/dev/bfe/if_bfe.c
index 6d5ce427..e805ad1c 100644
--- a/freebsd/sys/dev/bfe/if_bfe.c
+++ b/freebsd/sys/dev/bfe/if_bfe.c
@@ -794,7 +794,7 @@ bfe_list_newbuf(struct bfe_softc *sc, int c)
u_int32_t ctrl;
int nsegs;
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
m->m_len = m->m_pkthdr.len = MCLBYTES;
if (bus_dmamap_load_mbuf_sg(sc->bfe_rxmbuf_tag, sc->bfe_rx_sparemap,
@@ -1522,7 +1522,7 @@ bfe_encap(struct bfe_softc *sc, struct mbuf **m_head)
error = bus_dmamap_load_mbuf_sg(sc->bfe_txmbuf_tag, r->bfe_map, *m_head,
txsegs, &nsegs, 0);
if (error == EFBIG) {
- m = m_collapse(*m_head, M_DONTWAIT, BFE_MAXTXSEGS);
+ m = m_collapse(*m_head, M_NOWAIT, BFE_MAXTXSEGS);
if (m == NULL) {
m_freem(*m_head);
*m_head = NULL;
@@ -1742,7 +1742,7 @@ bfe_ifmedia_upd(struct ifnet *ifp)
mii = device_get_softc(sc->bfe_miibus);
LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
- mii_phy_reset(miisc);
+ PHY_RESET(miisc);
error = mii_mediachg(mii);
BFE_UNLOCK(sc);
diff --git a/freebsd/sys/dev/bge/if_bge.c b/freebsd/sys/dev/bge/if_bge.c
index 75912967..f9010aa1 100644
--- a/freebsd/sys/dev/bge/if_bge.c
+++ b/freebsd/sys/dev/bge/if_bge.c
@@ -37,10 +37,10 @@
__FBSDID("$FreeBSD$");
/*
- * Broadcom BCM570x family gigabit ethernet driver for FreeBSD.
+ * Broadcom BCM57xx(x)/BCM590x NetXtreme and NetLink family Ethernet driver
*
* The Broadcom BCM5700 is based on technology originally developed by
- * Alteon Networks as part of the Tigon I and Tigon II gigabit ethernet
+ * Alteon Networks as part of the Tigon I and Tigon II Gigabit Ethernet
* MAC chips. The BCM5700, sometimes referred to as the Tigon III, has
* two on-board MIPS R4000 CPUs and can have as much as 16MB of external
* SSRAM. The BCM5700 supports TCP, UDP and IP checksum offload, jumbo
@@ -369,8 +369,9 @@ static const struct bge_revision bge_majorrevs[] = {
#define BGE_IS_5717_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_5717_PLUS)
#define BGE_IS_57765_PLUS(sc) ((sc)->bge_flags & BGE_FLAG_57765_PLUS)
-const struct bge_revision * bge_lookup_rev(uint32_t);
-const struct bge_vendor * bge_lookup_vendor(uint16_t);
+static uint32_t bge_chipid(device_t);
+static const struct bge_vendor * bge_lookup_vendor(uint16_t);
+static const struct bge_revision * bge_lookup_rev(uint32_t);
typedef int (*bge_eaddr_fcn_t)(struct bge_softc *, uint8_t[]);
@@ -530,11 +531,11 @@ static devclass_t bge_devclass;
DRIVER_MODULE(bge, pci, bge_driver, bge_devclass, 0, 0);
DRIVER_MODULE(miibus, bge, miibus_driver, miibus_devclass, 0, 0);
-static int bge_allow_asf = 0;
+static int bge_allow_asf = 1;
TUNABLE_INT("hw.bge.allow_asf", &bge_allow_asf);
-SYSCTL_NODE(_hw, OID_AUTO, bge, CTLFLAG_RD, 0, "BGE driver parameters");
+static SYSCTL_NODE(_hw, OID_AUTO, bge, CTLFLAG_RD, 0, "BGE driver parameters");
SYSCTL_INT(_hw_bge, OID_AUTO, allow_asf, CTLFLAG_RD, &bge_allow_asf, 0,
"Allow ASF mode if available");
@@ -681,7 +682,7 @@ bge_ape_lock_init(struct bge_softc *sc)
bit = BGE_APE_LOCK_GRANT_DRIVER0;
break;
default:
- if (sc->bge_func_addr != 0)
+ if (sc->bge_func_addr == 0)
bit = BGE_APE_LOCK_GRANT_DRIVER0;
else
bit = (1 << sc->bge_func_addr);
@@ -1285,7 +1286,7 @@ bge_miibus_statchg(device_t dev)
/* Set MAC flow control behavior to match link flow control settings. */
tx_mode &= ~BGE_TXMODE_FLOWCTL_ENABLE;
rx_mode &= ~BGE_RXMODE_FLOWCTL_ENABLE;
- if (IFM_OPTIONS(mii->mii_media_active & IFM_FDX) != 0) {
+ if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) {
if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0)
tx_mode |= BGE_TXMODE_FLOWCTL_ENABLE;
if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0)
@@ -1314,12 +1315,12 @@ bge_newbuf_std(struct bge_softc *sc, int i)
if (sc->bge_flags & BGE_FLAG_JUMBO_STD &&
(sc->bge_ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN +
ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))) {
- m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
+ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
if (m == NULL)
return (ENOBUFS);
m->m_len = m->m_pkthdr.len = MJUM9BYTES;
} else {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
return (ENOBUFS);
m->m_len = m->m_pkthdr.len = MCLBYTES;
@@ -1370,11 +1371,11 @@ bge_newbuf_jumbo(struct bge_softc *sc, int i)
struct mbuf *m;
int error, nsegs;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ MGETHDR(m, M_NOWAIT, MT_DATA);
if (m == NULL)
return (ENOBUFS);
- m_cljget(m, M_DONTWAIT, MJUM9BYTES);
+ m_cljget(m, M_NOWAIT, MJUM9BYTES);
if (!(m->m_flags & M_EXT)) {
m_freem(m);
return (ENOBUFS);
@@ -1918,7 +1919,7 @@ bge_chipinit(struct bge_softc *sc)
PCI_CLRBIT(sc->bge_dev, BGE_PCI_CMD,
PCIM_CMD_INTxDIS | PCIM_CMD_MWIEN, 4);
- /* Set the timer prescaler (always 66Mhz) */
+ /* Set the timer prescaler (always 66 MHz). */
CSR_WRITE_4(sc, BGE_MISC_CFG, BGE_32BITTIME_66MHZ);
/* XXX: The Linux tg3 driver does this at the start of brgphy_reset. */
@@ -2281,9 +2282,9 @@ bge_blockinit(struct bge_softc *sc)
/* Set random backoff seed for TX */
CSR_WRITE_4(sc, BGE_TX_RANDOM_BACKOFF,
- IF_LLADDR(sc->bge_ifp)[0] + IF_LLADDR(sc->bge_ifp)[1] +
+ (IF_LLADDR(sc->bge_ifp)[0] + IF_LLADDR(sc->bge_ifp)[1] +
IF_LLADDR(sc->bge_ifp)[2] + IF_LLADDR(sc->bge_ifp)[3] +
- IF_LLADDR(sc->bge_ifp)[4] + IF_LLADDR(sc->bge_ifp)[5] +
+ IF_LLADDR(sc->bge_ifp)[4] + IF_LLADDR(sc->bge_ifp)[5]) &
BGE_TX_BACKOFF_SEED_MASK);
/* Set inter-packet gap */
@@ -2402,7 +2403,7 @@ bge_blockinit(struct bge_softc *sc)
DELAY(40);
/* Set misc. local control, enable interrupts on attentions */
- CSR_WRITE_4(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN);
+ BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN);
#ifdef notdef
/* Assert GPIO pins for PHY reset */
@@ -2588,7 +2589,7 @@ bge_blockinit(struct bge_softc *sc)
return (0);
}
-const struct bge_revision *
+static const struct bge_revision *
bge_lookup_rev(uint32_t chipid)
{
const struct bge_revision *br;
@@ -2606,7 +2607,7 @@ bge_lookup_rev(uint32_t chipid)
return (NULL);
}
-const struct bge_vendor *
+static const struct bge_vendor *
bge_lookup_vendor(uint16_t vid)
{
const struct bge_vendor *v;
@@ -2615,10 +2616,47 @@ bge_lookup_vendor(uint16_t vid)
if (v->v_id == vid)
return (v);
- panic("%s: unknown vendor %d", __func__, vid);
return (NULL);
}
+static uint32_t
+bge_chipid(device_t dev)
+{
+ uint32_t id;
+
+ id = pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >>
+ BGE_PCIMISCCTL_ASICREV_SHIFT;
+ if (BGE_ASICREV(id) == BGE_ASICREV_USE_PRODID_REG) {
+ /*
+ * Find the ASCI revision. Different chips use different
+ * registers.
+ */
+ switch (pci_get_device(dev)) {
+ case BCOM_DEVICEID_BCM5717:
+ case BCOM_DEVICEID_BCM5718:
+ case BCOM_DEVICEID_BCM5719:
+ case BCOM_DEVICEID_BCM5720:
+ id = pci_read_config(dev,
+ BGE_PCI_GEN2_PRODID_ASICREV, 4);
+ break;
+ case BCOM_DEVICEID_BCM57761:
+ case BCOM_DEVICEID_BCM57762:
+ case BCOM_DEVICEID_BCM57765:
+ case BCOM_DEVICEID_BCM57766:
+ case BCOM_DEVICEID_BCM57781:
+ case BCOM_DEVICEID_BCM57785:
+ case BCOM_DEVICEID_BCM57791:
+ case BCOM_DEVICEID_BCM57795:
+ id = pci_read_config(dev,
+ BGE_PCI_GEN15_PRODID_ASICREV, 4);
+ break;
+ default:
+ id = pci_read_config(dev, BGE_PCI_PRODID_ASICREV, 4);
+ }
+ }
+ return (id);
+}
+
/*
* Probe for a Broadcom chip. Check the PCI vendor and device IDs
* against our list and return its name if we find a match.
@@ -2636,61 +2674,34 @@ bge_probe(device_t dev)
char model[64];
const struct bge_revision *br;
const char *pname;
- struct bge_softc *sc = device_get_softc(dev);
+ struct bge_softc *sc;
const struct bge_type *t = bge_devs;
const struct bge_vendor *v;
uint32_t id;
uint16_t did, vid;
+ sc = device_get_softc(dev);
sc->bge_dev = dev;
vid = pci_get_vendor(dev);
did = pci_get_device(dev);
while(t->bge_vid != 0) {
if ((vid == t->bge_vid) && (did == t->bge_did)) {
- id = pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >>
- BGE_PCIMISCCTL_ASICREV_SHIFT;
- if (BGE_ASICREV(id) == BGE_ASICREV_USE_PRODID_REG) {
- /*
- * Find the ASCI revision. Different chips
- * use different registers.
- */
- switch (pci_get_device(dev)) {
- case BCOM_DEVICEID_BCM5717:
- case BCOM_DEVICEID_BCM5718:
- case BCOM_DEVICEID_BCM5719:
- case BCOM_DEVICEID_BCM5720:
- id = pci_read_config(dev,
- BGE_PCI_GEN2_PRODID_ASICREV, 4);
- break;
- case BCOM_DEVICEID_BCM57761:
- case BCOM_DEVICEID_BCM57762:
- case BCOM_DEVICEID_BCM57765:
- case BCOM_DEVICEID_BCM57766:
- case BCOM_DEVICEID_BCM57781:
- case BCOM_DEVICEID_BCM57785:
- case BCOM_DEVICEID_BCM57791:
- case BCOM_DEVICEID_BCM57795:
- id = pci_read_config(dev,
- BGE_PCI_GEN15_PRODID_ASICREV, 4);
- break;
- default:
- id = pci_read_config(dev,
- BGE_PCI_PRODID_ASICREV, 4);
- }
- }
+ id = bge_chipid(dev);
br = bge_lookup_rev(id);
- v = bge_lookup_vendor(vid);
if (bge_has_eaddr(sc) &&
pci_get_vpd_ident(dev, &pname) == 0)
- snprintf(model, 64, "%s", pname);
- else
- snprintf(model, 64, "%s %s", v->v_name,
+ snprintf(model, sizeof(model), "%s", pname);
+ else {
+ v = bge_lookup_vendor(vid);
+ snprintf(model, sizeof(model), "%s %s",
+ v != NULL ? v->v_name : "Unknown",
br != NULL ? br->br_name :
- "NetXtreme Ethernet Controller");
- snprintf(buf, 96, "%s, %sASIC rev. %#08x", model,
- br != NULL ? "" : "unknown ", id);
+ "NetXtreme/NetLink Ethernet Controller");
+ }
+ snprintf(buf, sizeof(buf), "%s, %sASIC rev. %#08x",
+ model, br != NULL ? "" : "unknown ", id);
device_set_desc_copy(dev, buf);
- return (0);
+ return (BUS_PROBE_DEFAULT);
}
t++;
}
@@ -3274,38 +3285,7 @@ bge_attach(device_t dev)
/* Save various chip information. */
sc->bge_func_addr = pci_get_function(dev);
- sc->bge_chipid =
- pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >>
- BGE_PCIMISCCTL_ASICREV_SHIFT;
- if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_USE_PRODID_REG) {
- /*
- * Find the ASCI revision. Different chips use different
- * registers.
- */
- switch (pci_get_device(dev)) {
- case BCOM_DEVICEID_BCM5717:
- case BCOM_DEVICEID_BCM5718:
- case BCOM_DEVICEID_BCM5719:
- case BCOM_DEVICEID_BCM5720:
- sc->bge_chipid = pci_read_config(dev,
- BGE_PCI_GEN2_PRODID_ASICREV, 4);
- break;
- case BCOM_DEVICEID_BCM57761:
- case BCOM_DEVICEID_BCM57762:
- case BCOM_DEVICEID_BCM57765:
- case BCOM_DEVICEID_BCM57766:
- case BCOM_DEVICEID_BCM57781:
- case BCOM_DEVICEID_BCM57785:
- case BCOM_DEVICEID_BCM57791:
- case BCOM_DEVICEID_BCM57795:
- sc->bge_chipid = pci_read_config(dev,
- BGE_PCI_GEN15_PRODID_ASICREV, 4);
- break;
- default:
- sc->bge_chipid = pci_read_config(dev,
- BGE_PCI_PRODID_ASICREV, 4);
- }
- }
+ sc->bge_chipid = bge_chipid(dev);
sc->bge_asicrev = BGE_ASICREV(sc->bge_chipid);
sc->bge_chiprev = BGE_CHIPREV(sc->bge_chipid);
@@ -3495,6 +3475,8 @@ bge_attach(device_t dev)
pci_get_device(dev) == BCOM_DEVICEID_BCM5753F ||
pci_get_device(dev) == BCOM_DEVICEID_BCM5787F)) ||
pci_get_device(dev) == BCOM_DEVICEID_BCM57790 ||
+ pci_get_device(dev) == BCOM_DEVICEID_BCM57791 ||
+ pci_get_device(dev) == BCOM_DEVICEID_BCM57795 ||
sc->bge_asicrev == BGE_ASICREV_BCM5906) {
/* These chips are 10/100 only. */
capmask &= ~BMSR_EXTSTAT;
@@ -3506,8 +3488,8 @@ bge_attach(device_t dev)
* TSO. But the firmware is not available to FreeBSD and Linux
* claims that the TSO performed by the firmware is slower than
* hardware based TSO. Moreover the firmware based TSO has one
- * known bug which can't handle TSO if ethernet header + IP/TCP
- * header is greater than 80 bytes. The workaround for the TSO
+ * known bug which can't handle TSO if Ethernet header + IP/TCP
+ * header is greater than 80 bytes. A workaround for the TSO
* bug exist but it seems it's too expensive than not using
* TSO at all. Some hardwares also have the TSO bug so limit
* the TSO to the controllers that are not affected TSO issues
@@ -3536,7 +3518,7 @@ bge_attach(device_t dev)
/*
* Check if this is a PCI-X or PCI Express device.
*/
- if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
+ if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
/*
* Found a PCI Express capabilities register, this
* must be a PCI Express device.
@@ -3558,7 +3540,7 @@ bge_attach(device_t dev)
* Check if the device is in PCI-X Mode.
* (This bit is not valid on PCI Express controllers.)
*/
- if (pci_find_extcap(dev, PCIY_PCIX, &reg) == 0)
+ if (pci_find_cap(dev, PCIY_PCIX, &reg) == 0)
sc->bge_pcixcap = reg;
if ((pci_read_config(dev, BGE_PCI_PCISTATE, 4) &
BGE_PCISTATE_PCI_BUSMODE) == 0)
@@ -3588,7 +3570,7 @@ bge_attach(device_t dev)
* normal operation.
*/
rid = 0;
- if (pci_find_extcap(sc->bge_dev, PCIY_MSI, &reg) == 0) {
+ if (pci_find_cap(sc->bge_dev, PCIY_MSI, &reg) == 0) {
sc->bge_msicap = reg;
if (bge_can_use_msi(sc)) {
msicount = pci_msi_count(dev);
@@ -3826,7 +3808,7 @@ again:
error = mii_attach(dev, &sc->bge_miibus, ifp, bge_ifmedia_upd,
bge_ifmedia_sts, capmask, sc->bge_phy_addr, MII_OFFSET_ANY,
- MIIF_DOPAUSE | MIIF_FORCEPAUSE);
+ MIIF_DOPAUSE);
if (error != 0) {
if (trys++ < 4) {
device_printf(sc->bge_dev, "Try again\n");
@@ -3880,8 +3862,13 @@ again:
error = ENOMEM;
goto fail;
}
- taskqueue_start_threads(&sc->bge_tq, 1, PI_NET, "%s taskq",
- device_get_nameunit(sc->bge_dev));
+ error = taskqueue_start_threads(&sc->bge_tq, 1, PI_NET,
+ "%s taskq", device_get_nameunit(sc->bge_dev));
+ if (error != 0) {
+ device_printf(dev, "could not start threads.\n");
+ ether_ifdetach(ifp);
+ goto fail;
+ }
error = bus_setup_intr(dev, sc->bge_irq,
INTR_TYPE_NET | INTR_MPSAFE, bge_msi_intr, NULL, sc,
&sc->bge_intrhand);
@@ -3926,9 +3913,9 @@ bge_detach(device_t dev)
if (sc->bge_tq)
taskqueue_drain(sc->bge_tq, &sc->bge_intr_task);
- if (sc->bge_flags & BGE_FLAG_TBI) {
+ if (sc->bge_flags & BGE_FLAG_TBI)
ifmedia_removeall(&sc->bge_ifmedia);
- } else {
+ else if (sc->bge_miibus != NULL) {
bus_generic_detach(dev);
device_delete_child(dev, sc->bge_miibus);
}
@@ -4987,7 +4974,7 @@ bge_cksum_pad(struct mbuf *m)
/* Allocate new empty mbuf, pad it. Compact later. */
struct mbuf *n;
- MGET(n, M_DONTWAIT, MT_DATA);
+ MGET(n, M_NOWAIT, MT_DATA);
if (n == NULL)
return (ENOBUFS);
n->m_len = 0;
@@ -5029,7 +5016,7 @@ bge_check_short_dma(struct mbuf *m)
}
if (found > 1) {
- n = m_defrag(m, M_DONTWAIT);
+ n = m_defrag(m, M_NOWAIT);
if (n == NULL)
m_freem(m);
} else
@@ -5049,7 +5036,7 @@ bge_setup_tso(struct bge_softc *sc, struct mbuf *m, uint16_t *mss,
if (M_WRITABLE(m) == 0) {
/* Get a writable copy. */
- n = m_dup(m, M_DONTWAIT);
+ n = m_dup(m, M_NOWAIT);
m_freem(m);
if (n == NULL)
return (NULL);
@@ -5166,9 +5153,9 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head, uint32_t *txidx)
* DMA read operation.
*/
if (sc->bge_forced_collapse == 1)
- m = m_defrag(m, M_DONTWAIT);
+ m = m_defrag(m, M_NOWAIT);
else
- m = m_collapse(m, M_DONTWAIT,
+ m = m_collapse(m, M_NOWAIT,
sc->bge_forced_collapse);
if (m == NULL)
m = *m_head;
@@ -5180,7 +5167,7 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head, uint32_t *txidx)
error = bus_dmamap_load_mbuf_sg(sc->bge_cdata.bge_tx_mtag, map, m, segs,
&nsegs, BUS_DMA_NOWAIT);
if (error == EFBIG) {
- m = m_collapse(m, M_DONTWAIT, BGE_NSEG_NEW);
+ m = m_collapse(m, M_NOWAIT, BGE_NSEG_NEW);
if (m == NULL) {
m_freem(*m_head);
*m_head = NULL;
@@ -5311,7 +5298,7 @@ bge_start_locked(struct ifnet *ifp)
/*
* Set a timeout in case the chip goes out to lunch.
*/
- sc->bge_timer = 5;
+ sc->bge_timer = BGE_TX_TIMEOUT;
}
}
@@ -5468,7 +5455,7 @@ bge_init_locked(struct bge_softc *sc)
* this number of frames, it will drop subsequent incoming
* frames until the MBUF High Watermark is reached.
*/
- if (sc->bge_asicrev == BGE_ASICREV_BCM57765)
+ if (BGE_IS_57765_PLUS(sc))
CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 1);
else
CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2);
@@ -5588,7 +5575,7 @@ bge_ifmedia_upd_locked(struct ifnet *ifp)
sc->bge_link_evt++;
mii = device_get_softc(sc->bge_miibus);
LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
- mii_phy_reset(miisc);
+ PHY_RESET(miisc);
mii_mediachg(mii);
/*
@@ -5623,6 +5610,10 @@ bge_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
BGE_LOCK(sc);
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ BGE_UNLOCK(sc);
+ return;
+ }
if (sc->bge_flags & BGE_FLAG_TBI) {
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = IFM_ETHER;
@@ -5812,12 +5803,40 @@ static void
bge_watchdog(struct bge_softc *sc)
{
struct ifnet *ifp;
+ uint32_t status;
BGE_LOCK_ASSERT(sc);
if (sc->bge_timer == 0 || --sc->bge_timer)
return;
+ /* If pause frames are active then don't reset the hardware. */
+ if ((CSR_READ_4(sc, BGE_RX_MODE) & BGE_RXMODE_FLOWCTL_ENABLE) != 0) {
+ status = CSR_READ_4(sc, BGE_RX_STS);
+ if ((status & BGE_RXSTAT_REMOTE_XOFFED) != 0) {
+ /*
+ * If link partner has us in XOFF state then wait for
+ * the condition to clear.
+ */
+ CSR_WRITE_4(sc, BGE_RX_STS, status);
+ sc->bge_timer = BGE_TX_TIMEOUT;
+ return;
+ } else if ((status & BGE_RXSTAT_RCVD_XOFF) != 0 &&
+ (status & BGE_RXSTAT_RCVD_XON) != 0) {
+ /*
+ * If link partner has us in XOFF state then wait for
+ * the condition to clear.
+ */
+ CSR_WRITE_4(sc, BGE_RX_STS, status);
+ sc->bge_timer = BGE_TX_TIMEOUT;
+ return;
+ }
+ /*
+ * Any other condition is unexpected and the controller
+ * should be reset.
+ */
+ }
+
ifp = sc->bge_ifp;
if_printf(ifp, "watchdog timeout -- resetting\n");
@@ -6325,7 +6344,7 @@ bge_add_sysctl_stats(struct bge_softc *sc, struct sysctl_ctx_list *ctx,
#undef BGE_SYSCTL_STAT
#define BGE_SYSCTL_STAT_ADD64(c, h, n, p, d) \
- SYSCTL_ADD_QUAD(c, h, OID_AUTO, n, CTLFLAG_RD, p, d)
+ SYSCTL_ADD_UQUAD(c, h, OID_AUTO, n, CTLFLAG_RD, p, d)
static void
bge_add_sysctl_stats_regs(struct bge_softc *sc, struct sysctl_ctx_list *ctx,
diff --git a/freebsd/sys/dev/bge/if_bgereg.h b/freebsd/sys/dev/bge/if_bgereg.h
index 93617cd1..09ced543 100644
--- a/freebsd/sys/dev/bge/if_bgereg.h
+++ b/freebsd/sys/dev/bge/if_bgereg.h
@@ -797,7 +797,7 @@
#define BGE_LEDCTL_BLINKPERIOD_OVERRIDE 0x80000000
/* TX backoff seed register */
-#define BGE_TX_BACKOFF_SEED_MASK 0x3F
+#define BGE_TX_BACKOFF_SEED_MASK 0x3FF
/* Autopoll status register */
#define BGE_AUTOPOLLSTS_ERROR 0x00000001
@@ -2928,6 +2928,7 @@ struct bge_dmamap_arg {
#define BGE_HWREV_TIGON_II 0x02
#define BGE_TIMEOUT 100000
#define BGE_TXCONS_UNSET 0xFFFF /* impossible value */
+#define BGE_TX_TIMEOUT 5
struct bge_bcom_hack {
int reg;
diff --git a/freebsd/sys/dev/dc/dcphy.c b/freebsd/sys/dev/dc/dcphy.c
index 9df39476..32d11c61 100644
--- a/freebsd/sys/dev/dc/dcphy.c
+++ b/freebsd/sys/dev/dc/dcphy.c
@@ -64,7 +64,6 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
#include <machine/resource.h>
-#include <sys/bus.h>
#include <dev/pci/pcivar.h>
@@ -116,6 +115,12 @@ static void dcphy_status(struct mii_softc *);
static void dcphy_reset(struct mii_softc *);
static int dcphy_auto(struct mii_softc *);
+static const struct mii_phy_funcs dcphy_funcs = {
+ dcphy_service,
+ dcphy_status,
+ dcphy_reset
+};
+
static int
dcphy_probe(device_t dev)
{
@@ -140,30 +145,16 @@ static int
dcphy_attach(device_t dev)
{
struct mii_softc *sc;
- struct mii_attach_args *ma;
- struct mii_data *mii;
struct dc_softc *dc_sc;
device_t brdev;
sc = device_get_softc(dev);
- ma = device_get_ivars(dev);
- sc->mii_dev = device_get_parent(dev);
- mii = ma->mii_data;
- LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
- sc->mii_flags = miibus_get_flags(dev);
- sc->mii_inst = mii->mii_instance++;
- sc->mii_phy = ma->mii_phyno;
- sc->mii_service = dcphy_service;
- sc->mii_pdata = mii;
+ mii_phy_dev_attach(dev, MIIF_NOISOLATE | MIIF_NOMANPAUSE,
+ &dcphy_funcs, 0);
- /*
- * Apparently, we can neither isolate nor do loopback.
- */
- sc->mii_flags |= MIIF_NOISOLATE | MIIF_NOLOOP | MIIF_NOMANPAUSE;
-
- /*dcphy_reset(sc);*/
- dc_sc = mii->mii_ifp->if_softc;
+ /*PHY_RESET(sc);*/
+ dc_sc = sc->mii_pdata->mii_ifp->if_softc;
CSR_WRITE_4(dc_sc, DC_10BTSTAT, 0);
CSR_WRITE_4(dc_sc, DC_10BTCTRL, 0);
@@ -184,7 +175,7 @@ dcphy_attach(device_t dev)
break;
}
- sc->mii_capabilities &= ma->mii_capmask;
+ sc->mii_capabilities &= sc->mii_capmask;
device_printf(dev, " ");
mii_phy_add_media(sc);
printf("\n");
@@ -221,11 +212,11 @@ dcphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
switch (IFM_SUBTYPE(ife->ifm_media)) {
case IFM_AUTO:
- /*dcphy_reset(sc);*/
- (void) dcphy_auto(sc);
+ /*PHY_RESET(sc);*/
+ (void)dcphy_auto(sc);
break;
case IFM_100_TX:
- dcphy_reset(sc);
+ PHY_RESET(sc);
DC_CLRBIT(dc_sc, DC_10BTCTRL, DC_TCTL_AUTONEGENBL);
mode |= DC_NETCFG_PORTSEL | DC_NETCFG_PCS |
DC_NETCFG_SCRAMBLER;
@@ -294,7 +285,7 @@ dcphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
}
/* Update the media status. */
- dcphy_status(sc);
+ PHY_STATUS(sc);
/* Callback if something changed. */
mii_phy_update(sc, cmd);
diff --git a/freebsd/sys/dev/dc/if_dc.c b/freebsd/sys/dev/dc/if_dc.c
index 575721c6..0c7a46cb 100644
--- a/freebsd/sys/dev/dc/if_dc.c
+++ b/freebsd/sys/dev/dc/if_dc.c
@@ -1369,7 +1369,7 @@ dc_netcfg_wait(struct dc_softc *sc)
break;
DELAY(10);
}
- if (i == DC_TIMEOUT) {
+ if (i == DC_TIMEOUT && bus_child_present(sc->dc_dev)) {
if (!(isr & DC_ISR_TX_IDLE) && !DC_IS_ASIX(sc))
device_printf(sc->dc_dev,
"%s: failed to force tx to idle state\n", __func__);
@@ -2652,7 +2652,7 @@ dc_newbuf(struct dc_softc *sc, int i)
bus_dma_segment_t segs[1];
int error, nseg;
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
return (ENOBUFS);
m->m_len = m->m_pkthdr.len = MCLBYTES;
@@ -3389,7 +3389,7 @@ dc_encap(struct dc_softc *sc, struct mbuf **m_head)
defragged = 0;
if (sc->dc_flags & DC_TX_COALESCE &&
((*m_head)->m_next != NULL || sc->dc_flags & DC_TX_ALIGN)) {
- m = m_defrag(*m_head, M_DONTWAIT);
+ m = m_defrag(*m_head, M_NOWAIT);
defragged = 1;
} else {
/*
@@ -3404,7 +3404,7 @@ dc_encap(struct dc_softc *sc, struct mbuf **m_head)
if (i > DC_TX_LIST_CNT / 4 ||
DC_TX_LIST_CNT - i + sc->dc_cdata.dc_tx_cnt <=
DC_TX_LIST_RSVD) {
- m = m_collapse(*m_head, M_DONTWAIT, DC_MAXFRAGS);
+ m = m_collapse(*m_head, M_NOWAIT, DC_MAXFRAGS);
defragged = 1;
}
}
@@ -3421,7 +3421,7 @@ dc_encap(struct dc_softc *sc, struct mbuf **m_head)
error = bus_dmamap_load_mbuf_sg(sc->dc_tx_mtag,
sc->dc_cdata.dc_tx_map[idx], *m_head, segs, &nseg, 0);
if (error == EFBIG) {
- if (defragged != 0 || (m = m_collapse(*m_head, M_DONTWAIT,
+ if (defragged != 0 || (m = m_collapse(*m_head, M_NOWAIT,
DC_MAXFRAGS)) == NULL) {
m_freem(*m_head);
*m_head = NULL;
diff --git a/freebsd/sys/dev/dc/if_dcreg.h b/freebsd/sys/dev/dc/if_dcreg.h
index 0a61b7d1..e290881f 100644
--- a/freebsd/sys/dev/dc/if_dcreg.h
+++ b/freebsd/sys/dev/dc/if_dcreg.h
@@ -1018,7 +1018,7 @@ struct dc_softc {
#define DC_VENDORID_MICROSOFT 0x1414
/*
- * Supported Microsoft PCI and cardbus NICs. These are really
+ * Supported Microsoft PCI and CardBus NICs. These are really
* ADMtek parts in disguise.
*/
diff --git a/freebsd/sys/dev/dc/pnphy.c b/freebsd/sys/dev/dc/pnphy.c
index c303f9bf..a9a8ec93 100644
--- a/freebsd/sys/dev/dc/pnphy.c
+++ b/freebsd/sys/dev/dc/pnphy.c
@@ -62,7 +62,6 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
#include <machine/resource.h>
-#include <sys/bus.h>
#include <dev/dc/if_dcreg.h>
@@ -92,6 +91,13 @@ DRIVER_MODULE(pnphy, miibus, pnphy_driver, pnphy_devclass, 0, 0);
static int pnphy_service(struct mii_softc *, struct mii_data *, int);
static void pnphy_status(struct mii_softc *);
+static void pnphy_reset(struct mii_softc *);
+
+static const struct mii_phy_funcs pnphy_funcs = {
+ pnphy_service,
+ pnphy_status,
+ pnphy_reset
+};
static int
pnphy_probe(device_t dev)
@@ -117,29 +123,15 @@ static int
pnphy_attach(device_t dev)
{
struct mii_softc *sc;
- struct mii_attach_args *ma;
- struct mii_data *mii;
sc = device_get_softc(dev);
- ma = device_get_ivars(dev);
- sc->mii_dev = device_get_parent(dev);
- mii = ma->mii_data;
- LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
- sc->mii_flags = miibus_get_flags(dev);
- sc->mii_inst = mii->mii_instance++;
- sc->mii_phy = ma->mii_phyno;
- sc->mii_service = pnphy_service;
- sc->mii_pdata = mii;
-
- /*
- * Apparently, we can neither isolate nor do loopback.
- */
- sc->mii_flags |= MIIF_NOISOLATE | MIIF_NOLOOP | MIIF_NOMANPAUSE;
+ mii_phy_dev_attach(dev, MIIF_NOISOLATE | MIIF_NOMANPAUSE,
+ &pnphy_funcs, 0);
sc->mii_capabilities =
BMSR_100TXFDX | BMSR_100TXHDX | BMSR_10TFDX | BMSR_10THDX;
- sc->mii_capabilities &= ma->mii_capmask;
+ sc->mii_capabilities &= sc->mii_capmask;
device_printf(dev, " ");
mii_phy_add_media(sc);
printf("\n");
@@ -196,7 +188,7 @@ pnphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
}
/* Update the media status. */
- pnphy_status(sc);
+ PHY_STATUS(sc);
/* Callback if something changed. */
mii_phy_update(sc, cmd);
@@ -228,3 +220,9 @@ pnphy_status(struct mii_softc *sc)
else
mii->mii_media_active |= IFM_HDX;
}
+
+static void
+pnphy_reset(struct mii_softc *sc __unused)
+{
+
+}
diff --git a/freebsd/sys/dev/e1000/e1000_api.c b/freebsd/sys/dev/e1000/e1000_api.c
index 40a1dd3f..3c0eb4f4 100644
--- a/freebsd/sys/dev/e1000/e1000_api.c
+++ b/freebsd/sys/dev/e1000/e1000_api.c
@@ -331,6 +331,9 @@ s32 e1000_set_mac_type(struct e1000_hw *hw)
case E1000_DEV_ID_I350_DA4:
mac->type = e1000_i350;
break;
+#if defined(QV_RELEASE) && defined(SPRINGVILLE_FLASHLESS_HW)
+ case E1000_DEV_ID_I210_NVMLESS:
+#endif /* QV_RELEASE && SPRINGVILLE_FLASHLESS_HW */
case E1000_DEV_ID_I210_COPPER:
case E1000_DEV_ID_I210_COPPER_OEM1:
case E1000_DEV_ID_I210_COPPER_IT:
diff --git a/freebsd/sys/dev/e1000/if_em.c b/freebsd/sys/dev/e1000/if_em.c
index 948d5f2b..4c1d3469 100644
--- a/freebsd/sys/dev/e1000/if_em.c
+++ b/freebsd/sys/dev/e1000/if_em.c
@@ -34,10 +34,11 @@
******************************************************************************/
/*$FreeBSD$*/
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include <rtems/bsd/local/opt_device_polling.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef HAVE_KERNEL_OPTION_HEADERS
+#include <rtems/bsd/local/opt_device_polling.h>
#endif
#include <rtems/bsd/sys/param.h>
@@ -96,7 +97,7 @@ int em_display_debug_stats = 0;
/*********************************************************************
* Driver version:
*********************************************************************/
-char em_driver_version[] = "7.3.7";
+char em_driver_version[] = "7.3.8";
/*********************************************************************
* PCI Device ID Table
@@ -337,6 +338,9 @@ MODULE_DEPEND(em, ether, 1, 1, 1);
#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
#define M_TSO_LEN 66
+#define MAX_INTS_PER_SEC 8000
+#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
+
/* Allow common code without TSO */
#ifndef CSUM_TSO
#define CSUM_TSO 0
@@ -572,6 +576,11 @@ em_attach(device_t dev)
&adapter->tx_abs_int_delay,
E1000_REGISTER(hw, E1000_TADV),
em_tx_abs_int_delay_dflt);
+ em_add_int_delay_sysctl(adapter, "itr",
+ "interrupt delay limit in usecs/4",
+ &adapter->tx_itr,
+ E1000_REGISTER(hw, E1000_ITR),
+ DEFAULT_ITR);
/* Sysctl for limiting the amount of work done in the taskqueue */
em_set_sysctl_value(adapter, "rx_processing_limit",
@@ -2135,12 +2144,37 @@ em_set_promisc(struct adapter *adapter)
static void
em_disable_promisc(struct adapter *adapter)
{
- u32 reg_rctl;
+ struct ifnet *ifp = adapter->ifp;
+ u32 reg_rctl;
+ int mcnt = 0;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
-
reg_rctl &= (~E1000_RCTL_UPE);
- reg_rctl &= (~E1000_RCTL_MPE);
+ if (ifp->if_flags & IFF_ALLMULTI)
+ mcnt = MAX_NUM_MULTICAST_ADDRESSES;
+ else {
+ struct ifmultiaddr *ifma;
+#if __FreeBSD_version < 800000
+ IF_ADDR_LOCK(ifp);
+#else
+ if_maddr_rlock(ifp);
+#endif
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
+ break;
+ mcnt++;
+ }
+#if __FreeBSD_version < 800000
+ IF_ADDR_UNLOCK(ifp);
+#else
+ if_maddr_runlock(ifp);
+#endif
+ }
+ /* Don't disable if in MAX groups */
+ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
+ reg_rctl &= (~E1000_RCTL_MPE);
reg_rctl &= (~E1000_RCTL_SBP);
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
}
@@ -2245,7 +2279,7 @@ em_local_timer(void *arg)
/* Mask to use in the irq trigger */
if (adapter->msix_mem)
- trigger = rxr->ims; /* RX for 82574 */
+ trigger = rxr->ims;
else
trigger = E1000_ICS_RXDMT0;
@@ -2410,16 +2444,8 @@ em_identify_hardware(struct adapter *adapter)
device_t dev = adapter->dev;
/* Make sure our PCI config space has the necessary stuff set */
+ pci_enable_busmaster(dev);
adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
- if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
- (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
- device_printf(dev, "Memory Access and/or Bus Master bits "
- "were not set!\n");
- adapter->hw.bus.pci_cmd_word |=
- (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
- pci_write_config(dev, PCIR_COMMAND,
- adapter->hw.bus.pci_cmd_word, 2);
- }
/* Save off the information about this board */
adapter->hw.vendor_id = pci_get_vendor(dev);
@@ -2718,7 +2744,7 @@ static int
em_setup_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
- int val = 0;
+ int val;
/*
** Setup MSI/X for Hartwell: tests have shown
@@ -2732,37 +2758,43 @@ em_setup_msix(struct adapter *adapter)
int rid = PCIR_BAR(EM_MSIX_BAR);
adapter->msix_mem = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
- if (!adapter->msix_mem) {
+ if (adapter->msix_mem == NULL) {
/* May not be enabled */
device_printf(adapter->dev,
"Unable to map MSIX table \n");
goto msi;
}
val = pci_msix_count(dev);
- /* We only need 3 vectors */
- if (val > 3)
+ /* We only need/want 3 vectors */
+ if (val >= 3)
val = 3;
- if ((val != 3) && (val != 5)) {
- bus_release_resource(dev, SYS_RES_MEMORY,
- PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
- adapter->msix_mem = NULL;
+ else {
device_printf(adapter->dev,
- "MSIX: incorrect vectors, using MSI\n");
+ "MSIX: insufficient vectors, using MSI\n");
goto msi;
}
- if (pci_alloc_msix(dev, &val) == 0) {
+ if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
device_printf(adapter->dev,
"Using MSIX interrupts "
"with %d vectors\n", val);
+ return (val);
}
- return (val);
+ /*
+ ** If MSIX alloc failed or provided us with
+ ** less than needed, free and fall through to MSI
+ */
+ pci_release_msi(dev);
}
msi:
- val = pci_msi_count(dev);
- if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
- adapter->msix = 1;
+ if (adapter->msix_mem != NULL) {
+ bus_release_resource(dev, SYS_RES_MEMORY,
+ PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
+ adapter->msix_mem = NULL;
+ }
+ val = 1;
+ if (pci_alloc_msi(dev, &val) == 0) {
device_printf(adapter->dev,"Using an MSI interrupt\n");
return (val);
}
@@ -3805,17 +3837,9 @@ em_txeof(struct tx_ring *txr)
EM_TX_LOCK_ASSERT(txr);
#ifdef DEV_NETMAP
- if (ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_adapter *na = NA(ifp);
-
- selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
- EM_TX_UNLOCK(txr);
- EM_CORE_LOCK(adapter);
- selwakeuppri(&na->tx_si, PI_NET);
- EM_CORE_UNLOCK(adapter);
- EM_TX_LOCK(txr);
+ if (netmap_tx_irq(ifp, txr->me |
+ (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
return;
- }
#endif /* DEV_NETMAP */
/* No work, make sure watchdog is off */
@@ -4256,8 +4280,6 @@ em_free_receive_buffers(struct rx_ring *rxr)
* Enable receive unit.
*
**********************************************************************/
-#define MAX_INTS_PER_SEC 8000
-#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256)
static void
em_initialize_receive_unit(struct adapter *adapter)
@@ -4299,11 +4321,12 @@ em_initialize_receive_unit(struct adapter *adapter)
E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
}
- if (ifp->if_capenable & IFCAP_RXCSUM) {
- rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
- rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
- E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
- }
+ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
+ if (ifp->if_capenable & IFCAP_RXCSUM)
+ rxcsum |= E1000_RXCSUM_TUOFL;
+ else
+ rxcsum &= ~E1000_RXCSUM_TUOFL;
+ E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
/*
** XXX TEMPORARY WORKAROUND: on some systems with 82573
@@ -4317,6 +4340,8 @@ em_initialize_receive_unit(struct adapter *adapter)
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
/* Setup the Base and Length of the Rx Descriptor Ring */
+ u32 rdt = adapter->num_rx_desc - 1; /* default */
+
bus_addr = rxr->rxdma.dma_paddr;
E1000_WRITE_REG(hw, E1000_RDLEN(i),
adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
@@ -4328,18 +4353,11 @@ em_initialize_receive_unit(struct adapter *adapter)
/*
* an init() while a netmap client is active must
* preserve the rx buffers passed to userspace.
- * In this driver it means we adjust RDT to
- * something different from na->num_rx_desc - 1.
*/
- if (ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_adapter *na = NA(adapter->ifp);
- struct netmap_kring *kring = &na->rx_rings[i];
- int t = na->num_rx_desc - 1 - kring->nr_hwavail;
-
- E1000_WRITE_REG(hw, E1000_RDT(i), t);
- } else
+ if (ifp->if_capenable & IFCAP_NETMAP)
+ rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
#endif /* DEV_NETMAP */
- E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
+ E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
}
/* Set PTHRESH for improved jumbo performance */
@@ -4416,17 +4434,8 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
EM_RX_LOCK(rxr);
#ifdef DEV_NETMAP
- if (ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_adapter *na = NA(ifp);
-
- na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
- selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
- EM_RX_UNLOCK(rxr);
- EM_CORE_LOCK(adapter);
- selwakeuppri(&na->rx_si, PI_NET);
- EM_CORE_UNLOCK(adapter);
- return (0);
- }
+ if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
+ return (FALSE);
#endif /* DEV_NETMAP */
for (i = rxr->next_to_check, processed = 0; count != 0;) {
@@ -4621,31 +4630,23 @@ em_fixup_rx(struct rx_ring *rxr)
static void
em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
{
+ mp->m_pkthdr.csum_flags = 0;
+
/* Ignore Checksum bit is set */
- if (rx_desc->status & E1000_RXD_STAT_IXSM) {
- mp->m_pkthdr.csum_flags = 0;
+ if (rx_desc->status & E1000_RXD_STAT_IXSM)
return;
- }
- if (rx_desc->status & E1000_RXD_STAT_IPCS) {
- /* Did it pass? */
- if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
- /* IP Checksum Good */
- mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
- mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
+ if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
+ return;
- } else {
- mp->m_pkthdr.csum_flags = 0;
- }
- }
+ /* IP Checksum Good? */
+ if (rx_desc->status & E1000_RXD_STAT_IPCS)
+ mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
- if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
- /* Did it pass? */
- if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
- mp->m_pkthdr.csum_flags |=
- (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
- mp->m_pkthdr.csum_data = htons(0xffff);
- }
+ /* TCP or UDP checksum */
+ if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
+ mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ mp->m_pkthdr.csum_data = htons(0xffff);
}
}
@@ -5624,6 +5625,8 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
return (EINVAL);
info->value = usecs;
ticks = EM_USECS_TO_TICKS(usecs);
+ if (info->offset == E1000_ITR) /* units are 256ns here */
+ ticks *= 4;
adapter = info->adapter;
diff --git a/freebsd/sys/dev/e1000/if_em.h b/freebsd/sys/dev/e1000/if_em.h
index f1afbadb..09d81d25 100644
--- a/freebsd/sys/dev/e1000/if_em.h
+++ b/freebsd/sys/dev/e1000/if_em.h
@@ -429,6 +429,7 @@ struct adapter {
struct em_int_delay_info tx_abs_int_delay;
struct em_int_delay_info rx_int_delay;
struct em_int_delay_info rx_abs_int_delay;
+ struct em_int_delay_info tx_itr;
/* Misc stats maintained by the driver */
unsigned long dropped_pkts;
diff --git a/freebsd/sys/dev/e1000/if_igb.c b/freebsd/sys/dev/e1000/if_igb.c
index a3c7fede..e26a867c 100644
--- a/freebsd/sys/dev/e1000/if_igb.c
+++ b/freebsd/sys/dev/e1000/if_igb.c
@@ -35,16 +35,17 @@
/*$FreeBSD$*/
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include <rtems/bsd/local/opt_device_polling.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef HAVE_KERNEL_OPTION_HEADERS
+#include <rtems/bsd/local/opt_device_polling.h>
#include <rtems/bsd/local/opt_altq.h>
#endif
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
#include <sys/buf_ring.h>
#endif
#include <sys/bus.h>
@@ -102,7 +103,7 @@ int igb_display_debug_stats = 0;
/*********************************************************************
* Driver version:
*********************************************************************/
-char igb_driver_version[] = "version - 2.3.9 - 8";
+char igb_driver_version[] = "version - 2.3.10";
/*********************************************************************
@@ -181,7 +182,7 @@ static int igb_detach(device_t);
static int igb_shutdown(device_t);
static int igb_suspend(device_t);
static int igb_resume(device_t);
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
static int igb_mq_start(struct ifnet *, struct mbuf *);
static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
static void igb_qflush(struct ifnet *);
@@ -376,8 +377,9 @@ SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
"Enable receive mbuf header split");
/*
-** This will autoconfigure based on
-** the number of CPUs if left at 0.
+** This will autoconfigure based on the
+** number of CPUs and max supported
+** MSIX messages if left at 0.
*/
static int igb_num_queues = 0;
TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
@@ -852,7 +854,7 @@ igb_resume(device_t dev)
(ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IGB_TX_LOCK(txr);
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
/* Process the stack queue only if not depleted */
if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
!drbr_empty(ifp, txr->br))
@@ -870,7 +872,7 @@ igb_resume(device_t dev)
}
-#if __FreeBSD_version < 800000
+#ifdef IGB_LEGACY_TX
/*********************************************************************
* Transmit entry point
@@ -948,7 +950,7 @@ igb_start(struct ifnet *ifp)
return;
}
-#else /* __FreeBSD_version >= 800000 */
+#else /* ~IGB_LEGACY_TX */
/*
** Multiqueue Transmit Entry:
@@ -972,7 +974,13 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m)
que = &adapter->queues[i];
err = drbr_enqueue(ifp, txr->br, m);
- taskqueue_enqueue(que->tq, &txr->txq_task);
+ if (err)
+ return (err);
+ if (IGB_TX_TRYLOCK(txr)) {
+ err = igb_mq_start_locked(ifp, txr);
+ IGB_TX_UNLOCK(txr);
+ } else
+ taskqueue_enqueue(que->tq, &txr->txq_task);
return (err);
}
@@ -981,8 +989,8 @@ static int
igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
- struct mbuf *buf;
- int err = 0, enq = 0;
+ struct mbuf *next;
+ int err = 0, enq;
IGB_TX_LOCK_ASSERT(txr);
@@ -990,22 +998,32 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
adapter->link_active == 0)
return (ENETDOWN);
+ enq = 0;
+
/* Process the queue */
- buf = drbr_dequeue(ifp, txr->br);
- while (buf != NULL) {
- if ((err = igb_xmit(txr, &buf)) != 0) {
- if (buf != NULL)
- err = drbr_enqueue(ifp, txr->br, buf);
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
+ if ((err = igb_xmit(txr, &next)) != 0) {
+ if (next == NULL) {
+ /* It was freed, move forward */
+ drbr_advance(ifp, txr->br);
+ } else {
+ /*
+ * Still have one left, it may not be
+ * the same since the transmit function
+ * may have changed it.
+ */
+ drbr_putback(ifp, txr->br, next);
+ }
break;
}
+ drbr_advance(ifp, txr->br);
enq++;
- ifp->if_obytes += buf->m_pkthdr.len;
- if (buf->m_flags & M_MCAST)
+ ifp->if_obytes += next->m_pkthdr.len;
+ if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
- ETHER_BPF_MTAP(ifp, buf);
+ ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- buf = drbr_dequeue(ifp, txr->br);
}
if (enq > 0) {
/* Set the watchdog */
@@ -1053,7 +1071,7 @@ igb_qflush(struct ifnet *ifp)
}
if_qflush(ifp);
}
-#endif /* __FreeBSD_version >= 800000 */
+#endif /* ~IGB_LEGACY_TX */
/*********************************************************************
* Ioctl entry point
@@ -1379,7 +1397,7 @@ igb_handle_que(void *context, int pending)
IGB_TX_LOCK(txr);
igb_txeof(txr);
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
/* Process the stack queue only if not depleted */
if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
!drbr_empty(ifp, txr->br))
@@ -1430,7 +1448,7 @@ igb_handle_link_locked(struct adapter *adapter)
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IGB_TX_LOCK(txr);
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
/* Process the stack queue only if not depleted */
if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
!drbr_empty(ifp, txr->br))
@@ -1532,7 +1550,7 @@ igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
do {
more = igb_txeof(txr);
} while (loop-- && more);
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
if (!drbr_empty(ifp, txr->br))
igb_mq_start_locked(ifp, txr);
#else
@@ -1562,12 +1580,16 @@ igb_msix_que(void *arg)
u32 newitr = 0;
bool more_rx;
+ /* Ignore spurious interrupts */
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return;
+
E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
++que->irqs;
IGB_TX_LOCK(txr);
igb_txeof(txr);
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
/* Process the stack queue only if not depleted */
if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
!drbr_empty(ifp, txr->br))
@@ -2087,7 +2109,9 @@ static void
igb_disable_promisc(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
+ struct ifnet *ifp = adapter->ifp;
u32 reg;
+ int mcnt = 0;
if (adapter->vf_ifp) {
e1000_promisc_set_vf(hw, e1000_promisc_disabled);
@@ -2095,7 +2119,31 @@ igb_disable_promisc(struct adapter *adapter)
}
reg = E1000_READ_REG(hw, E1000_RCTL);
reg &= (~E1000_RCTL_UPE);
- reg &= (~E1000_RCTL_MPE);
+ if (ifp->if_flags & IFF_ALLMULTI)
+ mcnt = MAX_NUM_MULTICAST_ADDRESSES;
+ else {
+ struct ifmultiaddr *ifma;
+#if __FreeBSD_version < 800000
+ IF_ADDR_LOCK(ifp);
+#else
+ if_maddr_rlock(ifp);
+#endif
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
+ break;
+ mcnt++;
+ }
+#if __FreeBSD_version < 800000
+ IF_ADDR_UNLOCK(ifp);
+#else
+ if_maddr_runlock(ifp);
+#endif
+ }
+ /* Don't disable if in MAX groups */
+ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
+ reg &= (~E1000_RCTL_MPE);
E1000_WRITE_REG(hw, E1000_RCTL, reg);
}
@@ -2370,16 +2418,8 @@ igb_identify_hardware(struct adapter *adapter)
device_t dev = adapter->dev;
/* Make sure our PCI config space has the necessary stuff set */
+ pci_enable_busmaster(dev);
adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
- if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
- (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
- INIT_DEBUGOUT("Memory Access and/or Bus Master "
- "bits were not set!\n");
- adapter->hw.bus.pci_cmd_word |=
- (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
- pci_write_config(dev, PCIR_COMMAND,
- adapter->hw.bus.pci_cmd_word, 2);
- }
/* Save off the information about this board */
adapter->hw.vendor_id = pci_get_vendor(dev);
@@ -2439,7 +2479,6 @@ igb_allocate_legacy(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct igb_queue *que = adapter->queues;
- struct tx_ring *txr = adapter->tx_rings;
int error, rid = 0;
/* Turn off all interrupts */
@@ -2458,8 +2497,8 @@ igb_allocate_legacy(struct adapter *adapter)
return (ENXIO);
}
-#if __FreeBSD_version >= 800000
- TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
+#ifndef IGB_LEGACY_TX
+ TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr);
#endif
/*
@@ -2542,7 +2581,7 @@ igb_allocate_msix(struct adapter *adapter)
i,igb_last_bind_cpu);
igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
}
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
que->txr);
#endif
@@ -2768,7 +2807,7 @@ igb_free_pci_resources(struct adapter *adapter)
for (int i = 0; i < adapter->num_queues; i++, que++) {
if (que->tq != NULL) {
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
taskqueue_drain(que->tq, &que->txr->txq_task);
#endif
taskqueue_drain(que->tq, &que->que_task);
@@ -2803,24 +2842,19 @@ igb_setup_msix(struct adapter *adapter)
goto msi;
/* First try MSI/X */
+ msgs = pci_msix_count(dev);
+ if (msgs == 0)
+ goto msi;
rid = PCIR_BAR(IGB_MSIX_BAR);
adapter->msix_mem = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
- if (!adapter->msix_mem) {
+ if (adapter->msix_mem == NULL) {
/* May not be enabled */
device_printf(adapter->dev,
"Unable to map MSIX table \n");
goto msi;
}
- msgs = pci_msix_count(dev);
- if (msgs == 0) { /* system has msix disabled */
- bus_release_resource(dev, SYS_RES_MEMORY,
- PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
- adapter->msix_mem = NULL;
- goto msi;
- }
-
/* Figure out a reasonable auto config value */
queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
@@ -2863,20 +2897,32 @@ igb_setup_msix(struct adapter *adapter)
"MSIX Configuration Problem, "
"%d vectors configured, but %d queues wanted!\n",
msgs, want);
- return (0);
+ goto msi;
}
- if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
+ if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
device_printf(adapter->dev,
"Using MSIX interrupts with %d vectors\n", msgs);
adapter->num_queues = queues;
return (msgs);
}
+ /*
+ ** If MSIX alloc failed or provided us with
+ ** less than needed, free and fall through to MSI
+ */
+ pci_release_msi(dev);
+
msi:
- msgs = pci_msi_count(dev);
- if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
- device_printf(adapter->dev," Using MSI interrupt\n");
+ if (adapter->msix_mem != NULL) {
+ bus_release_resource(dev, SYS_RES_MEMORY,
+ PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
+ adapter->msix_mem = NULL;
+ }
+ msgs = 1;
+ if (pci_alloc_msi(dev, &msgs) == 0) {
+ device_printf(adapter->dev," Using an MSI interrupt\n");
return (msgs);
}
+ device_printf(adapter->dev," Using a Legacy interrupt\n");
return (0);
}
@@ -3078,7 +3124,7 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = igb_ioctl;
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
ifp->if_transmit = igb_mq_start;
ifp->if_qflush = igb_qflush;
#else
@@ -3322,7 +3368,7 @@ igb_allocate_queues(struct adapter *adapter)
error = ENOMEM;
goto err_tx_desc;
}
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
/* Allocate a buf ring */
txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
M_WAITOK, &txr->tx_mtx);
@@ -3383,7 +3429,7 @@ err_tx_desc:
igb_dma_free(adapter, &txr->txdma);
free(adapter->rx_rings, M_DEVBUF);
rx_fail:
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
buf_ring_free(txr->br, M_DEVBUF);
#endif
free(adapter->tx_rings, M_DEVBUF);
@@ -3641,7 +3687,7 @@ igb_free_transmit_buffers(struct tx_ring *txr)
tx_buffer->map = NULL;
}
}
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
if (txr->br != NULL)
buf_ring_free(txr->br, M_DEVBUF);
#endif
@@ -3863,17 +3909,9 @@ igb_txeof(struct tx_ring *txr)
IGB_TX_LOCK_ASSERT(txr);
#ifdef DEV_NETMAP
- if (ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_adapter *na = NA(ifp);
-
- selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
- IGB_TX_UNLOCK(txr);
- IGB_CORE_LOCK(adapter);
- selwakeuppri(&na->tx_si, PI_NET);
- IGB_CORE_UNLOCK(adapter);
- IGB_TX_LOCK(txr);
- return FALSE;
- }
+ if (netmap_tx_irq(ifp, txr->me |
+ (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
+ return (FALSE);
#endif /* DEV_NETMAP */
if (txr->tx_avail == adapter->num_tx_desc) {
txr->queue_status = IGB_QUEUE_IDLE;
@@ -4727,17 +4765,8 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
#ifdef DEV_NETMAP
- if (ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_adapter *na = NA(ifp);
-
- na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
- selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
- IGB_RX_UNLOCK(rxr);
- IGB_CORE_LOCK(adapter);
- selwakeuppri(&na->rx_si, PI_NET);
- IGB_CORE_UNLOCK(adapter);
- return (0);
- }
+ if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
+ return (FALSE);
#endif /* DEV_NETMAP */
/* Main clean loop */
@@ -4862,7 +4891,7 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
rxr->fmp->m_pkthdr.ether_vtag = vtag;
rxr->fmp->m_flags |= M_VLANTAG;
}
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
rxr->fmp->m_pkthdr.flowid = que->msix;
rxr->fmp->m_flags |= M_FLOWID;
#endif
diff --git a/freebsd/sys/dev/e1000/if_igb.h b/freebsd/sys/dev/e1000/if_igb.h
index f286e67a..6f3a3a54 100644
--- a/freebsd/sys/dev/e1000/if_igb.h
+++ b/freebsd/sys/dev/e1000/if_igb.h
@@ -297,7 +297,7 @@ struct tx_ring {
u32 next_to_clean;
volatile u16 tx_avail;
struct igb_tx_buffer *tx_buffers;
-#if __FreeBSD_version >= 800000
+#ifndef IGB_LEGACY_TX
struct buf_ring *br;
struct task txq_task;
#endif
diff --git a/freebsd/sys/dev/e1000/if_lem.c b/freebsd/sys/dev/e1000/if_lem.c
index aa64f655..5127445c 100644
--- a/freebsd/sys/dev/e1000/if_lem.c
+++ b/freebsd/sys/dev/e1000/if_lem.c
@@ -34,10 +34,11 @@
******************************************************************************/
/*$FreeBSD$*/
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include <rtems/bsd/local/opt_device_polling.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef HAVE_KERNEL_OPTION_HEADERS
+#include <rtems/bsd/local/opt_device_polling.h>
#endif
#include <rtems/bsd/sys/param.h>
@@ -87,7 +88,7 @@
/*********************************************************************
* Legacy Em Driver version:
*********************************************************************/
-char lem_driver_version[] = "1.0.5";
+char lem_driver_version[] = "1.0.6";
/*********************************************************************
* PCI Device ID Table
@@ -283,6 +284,9 @@ MODULE_DEPEND(lem, ether, 1, 1, 1);
#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
+#define MAX_INTS_PER_SEC 8000
+#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
+
static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
@@ -444,6 +448,11 @@ lem_attach(device_t dev)
&adapter->tx_abs_int_delay,
E1000_REGISTER(&adapter->hw, E1000_TADV),
lem_tx_abs_int_delay_dflt);
+ lem_add_int_delay_sysctl(adapter, "itr",
+ "interrupt delay limit in usecs/4",
+ &adapter->tx_itr,
+ E1000_REGISTER(&adapter->hw, E1000_ITR),
+ DEFAULT_ITR);
}
/* Sysctls for limiting the amount of work done in the taskqueue */
@@ -1339,12 +1348,16 @@ lem_handle_rxtx(void *context, int pending)
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- lem_rxeof(adapter, adapter->rx_process_limit, NULL);
+ bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL);
EM_TX_LOCK(adapter);
lem_txeof(adapter);
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
lem_start_locked(ifp);
EM_TX_UNLOCK(adapter);
+ if (more) {
+ taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
+ return;
+ }
}
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
@@ -1858,12 +1871,37 @@ lem_set_promisc(struct adapter *adapter)
static void
lem_disable_promisc(struct adapter *adapter)
{
- u32 reg_rctl;
+ struct ifnet *ifp = adapter->ifp;
+ u32 reg_rctl;
+ int mcnt = 0;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
-
reg_rctl &= (~E1000_RCTL_UPE);
- reg_rctl &= (~E1000_RCTL_MPE);
+ if (ifp->if_flags & IFF_ALLMULTI)
+ mcnt = MAX_NUM_MULTICAST_ADDRESSES;
+ else {
+ struct ifmultiaddr *ifma;
+#if __FreeBSD_version < 800000
+ IF_ADDR_LOCK(ifp);
+#else
+ if_maddr_rlock(ifp);
+#endif
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
+ break;
+ mcnt++;
+ }
+#if __FreeBSD_version < 800000
+ IF_ADDR_UNLOCK(ifp);
+#else
+ if_maddr_runlock(ifp);
+#endif
+ }
+ /* Don't disable if in MAX groups */
+ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
+ reg_rctl &= (~E1000_RCTL_MPE);
reg_rctl &= (~E1000_RCTL_SBP);
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
}
@@ -2083,16 +2121,8 @@ lem_identify_hardware(struct adapter *adapter)
device_t dev = adapter->dev;
/* Make sure our PCI config space has the necessary stuff set */
+ pci_enable_busmaster(dev);
adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
- if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
- (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
- device_printf(dev, "Memory Access and/or Bus Master bits "
- "were not set!\n");
- adapter->hw.bus.pci_cmd_word |=
- (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
- pci_write_config(dev, PCIR_COMMAND,
- adapter->hw.bus.pci_cmd_word, 2);
- }
/* Save off the information about this board */
adapter->hw.vendor_id = pci_get_vendor(dev);
@@ -2957,10 +2987,8 @@ lem_txeof(struct adapter *adapter)
EM_TX_LOCK_ASSERT(adapter);
#ifdef DEV_NETMAP
- if (ifp->if_capenable & IFCAP_NETMAP) {
- selwakeuppri(&NA(ifp)->tx_rings[0].si, PI_NET);
+ if (netmap_tx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
return;
- }
#endif /* DEV_NETMAP */
if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
return;
@@ -3248,8 +3276,6 @@ lem_setup_receive_structures(struct adapter *adapter)
* Enable receive unit.
*
**********************************************************************/
-#define MAX_INTS_PER_SEC 8000
-#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256)
static void
lem_initialize_receive_unit(struct adapter *adapter)
@@ -3340,19 +3366,13 @@ lem_initialize_receive_unit(struct adapter *adapter)
* Tail Descriptor Pointers
*/
E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0);
+ rctl = adapter->num_rx_desc - 1; /* default RDT value */
#ifdef DEV_NETMAP
/* preserve buffers already made available to clients */
- if (ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_adapter *na = NA(adapter->ifp);
- struct netmap_kring *kring = &na->rx_rings[0];
- int t = na->num_rx_desc - 1 - kring->nr_hwavail;
-
- if (t >= na->num_rx_desc)
- t -= na->num_rx_desc;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), t);
- } else
+ if (ifp->if_capenable & IFCAP_NETMAP)
+ rctl -= NA(adapter->ifp)->rx_rings[0].nr_hwavail;
#endif /* DEV_NETMAP */
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), adapter->num_rx_desc - 1);
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl);
return;
}
@@ -3436,13 +3456,8 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
BUS_DMASYNC_POSTREAD);
#ifdef DEV_NETMAP
- if (ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_adapter *na = NA(ifp);
- na->rx_rings[0].nr_kflags |= NKR_PENDINTR;
- selwakeuppri(&na->rx_rings[0].si, PI_NET);
- EM_RX_UNLOCK(adapter);
- return (0);
- }
+ if (netmap_rx_irq(ifp, 0 | NETMAP_LOCKED_ENTER, &rx_sent))
+ return (FALSE);
#endif /* DEV_NETMAP */
if (!((current_desc->status) & E1000_RXD_STAT_DD)) {
@@ -4586,6 +4601,8 @@ lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
return (EINVAL);
info->value = usecs;
ticks = EM_USECS_TO_TICKS(usecs);
+ if (info->offset == E1000_ITR) /* units are 256ns here */
+ ticks *= 4;
adapter = info->adapter;
diff --git a/freebsd/sys/dev/e1000/if_lem.h b/freebsd/sys/dev/e1000/if_lem.h
index c1973e55..235277d7 100644
--- a/freebsd/sys/dev/e1000/if_lem.h
+++ b/freebsd/sys/dev/e1000/if_lem.h
@@ -363,6 +363,7 @@ struct adapter {
struct em_int_delay_info tx_abs_int_delay;
struct em_int_delay_info rx_int_delay;
struct em_int_delay_info rx_abs_int_delay;
+ struct em_int_delay_info tx_itr;
/*
* Transmit definitions
diff --git a/freebsd/sys/dev/fxp/if_fxp.c b/freebsd/sys/dev/fxp/if_fxp.c
index c67d4827..806163a2 100644
--- a/freebsd/sys/dev/fxp/if_fxp.c
+++ b/freebsd/sys/dev/fxp/if_fxp.c
@@ -292,7 +292,7 @@ static device_method_t fxp_methods[] = {
DEVMETHOD(miibus_writereg, fxp_miibus_writereg),
DEVMETHOD(miibus_statchg, fxp_miibus_statchg),
- { 0, 0 }
+ DEVMETHOD_END
};
static driver_t fxp_driver = {
@@ -303,8 +303,9 @@ static driver_t fxp_driver = {
static devclass_t fxp_devclass;
-DRIVER_MODULE(fxp, pci, fxp_driver, fxp_devclass, 0, 0);
-DRIVER_MODULE(miibus, fxp, miibus_driver, miibus_devclass, 0, 0);
+DRIVER_MODULE_ORDERED(fxp, pci, fxp_driver, fxp_devclass, NULL, NULL,
+ SI_ORDER_ANY);
+DRIVER_MODULE(miibus, fxp, miibus_driver, miibus_devclass, NULL, NULL);
static struct resource_spec fxp_res_spec_mem[] = {
{ SYS_RES_MEMORY, FXP_PCI_MMBA, RF_ACTIVE },
@@ -453,7 +454,6 @@ fxp_attach(device_t dev)
* Enable bus mastering.
*/
pci_enable_busmaster(dev);
- val = pci_read_config(dev, PCIR_COMMAND, 2);
/*
* Figure out which we should try first - memory mapping or i/o mapping?
@@ -525,7 +525,7 @@ fxp_attach(device_t dev)
sc->revision != FXP_REV_82559S_A) {
data = sc->eeprom[FXP_EEPROM_MAP_ID];
if ((data & 0x20) != 0 &&
- pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0)
+ pci_find_cap(sc->dev, PCIY_PMG, &pmc) == 0)
sc->flags |= FXP_FLAG_WOLCAP;
}
@@ -611,6 +611,7 @@ fxp_attach(device_t dev)
* is a valid cacheline size (8 or 16 dwords), then tell
* the board to turn on MWI.
*/
+ val = pci_read_config(dev, PCIR_COMMAND, 2);
if (val & PCIM_CMD_MWRICEN &&
pci_read_config(dev, PCIR_CACHELNSZ, 1) != 0)
sc->flags |= FXP_FLAG_MWI_ENABLE;
@@ -1068,7 +1069,7 @@ fxp_suspend(device_t dev)
FXP_LOCK(sc);
ifp = sc->ifp;
- if (pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0) {
+ if (pci_find_cap(sc->dev, PCIY_PMG, &pmc) == 0) {
pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2);
pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) {
@@ -1103,7 +1104,7 @@ fxp_resume(device_t dev)
FXP_LOCK(sc);
- if (pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0) {
+ if (pci_find_cap(sc->dev, PCIY_PMG, &pmc) == 0) {
sc->flags &= ~FXP_FLAG_WOL;
pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2);
/* Disable PME and clear PME status. */
@@ -1449,7 +1450,7 @@ fxp_encap(struct fxp_softc *sc, struct mbuf **m_head)
if (M_WRITABLE(*m_head) == 0) {
/* Get a writable copy. */
- m = m_dup(*m_head, M_DONTWAIT);
+ m = m_dup(*m_head, M_NOWAIT);
m_freem(*m_head);
if (m == NULL) {
*m_head = NULL;
@@ -1565,7 +1566,7 @@ fxp_encap(struct fxp_softc *sc, struct mbuf **m_head)
error = bus_dmamap_load_mbuf_sg(sc->fxp_txmtag, txp->tx_map, *m_head,
segs, &nseg, 0);
if (error == EFBIG) {
- m = m_collapse(*m_head, M_DONTWAIT, sc->maxtxseg);
+ m = m_collapse(*m_head, M_NOWAIT, sc->maxtxseg);
if (m == NULL) {
m_freem(*m_head);
*m_head = NULL;
@@ -2599,7 +2600,7 @@ fxp_ifmedia_upd(struct ifnet *ifp)
mii = device_get_softc(sc->miibus);
FXP_LOCK(sc);
LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
- mii_phy_reset(miisc);
+ PHY_RESET(miisc);
mii_mediachg(mii);
FXP_UNLOCK(sc);
return (0);
@@ -2637,7 +2638,7 @@ fxp_new_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp)
bus_dmamap_t tmp_map;
int error;
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
return (ENOBUFS);
diff --git a/freebsd/sys/dev/mii/brgphy.c b/freebsd/sys/dev/mii/brgphy.c
index e21e783e..de1249c3 100644
--- a/freebsd/sys/dev/mii/brgphy.c
+++ b/freebsd/sys/dev/mii/brgphy.c
@@ -70,9 +70,6 @@ static int brgphy_attach(device_t);
struct brgphy_softc {
struct mii_softc mii_sc;
- int mii_oui;
- int mii_model;
- int mii_rev;
int serdes_flags; /* Keeps track of the serdes type used */
#define BRGPHY_5706S 0x0001
#define BRGPHY_5708S 0x0002
@@ -119,39 +116,49 @@ static void brgphy_ethernet_wirespeed(struct mii_softc *);
static void brgphy_jumbo_settings(struct mii_softc *, u_long);
static const struct mii_phydesc brgphys[] = {
- MII_PHY_DESC(xxBROADCOM, BCM5400),
- MII_PHY_DESC(xxBROADCOM, BCM5401),
- MII_PHY_DESC(xxBROADCOM, BCM5411),
- MII_PHY_DESC(xxBROADCOM, BCM54K2),
- MII_PHY_DESC(xxBROADCOM, BCM5701),
- MII_PHY_DESC(xxBROADCOM, BCM5703),
- MII_PHY_DESC(xxBROADCOM, BCM5704),
- MII_PHY_DESC(xxBROADCOM, BCM5705),
- MII_PHY_DESC(xxBROADCOM, BCM5706),
- MII_PHY_DESC(xxBROADCOM, BCM5714),
- MII_PHY_DESC(xxBROADCOM, BCM5750),
- MII_PHY_DESC(xxBROADCOM, BCM5752),
- MII_PHY_DESC(xxBROADCOM, BCM5754),
- MII_PHY_DESC(xxBROADCOM, BCM5780),
- MII_PHY_DESC(xxBROADCOM, BCM5708C),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5755),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5787),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5708S),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5709CAX),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5722),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5784),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5709C),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5761),
- MII_PHY_DESC(xxBROADCOM_ALT1, BCM5709S),
- MII_PHY_DESC(xxBROADCOM_ALT2, BCM5717C),
- MII_PHY_DESC(xxBROADCOM_ALT2, BCM5719C),
- MII_PHY_DESC(xxBROADCOM_ALT2, BCM5720C),
- MII_PHY_DESC(xxBROADCOM_ALT2, BCM57765),
- MII_PHY_DESC(xxBROADCOM_ALT2, BCM57780),
- MII_PHY_DESC(BROADCOM2, BCM5906),
+ MII_PHY_DESC(BROADCOM, BCM5400),
+ MII_PHY_DESC(BROADCOM, BCM5401),
+ MII_PHY_DESC(BROADCOM, BCM5411),
+ MII_PHY_DESC(BROADCOM, BCM54K2),
+ MII_PHY_DESC(BROADCOM, BCM5701),
+ MII_PHY_DESC(BROADCOM, BCM5703),
+ MII_PHY_DESC(BROADCOM, BCM5704),
+ MII_PHY_DESC(BROADCOM, BCM5705),
+ MII_PHY_DESC(BROADCOM, BCM5706),
+ MII_PHY_DESC(BROADCOM, BCM5714),
+ MII_PHY_DESC(BROADCOM, BCM5421),
+ MII_PHY_DESC(BROADCOM, BCM5750),
+ MII_PHY_DESC(BROADCOM, BCM5752),
+ MII_PHY_DESC(BROADCOM, BCM5780),
+ MII_PHY_DESC(BROADCOM, BCM5708C),
+ MII_PHY_DESC(BROADCOM2, BCM5482),
+ MII_PHY_DESC(BROADCOM2, BCM5708S),
+ MII_PHY_DESC(BROADCOM2, BCM5709C),
+ MII_PHY_DESC(BROADCOM2, BCM5709S),
+ MII_PHY_DESC(BROADCOM2, BCM5709CAX),
+ MII_PHY_DESC(BROADCOM2, BCM5722),
+ MII_PHY_DESC(BROADCOM2, BCM5755),
+ MII_PHY_DESC(BROADCOM2, BCM5754),
+ MII_PHY_DESC(BROADCOM2, BCM5761),
+ MII_PHY_DESC(BROADCOM2, BCM5784),
+#ifdef notyet /* better handled by ukphy(4) until WARs are implemented */
+ MII_PHY_DESC(BROADCOM2, BCM5785),
+#endif
+ MII_PHY_DESC(BROADCOM3, BCM5717C),
+ MII_PHY_DESC(BROADCOM3, BCM5719C),
+ MII_PHY_DESC(BROADCOM3, BCM5720C),
+ MII_PHY_DESC(BROADCOM3, BCM57765),
+ MII_PHY_DESC(BROADCOM3, BCM57780),
+ MII_PHY_DESC(xxBROADCOM_ALT1, BCM5906),
MII_PHY_END
};
+static const struct mii_phy_funcs brgphy_funcs = {
+ brgphy_service,
+ brgphy_status,
+ brgphy_reset
+};
+
#define HS21_PRODUCT_ID "IBM eServer BladeCenter HS21"
#define HS21_BCM_CHIPID 0x57081021
@@ -190,41 +197,17 @@ brgphy_attach(device_t dev)
struct bge_softc *bge_sc = NULL;
struct bce_softc *bce_sc = NULL;
struct mii_softc *sc;
- struct mii_attach_args *ma;
- struct mii_data *mii;
struct ifnet *ifp;
bsc = device_get_softc(dev);
sc = &bsc->mii_sc;
- ma = device_get_ivars(dev);
- sc->mii_dev = device_get_parent(dev);
- mii = ma->mii_data;
- LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
-
- /* Initialize mii_softc structure */
- sc->mii_flags = miibus_get_flags(dev);
- sc->mii_inst = mii->mii_instance++;
- sc->mii_phy = ma->mii_phyno;
- sc->mii_service = brgphy_service;
- sc->mii_pdata = mii;
- /*
- * At least some variants wedge when isolating, at least some also
- * don't support loopback.
- */
- sc->mii_flags |= MIIF_NOISOLATE | MIIF_NOLOOP | MIIF_NOMANPAUSE;
+ mii_phy_dev_attach(dev, MIIF_NOISOLATE | MIIF_NOMANPAUSE,
+ &brgphy_funcs, 0);
- /* Initialize brgphy_softc structure */
- bsc->mii_oui = MII_OUI(ma->mii_id1, ma->mii_id2);
- bsc->mii_model = MII_MODEL(ma->mii_id2);
- bsc->mii_rev = MII_REV(ma->mii_id2);
bsc->serdes_flags = 0;
ifp = sc->mii_pdata->mii_ifp;
- if (bootverbose)
- device_printf(dev, "OUI 0x%06x, model 0x%04x, rev. %d\n",
- bsc->mii_oui, bsc->mii_model, bsc->mii_rev);
-
/* Find the MAC driver associated with this PHY. */
if (strcmp(ifp->if_dname, "bge") == 0)
bge_sc = ifp->if_softc;
@@ -232,11 +215,11 @@ brgphy_attach(device_t dev)
bce_sc = ifp->if_softc;
/* Handle any special cases based on the PHY ID */
- switch (bsc->mii_oui) {
- case MII_OUI_xxBROADCOM:
- switch (bsc->mii_model) {
- case MII_MODEL_xxBROADCOM_BCM5706:
- case MII_MODEL_xxBROADCOM_BCM5714:
+ switch (sc->mii_mpd_oui) {
+ case MII_OUI_BROADCOM:
+ switch (sc->mii_mpd_model) {
+ case MII_MODEL_BROADCOM_BCM5706:
+ case MII_MODEL_BROADCOM_BCM5714:
/*
* The 5464 PHY used in the 5706 supports both copper
* and fiber interfaces over GMII. Need to check the
@@ -254,13 +237,13 @@ brgphy_attach(device_t dev)
break;
}
break;
- case MII_OUI_xxBROADCOM_ALT1:
- switch (bsc->mii_model) {
- case MII_MODEL_xxBROADCOM_ALT1_BCM5708S:
+ case MII_OUI_BROADCOM2:
+ switch (sc->mii_mpd_model) {
+ case MII_MODEL_BROADCOM2_BCM5708S:
bsc->serdes_flags |= BRGPHY_5708S;
sc->mii_flags |= MIIF_HAVEFIBER;
break;
- case MII_MODEL_xxBROADCOM_ALT1_BCM5709S:
+ case MII_MODEL_BROADCOM2_BCM5709S:
/*
* XXX
* 5720S and 5709S shares the same PHY id.
@@ -276,15 +259,15 @@ brgphy_attach(device_t dev)
break;
}
- brgphy_reset(sc);
+ PHY_RESET(sc);
/* Read the PHY's capabilities. */
- sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask;
+ sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & sc->mii_capmask;
if (sc->mii_capabilities & BMSR_EXTSTAT)
sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR);
device_printf(dev, " ");
-#define ADD(m, c) ifmedia_add(&mii->mii_media, (m), (c), NULL)
+#define ADD(m, c) ifmedia_add(&sc->mii_pdata->mii_media, (m), (c), NULL)
/* Add the supported media types */
if ((sc->mii_flags & MIIF_HAVEFIBER) == 0) {
@@ -325,7 +308,6 @@ brgphy_attach(device_t dev)
static int
brgphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
{
- struct brgphy_softc *bsc = (struct brgphy_softc *)sc;
struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
int val;
@@ -338,7 +320,7 @@ brgphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
break;
/* Todo: Why is this here? Is it really needed? */
- brgphy_reset(sc); /* XXX hardware bug work-around */
+ PHY_RESET(sc); /* XXX hardware bug work-around */
switch (IFM_SUBTYPE(ife->ifm_media)) {
case IFM_AUTO:
@@ -393,7 +375,7 @@ brgphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
}
/* Update the media status. */
- brgphy_status(sc);
+ PHY_STATUS(sc);
/*
* Callback if something changed. Note that we need to poke
@@ -402,20 +384,20 @@ brgphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
if (sc->mii_media_active != mii->mii_media_active ||
sc->mii_media_status != mii->mii_media_status ||
cmd == MII_MEDIACHG) {
- switch (bsc->mii_oui) {
- case MII_OUI_xxBROADCOM:
- switch (bsc->mii_model) {
- case MII_MODEL_xxBROADCOM_BCM5400:
+ switch (sc->mii_mpd_oui) {
+ case MII_OUI_BROADCOM:
+ switch (sc->mii_mpd_model) {
+ case MII_MODEL_BROADCOM_BCM5400:
bcm5401_load_dspcode(sc);
break;
- case MII_MODEL_xxBROADCOM_BCM5401:
- if (bsc->mii_rev == 1 || bsc->mii_rev == 3)
+ case MII_MODEL_BROADCOM_BCM5401:
+ if (sc->mii_mpd_rev == 1 || sc->mii_mpd_rev == 3)
bcm5401_load_dspcode(sc);
break;
- case MII_MODEL_xxBROADCOM_BCM5411:
+ case MII_MODEL_BROADCOM_BCM5411:
bcm5411_load_dspcode(sc);
break;
- case MII_MODEL_xxBROADCOM_BCM54K2:
+ case MII_MODEL_BROADCOM_BCM54K2:
bcm54k2_load_dspcode(sc);
break;
}
@@ -474,8 +456,7 @@ brgphy_setmedia(struct mii_softc *sc, int media)
if (IFM_SUBTYPE(media) == IFM_1000_T) {
gig |= BRGPHY_1000CTL_MSE;
- if ((media & IFM_ETH_MASTER) != 0 ||
- (sc->mii_pdata->mii_ifp->if_flags & IFF_LINK0) != 0)
+ if ((media & IFM_ETH_MASTER) != 0)
gig |= BRGPHY_1000CTL_MSC;
}
PHY_WRITE(sc, BRGPHY_MII_1000CTL, gig);
@@ -625,10 +606,9 @@ brgphy_status(struct mii_softc *sc)
static void
brgphy_mii_phy_auto(struct mii_softc *sc, int media)
{
- struct brgphy_softc *bsc = (struct brgphy_softc *)sc;
int anar, ktcr = 0;
- brgphy_reset(sc);
+ PHY_RESET(sc);
if ((sc->mii_flags & MIIF_HAVEFIBER) == 0) {
anar = BMSR_MEDIA_TO_ANAR(sc->mii_capabilities) | ANAR_CSMA;
@@ -637,7 +617,7 @@ brgphy_mii_phy_auto(struct mii_softc *sc, int media)
anar |= BRGPHY_ANAR_PC | BRGPHY_ANAR_ASP;
PHY_WRITE(sc, BRGPHY_MII_ANAR, anar);
ktcr = BRGPHY_1000CTL_AFD | BRGPHY_1000CTL_AHD;
- if (bsc->mii_model == MII_MODEL_xxBROADCOM_BCM5701)
+ if (sc->mii_mpd_model == MII_MODEL_BROADCOM_BCM5701)
ktcr |= BRGPHY_1000CTL_MSE | BRGPHY_1000CTL_MSC;
PHY_WRITE(sc, BRGPHY_MII_1000CTL, ktcr);
PHY_READ(sc, BRGPHY_MII_1000CTL);
@@ -874,12 +854,11 @@ brgphy_ethernet_wirespeed(struct mii_softc *sc)
static void
brgphy_jumbo_settings(struct mii_softc *sc, u_long mtu)
{
- struct brgphy_softc *bsc = (struct brgphy_softc *)sc;
uint32_t val;
/* Set or clear jumbo frame settings in the PHY. */
if (mtu > ETHER_MAX_LEN) {
- if (bsc->mii_model == MII_MODEL_xxBROADCOM_BCM5401) {
+ if (sc->mii_mpd_model == MII_MODEL_BROADCOM_BCM5401) {
/* BCM5401 PHY cannot read-modify-write. */
PHY_WRITE(sc, BRGPHY_MII_AUXCTL, 0x4c20);
} else {
@@ -907,7 +886,6 @@ brgphy_jumbo_settings(struct mii_softc *sc, u_long mtu)
static void
brgphy_reset(struct mii_softc *sc)
{
- struct brgphy_softc *bsc = (struct brgphy_softc *)sc;
struct bge_softc *bge_sc = NULL;
struct bce_softc *bce_sc = NULL;
struct ifnet *ifp;
@@ -929,30 +907,30 @@ brgphy_reset(struct mii_softc *sc)
}
/* Handle any PHY specific procedures following the reset. */
- switch (bsc->mii_oui) {
- case MII_OUI_xxBROADCOM:
- switch (bsc->mii_model) {
- case MII_MODEL_xxBROADCOM_BCM5400:
+ switch (sc->mii_mpd_oui) {
+ case MII_OUI_BROADCOM:
+ switch (sc->mii_mpd_model) {
+ case MII_MODEL_BROADCOM_BCM5400:
bcm5401_load_dspcode(sc);
break;
- case MII_MODEL_xxBROADCOM_BCM5401:
- if (bsc->mii_rev == 1 || bsc->mii_rev == 3)
+ case MII_MODEL_BROADCOM_BCM5401:
+ if (sc->mii_mpd_rev == 1 || sc->mii_mpd_rev == 3)
bcm5401_load_dspcode(sc);
break;
- case MII_MODEL_xxBROADCOM_BCM5411:
+ case MII_MODEL_BROADCOM_BCM5411:
bcm5411_load_dspcode(sc);
break;
- case MII_MODEL_xxBROADCOM_BCM54K2:
+ case MII_MODEL_BROADCOM_BCM54K2:
bcm54k2_load_dspcode(sc);
break;
}
break;
- case MII_OUI_xxBROADCOM_ALT2:
- switch (bsc->mii_model) {
- case MII_MODEL_xxBROADCOM_ALT2_BCM5717C:
- case MII_MODEL_xxBROADCOM_ALT2_BCM5719C:
- case MII_MODEL_xxBROADCOM_ALT2_BCM5720C:
- case MII_MODEL_xxBROADCOM_ALT2_BCM57765:
+ case MII_OUI_BROADCOM3:
+ switch (sc->mii_mpd_model) {
+ case MII_MODEL_BROADCOM3_BCM5717C:
+ case MII_MODEL_BROADCOM3_BCM5719C:
+ case MII_MODEL_BROADCOM3_BCM5720C:
+ case MII_MODEL_BROADCOM3_BCM57765:
return;
}
break;
diff --git a/freebsd/sys/dev/mii/icsphy.c b/freebsd/sys/dev/mii/icsphy.c
index 47474db7..3d5e6384 100644
--- a/freebsd/sys/dev/mii/icsphy.c
+++ b/freebsd/sys/dev/mii/icsphy.c
@@ -85,11 +85,6 @@ __FBSDID("$FreeBSD$");
static int icsphy_probe(device_t dev);
static int icsphy_attach(device_t dev);
-struct icsphy_softc {
- struct mii_softc mii_sc;
- int mii_model;
-};
-
static device_method_t icsphy_methods[] = {
/* device interface */
DEVMETHOD(device_probe, icsphy_probe),
@@ -104,7 +99,7 @@ static devclass_t icsphy_devclass;
static driver_t icsphy_driver = {
"icsphy",
icsphy_methods,
- sizeof(struct icsphy_softc)
+ sizeof(struct mii_softc)
};
DRIVER_MODULE(icsphy, miibus, icsphy_driver, icsphy_devclass, 0, 0);
@@ -114,13 +109,19 @@ static void icsphy_status(struct mii_softc *);
static void icsphy_reset(struct mii_softc *);
static const struct mii_phydesc icsphys[] = {
- MII_PHY_DESC(xxICS, 1889),
- MII_PHY_DESC(xxICS, 1890),
- MII_PHY_DESC(xxICS, 1892),
- MII_PHY_DESC(xxICS, 1893),
+ MII_PHY_DESC(ICS, 1889),
+ MII_PHY_DESC(ICS, 1890),
+ MII_PHY_DESC(ICS, 1892),
+ MII_PHY_DESC(ICS, 1893),
MII_PHY_END
};
+static const struct mii_phy_funcs icsphy_funcs = {
+ icsphy_service,
+ icsphy_status,
+ icsphy_reset
+};
+
static int
icsphy_probe(device_t dev)
{
@@ -131,40 +132,9 @@ icsphy_probe(device_t dev)
static int
icsphy_attach(device_t dev)
{
- struct icsphy_softc *isc;
- struct mii_softc *sc;
- struct mii_attach_args *ma;
- struct mii_data *mii;
-
- isc = device_get_softc(dev);
- sc = &isc->mii_sc;
- ma = device_get_ivars(dev);
- sc->mii_dev = device_get_parent(dev);
- mii = ma->mii_data;
- LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
-
- sc->mii_flags = miibus_get_flags(dev);
- sc->mii_inst = mii->mii_instance++;
- sc->mii_phy = ma->mii_phyno;
- sc->mii_service = icsphy_service;
- sc->mii_pdata = mii;
-
- sc->mii_flags |= MIIF_NOISOLATE | MIIF_NOMANPAUSE;
-
- ifmedia_add(&mii->mii_media,
- IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_LOOP, sc->mii_inst),
- MII_MEDIA_100_TX, NULL);
-
- isc->mii_model = MII_MODEL(ma->mii_id2);
- icsphy_reset(sc);
-
- sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask;
- device_printf(dev, " ");
- mii_phy_add_media(sc);
- printf("\n");
-
- MIIBUS_MEDIAINIT(sc->mii_dev);
+ mii_phy_dev_attach(dev, MIIF_NOISOLATE | MIIF_NOMANPAUSE,
+ &icsphy_funcs, 1);
return (0);
}
@@ -193,7 +163,7 @@ icsphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
}
/* Update the media status. */
- icsphy_status(sc);
+ PHY_STATUS(sc);
/* Callback if something changed. */
mii_phy_update(sc, cmd);
@@ -253,16 +223,15 @@ icsphy_status(struct mii_softc *sc)
static void
icsphy_reset(struct mii_softc *sc)
{
- struct icsphy_softc *isc = (struct icsphy_softc *)sc;
mii_phy_reset(sc);
/* set powerdown feature */
- switch (isc->mii_model) {
- case MII_MODEL_xxICS_1890:
- case MII_MODEL_xxICS_1893:
+ switch (sc->mii_mpd_model) {
+ case MII_MODEL_ICS_1890:
+ case MII_MODEL_ICS_1893:
PHY_WRITE(sc, MII_ICSPHY_ECR2, ECR2_100AUTOPWRDN);
break;
- case MII_MODEL_xxICS_1892:
+ case MII_MODEL_ICS_1892:
PHY_WRITE(sc, MII_ICSPHY_ECR2,
ECR2_10AUTOPWRDN|ECR2_100AUTOPWRDN);
break;
diff --git a/freebsd/sys/dev/mii/mii.c b/freebsd/sys/dev/mii/mii.c
index e3a3ec84..d1f55cb0 100644
--- a/freebsd/sys/dev/mii/mii.c
+++ b/freebsd/sys/dev/mii/mii.c
@@ -72,6 +72,7 @@ static miibus_writereg_t miibus_writereg;
static miibus_linkchg_t miibus_linkchg;
static miibus_mediainit_t miibus_mediainit;
+static unsigned char mii_bitreverse(unsigned char x);
static device_method_t miibus_methods[] = {
/* device interface */
@@ -303,19 +304,12 @@ miibus_statchg(device_t dev)
{
device_t parent;
struct mii_data *mii;
- struct ifnet *ifp;
parent = device_get_parent(dev);
MIIBUS_STATCHG(parent);
mii = device_get_softc(dev);
-
- /*
- * Note that each NIC's softc must start with an ifnet pointer.
- * XXX: EVIL HACK!
- */
- ifp = *(struct ifnet **)device_get_softc(parent);
- ifp->if_baudrate = ifmedia_baudrate(mii->mii_media_active);
+ mii->mii_ifp->if_baudrate = ifmedia_baudrate(mii->mii_media_active);
}
static void
@@ -337,11 +331,7 @@ miibus_linkchg(device_t dev)
link_state = LINK_STATE_DOWN;
} else
link_state = LINK_STATE_UNKNOWN;
- /*
- * Note that each NIC's softc must start with an ifnet pointer.
- * XXX: EVIL HACK!
- */
- if_link_state_change(*(struct ifnet**)device_get_softc(parent), link_state);
+ if_link_state_change(mii->mii_ifp, link_state);
}
static void
@@ -493,6 +483,7 @@ mii_attach(device_t dev, device_t *miibus, struct ifnet *ifp,
ma.mii_id1 = MIIBUS_READREG(dev, ma.mii_phyno, MII_PHYIDR1);
ma.mii_id2 = MIIBUS_READREG(dev, ma.mii_phyno, MII_PHYIDR2);
+ ma.mii_offset = ivars->mii_offset;
args = malloc(sizeof(struct mii_attach_args), M_DEVBUF,
M_NOWAIT);
if (args == NULL)
@@ -544,21 +535,6 @@ mii_attach(device_t dev, device_t *miibus, struct ifnet *ifp,
return (rv);
}
-int
-mii_phy_probe(device_t dev, device_t *child, ifm_change_cb_t ifmedia_upd,
- ifm_stat_cb_t ifmedia_sts)
-{
- struct ifnet *ifp;
-
- /*
- * Note that each NIC's softc must start with an ifnet pointer.
- * XXX: EVIL HACK!
- */
- ifp = *(struct ifnet **)device_get_softc(dev);
- return (mii_attach(dev, child, ifp, ifmedia_upd, ifmedia_sts,
- BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0));
-}
-
/*
* Media changed; notify all PHYs.
*/
@@ -588,7 +564,7 @@ mii_mediachg(struct mii_data *mii)
BMCR_ISO);
continue;
}
- rv = (*child->mii_service)(child, mii, MII_MEDIACHG);
+ rv = PHY_SERVICE(child, mii, MII_MEDIACHG);
if (rv)
return (rv);
}
@@ -611,7 +587,7 @@ mii_tick(struct mii_data *mii)
*/
if (IFM_INST(ife->ifm_media) != child->mii_inst)
continue;
- (void)(*child->mii_service)(child, mii, MII_TICK);
+ (void)PHY_SERVICE(child, mii, MII_TICK);
}
}
@@ -633,7 +609,7 @@ mii_pollstat(struct mii_data *mii)
*/
if (IFM_INST(ife->ifm_media) != child->mii_inst)
continue;
- (void)(*child->mii_service)(child, mii, MII_POLLSTAT);
+ (void)PHY_SERVICE(child, mii, MII_POLLSTAT);
}
}
@@ -648,3 +624,25 @@ mii_down(struct mii_data *mii)
LIST_FOREACH(child, &mii->mii_phys, mii_list)
mii_phy_down(child);
}
+
+static unsigned char
+mii_bitreverse(unsigned char x)
+{
+ static unsigned const char nibbletab[16] = {
+ 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15
+ };
+
+ return ((nibbletab[x & 15] << 4) | nibbletab[x >> 4]);
+}
+
+u_int
+mii_oui(u_int id1, u_int id2)
+{
+ u_int h;
+
+ h = (id1 << 6) | (id2 >> 10);
+
+ return ((mii_bitreverse(h >> 16) << 16) |
+ (mii_bitreverse((h >> 8) & 0xff) << 8) |
+ mii_bitreverse(h & 0xff));
+}
diff --git a/freebsd/sys/dev/mii/mii.h b/freebsd/sys/dev/mii/mii.h
index 5316f1eb..668fb8fb 100644
--- a/freebsd/sys/dev/mii/mii.h
+++ b/freebsd/sys/dev/mii/mii.h
@@ -106,10 +106,6 @@
#define IDR2_MODEL 0x03f0 /* vendor model */
#define IDR2_REV 0x000f /* vendor revision */
-#define MII_OUI(id1, id2) (((id1) << 6) | ((id2) >> 10))
-#define MII_MODEL(id2) (((id2) & IDR2_MODEL) >> 4)
-#define MII_REV(id2) ((id2) & IDR2_REV)
-
#define MII_ANAR 0x04 /* Autonegotiation advertisement (rw) */
/* section 28.2.4.1 and 37.2.6.1 */
#define ANAR_NP 0x8000 /* Next page (ro) */
diff --git a/freebsd/sys/dev/mii/mii_physubr.c b/freebsd/sys/dev/mii/mii_physubr.c
index 7fc4135c..e2725ba6 100644
--- a/freebsd/sys/dev/mii/mii_physubr.c
+++ b/freebsd/sys/dev/mii/mii_physubr.c
@@ -58,7 +58,7 @@ __FBSDID("$FreeBSD$");
/*
* Media to register setting conversion table. Order matters.
*/
-const struct mii_media mii_media_table[MII_NMEDIA] = {
+static const struct mii_media mii_media_table[MII_NMEDIA] = {
/* None */
{ BMCR_ISO, ANAR_CSMA,
0, },
@@ -152,9 +152,6 @@ mii_phy_setmedia(struct mii_softc *sc)
}
}
- if ((ife->ifm_media & IFM_LOOP) != 0)
- bmcr |= BMCR_LOOP;
-
PHY_WRITE(sc, MII_ANAR, anar);
PHY_WRITE(sc, MII_BMCR, bmcr);
if ((sc->mii_flags & MIIF_HAVE_GTCR) != 0)
@@ -253,7 +250,7 @@ mii_phy_tick(struct mii_softc *sc)
return (EJUSTRETURN);
sc->mii_ticks = 0;
- mii_phy_reset(sc);
+ PHY_RESET(sc);
mii_phy_auto(sc);
return (0);
}
@@ -311,99 +308,6 @@ mii_phy_update(struct mii_softc *sc, int cmd)
}
/*
- * Given an ifmedia word, return the corresponding ANAR value.
- */
-int
-mii_anar(int media)
-{
- int rv;
-
- switch (media & (IFM_TMASK|IFM_NMASK|IFM_FDX)) {
- case IFM_ETHER|IFM_10_T:
- rv = ANAR_10|ANAR_CSMA;
- break;
- case IFM_ETHER|IFM_10_T|IFM_FDX:
- rv = ANAR_10_FD|ANAR_CSMA;
- break;
- case IFM_ETHER|IFM_100_TX:
- rv = ANAR_TX|ANAR_CSMA;
- break;
- case IFM_ETHER|IFM_100_TX|IFM_FDX:
- rv = ANAR_TX_FD|ANAR_CSMA;
- break;
- case IFM_ETHER|IFM_100_T4:
- rv = ANAR_T4|ANAR_CSMA;
- break;
- default:
- rv = 0;
- break;
- }
-
- return (rv);
-}
-
-/*
- * Initialize generic PHY media based on BMSR, called when a PHY is
- * attached. We expect to be set up to print a comma-separated list
- * of media names. Does not print a newline.
- */
-void
-mii_add_media(struct mii_softc *sc)
-{
- struct mii_data *mii = sc->mii_pdata;
- const char *sep = "";
-
- if ((sc->mii_capabilities & BMSR_MEDIAMASK) == 0) {
- printf("no media present");
- return;
- }
-
-#define ADD(m, c) ifmedia_add(&mii->mii_media, (m), (c), NULL)
-#define PRINT(s) printf("%s%s", sep, s); sep = ", "
-
- if (sc->mii_capabilities & BMSR_10THDX) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, 0, sc->mii_inst), 0);
- PRINT("10baseT");
- }
- if (sc->mii_capabilities & BMSR_10TFDX) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, IFM_FDX, sc->mii_inst),
- BMCR_FDX);
- PRINT("10baseT-FDX");
- }
- if (sc->mii_capabilities & BMSR_100TXHDX) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, 0, sc->mii_inst),
- BMCR_S100);
- PRINT("100baseTX");
- }
- if (sc->mii_capabilities & BMSR_100TXFDX) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_FDX, sc->mii_inst),
- BMCR_S100|BMCR_FDX);
- PRINT("100baseTX-FDX");
- }
- if (sc->mii_capabilities & BMSR_100T4) {
- /*
- * XXX How do you enable 100baseT4? I assume we set
- * XXX BMCR_S100 and then assume the PHYs will take
- * XXX watever action is necessary to switch themselves
- * XXX into T4 mode.
- */
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_T4, 0, sc->mii_inst),
- BMCR_S100);
- PRINT("100baseT4");
- }
- if (sc->mii_capabilities & BMSR_ANEG) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_AUTO, 0, sc->mii_inst),
- BMCR_AUTOEN);
- PRINT("auto");
- }
-
-
-
-#undef ADD
-#undef PRINT
-}
-
-/*
* Initialize generic PHY media based on BMSR, called when a PHY is
* attached. We expect to be set up to print a comma-separated list
* of media names. Does not print a newline.
@@ -621,6 +525,50 @@ mii_phy_dev_probe(device_t dev, const struct mii_phydesc *mpd, int mrv)
return (ENXIO);
}
+void
+mii_phy_dev_attach(device_t dev, u_int flags, const struct mii_phy_funcs *mpf,
+ int add_media)
+{
+ struct mii_softc *sc;
+ struct mii_attach_args *ma;
+ struct mii_data *mii;
+
+ sc = device_get_softc(dev);
+ ma = device_get_ivars(dev);
+ sc->mii_dev = device_get_parent(dev);
+ mii = ma->mii_data;
+ LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list);
+
+ sc->mii_flags = flags | miibus_get_flags(dev);
+ sc->mii_mpd_oui = MII_OUI(ma->mii_id1, ma->mii_id2);
+ sc->mii_mpd_model = MII_MODEL(ma->mii_id2);
+ sc->mii_mpd_rev = MII_REV(ma->mii_id2);
+ sc->mii_capmask = ma->mii_capmask;
+ sc->mii_inst = mii->mii_instance++;
+ sc->mii_phy = ma->mii_phyno;
+ sc->mii_offset = ma->mii_offset;
+ sc->mii_funcs = mpf;
+ sc->mii_pdata = mii;
+
+ if (bootverbose)
+ device_printf(dev, "OUI 0x%06x, model 0x%04x, rev. %d\n",
+ sc->mii_mpd_oui, sc->mii_mpd_model, sc->mii_mpd_rev);
+
+ if (add_media == 0)
+ return;
+
+ PHY_RESET(sc);
+
+ sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & sc->mii_capmask;
+ if (sc->mii_capabilities & BMSR_EXTSTAT)
+ sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR);
+ device_printf(dev, " ");
+ mii_phy_add_media(sc);
+ printf("\n");
+
+ MIIBUS_MEDIAINIT(sc->mii_dev);
+}
+
/*
* Return the flow control status flag from MII_ANAR & MII_ANLPAR.
*/
diff --git a/freebsd/sys/dev/mii/miivar.h b/freebsd/sys/dev/mii/miivar.h
index ac58ac20..34b0e9ed 100644
--- a/freebsd/sys/dev/mii/miivar.h
+++ b/freebsd/sys/dev/mii/miivar.h
@@ -83,10 +83,13 @@ struct mii_data {
typedef struct mii_data mii_data_t;
/*
- * This call is used by the MII layer to call into the PHY driver
- * to perform a `service request'.
+ * Functions provided by the PHY to perform various functions.
*/
-typedef int (*mii_downcall_t)(struct mii_softc *, struct mii_data *, int);
+struct mii_phy_funcs {
+ int (*pf_service)(struct mii_softc *, struct mii_data *, int);
+ void (*pf_status)(struct mii_softc *);
+ void (*pf_reset)(struct mii_softc *);
+};
/*
* Requests that can be made to the downcall.
@@ -105,10 +108,17 @@ struct mii_softc {
LIST_ENTRY(mii_softc) mii_list; /* entry on parent's PHY list */
+ uint32_t mii_mpd_oui; /* the PHY's OUI (MII_OUI())*/
+ uint32_t mii_mpd_model; /* the PHY's model (MII_MODEL())*/
+ uint32_t mii_mpd_rev; /* the PHY's revision (MII_REV())*/
+ u_int mii_capmask; /* capability mask for BMSR */
u_int mii_phy; /* our MII address */
+ u_int mii_offset; /* first PHY, second PHY, etc. */
u_int mii_inst; /* instance for ifmedia */
- mii_downcall_t mii_service; /* our downcall */
+ /* Our PHY functions. */
+ const struct mii_phy_funcs *mii_funcs;
+
struct mii_data *mii_pdata; /* pointer to parent's mii_data */
u_int mii_flags; /* misc. flags; see below */
@@ -124,7 +134,9 @@ typedef struct mii_softc mii_softc_t;
/* mii_flags */
#define MIIF_INITDONE 0x00000001 /* has been initialized (mii_data) */
#define MIIF_NOISOLATE 0x00000002 /* do not isolate the PHY */
+#if 0
#define MIIF_NOLOOP 0x00000004 /* no loopback capability */
+#endif
#define MIIF_DOINGAUTO 0x00000008 /* doing autonegotiation (mii_softc) */
#define MIIF_AUTOTSLEEP 0x00000010 /* use tsleep(), not callout() */
#define MIIF_HAVEFIBER 0x00000020 /* from parent: has fiber interface */
@@ -210,6 +222,15 @@ struct mii_media {
#define PHY_WRITE(p, r, v) \
MIIBUS_WRITEREG((p)->mii_dev, (p)->mii_phy, (r), (v))
+#define PHY_SERVICE(p, d, o) \
+ (*(p)->mii_funcs->pf_service)((p), (d), (o))
+
+#define PHY_STATUS(p) \
+ (*(p)->mii_funcs->pf_status)(p)
+
+#define PHY_RESET(p) \
+ (*(p)->mii_funcs->pf_reset)(p)
+
enum miibus_device_ivars {
MIIBUS_IVAR_FLAGS
};
@@ -227,13 +248,10 @@ extern driver_t miibus_driver;
int mii_attach(device_t, device_t *, struct ifnet *, ifm_change_cb_t,
ifm_stat_cb_t, int, int, int, int);
-int mii_anar(int);
void mii_down(struct mii_data *);
int mii_mediachg(struct mii_data *);
void mii_tick(struct mii_data *);
void mii_pollstat(struct mii_data *);
-int mii_phy_probe(device_t, device_t *, ifm_change_cb_t, ifm_stat_cb_t);
-void mii_add_media(struct mii_softc *);
void mii_phy_add_media(struct mii_softc *);
int mii_phy_auto(struct mii_softc *);
@@ -250,8 +268,16 @@ const struct mii_phydesc * mii_phy_match(const struct mii_attach_args *ma,
const struct mii_phydesc * mii_phy_match_gen(const struct mii_attach_args *ma,
const struct mii_phydesc *mpd, size_t endlen);
int mii_phy_dev_probe(device_t dev, const struct mii_phydesc *mpd, int mrv);
+void mii_phy_dev_attach(device_t dev, u_int flags,
+ const struct mii_phy_funcs *mpf, int add_media);
void ukphy_status(struct mii_softc *);
+
+u_int mii_oui(u_int, u_int);
+#define MII_OUI(id1, id2) mii_oui(id1, id2)
+#define MII_MODEL(id2) (((id2) & IDR2_MODEL) >> 4)
+#define MII_REV(id2) ((id2) & IDR2_REV)
+
#endif /* _KERNEL */
#endif /* _DEV_MII_MIIVAR_H_ */
diff --git a/freebsd/sys/dev/pci/pci.c b/freebsd/sys/dev/pci/pci.c
index ea1e841e..94eeb844 100644
--- a/freebsd/sys/dev/pci/pci.c
+++ b/freebsd/sys/dev/pci/pci.c
@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcivar.h>
#include <dev/pci/pci_private.h>
+#include <dev/usb/controller/xhcireg.h>
#include <dev/usb/controller/ehcireg.h>
#include <dev/usb/controller/ohcireg.h>
#include <dev/usb/controller/uhcireg.h>
@@ -71,13 +72,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
#include <rtems/bsd/local/pci_if.h>
-#ifdef __HAVE_ACPI
-#include <contrib/dev/acpica/include/acpi.h>
-#include <rtems/bsd/local/acpi_if.h>
-#else
-#define ACPI_PWR_FOR_SLEEP(x, y, z)
-#endif
-
/*
* XXX: Due to a limitation of the bus_dma_tag_create() API, we cannot
* specify a 4GB boundary on 32-bit targets. Usually this does not
@@ -97,6 +91,7 @@ __FBSDID("$FreeBSD$");
(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) || \
((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
+static int pci_has_quirk(uint32_t devid, int quirk);
static pci_addr_t pci_mapbase(uint64_t mapreg);
static const char *pci_maptype(uint64_t mapreg);
static int pci_mapsize(uint64_t testval);
@@ -138,6 +133,7 @@ static void pci_enable_msix(device_t dev, u_int index,
static void pci_mask_msix(device_t dev, u_int index);
static void pci_unmask_msix(device_t dev, u_int index);
static int pci_msi_blacklisted(void);
+static int pci_msix_blacklisted(void);
static void pci_resume_msi(device_t dev);
static void pci_resume_msix(device_t dev);
static int pci_remap_intr_method(device_t bus, device_t dev,
@@ -201,7 +197,7 @@ static device_method_t pci_methods[] = {
DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
static devclass_t pci_devclass;
-DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
+DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
MODULE_VERSION(pci, 1);
static char *pci_vendordata;
@@ -211,15 +207,16 @@ struct pci_quirk {
uint32_t devid; /* Vendor/device of the card */
int type;
#define PCI_QUIRK_MAP_REG 1 /* PCI map register in weird place */
-#define PCI_QUIRK_DISABLE_MSI 2 /* MSI/MSI-X doesn't work */
+#define PCI_QUIRK_DISABLE_MSI 2 /* Neither MSI nor MSI-X work */
#define PCI_QUIRK_ENABLE_MSI_VM 3 /* Older chipset in VM where MSI works */
#define PCI_QUIRK_UNMAP_REG 4 /* Ignore PCI map register */
+#define PCI_QUIRK_DISABLE_MSIX 5 /* MSI-X doesn't work */
int arg1;
int arg2;
};
static const struct pci_quirk pci_quirks[] = {
- /* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
+ /* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
{ 0x71138086, PCI_QUIRK_MAP_REG, 0x90, 0 },
{ 0x719b8086, PCI_QUIRK_MAP_REG, 0x90, 0 },
/* As does the Serverworks OSB4 (the SMBus mapping register) */
@@ -254,8 +251,8 @@ static const struct pci_quirk pci_quirks[] = {
* MSI-X allocation doesn't work properly for devices passed through
* by VMware up to at least ESXi 5.1.
*/
- { 0x079015ad, PCI_QUIRK_DISABLE_MSI, 0, 0 }, /* PCI/PCI-X */
- { 0x07a015ad, PCI_QUIRK_DISABLE_MSI, 0, 0 }, /* PCIe */
+ { 0x079015ad, PCI_QUIRK_DISABLE_MSIX, 0, 0 }, /* PCI/PCI-X */
+ { 0x07a015ad, PCI_QUIRK_DISABLE_MSIX, 0, 0 }, /* PCIe */
/*
* Some virtualization environments emulate an older chipset
@@ -297,6 +294,12 @@ SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
enable these bits correctly. We'd like to do this all the time, but there\n\
are some peripherals that this causes problems with.");
+static int pci_do_realloc_bars = 0;
+TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
+SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
+ &pci_do_realloc_bars, 0,
+ "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
+
static int pci_do_power_nodriver = 0;
TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
@@ -306,12 +309,18 @@ disable. 1 means conservatively place devices into D3 state. 2 means\n\
agressively place devices into D3 state. 3 means put absolutely everything\n\
in D3 state.");
-static int pci_do_power_resume = 1;
+int pci_do_power_resume = 1;
TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
&pci_do_power_resume, 1,
"Transition from D3 -> D0 on resume.");
+int pci_do_power_suspend = 1;
+TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
+SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
+ &pci_do_power_suspend, 1,
+ "Transition from D0 -> D3 on suspend.");
+
static int pci_do_msi = 1;
TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
@@ -325,7 +334,7 @@ SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
static int pci_honor_msi_blacklist = 1;
TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
- &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
+ &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
#if defined(__i386__) || defined(__amd64__)
static int pci_usb_takeover = 1;
@@ -338,6 +347,18 @@ SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
Disable this if you depend on BIOS emulation of USB devices, that is\n\
you use USB devices (like keyboard or mouse) but do not load USB drivers");
+static int
+pci_has_quirk(uint32_t devid, int quirk)
+{
+ const struct pci_quirk *q;
+
+ for (q = &pci_quirks[0]; q->devid; q++) {
+ if (q->devid == devid && q->type == quirk)
+ return (1);
+ }
+ return (0);
+}
+
/* Find a device_t by bus/slot/function in domain 0 */
device_t
@@ -515,12 +536,12 @@ pci_maprange(uint64_t mapreg)
static void
pci_fixancient(pcicfgregs *cfg)
{
- if (cfg->hdrtype != 0)
+ if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
return;
/* PCI to PCI bridges use header type 1 */
if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
- cfg->hdrtype = 1;
+ cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
}
/* extract header type specific config data */
@@ -529,16 +550,16 @@ static void
pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
{
#define REG(n, w) PCIB_READ_CONFIG(pcib, b, s, f, n, w)
- switch (cfg->hdrtype) {
- case 0:
+ switch (cfg->hdrtype & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_NORMAL:
cfg->subvendor = REG(PCIR_SUBVEND_0, 2);
cfg->subdevice = REG(PCIR_SUBDEV_0, 2);
cfg->nummaps = PCI_MAXMAPS_0;
break;
- case 1:
+ case PCIM_HDRTYPE_BRIDGE:
cfg->nummaps = PCI_MAXMAPS_1;
break;
- case 2:
+ case PCIM_HDRTYPE_CARDBUS:
cfg->subvendor = REG(PCIR_SUBVEND_2, 2);
cfg->subdevice = REG(PCIR_SUBDEV_2, 2);
cfg->nummaps = PCI_MAXMAPS_2;
@@ -635,11 +656,11 @@ pci_read_cap(device_t pcib, pcicfgregs *cfg)
int ptr, nextptr, ptrptr;
switch (cfg->hdrtype & PCIM_HDRTYPE) {
- case 0:
- case 1:
+ case PCIM_HDRTYPE_NORMAL:
+ case PCIM_HDRTYPE_BRIDGE:
ptrptr = PCIR_CAP_PTR;
break;
- case 2:
+ case PCIM_HDRTYPE_CARDBUS:
ptrptr = PCIR_CAP_PTR_2; /* cardbus capabilities ptr */
break;
default:
@@ -667,7 +688,7 @@ pci_read_cap(device_t pcib, pcicfgregs *cfg)
if (cfg->pp.pp_cap == 0) {
cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
- cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
+ cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
if ((nextptr - ptr) > PCIR_POWER_DATA)
cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
}
@@ -730,7 +751,8 @@ pci_read_cap(device_t pcib, pcicfgregs *cfg)
break;
case PCIY_SUBVENDOR:
/* Should always be true. */
- if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
+ if ((cfg->hdrtype & PCIM_HDRTYPE) ==
+ PCIM_HDRTYPE_BRIDGE) {
val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
cfg->subvendor = val & 0xffff;
cfg->subdevice = val >> 16;
@@ -744,7 +766,8 @@ pci_read_cap(device_t pcib, pcicfgregs *cfg)
* PCI-express or HT chipsets might match on
* this check as well.
*/
- if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
+ if ((cfg->hdrtype & PCIM_HDRTYPE) ==
+ PCIM_HDRTYPE_BRIDGE)
pcix_chipset = 1;
break;
case PCIY_EXPRESS: /* PCI-express */
@@ -1202,11 +1225,11 @@ pci_find_extcap_method(device_t dev, device_t child, int capability,
* Determine the start pointer of the capabilities list.
*/
switch (cfg->hdrtype & PCIM_HDRTYPE) {
- case 0:
- case 1:
+ case PCIM_HDRTYPE_NORMAL:
+ case PCIM_HDRTYPE_BRIDGE:
ptr = PCIR_CAP_PTR;
break;
- case 2:
+ case PCIM_HDRTYPE_CARDBUS:
ptr = PCIR_CAP_PTR_2;
break;
default:
@@ -1354,8 +1377,8 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
return (ENXIO);
- /* If MSI is blacklisted for this system, fail. */
- if (pci_msi_blacklisted())
+ /* If MSI-X is blacklisted for this system, fail. */
+ if (pci_msix_blacklisted())
return (ENXIO);
/* MSI-X capability present? */
@@ -1907,38 +1930,15 @@ pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
int
pci_msi_device_blacklisted(device_t dev)
{
- const struct pci_quirk *q;
if (!pci_honor_msi_blacklist)
return (0);
- for (q = &pci_quirks[0]; q->devid; q++) {
- if (q->devid == pci_get_devid(dev) &&
- q->type == PCI_QUIRK_DISABLE_MSI)
- return (1);
- }
- return (0);
-}
-
-/*
- * Returns true if a specified chipset supports MSI when it is
- * emulated hardware in a virtual machine.
- */
-static int
-pci_msi_vm_chipset(device_t dev)
-{
- const struct pci_quirk *q;
-
- for (q = &pci_quirks[0]; q->devid; q++) {
- if (q->devid == pci_get_devid(dev) &&
- q->type == PCI_QUIRK_ENABLE_MSI_VM)
- return (1);
- }
- return (0);
+ return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
}
/*
- * Determine if MSI is blacklisted globally on this sytem. Currently,
+ * Determine if MSI is blacklisted globally on this system. Currently,
* we just check for blacklisted chipsets as represented by the
* host-PCI bridge at device 0:0:0. In the future, it may become
* necessary to check other system attributes, such as the kenv values
@@ -1955,9 +1955,14 @@ pci_msi_blacklisted(void)
/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
if (!(pcie_chipset || pcix_chipset)) {
if (vm_guest != VM_GUEST_NO) {
+ /*
+ * Whitelist older chipsets in virtual
+ * machines known to support MSI.
+ */
dev = pci_find_bsf(0, 0, 0);
if (dev != NULL)
- return (pci_msi_vm_chipset(dev) == 0);
+ return (!pci_has_quirk(pci_get_devid(dev),
+ PCI_QUIRK_ENABLE_MSI_VM));
}
return (1);
}
@@ -1969,6 +1974,45 @@ pci_msi_blacklisted(void)
}
/*
+ * Returns true if the specified device is blacklisted because MSI-X
+ * doesn't work. Note that this assumes that if MSI doesn't work,
+ * MSI-X doesn't either.
+ */
+int
+pci_msix_device_blacklisted(device_t dev)
+{
+
+ if (!pci_honor_msi_blacklist)
+ return (0);
+
+ if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
+ return (1);
+
+ return (pci_msi_device_blacklisted(dev));
+}
+
+/*
+ * Determine if MSI-X is blacklisted globally on this system. If MSI
+ * is blacklisted, assume that MSI-X is as well. Check for additional
+ * chipsets where MSI works but MSI-X does not.
+ */
+static int
+pci_msix_blacklisted(void)
+{
+ device_t dev;
+
+ if (!pci_honor_msi_blacklist)
+ return (0);
+
+ dev = pci_find_bsf(0, 0, 0);
+ if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
+ PCI_QUIRK_DISABLE_MSIX))
+ return (1);
+
+ return (pci_msi_blacklisted());
+}
+
+/*
* Attempt to allocate *count MSI messages. The actual number allocated is
* returned in *count. After this function returns, each message will be
* available to the driver as SYS_RES_IRQ resources starting at a rid 1.
@@ -2745,22 +2789,40 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
* driver for this device will later inherit this resource in
* pci_alloc_resource().
*/
- res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
+ res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
prefetch ? RF_PREFETCHABLE : 0);
+ if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
+ /*
+ * If the allocation fails, try to allocate a resource for
+ * this BAR using any available range. The firmware felt
+ * it was important enough to assign a resource, so don't
+ * disable decoding if we can help it.
+ */
+ resource_list_delete(rl, type, reg);
+ resource_list_add(rl, type, reg, 0, ~0ul, count);
+ res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
+ count, prefetch ? RF_PREFETCHABLE : 0);
+ }
if (res == NULL) {
/*
- * If the allocation fails, clear the BAR and delete
- * the resource list entry to force
- * pci_alloc_resource() to allocate resources from the
- * parent.
+ * If the allocation fails, delete the resource list entry
+ * and disable decoding for this device.
+ *
+ * If the driver requests this resource in the future,
+ * pci_reserve_map() will try to allocate a fresh
+ * resource range.
*/
resource_list_delete(rl, type, reg);
- start = 0;
+ pci_disable_io(dev, type);
+ if (bootverbose)
+ device_printf(bus,
+ "pci%d:%d:%d:%d bar %#x failed to allocate\n",
+ pci_get_domain(dev), pci_get_bus(dev),
+ pci_get_slot(dev), pci_get_function(dev), reg);
} else {
start = rman_get_start(res);
- rman_set_device(res, bus);
+ pci_write_bar(dev, pm, start);
}
- pci_write_bar(dev, pm, start);
return (barlen);
}
@@ -2798,14 +2860,12 @@ pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
} else {
rid = PCIR_BAR(0);
resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
- r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
- 8, 0);
- rman_set_device(r, bus);
+ r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
+ 0x1f7, 8, 0);
rid = PCIR_BAR(1);
resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
- r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
- 1, 0);
- rman_set_device(r, bus);
+ r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
+ 0x3f6, 1, 0);
}
if (progif & PCIP_STORAGE_IDE_MODESEC) {
pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
@@ -2815,14 +2875,12 @@ pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
} else {
rid = PCIR_BAR(2);
resource_list_add(rl, type, rid, 0x170, 0x177, 8);
- r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
- 8, 0);
- rman_set_device(r, bus);
+ r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
+ 0x177, 8, 0);
rid = PCIR_BAR(3);
resource_list_add(rl, type, rid, 0x376, 0x376, 1);
- r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
- 1, 0);
- rman_set_device(r, bus);
+ r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
+ 0x376, 1, 0);
}
pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
prefetchmask & (1 << 4));
@@ -2999,6 +3057,68 @@ ehci_early_takeover(device_t self)
bus_release_resource(self, SYS_RES_MEMORY, rid, res);
}
+/* Perform early XHCI takeover from SMM. */
+static void
+xhci_early_takeover(device_t self)
+{
+ struct resource *res;
+ uint32_t cparams;
+ uint32_t eec;
+ uint8_t eecp;
+ uint8_t bios_sem;
+ uint8_t offs;
+ int rid;
+ int i;
+
+ rid = PCIR_BAR(0);
+ res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
+ if (res == NULL)
+ return;
+
+ cparams = bus_read_4(res, XHCI_HCSPARAMS0);
+
+ eec = -1;
+
+ /* Synchronise with the BIOS if it owns the controller. */
+ for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
+ eecp += XHCI_XECP_NEXT(eec) << 2) {
+ eec = bus_read_4(res, eecp);
+
+ if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
+ continue;
+
+ bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
+ if (bios_sem == 0)
+ continue;
+
+ if (bootverbose)
+ printf("xhci early: "
+ "SMM active, request owner change\n");
+
+ bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
+
+ /* wait a maximum of 5 second */
+
+ for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
+ DELAY(1000);
+ bios_sem = bus_read_1(res, eecp +
+ XHCI_XECP_BIOS_SEM);
+ }
+
+ if (bios_sem != 0) {
+ if (bootverbose)
+ printf("xhci early: "
+ "SMM does not respond\n");
+ }
+
+ /* Disable interrupts */
+ offs = bus_read_1(res, XHCI_CAPLENGTH);
+ bus_write_4(res, offs + XHCI_USBCMD, 0);
+ bus_read_4(res, offs + XHCI_USBSTS);
+ }
+ bus_release_resource(self, SYS_RES_MEMORY, rid, res);
+}
+
void
pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
{
@@ -3062,7 +3182,9 @@ pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
- if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
+ if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
+ xhci_early_takeover(dev);
+ else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
ehci_early_takeover(dev);
else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
ohci_early_takeover(dev);
@@ -3184,20 +3306,43 @@ pci_attach(device_t dev)
return (bus_generic_attach(dev));
}
+static void
+pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
+ int state)
+{
+ device_t child, pcib;
+ struct pci_devinfo *dinfo;
+ int dstate, i;
+
+ /*
+ * Set the device to the given state. If the firmware suggests
+ * a different power state, use it instead. If power management
+ * is not present, the firmware is responsible for managing
+ * device power. Skip children who aren't attached since they
+ * are handled separately.
+ */
+ pcib = device_get_parent(dev);
+ for (i = 0; i < numdevs; i++) {
+ child = devlist[i];
+ dinfo = device_get_ivars(child);
+ dstate = state;
+ if (device_is_attached(child) &&
+ PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
+ pci_set_powerstate(child, dstate);
+ }
+}
+
int
pci_suspend(device_t dev)
{
- int dstate, error, i, numdevs;
- device_t acpi_dev, child, *devlist;
+ device_t child, *devlist;
struct pci_devinfo *dinfo;
+ int error, i, numdevs;
/*
* Save the PCI configuration space for each child and set the
* device in the appropriate power state for this sleep state.
*/
- acpi_dev = NULL;
- if (pci_do_power_resume)
- acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
return (error);
for (i = 0; i < numdevs; i++) {
@@ -3212,23 +3357,9 @@ pci_suspend(device_t dev)
free(devlist, M_TEMP);
return (error);
}
-
- /*
- * Always set the device to D3. If ACPI suggests a different
- * power state, use it instead. If ACPI is not present, the
- * firmware is responsible for managing device power. Skip
- * children who aren't attached since they are powered down
- * separately. Only manage type 0 devices for now.
- */
- for (i = 0; acpi_dev && i < numdevs; i++) {
- child = devlist[i];
- dinfo = (struct pci_devinfo *) device_get_ivars(child);
- if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
- dstate = PCI_POWERSTATE_D3;
- ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
- pci_set_powerstate(child, dstate);
- }
- }
+ if (pci_do_power_suspend)
+ pci_set_power_children(dev, devlist, numdevs,
+ PCI_POWERSTATE_D3);
free(devlist, M_TEMP);
return (0);
}
@@ -3236,51 +3367,76 @@ pci_suspend(device_t dev)
int
pci_resume(device_t dev)
{
- int i, numdevs, error;
- device_t acpi_dev, child, *devlist;
+ device_t child, *devlist;
struct pci_devinfo *dinfo;
+ int error, i, numdevs;
/*
* Set each child to D0 and restore its PCI configuration space.
*/
- acpi_dev = NULL;
- if (pci_do_power_resume)
- acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
return (error);
+ if (pci_do_power_resume)
+ pci_set_power_children(dev, devlist, numdevs,
+ PCI_POWERSTATE_D0);
+
+ /* Now the device is powered up, restore its config space. */
for (i = 0; i < numdevs; i++) {
- /*
- * Notify ACPI we're going to D0 but ignore the result. If
- * ACPI is not present, the firmware is responsible for
- * managing device power. Only manage type 0 devices for now.
- */
child = devlist[i];
- dinfo = (struct pci_devinfo *) device_get_ivars(child);
- if (acpi_dev && device_is_attached(child) &&
- dinfo->cfg.hdrtype == 0) {
- ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
- pci_set_powerstate(child, PCI_POWERSTATE_D0);
- }
+ dinfo = device_get_ivars(child);
- /* Now the device is powered up, restore its config space. */
pci_cfg_restore(child, dinfo);
+ if (!device_is_attached(child))
+ pci_cfg_save(child, dinfo, 1);
+ }
+
+ /*
+ * Resume critical devices first, then everything else later.
+ */
+ for (i = 0; i < numdevs; i++) {
+ child = devlist[i];
+ switch (pci_get_class(child)) {
+ case PCIC_DISPLAY:
+ case PCIC_MEMORY:
+ case PCIC_BRIDGE:
+ case PCIC_BASEPERIPH:
+ DEVICE_RESUME(child);
+ break;
+ }
+ }
+ for (i = 0; i < numdevs; i++) {
+ child = devlist[i];
+ switch (pci_get_class(child)) {
+ case PCIC_DISPLAY:
+ case PCIC_MEMORY:
+ case PCIC_BRIDGE:
+ case PCIC_BASEPERIPH:
+ break;
+ default:
+ DEVICE_RESUME(child);
+ }
}
free(devlist, M_TEMP);
- return (bus_generic_resume(dev));
+ return (0);
}
static void
pci_load_vendor_data(void)
{
- caddr_t vendordata, info;
-
- if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
- info = preload_search_info(vendordata, MODINFO_ADDR);
- pci_vendordata = *(char **)info;
- info = preload_search_info(vendordata, MODINFO_SIZE);
- pci_vendordata_size = *(size_t *)info;
- /* terminate the database */
- pci_vendordata[pci_vendordata_size] = '\n';
+ caddr_t data;
+ void *ptr;
+ size_t sz;
+
+ data = preload_search_by_type("pci_vendor_data");
+ if (data != NULL) {
+ ptr = preload_fetch_addr(data);
+ sz = preload_fetch_size(data);
+ if (ptr != NULL && sz != 0) {
+ pci_vendordata = ptr;
+ pci_vendordata_size = sz;
+ /* terminate the database */
+ pci_vendordata[pci_vendordata_size] = '\n';
+ }
}
}
@@ -3742,7 +3898,7 @@ pci_describe_device(device_t dev)
if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
NULL)
sprintf(desc, "%s, %s", vp, dp);
- out:
+out:
if (vp != NULL)
free(vp, M_DEVBUF);
if (dp != NULL)
@@ -3917,7 +4073,7 @@ DB_SHOW_COMMAND(pciregs, db_pci_dump)
#endif /* DDB */
static struct resource *
-pci_alloc_map(device_t dev, device_t child, int type, int *rid,
+pci_reserve_map(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
struct pci_devinfo *dinfo = device_get_ivars(child);
@@ -4003,22 +4159,22 @@ pci_alloc_map(device_t dev, device_t child, int type, int *rid,
count, *rid, type, start, end);
goto out;
}
- rman_set_device(res, dev);
resource_list_add(rl, type, *rid, start, end, count);
rle = resource_list_find(rl, type, *rid);
if (rle == NULL)
- panic("pci_alloc_map: unexpectedly can't find resource.");
+ panic("pci_reserve_map: unexpectedly can't find resource.");
rle->res = res;
rle->start = rman_get_start(res);
rle->end = rman_get_end(res);
rle->count = count;
+ rle->flags = RLE_RESERVED;
if (bootverbose)
device_printf(child,
"Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
count, *rid, type, rman_get_start(res));
map = rman_get_start(res);
pci_write_bar(child, pm, map);
-out:;
+out:
return (res);
}
@@ -4080,34 +4236,13 @@ pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
}
}
#endif
- /* Allocate resources for this BAR if needed. */
+ /* Reserve resources for this BAR if needed. */
rle = resource_list_find(rl, type, *rid);
if (rle == NULL) {
- res = pci_alloc_map(dev, child, type, rid, start, end,
+ res = pci_reserve_map(dev, child, type, rid, start, end,
count, flags);
if (res == NULL)
return (NULL);
- rle = resource_list_find(rl, type, *rid);
- }
-
- /*
- * If the resource belongs to the bus, then give it to
- * the child. We need to activate it if requested
- * since the bus always allocates inactive resources.
- */
- if (rle != NULL && rle->res != NULL &&
- rman_get_device(rle->res) == dev) {
- if (bootverbose)
- device_printf(child,
- "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
- rman_get_size(rle->res), *rid, type,
- rman_get_start(rle->res));
- rman_set_device(rle->res, child);
- if ((flags & RF_ACTIVE) &&
- bus_activate_resource(child, type, *rid,
- rle->res) != 0)
- return (NULL);
- return (rle->res);
}
}
return (resource_list_alloc(rl, dev, child, type, rid,
@@ -4115,37 +4250,6 @@ pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
}
int
-pci_release_resource(device_t dev, device_t child, int type, int rid,
- struct resource *r)
-{
- int error;
-
- if (device_get_parent(child) != dev)
- return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
- type, rid, r));
-
- /*
- * For BARs we don't actually want to release the resource.
- * Instead, we deactivate the resource if needed and then give
- * ownership of the BAR back to the bus.
- */
- switch (type) {
- case SYS_RES_IOPORT:
- case SYS_RES_MEMORY:
- if (rman_get_device(r) != child)
- return (EINVAL);
- if (rman_get_flags(r) & RF_ACTIVE) {
- error = bus_deactivate_resource(child, type, rid, r);
- if (error)
- return (error);
- }
- rman_set_device(r, dev);
- return (0);
- }
- return (bus_generic_rl_release_resource(dev, child, type, rid, r));
-}
-
-int
pci_activate_resource(device_t dev, device_t child, int type, int rid,
struct resource *r)
{
@@ -4195,6 +4299,46 @@ pci_deactivate_resource(device_t dev, device_t child, int type,
}
void
+pci_delete_child(device_t dev, device_t child)
+{
+ struct resource_list_entry *rle;
+ struct resource_list *rl;
+ struct pci_devinfo *dinfo;
+
+ dinfo = device_get_ivars(child);
+ rl = &dinfo->resources;
+
+ if (device_is_attached(child))
+ device_detach(child);
+
+ /* Turn off access to resources we're about to free */
+ pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
+ PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
+
+ /* Free all allocated resources */
+ STAILQ_FOREACH(rle, rl, link) {
+ if (rle->res) {
+ if (rman_get_flags(rle->res) & RF_ACTIVE ||
+ resource_list_busy(rl, rle->type, rle->rid)) {
+ pci_printf(&dinfo->cfg,
+ "Resource still owned, oops. "
+ "(type=%d, rid=%d, addr=%lx)\n",
+ rle->type, rle->rid,
+ rman_get_start(rle->res));
+ bus_release_resource(child, rle->type, rle->rid,
+ rle->res);
+ }
+ resource_list_unreserve(rl, dev, child, rle->type,
+ rle->rid);
+ }
+ }
+ resource_list_free(rl);
+
+ device_delete_child(dev, child);
+ pci_freecfg(dinfo);
+}
+
+void
pci_delete_resource(device_t dev, device_t child, int type, int rid)
{
struct pci_devinfo *dinfo;
@@ -4211,29 +4355,15 @@ pci_delete_resource(device_t dev, device_t child, int type, int rid)
return;
if (rle->res) {
- if (rman_get_device(rle->res) != dev ||
- rman_get_flags(rle->res) & RF_ACTIVE) {
+ if (rman_get_flags(rle->res) & RF_ACTIVE ||
+ resource_list_busy(rl, type, rid)) {
device_printf(dev, "delete_resource: "
"Resource still owned by child, oops. "
"(type=%d, rid=%d, addr=%lx)\n",
- rle->type, rle->rid,
- rman_get_start(rle->res));
+ type, rid, rman_get_start(rle->res));
return;
}
-
-#ifndef __PCI_BAR_ZERO_VALID
- /*
- * If this is a BAR, clear the BAR so it stops
- * decoding before releasing the resource.
- */
- switch (type) {
- case SYS_RES_IOPORT:
- case SYS_RES_MEMORY:
- pci_write_bar(child, pci_find_bar(child, rid), 0);
- break;
- }
-#endif
- bus_release_resource(dev, type, rid, rle->res);
+ resource_list_unreserve(rl, dev, child, type, rid);
}
resource_list_delete(rl, type, rid);
}
@@ -4344,7 +4474,7 @@ pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
* Other types are unknown, and we err on the side of safety
* by ignoring them.
*/
- if (dinfo->cfg.hdrtype != 0)
+ if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
return;
/*
@@ -4388,7 +4518,7 @@ pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
* we err on the side of safety by ignoring them. Powering down
* bridges should not be undertaken lightly.
*/
- if (dinfo->cfg.hdrtype != 0)
+ if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
return;
/*
diff --git a/freebsd/sys/dev/pci/pci_pci.c b/freebsd/sys/dev/pci/pci_pci.c
index b0951024..bfaabf35 100644
--- a/freebsd/sys/dev/pci/pci_pci.c
+++ b/freebsd/sys/dev/pci/pci_pci.c
@@ -40,23 +40,24 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
-#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/rman.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
+#include <dev/pci/pci_private.h>
#include <dev/pci/pcib_private.h>
#include <rtems/bsd/local/pcib_if.h>
static int pcib_probe(device_t dev);
+static int pcib_suspend(device_t dev);
+static int pcib_resume(device_t dev);
+static int pcib_power_for_sleep(device_t pcib, device_t dev,
+ int *pstate);
static device_method_t pcib_methods[] = {
/* Device interface */
@@ -64,8 +65,8 @@ static device_method_t pcib_methods[] = {
DEVMETHOD(device_attach, pcib_attach),
DEVMETHOD(device_detach, bus_generic_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
+ DEVMETHOD(device_suspend, pcib_suspend),
+ DEVMETHOD(device_resume, pcib_resume),
/* Bus interface */
DEVMETHOD(bus_read_ivar, pcib_read_ivar),
@@ -93,6 +94,7 @@ static device_method_t pcib_methods[] = {
DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
DEVMETHOD(pcib_release_msix, pcib_release_msix),
DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_power_for_sleep, pcib_power_for_sleep),
DEVMETHOD_END
};
@@ -100,7 +102,7 @@ static device_method_t pcib_methods[] = {
static devclass_t pcib_devclass;
DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc));
-DRIVER_MODULE(pcib, pci, pcib_driver, pcib_devclass, 0, 0);
+DRIVER_MODULE(pcib, pci, pcib_driver, pcib_devclass, NULL, NULL);
#ifdef NEW_PCIB
/*
@@ -365,7 +367,161 @@ pcib_is_io_open(struct pcib_softc *sc)
{
return (sc->iobase > 0 && sc->iobase < sc->iolimit);
}
+
+/*
+ * Get current I/O decode.
+ */
+static void
+pcib_get_io_decode(struct pcib_softc *sc)
+{
+ device_t dev;
+ uint32_t iolow;
+
+ dev = sc->dev;
+
+ iolow = pci_read_config(dev, PCIR_IOBASEL_1, 1);
+ if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32)
+ sc->iobase = PCI_PPBIOBASE(
+ pci_read_config(dev, PCIR_IOBASEH_1, 2), iolow);
+ else
+ sc->iobase = PCI_PPBIOBASE(0, iolow);
+
+ iolow = pci_read_config(dev, PCIR_IOLIMITL_1, 1);
+ if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32)
+ sc->iolimit = PCI_PPBIOLIMIT(
+ pci_read_config(dev, PCIR_IOLIMITH_1, 2), iolow);
+ else
+ sc->iolimit = PCI_PPBIOLIMIT(0, iolow);
+}
+
+/*
+ * Get current memory decode.
+ */
+static void
+pcib_get_mem_decode(struct pcib_softc *sc)
+{
+ device_t dev;
+ pci_addr_t pmemlow;
+
+ dev = sc->dev;
+
+ sc->membase = PCI_PPBMEMBASE(0,
+ pci_read_config(dev, PCIR_MEMBASE_1, 2));
+ sc->memlimit = PCI_PPBMEMLIMIT(0,
+ pci_read_config(dev, PCIR_MEMLIMIT_1, 2));
+
+ pmemlow = pci_read_config(dev, PCIR_PMBASEL_1, 2);
+ if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
+ sc->pmembase = PCI_PPBMEMBASE(
+ pci_read_config(dev, PCIR_PMBASEH_1, 4), pmemlow);
+ else
+ sc->pmembase = PCI_PPBMEMBASE(0, pmemlow);
+
+ pmemlow = pci_read_config(dev, PCIR_PMLIMITL_1, 2);
+ if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
+ sc->pmemlimit = PCI_PPBMEMLIMIT(
+ pci_read_config(dev, PCIR_PMLIMITH_1, 4), pmemlow);
+ else
+ sc->pmemlimit = PCI_PPBMEMLIMIT(0, pmemlow);
+}
+
+/*
+ * Restore previous I/O decode.
+ */
+static void
+pcib_set_io_decode(struct pcib_softc *sc)
+{
+ device_t dev;
+ uint32_t iohi;
+
+ dev = sc->dev;
+
+ iohi = sc->iobase >> 16;
+ if (iohi > 0)
+ pci_write_config(dev, PCIR_IOBASEH_1, iohi, 2);
+ pci_write_config(dev, PCIR_IOBASEL_1, sc->iobase >> 8, 1);
+
+ iohi = sc->iolimit >> 16;
+ if (iohi > 0)
+ pci_write_config(dev, PCIR_IOLIMITH_1, iohi, 2);
+ pci_write_config(dev, PCIR_IOLIMITL_1, sc->iolimit >> 8, 1);
+}
+
+/*
+ * Restore previous memory decode.
+ */
+static void
+pcib_set_mem_decode(struct pcib_softc *sc)
+{
+ device_t dev;
+ pci_addr_t pmemhi;
+
+ dev = sc->dev;
+
+ pci_write_config(dev, PCIR_MEMBASE_1, sc->membase >> 16, 2);
+ pci_write_config(dev, PCIR_MEMLIMIT_1, sc->memlimit >> 16, 2);
+
+ pmemhi = sc->pmembase >> 32;
+ if (pmemhi > 0)
+ pci_write_config(dev, PCIR_PMBASEH_1, pmemhi, 4);
+ pci_write_config(dev, PCIR_PMBASEL_1, sc->pmembase >> 16, 2);
+
+ pmemhi = sc->pmemlimit >> 32;
+ if (pmemhi > 0)
+ pci_write_config(dev, PCIR_PMLIMITH_1, pmemhi, 4);
+ pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmemlimit >> 16, 2);
+}
+#endif
+
+/*
+ * Get current bridge configuration.
+ */
+static void
+pcib_cfg_save(struct pcib_softc *sc)
+{
+ device_t dev;
+
+ dev = sc->dev;
+
+ sc->command = pci_read_config(dev, PCIR_COMMAND, 2);
+ sc->pribus = pci_read_config(dev, PCIR_PRIBUS_1, 1);
+ sc->secbus = pci_read_config(dev, PCIR_SECBUS_1, 1);
+ sc->subbus = pci_read_config(dev, PCIR_SUBBUS_1, 1);
+ sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
+ sc->seclat = pci_read_config(dev, PCIR_SECLAT_1, 1);
+#ifndef NEW_PCIB
+ if (sc->command & PCIM_CMD_PORTEN)
+ pcib_get_io_decode(sc);
+ if (sc->command & PCIM_CMD_MEMEN)
+ pcib_get_mem_decode(sc);
+#endif
+}
+
+/*
+ * Restore previous bridge configuration.
+ */
+static void
+pcib_cfg_restore(struct pcib_softc *sc)
+{
+ device_t dev;
+
+ dev = sc->dev;
+
+ pci_write_config(dev, PCIR_COMMAND, sc->command, 2);
+ pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1);
+ pci_write_config(dev, PCIR_SECBUS_1, sc->secbus, 1);
+ pci_write_config(dev, PCIR_SUBBUS_1, sc->subbus, 1);
+ pci_write_config(dev, PCIR_BRIDGECTL_1, sc->bridgectl, 2);
+ pci_write_config(dev, PCIR_SECLAT_1, sc->seclat, 1);
+#ifdef NEW_PCIB
+ pcib_write_windows(sc, WIN_IO | WIN_MEM | WIN_PMEM);
+#else
+ if (sc->command & PCIM_CMD_PORTEN)
+ pcib_set_io_decode(sc);
+ if (sc->command & PCIM_CMD_MEMEN)
+ pcib_set_mem_decode(sc);
#endif
+}
/*
* Generic device interface
@@ -385,9 +541,6 @@ void
pcib_attach_common(device_t dev)
{
struct pcib_softc *sc;
-#ifndef NEW_PCIB
- uint8_t iolow;
-#endif
struct sysctl_ctx_list *sctx;
struct sysctl_oid *soid;
@@ -397,14 +550,9 @@ pcib_attach_common(device_t dev)
/*
* Get current bridge configuration.
*/
- sc->command = pci_read_config(dev, PCIR_COMMAND, 1);
- sc->domain = pci_get_domain(dev);
- sc->pribus = pci_read_config(dev, PCIR_PRIBUS_1, 1);
- sc->secbus = pci_read_config(dev, PCIR_SECBUS_1, 1);
- sc->subbus = pci_read_config(dev, PCIR_SUBBUS_1, 1);
- sc->secstat = pci_read_config(dev, PCIR_SECSTAT_1, 2);
- sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
- sc->seclat = pci_read_config(dev, PCIR_SECLAT_1, 1);
+ sc->domain = pci_get_domain(dev);
+ sc->secstat = pci_read_config(dev, PCIR_SECSTAT_1, 2);
+ pcib_cfg_save(sc);
/*
* Setup sysctl reporting nodes
@@ -420,53 +568,6 @@ pcib_attach_common(device_t dev)
SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus",
CTLFLAG_RD, &sc->subbus, 0, "Subordinate bus number");
-#ifndef NEW_PCIB
- /*
- * Determine current I/O decode.
- */
- if (sc->command & PCIM_CMD_PORTEN) {
- iolow = pci_read_config(dev, PCIR_IOBASEL_1, 1);
- if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32) {
- sc->iobase = PCI_PPBIOBASE(pci_read_config(dev, PCIR_IOBASEH_1, 2),
- pci_read_config(dev, PCIR_IOBASEL_1, 1));
- } else {
- sc->iobase = PCI_PPBIOBASE(0, pci_read_config(dev, PCIR_IOBASEL_1, 1));
- }
-
- iolow = pci_read_config(dev, PCIR_IOLIMITL_1, 1);
- if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32) {
- sc->iolimit = PCI_PPBIOLIMIT(pci_read_config(dev, PCIR_IOLIMITH_1, 2),
- pci_read_config(dev, PCIR_IOLIMITL_1, 1));
- } else {
- sc->iolimit = PCI_PPBIOLIMIT(0, pci_read_config(dev, PCIR_IOLIMITL_1, 1));
- }
- }
-
- /*
- * Determine current memory decode.
- */
- if (sc->command & PCIM_CMD_MEMEN) {
- sc->membase = PCI_PPBMEMBASE(0, pci_read_config(dev, PCIR_MEMBASE_1, 2));
- sc->memlimit = PCI_PPBMEMLIMIT(0, pci_read_config(dev, PCIR_MEMLIMIT_1, 2));
- iolow = pci_read_config(dev, PCIR_PMBASEL_1, 1);
- if ((iolow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
- sc->pmembase = PCI_PPBMEMBASE(
- pci_read_config(dev, PCIR_PMBASEH_1, 4),
- pci_read_config(dev, PCIR_PMBASEL_1, 2));
- else
- sc->pmembase = PCI_PPBMEMBASE(0,
- pci_read_config(dev, PCIR_PMBASEL_1, 2));
- iolow = pci_read_config(dev, PCIR_PMLIMITL_1, 1);
- if ((iolow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
- sc->pmemlimit = PCI_PPBMEMLIMIT(
- pci_read_config(dev, PCIR_PMLIMITH_1, 4),
- pci_read_config(dev, PCIR_PMLIMITL_1, 2));
- else
- sc->pmemlimit = PCI_PPBMEMLIMIT(0,
- pci_read_config(dev, PCIR_PMLIMITL_1, 2));
- }
-#endif
-
/*
* Quirk handling.
*/
@@ -527,6 +628,9 @@ pcib_attach_common(device_t dev)
if (pci_msi_device_blacklisted(dev))
sc->flags |= PCIB_DISABLE_MSI;
+ if (pci_msix_device_blacklisted(dev))
+ sc->flags |= PCIB_DISABLE_MSIX;
+
/*
* Intel 815, 845 and other chipsets say they are PCI-PCI bridges,
* but have a ProgIF of 0x80. The 82801 family (AA, AB, BAM/CAM,
@@ -614,6 +718,37 @@ pcib_attach(device_t dev)
}
int
+pcib_suspend(device_t dev)
+{
+ device_t pcib;
+ int dstate, error;
+
+ pcib_cfg_save(device_get_softc(dev));
+ error = bus_generic_suspend(dev);
+ if (error == 0 && pci_do_power_suspend) {
+ dstate = PCI_POWERSTATE_D3;
+ pcib = device_get_parent(device_get_parent(dev));
+ if (PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
+ pci_set_powerstate(dev, dstate);
+ }
+ return (error);
+}
+
+int
+pcib_resume(device_t dev)
+{
+ device_t pcib;
+
+ if (pci_do_power_resume) {
+ pcib = device_get_parent(device_get_parent(dev));
+ if (PCIB_POWER_FOR_SLEEP(pcib, dev, NULL) == 0)
+ pci_set_powerstate(dev, PCI_POWERSTATE_D0);
+ }
+ pcib_cfg_restore(device_get_softc(dev));
+ return (bus_generic_resume(dev));
+}
+
+int
pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
{
struct pcib_softc *sc = device_get_softc(dev);
@@ -645,18 +780,6 @@ pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
}
#ifdef NEW_PCIB
-static const char *
-pcib_child_name(device_t child)
-{
- static char buf[64];
-
- if (device_get_nameunit(child) != NULL)
- return (device_get_nameunit(child));
- snprintf(buf, sizeof(buf), "pci%d:%d:%d:%d", pci_get_domain(child),
- pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
- return (buf);
-}
-
/*
* Attempt to allocate a resource from the existing resources assigned
* to a window.
@@ -1263,7 +1386,7 @@ pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
struct pcib_softc *sc = device_get_softc(pcib);
device_t bus;
- if (sc->flags & PCIB_DISABLE_MSI)
+ if (sc->flags & PCIB_DISABLE_MSIX)
return (ENXIO);
bus = device_get_parent(pcib);
return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
@@ -1296,90 +1419,12 @@ pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr,
return (0);
}
-/*
- * Try to read the bus number of a host-PCI bridge using appropriate config
- * registers.
- */
+/* Pass request for device power state up to parent bridge. */
int
-host_pcib_get_busno(pci_read_config_fn read_config, int bus, int slot, int func,
- uint8_t *busnum)
+pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate)
{
- uint32_t id;
-
- id = read_config(bus, slot, func, PCIR_DEVVENDOR, 4);
- if (id == 0xffffffff)
- return (0);
-
- switch (id) {
- case 0x12258086:
- /* Intel 824?? */
- /* XXX This is a guess */
- /* *busnum = read_config(bus, slot, func, 0x41, 1); */
- *busnum = bus;
- break;
- case 0x84c48086:
- /* Intel 82454KX/GX (Orion) */
- *busnum = read_config(bus, slot, func, 0x4a, 1);
- break;
- case 0x84ca8086:
- /*
- * For the 450nx chipset, there is a whole bundle of
- * things pretending to be host bridges. The MIOC will
- * be seen first and isn't really a pci bridge (the
- * actual busses are attached to the PXB's). We need to
- * read the registers of the MIOC to figure out the
- * bus numbers for the PXB channels.
- *
- * Since the MIOC doesn't have a pci bus attached, we
- * pretend it wasn't there.
- */
- return (0);
- case 0x84cb8086:
- switch (slot) {
- case 0x12:
- /* Intel 82454NX PXB#0, Bus#A */
- *busnum = read_config(bus, 0x10, func, 0xd0, 1);
- break;
- case 0x13:
- /* Intel 82454NX PXB#0, Bus#B */
- *busnum = read_config(bus, 0x10, func, 0xd1, 1) + 1;
- break;
- case 0x14:
- /* Intel 82454NX PXB#1, Bus#A */
- *busnum = read_config(bus, 0x10, func, 0xd3, 1);
- break;
- case 0x15:
- /* Intel 82454NX PXB#1, Bus#B */
- *busnum = read_config(bus, 0x10, func, 0xd4, 1) + 1;
- break;
- }
- break;
-
- /* ServerWorks -- vendor 0x1166 */
- case 0x00051166:
- case 0x00061166:
- case 0x00081166:
- case 0x00091166:
- case 0x00101166:
- case 0x00111166:
- case 0x00171166:
- case 0x01011166:
- case 0x010f1014:
- case 0x01101166:
- case 0x02011166:
- case 0x02251166:
- case 0x03021014:
- *busnum = read_config(bus, slot, func, 0x44, 1);
- break;
-
- /* Compaq/HP -- vendor 0x0e11 */
- case 0x60100e11:
- *busnum = read_config(bus, slot, func, 0xc8, 1);
- break;
- default:
- /* Don't know how to read bus number. */
- return 0;
- }
+ device_t bus;
- return 1;
+ bus = device_get_parent(pcib);
+ return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate));
}
diff --git a/freebsd/sys/dev/pci/pci_private.h b/freebsd/sys/dev/pci/pci_private.h
index b3ff50d5..b8e446ea 100644
--- a/freebsd/sys/dev/pci/pci_private.h
+++ b/freebsd/sys/dev/pci/pci_private.h
@@ -42,12 +42,16 @@ struct pci_softc {
bus_dma_tag_t sc_dma_tag;
};
+extern int pci_do_power_resume;
+extern int pci_do_power_suspend;
+
void pci_add_children(device_t dev, int domain, int busno,
size_t dinfo_size);
void pci_add_child(device_t bus, struct pci_devinfo *dinfo);
void pci_add_resources(device_t bus, device_t dev, int force,
uint32_t prefetchmask);
int pci_attach_common(device_t dev);
+void pci_delete_child(device_t dev, device_t child);
void pci_driver_added(device_t dev, driver_t *driver);
int pci_print_child(device_t dev, device_t child);
void pci_probe_nomatch(device_t dev, device_t child);
@@ -87,8 +91,6 @@ int pci_msix_count_method(device_t dev, device_t child);
struct resource *pci_alloc_resource(device_t dev, device_t child,
int type, int *rid, u_long start, u_long end, u_long count,
u_int flags);
-int pci_release_resource(device_t dev, device_t child, int type,
- int rid, struct resource *r);
int pci_activate_resource(device_t dev, device_t child, int type,
int rid, struct resource *r);
int pci_deactivate_resource(device_t dev, device_t child, int type,
diff --git a/freebsd/sys/dev/pci/pci_user.c b/freebsd/sys/dev/pci/pci_user.c
index c2723ad3..63d64c39 100644
--- a/freebsd/sys/dev/pci/pci_user.c
+++ b/freebsd/sys/dev/pci/pci_user.c
@@ -227,6 +227,51 @@ struct pci_io_old {
u_int32_t pi_data; /* data to write or result of read */
};
+#ifdef COMPAT_FREEBSD32
+struct pci_conf_old32 {
+ struct pcisel_old pc_sel; /* bus+slot+function */
+ uint8_t pc_hdr; /* PCI header type */
+ uint16_t pc_subvendor; /* card vendor ID */
+ uint16_t pc_subdevice; /* card device ID, assigned by
+ card vendor */
+ uint16_t pc_vendor; /* chip vendor ID */
+ uint16_t pc_device; /* chip device ID, assigned by
+ chip vendor */
+ uint8_t pc_class; /* chip PCI class */
+ uint8_t pc_subclass; /* chip PCI subclass */
+ uint8_t pc_progif; /* chip PCI programming interface */
+ uint8_t pc_revid; /* chip revision ID */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ uint32_t pd_unit; /* device unit number (u_long) */
+};
+
+struct pci_match_conf_old32 {
+ struct pcisel_old pc_sel; /* bus+slot+function */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ uint32_t pd_unit; /* Unit number (u_long) */
+ uint16_t pc_vendor; /* PCI Vendor ID */
+ uint16_t pc_device; /* PCI Device ID */
+ uint8_t pc_class; /* PCI class */
+ pci_getconf_flags_old flags; /* Matching expression */
+};
+
+struct pci_conf_io32 {
+ uint32_t pat_buf_len; /* pattern buffer length */
+ uint32_t num_patterns; /* number of patterns */
+ uint32_t patterns; /* pattern buffer
+ (struct pci_match_conf_old32 *) */
+ uint32_t match_buf_len; /* match buffer length */
+ uint32_t num_matches; /* number of matches returned */
+ uint32_t matches; /* match buffer
+ (struct pci_conf_old32 *) */
+ uint32_t offset; /* offset into device list */
+ uint32_t generation; /* device list generation */
+ pci_getconf_status status; /* request status */
+};
+
+#define PCIOCGETCONF_OLD32 _IOWR('p', 1, struct pci_conf_io32)
+#endif /* COMPAT_FREEBSD32 */
+
#define PCIOCGETCONF_OLD _IOWR('p', 1, struct pci_conf_io)
#define PCIOCREAD_OLD _IOWR('p', 2, struct pci_io_old)
#define PCIOCWRITE_OLD _IOWR('p', 3, struct pci_io_old)
@@ -297,7 +342,71 @@ pci_conf_match_old(struct pci_match_conf_old *matches, int num_matches,
return(1);
}
-#endif
+#ifdef COMPAT_FREEBSD32
+static int
+pci_conf_match_old32(struct pci_match_conf_old32 *matches, int num_matches,
+ struct pci_conf *match_buf)
+{
+ int i;
+
+ if ((matches == NULL) || (match_buf == NULL) || (num_matches <= 0))
+ return(1);
+
+ for (i = 0; i < num_matches; i++) {
+ if (match_buf->pc_sel.pc_domain != 0)
+ continue;
+
+ /*
+ * I'm not sure why someone would do this...but...
+ */
+ if (matches[i].flags == PCI_GETCONF_NO_MATCH_OLD)
+ continue;
+
+ /*
+ * Look at each of the match flags. If it's set, do the
+ * comparison. If the comparison fails, we don't have a
+ * match, go on to the next item if there is one.
+ */
+ if (((matches[i].flags & PCI_GETCONF_MATCH_BUS_OLD) != 0) &&
+ (match_buf->pc_sel.pc_bus != matches[i].pc_sel.pc_bus))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_DEV_OLD) != 0) &&
+ (match_buf->pc_sel.pc_dev != matches[i].pc_sel.pc_dev))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_FUNC_OLD) != 0) &&
+ (match_buf->pc_sel.pc_func != matches[i].pc_sel.pc_func))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_VENDOR_OLD) != 0) &&
+ (match_buf->pc_vendor != matches[i].pc_vendor))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_DEVICE_OLD) != 0) &&
+ (match_buf->pc_device != matches[i].pc_device))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_CLASS_OLD) != 0) &&
+ (match_buf->pc_class != matches[i].pc_class))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_UNIT_OLD) != 0) &&
+ ((u_int32_t)match_buf->pd_unit != matches[i].pd_unit))
+ continue;
+
+ if (((matches[i].flags & PCI_GETCONF_MATCH_NAME_OLD) != 0) &&
+ (strncmp(matches[i].pd_name, match_buf->pd_name,
+ sizeof(match_buf->pd_name)) != 0))
+ continue;
+
+ return (0);
+ }
+
+ return (1);
+}
+#endif /* COMPAT_FREEBSD32 */
+#endif /* PRE7_COMPAT */
static int
pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
@@ -306,7 +415,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
void *confdata;
const char *name;
struct devlist *devlist_head;
- struct pci_conf_io *cio;
+ struct pci_conf_io *cio = NULL;
struct pci_devinfo *dinfo;
struct pci_io *io;
struct pci_bar_io *bio;
@@ -315,13 +424,17 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
size_t confsz, iolen, pbufsz;
int error, ionum, i, num_patterns;
#ifdef PRE7_COMPAT
+#ifdef COMPAT_FREEBSD32
+ struct pci_conf_io32 *cio32 = NULL;
+ struct pci_conf_old32 conf_old32;
+ struct pci_match_conf_old32 *pattern_buf_old32 = NULL;
+#endif
struct pci_conf_old conf_old;
struct pci_io iodata;
struct pci_io_old *io_old;
- struct pci_match_conf_old *pattern_buf_old;
+ struct pci_match_conf_old *pattern_buf_old = NULL;
io_old = NULL;
- pattern_buf_old = NULL;
if (!(flag & FWRITE) && cmd != PCIOCGETBAR &&
cmd != PCIOCGETCONF && cmd != PCIOCGETCONF_OLD)
@@ -333,11 +446,36 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
switch(cmd) {
#ifdef PRE7_COMPAT
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF_OLD32:
+ cio32 = (struct pci_conf_io32 *)data;
+ cio = malloc(sizeof(struct pci_conf_io), M_TEMP, M_WAITOK);
+ cio->pat_buf_len = cio32->pat_buf_len;
+ cio->num_patterns = cio32->num_patterns;
+ cio->patterns = (void *)(uintptr_t)cio32->patterns;
+ cio->match_buf_len = cio32->match_buf_len;
+ cio->num_matches = cio32->num_matches;
+ cio->matches = (void *)(uintptr_t)cio32->matches;
+ cio->offset = cio32->offset;
+ cio->generation = cio32->generation;
+ cio->status = cio32->status;
+ cio32->num_matches = 0;
+ break;
+#endif
case PCIOCGETCONF_OLD:
- /* FALLTHROUGH */
#endif
case PCIOCGETCONF:
cio = (struct pci_conf_io *)data;
+ }
+
+ switch(cmd) {
+#ifdef PRE7_COMPAT
+#ifdef COMPAT_FREEBSD32
+ case PCIOCGETCONF_OLD32:
+#endif
+ case PCIOCGETCONF_OLD:
+#endif
+ case PCIOCGETCONF:
pattern_buf = NULL;
num_patterns = 0;
@@ -355,7 +493,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
&& (cio->generation != pci_generation)){
cio->status = PCI_GETCONF_LIST_CHANGED;
error = 0;
- break;
+ goto getconfexit;
}
/*
@@ -365,7 +503,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
if (cio->offset >= pci_numdevs) {
cio->status = PCI_GETCONF_LAST_DEVICE;
error = 0;
- break;
+ goto getconfexit;
}
/* get the head of the device queue */
@@ -378,6 +516,11 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
* didn't specify a multiple of that size.
*/
#ifdef PRE7_COMPAT
+#ifdef COMPAT_FREEBSD32
+ if (cmd == PCIOCGETCONF_OLD32)
+ confsz = sizeof(struct pci_conf_old32);
+ else
+#endif
if (cmd == PCIOCGETCONF_OLD)
confsz = sizeof(struct pci_conf_old);
else
@@ -412,6 +555,11 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
* updated their kernel but not their userland.
*/
#ifdef PRE7_COMPAT
+#ifdef COMPAT_FREEBSD32
+ if (cmd == PCIOCGETCONF_OLD32)
+ pbufsz = sizeof(struct pci_match_conf_old32);
+ else
+#endif
if (cmd == PCIOCGETCONF_OLD)
pbufsz = sizeof(struct pci_match_conf_old);
else
@@ -421,20 +569,28 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
/* The user made a mistake, return an error. */
cio->status = PCI_GETCONF_ERROR;
error = EINVAL;
- break;
+ goto getconfexit;
}
/*
* Allocate a buffer to hold the patterns.
*/
#ifdef PRE7_COMPAT
+#ifdef COMPAT_FREEBSD32
+ if (cmd == PCIOCGETCONF_OLD32) {
+ pattern_buf_old32 = malloc(cio->pat_buf_len,
+ M_TEMP, M_WAITOK);
+ error = copyin(cio->patterns,
+ pattern_buf_old32, cio->pat_buf_len);
+ } else
+#endif /* COMPAT_FREEBSD32 */
if (cmd == PCIOCGETCONF_OLD) {
pattern_buf_old = malloc(cio->pat_buf_len,
M_TEMP, M_WAITOK);
error = copyin(cio->patterns,
pattern_buf_old, cio->pat_buf_len);
} else
-#endif
+#endif /* PRE7_COMPAT */
{
pattern_buf = malloc(cio->pat_buf_len, M_TEMP,
M_WAITOK);
@@ -453,7 +609,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
*/
cio->status = PCI_GETCONF_ERROR;
error = EINVAL;
- break;
+ goto getconfexit;
}
/*
@@ -485,7 +641,14 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
}
#ifdef PRE7_COMPAT
- if ((cmd == PCIOCGETCONF_OLD &&
+ if (
+#ifdef COMPAT_FREEBSD32
+ (cmd == PCIOCGETCONF_OLD32 &&
+ (pattern_buf_old32 == NULL ||
+ pci_conf_match_old32(pattern_buf_old32,
+ num_patterns, &dinfo->conf) == 0)) ||
+#endif
+ (cmd == PCIOCGETCONF_OLD &&
(pattern_buf_old == NULL ||
pci_conf_match_old(pattern_buf_old, num_patterns,
&dinfo->conf) == 0)) ||
@@ -510,6 +673,40 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
break;
#ifdef PRE7_COMPAT
+#ifdef COMPAT_FREEBSD32
+ if (cmd == PCIOCGETCONF_OLD32) {
+ conf_old32.pc_sel.pc_bus =
+ dinfo->conf.pc_sel.pc_bus;
+ conf_old32.pc_sel.pc_dev =
+ dinfo->conf.pc_sel.pc_dev;
+ conf_old32.pc_sel.pc_func =
+ dinfo->conf.pc_sel.pc_func;
+ conf_old32.pc_hdr = dinfo->conf.pc_hdr;
+ conf_old32.pc_subvendor =
+ dinfo->conf.pc_subvendor;
+ conf_old32.pc_subdevice =
+ dinfo->conf.pc_subdevice;
+ conf_old32.pc_vendor =
+ dinfo->conf.pc_vendor;
+ conf_old32.pc_device =
+ dinfo->conf.pc_device;
+ conf_old32.pc_class =
+ dinfo->conf.pc_class;
+ conf_old32.pc_subclass =
+ dinfo->conf.pc_subclass;
+ conf_old32.pc_progif =
+ dinfo->conf.pc_progif;
+ conf_old32.pc_revid =
+ dinfo->conf.pc_revid;
+ strncpy(conf_old32.pd_name,
+ dinfo->conf.pd_name,
+ sizeof(conf_old32.pd_name));
+ conf_old32.pd_name[PCI_MAXNAMELEN] = 0;
+ conf_old32.pd_unit =
+ (uint32_t)dinfo->conf.pd_unit;
+ confdata = &conf_old32;
+ } else
+#endif /* COMPAT_FREEBSD32 */
if (cmd == PCIOCGETCONF_OLD) {
conf_old.pc_sel.pc_bus =
dinfo->conf.pc_sel.pc_bus;
@@ -542,7 +739,7 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
dinfo->conf.pd_unit;
confdata = &conf_old;
} else
-#endif
+#endif /* PRE7_COMPAT */
confdata = &dinfo->conf;
/* Only if we can copy it out do we count it. */
if (!(error = copyout(confdata,
@@ -576,12 +773,23 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
cio->status = PCI_GETCONF_MORE_DEVS;
getconfexit:
- if (pattern_buf != NULL)
- free(pattern_buf, M_TEMP);
#ifdef PRE7_COMPAT
+#ifdef COMPAT_FREEBSD32
+ if (cmd == PCIOCGETCONF_OLD32) {
+ cio32->status = cio->status;
+ cio32->generation = cio->generation;
+ cio32->offset = cio->offset;
+ cio32->num_matches = cio->num_matches;
+ free(cio, M_TEMP);
+ }
+ if (pattern_buf_old32 != NULL)
+ free(pattern_buf_old32, M_TEMP);
+#endif
if (pattern_buf_old != NULL)
free(pattern_buf_old, M_TEMP);
#endif
+ if (pattern_buf != NULL)
+ free(pattern_buf, M_TEMP);
break;
@@ -696,6 +904,16 @@ getconfexit:
bio->pbi_enabled = pci_bar_enabled(pcidev, pm);
error = 0;
break;
+ case PCIOCATTACHED:
+ error = 0;
+ io = (struct pci_io *)data;
+ pcidev = pci_find_dbsf(io->pi_sel.pc_domain, io->pi_sel.pc_bus,
+ io->pi_sel.pc_dev, io->pi_sel.pc_func);
+ if (pcidev != NULL)
+ io->pi_data = device_is_attached(pcidev);
+ else
+ error = ENODEV;
+ break;
default:
error = ENOTTY;
break;
diff --git a/freebsd/sys/dev/pci/pcib_private.h b/freebsd/sys/dev/pci/pcib_private.h
index 1574deb7..79135afa 100644
--- a/freebsd/sys/dev/pci/pcib_private.h
+++ b/freebsd/sys/dev/pci/pcib_private.h
@@ -33,6 +33,31 @@
#ifndef __PCIB_PRIVATE_H__
#define __PCIB_PRIVATE_H__
+#ifdef NEW_PCIB
+/*
+ * Data structure and routines that Host to PCI bridge drivers can use
+ * to restrict allocations for child devices to ranges decoded by the
+ * bridge.
+ */
+struct pcib_host_resources {
+ device_t hr_pcib;
+ struct resource_list hr_rl;
+};
+
+int pcib_host_res_init(device_t pcib,
+ struct pcib_host_resources *hr);
+int pcib_host_res_free(device_t pcib,
+ struct pcib_host_resources *hr);
+int pcib_host_res_decodes(struct pcib_host_resources *hr, int type,
+ u_long start, u_long end, u_int flags);
+struct resource *pcib_host_res_alloc(struct pcib_host_resources *hr,
+ device_t dev, int type, int *rid, u_long start, u_long end,
+ u_long count, u_int flags);
+int pcib_host_res_adjust(struct pcib_host_resources *hr,
+ device_t dev, int type, struct resource *r, u_long start,
+ u_long end);
+#endif
+
/*
* Export portions of generic PCI:PCI bridge support so that it can be
* used by subclasses.
@@ -66,6 +91,7 @@ struct pcib_softc
uint32_t flags; /* flags */
#define PCIB_SUBTRACTIVE 0x1
#define PCIB_DISABLE_MSI 0x2
+#define PCIB_DISABLE_MSIX 0x4
uint16_t command; /* command register */
u_int domain; /* domain number */
u_int pribus; /* primary bus number */
@@ -90,6 +116,9 @@ struct pcib_softc
typedef uint32_t pci_read_config_fn(int b, int s, int f, int reg, int width);
+#ifdef NEW_PCIB
+const char *pcib_child_name(device_t child);
+#endif
int host_pcib_get_busno(pci_read_config_fn read_config, int bus,
int slot, int func, uint8_t *busnum);
int pcib_attach(device_t dev);
diff --git a/freebsd/sys/dev/pci/pcireg.h b/freebsd/sys/dev/pci/pcireg.h
index 0127e776..ef351356 100644
--- a/freebsd/sys/dev/pci/pcireg.h
+++ b/freebsd/sys/dev/pci/pcireg.h
@@ -69,7 +69,6 @@
#define PCIM_STATUS_66CAPABLE 0x0020
#define PCIM_STATUS_BACKTOBACK 0x0080
#define PCIM_STATUS_MDPERR 0x0100
-#define PCIM_STATUS_PERRREPORT PCIM_STATUS_MDPERR
#define PCIM_STATUS_SEL_FAST 0x0000
#define PCIM_STATUS_SEL_MEDIMUM 0x0200
#define PCIM_STATUS_SEL_SLOW 0x0400
@@ -445,12 +444,16 @@
#define PCIR_POWER_CAP 0x2
#define PCIM_PCAP_SPEC 0x0007
#define PCIM_PCAP_PMEREQCLK 0x0008
-#define PCIM_PCAP_PMEREQPWR 0x0010
#define PCIM_PCAP_DEVSPECINIT 0x0020
-#define PCIM_PCAP_DYNCLOCK 0x0040
-#define PCIM_PCAP_SECCLOCK 0x00c0
-#define PCIM_PCAP_CLOCKMASK 0x00c0
-#define PCIM_PCAP_REQFULLCLOCK 0x0100
+#define PCIM_PCAP_AUXPWR_0 0x0000
+#define PCIM_PCAP_AUXPWR_55 0x0040
+#define PCIM_PCAP_AUXPWR_100 0x0080
+#define PCIM_PCAP_AUXPWR_160 0x00c0
+#define PCIM_PCAP_AUXPWR_220 0x0100
+#define PCIM_PCAP_AUXPWR_270 0x0140
+#define PCIM_PCAP_AUXPWR_320 0x0180
+#define PCIM_PCAP_AUXPWR_375 0x01c0
+#define PCIM_PCAP_AUXPWRMASK 0x01c0
#define PCIM_PCAP_D1SUPP 0x0200
#define PCIM_PCAP_D2SUPP 0x0400
#define PCIM_PCAP_D0PME 0x0800
@@ -465,16 +468,17 @@
#define PCIM_PSTAT_D2 0x0002
#define PCIM_PSTAT_D3 0x0003
#define PCIM_PSTAT_DMASK 0x0003
-#define PCIM_PSTAT_REPENABLE 0x0010
+#define PCIM_PSTAT_NOSOFTRESET 0x0008
#define PCIM_PSTAT_PMEENABLE 0x0100
#define PCIM_PSTAT_D0POWER 0x0000
#define PCIM_PSTAT_D1POWER 0x0200
#define PCIM_PSTAT_D2POWER 0x0400
#define PCIM_PSTAT_D3POWER 0x0600
#define PCIM_PSTAT_D0HEAT 0x0800
-#define PCIM_PSTAT_D1HEAT 0x1000
-#define PCIM_PSTAT_D2HEAT 0x1200
-#define PCIM_PSTAT_D3HEAT 0x1400
+#define PCIM_PSTAT_D1HEAT 0x0a00
+#define PCIM_PSTAT_D2HEAT 0x0c00
+#define PCIM_PSTAT_D3HEAT 0x0e00
+#define PCIM_PSTAT_DATASELMASK 0x1e00
#define PCIM_PSTAT_DATAUNKN 0x0000
#define PCIM_PSTAT_DATADIV10 0x2000
#define PCIM_PSTAT_DATADIV100 0x4000
@@ -482,11 +486,10 @@
#define PCIM_PSTAT_DATADIVMASK 0x6000
#define PCIM_PSTAT_PME 0x8000
-#define PCIR_POWER_PMCSR 0x6
-#define PCIM_PMCSR_DCLOCK 0x10
-#define PCIM_PMCSR_B2SUPP 0x20
-#define PCIM_BMCSR_B3SUPP 0x40
-#define PCIM_BMCSR_BPCE 0x80
+#define PCIR_POWER_BSE 0x6
+#define PCIM_PMCSR_BSE_D3B3 0x00
+#define PCIM_PMCSR_BSE_D3B2 0x40
+#define PCIM_PMCSR_BSE_BPCCE 0x80
#define PCIR_POWER_DATA 0x7
@@ -609,6 +612,10 @@
#define PCIM_HTCAP_VCSET 0xb800 /* 10111 */
#define PCIM_HTCAP_RETRY_MODE 0xc000 /* 11000 */
#define PCIM_HTCAP_X86_ENCODING 0xc800 /* 11001 */
+#define PCIM_HTCAP_GEN3 0xd000 /* 11010 */
+#define PCIM_HTCAP_FLE 0xd800 /* 11011 */
+#define PCIM_HTCAP_PM 0xe000 /* 11100 */
+#define PCIM_HTCAP_HIGH_NODE_COUNT 0xe800 /* 11101 */
/* HT MSI Mapping Capability definitions. */
#define PCIM_HTCMD_MSI_ENABLE 0x0001
@@ -770,7 +777,7 @@
#define PCIER_SLOT_CAP2 0x34
#define PCIER_SLOT_CTL2 0x38
#define PCIER_SLOT_STA2 0x3a
-
+
/* Old compatibility definitions for PCI Express registers */
#define PCIR_EXPRESS_FLAGS PCIER_FLAGS
#define PCIM_EXP_FLAGS_VERSION PCIEM_FLAGS_VERSION
diff --git a/freebsd/sys/dev/pci/pcivar.h b/freebsd/sys/dev/pci/pcivar.h
index b0359d30..84e7c871 100644
--- a/freebsd/sys/dev/pci/pcivar.h
+++ b/freebsd/sys/dev/pci/pcivar.h
@@ -42,9 +42,9 @@ typedef uint64_t pci_addr_t;
/* Interesting values for PCI power management */
struct pcicfg_pp {
uint16_t pp_cap; /* PCI power management capabilities */
- uint8_t pp_status; /* config space address of PCI power status reg */
- uint8_t pp_pmcsr; /* config space address of PMCSR reg */
- uint8_t pp_data; /* config space address of PCI power data reg */
+ uint8_t pp_status; /* conf. space addr. of PM control/status reg */
+ uint8_t pp_bse; /* conf. space addr. of PM BSE reg */
+ uint8_t pp_data; /* conf. space addr. of PM data reg */
};
struct pci_map {
@@ -465,6 +465,7 @@ device_t pci_find_class(uint8_t class, uint8_t subclass);
int pci_pending_msix(device_t dev, u_int index);
int pci_msi_device_blacklisted(device_t dev);
+int pci_msix_device_blacklisted(device_t dev);
void pci_ht_map_msi(device_t dev, uint64_t addr);
diff --git a/freebsd/sys/dev/re/if_re.c b/freebsd/sys/dev/re/if_re.c
index fb86f6a8..cde6bcee 100644
--- a/freebsd/sys/dev/re/if_re.c
+++ b/freebsd/sys/dev/re/if_re.c
@@ -298,6 +298,10 @@ static void re_setwol (struct rl_softc *);
static void re_clrwol (struct rl_softc *);
static void re_set_linkspeed (struct rl_softc *);
+#ifdef DEV_NETMAP /* see ixgbe.c for details */
+#include <dev/netmap/if_re_netmap.h>
+#endif /* !DEV_NETMAP */
+
#ifdef RE_DIAG
static int re_diag (struct rl_softc *);
#endif
@@ -756,7 +760,7 @@ re_diag(struct rl_softc *sc)
u_int8_t src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
/* Allocate a single mbuf */
- MGETHDR(m0, M_DONTWAIT, MT_DATA);
+ MGETHDR(m0, M_NOWAIT, MT_DATA);
if (m0 == NULL)
return (ENOBUFS);
@@ -1243,7 +1247,7 @@ re_attach(device_t dev)
msic = pci_msi_count(dev);
msixc = pci_msix_count(dev);
- if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
+ if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
sc->rl_flags |= RL_FLAG_PCIE;
sc->rl_expcap = reg;
}
@@ -1585,7 +1589,8 @@ re_attach(device_t dev)
* packet has IP options so disable TX IP checksum offloading.
*/
if (sc->rl_hwrev->rl_rev == RL_HWREV_8168C ||
- sc->rl_hwrev->rl_rev == RL_HWREV_8168C_SPIN2)
+ sc->rl_hwrev->rl_rev == RL_HWREV_8168C_SPIN2 ||
+ sc->rl_hwrev->rl_rev == RL_HWREV_8168CP)
ifp->if_hwassist = CSUM_TCP | CSUM_UDP;
else
ifp->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP;
@@ -1622,7 +1627,7 @@ re_attach(device_t dev)
if (ifp->if_capabilities & IFCAP_HWCSUM)
ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
/* Enable WOL if PM is supported. */
- if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
+ if (pci_find_cap(sc->rl_dev, PCIY_PMG, &reg) == 0)
ifp->if_capabilities |= IFCAP_WOL;
ifp->if_capenable = ifp->if_capabilities;
ifp->if_capenable &= ~(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST);
@@ -1643,6 +1648,9 @@ re_attach(device_t dev)
*/
ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+#ifdef DEV_NETMAP
+ re_netmap_attach(sc);
+#endif /* DEV_NETMAP */
#ifdef RE_DIAG
/*
* Perform hardware diagnostic on the original RTL8169.
@@ -1748,8 +1756,12 @@ re_detach(device_t dev)
bus_teardown_intr(dev, sc->rl_irq[0], sc->rl_intrhand[0]);
sc->rl_intrhand[0] = NULL;
}
- if (ifp != NULL)
+ if (ifp != NULL) {
+#ifdef DEV_NETMAP
+ netmap_detach(ifp);
+#endif /* DEV_NETMAP */
if_free(ifp);
+ }
if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0)
rid = 0;
else
@@ -1877,7 +1889,7 @@ re_newbuf(struct rl_softc *sc, int idx)
uint32_t cmdstat;
int error, nsegs;
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
return (ENOBUFS);
@@ -1941,7 +1953,7 @@ re_jumbo_newbuf(struct rl_softc *sc, int idx)
uint32_t cmdstat;
int error, nsegs;
- m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
+ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
if (m == NULL)
return (ENOBUFS);
m->m_len = m->m_pkthdr.len = MJUM9BYTES;
@@ -2012,6 +2024,9 @@ re_tx_list_init(struct rl_softc *sc)
sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
+#ifdef DEV_NETMAP
+ re_netmap_tx_init(sc);
+#endif /* DEV_NETMAP */
/* Set EOR. */
desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
@@ -2039,6 +2054,9 @@ re_rx_list_init(struct rl_softc *sc)
if ((error = re_newbuf(sc, i)) != 0)
return (error);
}
+#ifdef DEV_NETMAP
+ re_netmap_rx_init(sc);
+#endif /* DEV_NETMAP */
/* Flush the RX descriptors */
@@ -2095,6 +2113,11 @@ re_rxeof(struct rl_softc *sc, int *rx_npktsp)
RL_LOCK_ASSERT(sc);
ifp = sc->rl_ifp;
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT),
+ &rx_npkts))
+ return 0;
+#endif /* DEV_NETMAP */
if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
jumbo = 1;
else
@@ -2336,6 +2359,10 @@ re_txeof(struct rl_softc *sc)
return;
ifp = sc->rl_ifp;
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
+ return;
+#endif /* DEV_NETMAP */
/* Invalidate the TX descriptor list */
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list_map,
@@ -2672,7 +2699,7 @@ re_encap(struct rl_softc *sc, struct mbuf **m_head)
padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
if (M_WRITABLE(*m_head) == 0) {
/* Get a writable copy. */
- m_new = m_dup(*m_head, M_DONTWAIT);
+ m_new = m_dup(*m_head, M_NOWAIT);
m_freem(*m_head);
if (m_new == NULL) {
*m_head = NULL;
@@ -2682,7 +2709,7 @@ re_encap(struct rl_softc *sc, struct mbuf **m_head)
}
if ((*m_head)->m_next != NULL ||
M_TRAILINGSPACE(*m_head) < padlen) {
- m_new = m_defrag(*m_head, M_DONTWAIT);
+ m_new = m_defrag(*m_head, M_NOWAIT);
if (m_new == NULL) {
m_freem(*m_head);
*m_head = NULL;
@@ -2706,7 +2733,7 @@ re_encap(struct rl_softc *sc, struct mbuf **m_head)
error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
*m_head, segs, &nsegs, BUS_DMA_NOWAIT);
if (error == EFBIG) {
- m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
+ m_new = m_collapse(*m_head, M_NOWAIT, RL_NTXSEGS);
if (m_new == NULL) {
m_freem(*m_head);
*m_head = NULL;
@@ -2854,6 +2881,21 @@ re_start_locked(struct ifnet *ifp)
sc = ifp->if_softc;
+#ifdef DEV_NETMAP
+ /* XXX is this necessary ? */
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_kring *kring = &NA(ifp)->tx_rings[0];
+ if (sc->rl_ldata.rl_tx_prodidx != kring->nr_hwcur) {
+ /* kick the tx unit */
+ CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
+#ifdef RE_TX_MODERATION
+ CSR_WRITE_4(sc, RL_TIMERCNT, 1);
+#endif
+ sc->rl_watchdog_timer = 5;
+ }
+ return;
+ }
+#endif /* DEV_NETMAP */
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0)
return;
@@ -3376,7 +3418,8 @@ re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) {
rev = sc->rl_hwrev->rl_rev;
if (rev == RL_HWREV_8168C ||
- rev == RL_HWREV_8168C_SPIN2)
+ rev == RL_HWREV_8168C_SPIN2 ||
+ rev == RL_HWREV_8168CP)
ifp->if_hwassist |= CSUM_TCP | CSUM_UDP;
else
ifp->if_hwassist |= RE_CSUM_FEATURES;
@@ -3692,7 +3735,7 @@ re_set_linkspeed(struct rl_softc *sc)
miisc = LIST_FIRST(&mii->mii_phys);
phyno = miisc->mii_phy;
LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
- mii_phy_reset(miisc);
+ PHY_RESET(miisc);
re_miibus_writereg(sc->rl_dev, phyno, MII_100T2CR, 0);
re_miibus_writereg(sc->rl_dev, phyno,
MII_ANAR, ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA);
@@ -3742,7 +3785,7 @@ re_setwol(struct rl_softc *sc)
RL_LOCK_ASSERT(sc);
- if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
+ if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
return;
ifp = sc->rl_ifp;
@@ -3814,7 +3857,7 @@ re_clrwol(struct rl_softc *sc)
RL_LOCK_ASSERT(sc);
- if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
+ if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
return;
/* Enable config register write. */
diff --git a/freebsd/sys/dev/smc/if_smc.c b/freebsd/sys/dev/smc/if_smc.c
index b6cb58fd..073a35c1 100644
--- a/freebsd/sys/dev/smc/if_smc.c
+++ b/freebsd/sys/dev/smc/if_smc.c
@@ -690,11 +690,11 @@ smc_task_rx(void *context, int pending)
/*
* Grab an mbuf and attach a cluster.
*/
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ MGETHDR(m, M_NOWAIT, MT_DATA);
if (m == NULL) {
break;
}
- MCLGET(m, M_DONTWAIT);
+ MCLGET(m, M_NOWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_freem(m);
break;
diff --git a/freebsd/sys/dev/usb/controller/ehci.c b/freebsd/sys/dev/usb/controller/ehci.c
index 18864431..456aff61 100644
--- a/freebsd/sys/dev/usb/controller/ehci.c
+++ b/freebsd/sys/dev/usb/controller/ehci.c
@@ -96,7 +96,7 @@ static int ehcinohighspeed = 0;
static int ehciiaadbug = 0;
static int ehcilostintrbug = 0;
-SYSCTL_NODE(_hw_usb, OID_AUTO, ehci, CTLFLAG_RW, 0, "USB ehci");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, ehci, CTLFLAG_RW, 0, "USB ehci");
SYSCTL_INT(_hw_usb_ehci, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN,
&ehcidebug, 0, "Debug level");
TUNABLE_INT("hw.usb.ehci.debug", &ehcidebug);
@@ -334,14 +334,18 @@ ehci_init(ehci_softc_t *sc)
sc->sc_noport = EHCI_HCS_N_PORTS(sparams);
sc->sc_bus.usbrev = USB_REV_2_0;
- /* Reset the controller */
- DPRINTF("%s: resetting\n", device_get_nameunit(sc->sc_bus.bdev));
+ if (!(sc->sc_flags & EHCI_SCFLG_DONTRESET)) {
+ /* Reset the controller */
+ DPRINTF("%s: resetting\n",
+ device_get_nameunit(sc->sc_bus.bdev));
- err = ehci_hcreset(sc);
- if (err) {
- device_printf(sc->sc_bus.bdev, "reset timeout\n");
- return (err);
+ err = ehci_hcreset(sc);
+ if (err) {
+ device_printf(sc->sc_bus.bdev, "reset timeout\n");
+ return (err);
+ }
}
+
/*
* use current frame-list-size selection 0: 1024*4 bytes 1: 512*4
* bytes 2: 256*4 bytes 3: unknown
diff --git a/freebsd/sys/dev/usb/controller/ehci.h b/freebsd/sys/dev/usb/controller/ehci.h
index a64d48a0..f718a8a7 100644
--- a/freebsd/sys/dev/usb/controller/ehci.h
+++ b/freebsd/sys/dev/usb/controller/ehci.h
@@ -345,6 +345,8 @@ typedef struct ehci_softc {
#define EHCI_SCFLG_TT 0x0020 /* transaction translator present */
#define EHCI_SCFLG_LOSTINTRBUG 0x0040 /* workaround for VIA / ATI chipsets */
#define EHCI_SCFLG_IAADBUG 0x0080 /* workaround for nVidia chipsets */
+#define EHCI_SCFLG_DONTRESET 0x0100 /* don't reset ctrl. in ehci_init() */
+#define EHCI_SCFLG_DONEINIT 0x1000 /* ehci_init() has been called. */
uint8_t sc_offs; /* offset to operational registers */
uint8_t sc_doorbell_disable; /* set on doorbell failure */
diff --git a/freebsd/sys/dev/usb/controller/ohci.c b/freebsd/sys/dev/usb/controller/ohci.c
index 94e94fd9..42129a8e 100644
--- a/freebsd/sys/dev/usb/controller/ohci.c
+++ b/freebsd/sys/dev/usb/controller/ohci.c
@@ -82,7 +82,7 @@ __FBSDID("$FreeBSD$");
#ifdef USB_DEBUG
static int ohcidebug = 0;
-SYSCTL_NODE(_hw_usb, OID_AUTO, ohci, CTLFLAG_RW, 0, "USB ohci");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, ohci, CTLFLAG_RW, 0, "USB ohci");
SYSCTL_INT(_hw_usb_ohci, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN,
&ohcidebug, 0, "ohci debug level");
TUNABLE_INT("hw.usb.ohci.debug", &ohcidebug);
diff --git a/freebsd/sys/dev/usb/controller/usb_controller.c b/freebsd/sys/dev/usb/controller/usb_controller.c
index 3f9c8742..37f79364 100644
--- a/freebsd/sys/dev/usb/controller/usb_controller.c
+++ b/freebsd/sys/dev/usb/controller/usb_controller.c
@@ -81,7 +81,7 @@ static void usb_attach_sub(device_t, struct usb_bus *);
#ifdef USB_DEBUG
static int usb_ctrl_debug = 0;
-SYSCTL_NODE(_hw_usb, OID_AUTO, ctrl, CTLFLAG_RW, 0, "USB controller");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, ctrl, CTLFLAG_RW, 0, "USB controller");
SYSCTL_INT(_hw_usb_ctrl, OID_AUTO, debug, CTLFLAG_RW, &usb_ctrl_debug, 0,
"Debug level");
#endif
@@ -889,3 +889,28 @@ usb_bus_mem_free_all(struct usb_bus *bus, usb_bus_mem_cb_t *cb)
mtx_destroy(&bus->bus_mtx);
}
+
+/* convenience wrappers */
+void
+usb_proc_explore_mwait(struct usb_device *udev, void *pm1, void *pm2)
+{
+ usb_proc_mwait(&udev->bus->explore_proc, pm1, pm2);
+}
+
+void *
+usb_proc_explore_msignal(struct usb_device *udev, void *pm1, void *pm2)
+{
+ return (usb_proc_msignal(&udev->bus->explore_proc, pm1, pm2));
+}
+
+void
+usb_proc_explore_lock(struct usb_device *udev)
+{
+ USB_BUS_LOCK(udev->bus);
+}
+
+void
+usb_proc_explore_unlock(struct usb_device *udev)
+{
+ USB_BUS_UNLOCK(udev->bus);
+}
diff --git a/freebsd/sys/dev/usb/controller/xhcireg.h b/freebsd/sys/dev/usb/controller/xhcireg.h
new file mode 100644
index 00000000..85d989a6
--- /dev/null
+++ b/freebsd/sys/dev/usb/controller/xhcireg.h
@@ -0,0 +1,221 @@
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2010 Hans Petter Selasky. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _XHCIREG_H_
+#define _XHCIREG_H_
+
+/* XHCI PCI config registers */
+#define PCI_XHCI_CBMEM 0x10 /* configuration base MEM */
+#define PCI_XHCI_USBREV 0x60 /* RO USB protocol revision */
+#define PCI_USB_REV_3_0 0x30 /* USB 3.0 */
+#define PCI_XHCI_FLADJ 0x61 /* RW frame length adjust */
+
+#define PCI_XHCI_INTEL_XUSB2PR 0xD0 /* Intel USB2 Port Routing */
+#define PCI_XHCI_INTEL_USB3_PSSEN 0xD8 /* Intel USB3 Port SuperSpeed Enable */
+
+/* XHCI capability registers */
+#define XHCI_CAPLENGTH 0x00 /* RO capability */
+#define XHCI_RESERVED 0x01 /* Reserved */
+#define XHCI_HCIVERSION 0x02 /* RO Interface version number */
+#define XHCI_HCIVERSION_0_9 0x0090 /* xHCI version 0.9 */
+#define XHCI_HCIVERSION_1_0 0x0100 /* xHCI version 1.0 */
+#define XHCI_HCSPARAMS1 0x04 /* RO structual parameters 1 */
+#define XHCI_HCS1_DEVSLOT_MAX(x)((x) & 0xFF)
+#define XHCI_HCS1_IRQ_MAX(x) (((x) >> 8) & 0x3FF)
+#define XHCI_HCS1_N_PORTS(x) (((x) >> 24) & 0xFF)
+#define XHCI_HCSPARAMS2 0x08 /* RO structual parameters 2 */
+#define XHCI_HCS2_IST(x) ((x) & 0xF)
+#define XHCI_HCS2_ERST_MAX(x) (((x) >> 4) & 0xF)
+#define XHCI_HCS2_SPR(x) (((x) >> 24) & 0x1)
+#define XHCI_HCS2_SPB_MAX(x) (((x) >> 27) & 0x7F)
+#define XHCI_HCSPARAMS3 0x0C /* RO structual parameters 3 */
+#define XHCI_HCS3_U1_DEL(x) ((x) & 0xFF)
+#define XHCI_HCS3_U2_DEL(x) (((x) >> 16) & 0xFFFF)
+#define XHCI_HCSPARAMS0 0x10 /* RO capability parameters */
+#define XHCI_HCS0_AC64(x) ((x) & 0x1) /* 64-bit capable */
+#define XHCI_HCS0_BNC(x) (((x) >> 1) & 0x1) /* BW negotiation */
+#define XHCI_HCS0_CSZ(x) (((x) >> 2) & 0x1) /* context size */
+#define XHCI_HCS0_PPC(x) (((x) >> 3) & 0x1) /* port power control */
+#define XHCI_HCS0_PIND(x) (((x) >> 4) & 0x1) /* port indicators */
+#define XHCI_HCS0_LHRC(x) (((x) >> 5) & 0x1) /* light HC reset */
+#define XHCI_HCS0_LTC(x) (((x) >> 6) & 0x1) /* latency tolerance msg */
+#define XHCI_HCS0_NSS(x) (((x) >> 7) & 0x1) /* no secondary sid */
+#define XHCI_HCS0_PSA_SZ_MAX(x) (((x) >> 12) & 0xF) /* max pri. stream array size */
+#define XHCI_HCS0_XECP(x) (((x) >> 16) & 0xFFFF) /* extended capabilities pointer */
+#define XHCI_DBOFF 0x14 /* RO doorbell offset */
+#define XHCI_RTSOFF 0x18 /* RO runtime register space offset */
+
+/* XHCI operational registers. Offset given by XHCI_CAPLENGTH register */
+#define XHCI_USBCMD 0x00 /* XHCI command */
+#define XHCI_CMD_RS 0x00000001 /* RW Run/Stop */
+#define XHCI_CMD_HCRST 0x00000002 /* RW Host Controller Reset */
+#define XHCI_CMD_INTE 0x00000004 /* RW Interrupter Enable */
+#define XHCI_CMD_HSEE 0x00000008 /* RW Host System Error Enable */
+#define XHCI_CMD_LHCRST 0x00000080 /* RO/RW Light Host Controller Reset */
+#define XHCI_CMD_CSS 0x00000100 /* RW Controller Save State */
+#define XHCI_CMD_CRS 0x00000200 /* RW Controller Restore State */
+#define XHCI_CMD_EWE 0x00000400 /* RW Enable Wrap Event */
+#define XHCI_CMD_EU3S 0x00000800 /* RW Enable U3 MFINDEX Stop */
+#define XHCI_USBSTS 0x04 /* XHCI status */
+#define XHCI_STS_HCH 0x00000001 /* RO - Host Controller Halted */
+#define XHCI_STS_HSE 0x00000004 /* RW - Host System Error */
+#define XHCI_STS_EINT 0x00000008 /* RW - Event Interrupt */
+#define XHCI_STS_PCD 0x00000010 /* RW - Port Change Detect */
+#define XHCI_STS_SSS 0x00000100 /* RO - Save State Status */
+#define XHCI_STS_RSS 0x00000200 /* RO - Restore State Status */
+#define XHCI_STS_SRE 0x00000400 /* RW - Save/Restore Error */
+#define XHCI_STS_CNR 0x00000800 /* RO - Controller Not Ready */
+#define XHCI_STS_HCE 0x00001000 /* RO - Host Controller Error */
+#define XHCI_PAGESIZE 0x08 /* XHCI page size mask */
+#define XHCI_PAGESIZE_4K 0x00000001 /* 4K Page Size */
+#define XHCI_PAGESIZE_8K 0x00000002 /* 8K Page Size */
+#define XHCI_PAGESIZE_16K 0x00000004 /* 16K Page Size */
+#define XHCI_PAGESIZE_32K 0x00000008 /* 32K Page Size */
+#define XHCI_PAGESIZE_64K 0x00000010 /* 64K Page Size */
+#define XHCI_DNCTRL 0x14 /* XHCI device notification control */
+#define XHCI_DNCTRL_MASK(n) (1U << (n))
+#define XHCI_CRCR_LO 0x18 /* XHCI command ring control */
+#define XHCI_CRCR_LO_RCS 0x00000001 /* RW - consumer cycle state */
+#define XHCI_CRCR_LO_CS 0x00000002 /* RW - command stop */
+#define XHCI_CRCR_LO_CA 0x00000004 /* RW - command abort */
+#define XHCI_CRCR_LO_CRR 0x00000008 /* RW - command ring running */
+#define XHCI_CRCR_LO_MASK 0x0000000F
+#define XHCI_CRCR_HI 0x1C /* XHCI command ring control */
+#define XHCI_DCBAAP_LO 0x30 /* XHCI dev context BA pointer */
+#define XHCI_DCBAAP_HI 0x34 /* XHCI dev context BA pointer */
+#define XHCI_CONFIG 0x38
+#define XHCI_CONFIG_SLOTS_MASK 0x000000FF /* RW - number of device slots enabled */
+
+/* XHCI port status registers */
+#define XHCI_PORTSC(n) (0x3F0 + (0x10 * (n))) /* XHCI port status */
+#define XHCI_PS_CCS 0x00000001 /* RO - current connect status */
+#define XHCI_PS_PED 0x00000002 /* RW - port enabled / disabled */
+#define XHCI_PS_OCA 0x00000008 /* RO - over current active */
+#define XHCI_PS_PR 0x00000010 /* RW - port reset */
+#define XHCI_PS_PLS_GET(x) (((x) >> 5) & 0xF) /* RW - port link state */
+#define XHCI_PS_PLS_SET(x) (((x) & 0xF) << 5) /* RW - port link state */
+#define XHCI_PS_PP 0x00000200 /* RW - port power */
+#define XHCI_PS_SPEED_GET(x) (((x) >> 10) & 0xF) /* RO - port speed */
+#define XHCI_PS_PIC_GET(x) (((x) >> 14) & 0x3) /* RW - port indicator */
+#define XHCI_PS_PIC_SET(x) (((x) & 0x3) << 14) /* RW - port indicator */
+#define XHCI_PS_LWS 0x00010000 /* RW - port link state write strobe */
+#define XHCI_PS_CSC 0x00020000 /* RW - connect status change */
+#define XHCI_PS_PEC 0x00040000 /* RW - port enable/disable change */
+#define XHCI_PS_WRC 0x00080000 /* RW - warm port reset change */
+#define XHCI_PS_OCC 0x00100000 /* RW - over-current change */
+#define XHCI_PS_PRC 0x00200000 /* RW - port reset change */
+#define XHCI_PS_PLC 0x00400000 /* RW - port link state change */
+#define XHCI_PS_CEC 0x00800000 /* RW - config error change */
+#define XHCI_PS_CAS 0x01000000 /* RO - cold attach status */
+#define XHCI_PS_WCE 0x02000000 /* RW - wake on connect enable */
+#define XHCI_PS_WDE 0x04000000 /* RW - wake on disconnect enable */
+#define XHCI_PS_WOE 0x08000000 /* RW - wake on over-current enable */
+#define XHCI_PS_DR 0x40000000 /* RO - device removable */
+#define XHCI_PS_WPR 0x80000000U /* RW - warm port reset */
+#define XHCI_PS_CLEAR 0x80FF01FFU /* command bits */
+
+#define XHCI_PORTPMSC(n) (0x3F4 + (0x10 * (n))) /* XHCI status and control */
+#define XHCI_PM3_U1TO_GET(x) (((x) >> 0) & 0xFF) /* RW - U1 timeout */
+#define XHCI_PM3_U1TO_SET(x) (((x) & 0xFF) << 0) /* RW - U1 timeout */
+#define XHCI_PM3_U2TO_GET(x) (((x) >> 8) & 0xFF) /* RW - U2 timeout */
+#define XHCI_PM3_U2TO_SET(x) (((x) & 0xFF) << 8) /* RW - U2 timeout */
+#define XHCI_PM3_FLA 0x00010000 /* RW - Force Link PM Accept */
+#define XHCI_PM2_L1S_GET(x) (((x) >> 0) & 0x7) /* RO - L1 status */
+#define XHCI_PM2_RWE 0x00000008 /* RW - remote wakup enable */
+#define XHCI_PM2_HIRD_GET(x) (((x) >> 4) & 0xF) /* RW - host initiated resume duration */
+#define XHCI_PM2_HIRD_SET(x) (((x) & 0xF) << 4) /* RW - host initiated resume duration */
+#define XHCI_PM2_L1SLOT_GET(x) (((x) >> 8) & 0xFF) /* RW - L1 device slot */
+#define XHCI_PM2_L1SLOT_SET(x) (((x) & 0xFF) << 8) /* RW - L1 device slot */
+#define XHCI_PM2_HLE 0x00010000 /* RW - hardware LPM enable */
+#define XHCI_PORTLI(n) (0x3F8 + (0x10 * (n))) /* XHCI port link info */
+#define XHCI_PLI3_ERR_GET(x) (((x) >> 0) & 0xFFFF) /* RO - port link errors */
+#define XHCI_PORTRSV(n) (0x3FC + (0x10 * (n))) /* XHCI port reserved */
+
+/* XHCI runtime registers. Offset given by XHCI_CAPLENGTH + XHCI_RTSOFF registers */
+#define XHCI_MFINDEX 0x0000 /* RO - microframe index */
+#define XHCI_MFINDEX_GET(x) ((x) & 0x3FFF)
+#define XHCI_IMAN(n) (0x0020 + (0x20 * (n))) /* XHCI interrupt management */
+#define XHCI_IMAN_INTR_PEND 0x00000001 /* RW - interrupt pending */
+#define XHCI_IMAN_INTR_ENA 0x00000002 /* RW - interrupt enable */
+#define XHCI_IMOD(n) (0x0024 + (0x20 * (n))) /* XHCI interrupt moderation */
+#define XHCI_IMOD_IVAL_GET(x) (((x) >> 0) & 0xFFFF) /* 250ns unit */
+#define XHCI_IMOD_IVAL_SET(x) (((x) & 0xFFFF) << 0) /* 250ns unit */
+#define XHCI_IMOD_ICNT_GET(x) (((x) >> 16) & 0xFFFF) /* 250ns unit */
+#define XHCI_IMOD_ICNT_SET(x) (((x) & 0xFFFF) << 16) /* 250ns unit */
+#define XHCI_IMOD_DEFAULT 0x000003E8U /* 8000 IRQ/second */
+#define XHCI_ERSTSZ(n) (0x0028 + (0x20 * (n))) /* XHCI event ring segment table size */
+#define XHCI_ERSTS_GET(x) ((x) & 0xFFFF)
+#define XHCI_ERSTS_SET(x) ((x) & 0xFFFF)
+#define XHCI_ERSTBA_LO(n) (0x0030 + (0x20 * (n))) /* XHCI event ring segment table BA */
+#define XHCI_ERSTBA_HI(n) (0x0034 + (0x20 * (n))) /* XHCI event ring segment table BA */
+#define XHCI_ERDP_LO(n) (0x0038 + (0x20 * (n))) /* XHCI event ring dequeue pointer */
+#define XHCI_ERDP_LO_SINDEX(x) ((x) & 0x7) /* RO - dequeue segment index */
+#define XHCI_ERDP_LO_BUSY 0x00000008 /* RW - event handler busy */
+#define XHCI_ERDP_HI(n) (0x003C + (0x20 * (n))) /* XHCI event ring dequeue pointer */
+
+/* XHCI doorbell registers. Offset given by XHCI_CAPLENGTH + XHCI_DBOFF registers */
+#define XHCI_DOORBELL(n) (0x0000 + (4 * (n)))
+#define XHCI_DB_TARGET_GET(x) ((x) & 0xFF) /* RW - doorbell target */
+#define XHCI_DB_TARGET_SET(x) ((x) & 0xFF) /* RW - doorbell target */
+#define XHCI_DB_SID_GET(x) (((x) >> 16) & 0xFFFF) /* RW - doorbell stream ID */
+#define XHCI_DB_SID_SET(x) (((x) & 0xFFFF) << 16) /* RW - doorbell stream ID */
+
+/* XHCI legacy support */
+#define XHCI_XECP_ID(x) ((x) & 0xFF)
+#define XHCI_XECP_NEXT(x) (((x) >> 8) & 0xFF)
+#define XHCI_XECP_BIOS_SEM 0x0002
+#define XHCI_XECP_OS_SEM 0x0003
+
+/* XHCI capability ID's */
+#define XHCI_ID_USB_LEGACY 0x0001
+#define XHCI_ID_PROTOCOLS 0x0002
+#define XHCI_ID_POWER_MGMT 0x0003
+#define XHCI_ID_VIRTUALIZATION 0x0004
+#define XHCI_ID_MSG_IRQ 0x0005
+#define XHCI_ID_USB_LOCAL_MEM 0x0006
+
+/* XHCI register R/W wrappers */
+#define XREAD1(sc, what, a) \
+ bus_space_read_1((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off)
+#define XREAD2(sc, what, a) \
+ bus_space_read_2((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off)
+#define XREAD4(sc, what, a) \
+ bus_space_read_4((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off)
+#define XWRITE1(sc, what, a, x) \
+ bus_space_write_1((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off, (x))
+#define XWRITE2(sc, what, a, x) \
+ bus_space_write_2((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off, (x))
+#define XWRITE4(sc, what, a, x) \
+ bus_space_write_4((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off, (x))
+
+#endif /* _XHCIREG_H_ */
diff --git a/freebsd/sys/dev/usb/quirk/usb_quirk.c b/freebsd/sys/dev/usb/quirk/usb_quirk.c
index 2df2de5e..9042c166 100644
--- a/freebsd/sys/dev/usb/quirk/usb_quirk.c
+++ b/freebsd/sys/dev/usb/quirk/usb_quirk.c
@@ -96,6 +96,7 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(SILICONPORTALS, YAPPHONE, 0x100, 0x100, UQ_AU_INP_ASYNC),
USB_QUIRK(LOGITECH, UN53B, 0x0000, 0xffff, UQ_NO_STRINGS),
USB_QUIRK(ELSA, MODEM1, 0x0000, 0xffff, UQ_CFG_INDEX_1),
+ USB_QUIRK(PLANEX2, MZKUE150N, 0x0000, 0xffff, UQ_CFG_INDEX_1),
/* Quirks for printer devices */
USB_QUIRK(HP, 895C, 0x0000, 0xffff, UQ_BROKEN_BIDIR),
USB_QUIRK(HP, 880C, 0x0000, 0xffff, UQ_BROKEN_BIDIR),
@@ -127,6 +128,8 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
/* MS keyboards do weird things */
USB_QUIRK(MICROSOFT, NATURAL4000, 0x0000, 0xFFFF, UQ_KBD_BOOTPROTO),
USB_QUIRK(MICROSOFT, WLINTELLIMOUSE, 0x0000, 0xffff, UQ_MS_LEADING_BYTE),
+ /* Quirk for Corsair Vengeance K60 keyboard */
+ USB_QUIRK(CORSAIR, K60, 0x0000, 0xffff, UQ_KBD_BOOTPROTO),
/* umodem(4) device quirks */
USB_QUIRK(METRICOM, RICOCHET_GS, 0x100, 0x100, UQ_ASSUME_CM_OVER_DATA),
USB_QUIRK(SANYO, SCP4900, 0x000, 0x000, UQ_ASSUME_CM_OVER_DATA),
@@ -254,6 +257,7 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(MICROTECH, DPCM, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI,
UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_TEST_UNIT_READY,
UQ_MSC_NO_START_STOP),
+ USB_QUIRK(MICRON, REALSSD, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE),
USB_QUIRK(MICROTECH, SCSIDB25, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI),
USB_QUIRK(MICROTECH, SCSIHD50, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
@@ -391,6 +395,7 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN),
USB_QUIRK(SONY, PORTABLE_HDD_V2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI),
+ USB_QUIRK(STMICRO, ST72682, 0x0000, 0xffff, UQ_MSC_NO_PREVENT_ALLOW),
USB_QUIRK(SUPERTOP, IDE, 0x0000, 0xffff, UQ_MSC_IGNORE_RESIDUE,
UQ_MSC_NO_SYNC_CACHE),
USB_QUIRK(TAUGA, CAMERAMATE, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI),
@@ -443,6 +448,9 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(MEIZU, M6_SL, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB,
UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY, UQ_MSC_NO_SYNC_CACHE),
+ USB_QUIRK(TOSHIBA, TRANSMEMORY, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE),
+ USB_QUIRK(VIALABS, USB30SATABRIDGE, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE),
+
/* Non-standard USB MIDI devices */
USB_QUIRK(ROLAND, UM1, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
USB_QUIRK(ROLAND, SC8850, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
@@ -460,7 +468,13 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = {
USB_QUIRK(ROLAND, SD20, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
USB_QUIRK(ROLAND, SD80, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
USB_QUIRK(ROLAND, UA700, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
+ USB_QUIRK(EGO, M4U, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI),
+ USB_QUIRK(LOGILINK, U2M, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI),
USB_QUIRK(MEDELI, DD305, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI, UQ_MATCH_VENDOR_ONLY),
+ USB_QUIRK(REDOCTANE, GHMIDI, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI),
+ USB_QUIRK(TEXTECH, U2M_1, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI),
+ USB_QUIRK(TEXTECH, U2M_2, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI),
+ USB_QUIRK(WCH2, U2M, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI),
/* Non-standard USB AUDIO devices */
USB_QUIRK(MAUDIO, FASTTRACKULTRA, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS),
@@ -514,6 +528,7 @@ static const char *usb_quirk_str[USB_QUIRK_MAX] = {
[UQ_MSC_NO_GETMAXLUN] = "UQ_MSC_NO_GETMAXLUN",
[UQ_MSC_NO_INQUIRY] = "UQ_MSC_NO_INQUIRY",
[UQ_MSC_NO_INQUIRY_EVPD] = "UQ_MSC_NO_INQUIRY_EVPD",
+ [UQ_MSC_NO_PREVENT_ALLOW] = "UQ_MSC_NO_PREVENT_ALLOW",
[UQ_MSC_NO_SYNC_CACHE] = "UQ_MSC_NO_SYNC_CACHE",
[UQ_MSC_SHUTTLE_INIT] = "UQ_MSC_SHUTTLE_INIT",
[UQ_MSC_ALT_IFACE_1] = "UQ_MSC_ALT_IFACE_1",
diff --git a/freebsd/sys/dev/usb/quirk/usb_quirk.h b/freebsd/sys/dev/usb/quirk/usb_quirk.h
index f2c10dd8..32a60a10 100644
--- a/freebsd/sys/dev/usb/quirk/usb_quirk.h
+++ b/freebsd/sys/dev/usb/quirk/usb_quirk.h
@@ -75,6 +75,7 @@ enum {
UQ_MSC_NO_GETMAXLUN, /* does not support get max LUN */
UQ_MSC_NO_INQUIRY, /* fake generic inq response */
UQ_MSC_NO_INQUIRY_EVPD, /* does not support inq EVPD */
+ UQ_MSC_NO_PREVENT_ALLOW, /* does not support medium removal */
UQ_MSC_NO_SYNC_CACHE, /* does not support sync cache */
UQ_MSC_SHUTTLE_INIT, /* requires Shuttle init sequence */
UQ_MSC_ALT_IFACE_1, /* switch to alternate interface 1 */
diff --git a/freebsd/sys/dev/usb/storage/umass.c b/freebsd/sys/dev/usb/storage/umass.c
index d3d2040f..76a1fcb6 100644
--- a/freebsd/sys/dev/usb/storage/umass.c
+++ b/freebsd/sys/dev/usb/storage/umass.c
@@ -139,14 +139,6 @@ __FBSDID("$FreeBSD$");
#include <cam/cam_periph.h>
-#define UMASS_EXT_BUFFER
-#ifdef UMASS_EXT_BUFFER
-/* this enables loading of virtual buffers into DMA */
-#define UMASS_USB_FLAGS .ext_buffer=1,
-#else
-#define UMASS_USB_FLAGS
-#endif
-
#ifdef USB_DEBUG
#define DIF(m, x) \
do { \
@@ -173,19 +165,21 @@ __FBSDID("$FreeBSD$");
#define UDMASS_CBI 0x00400000 /* CBI transfers */
#define UDMASS_WIRE (UDMASS_BBB|UDMASS_CBI)
#define UDMASS_ALL 0xffff0000 /* all of the above */
-static int umass_debug = 0;
+static int umass_debug;
+static int umass_throttle;
-SYSCTL_NODE(_hw_usb, OID_AUTO, umass, CTLFLAG_RW, 0, "USB umass");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, umass, CTLFLAG_RW, 0, "USB umass");
SYSCTL_INT(_hw_usb_umass, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN,
&umass_debug, 0, "umass debug level");
TUNABLE_INT("hw.usb.umass.debug", &umass_debug);
+SYSCTL_INT(_hw_usb_umass, OID_AUTO, throttle, CTLFLAG_RW | CTLFLAG_TUN,
+ &umass_throttle, 0, "Forced delay between commands in milliseconds");
+TUNABLE_INT("hw.usb.umass.throttle", &umass_throttle);
#else
#define DIF(...) do { } while (0)
#define DPRINTF(...) do { } while (0)
#endif
-#define UMASS_GONE ((struct umass_softc *)1)
-
#define UMASS_BULK_SIZE (1 << 17)
#define UMASS_CBI_DIAGNOSTIC_CMDLEN 12 /* bytes */
#define UMASS_MAX_CMDLEN MAX(12, CAM_MAX_CDBLEN) /* bytes */
@@ -373,6 +367,8 @@ typedef uint8_t (umass_transform_t)(struct umass_softc *sc, uint8_t *cmd_ptr,
* result.
*/
#define NO_SYNCHRONIZE_CACHE 0x4000
+ /* Device does not support 'PREVENT/ALLOW MEDIUM REMOVAL'. */
+#define NO_PREVENT_ALLOW 0x8000
struct umass_softc {
@@ -545,7 +541,7 @@ static struct usb_config umass_bbb_config[UMASS_T_BBB_MAX] = {
.endpoint = UE_ADDR_ANY,
.direction = UE_DIR_IN,
.bufsize = UMASS_BULK_SIZE,
- .flags = {.proxy_buffer = 1,.short_xfer_ok = 1, UMASS_USB_FLAGS},
+ .flags = {.proxy_buffer = 1,.short_xfer_ok = 1,.ext_buffer=1,},
.callback = &umass_t_bbb_data_read_callback,
.timeout = 0, /* overwritten later */
},
@@ -564,7 +560,7 @@ static struct usb_config umass_bbb_config[UMASS_T_BBB_MAX] = {
.endpoint = UE_ADDR_ANY,
.direction = UE_DIR_OUT,
.bufsize = UMASS_BULK_SIZE,
- .flags = {.proxy_buffer = 1,.short_xfer_ok = 1, UMASS_USB_FLAGS},
+ .flags = {.proxy_buffer = 1,.short_xfer_ok = 1,.ext_buffer=1,},
.callback = &umass_t_bbb_data_write_callback,
.timeout = 0, /* overwritten later */
},
@@ -637,7 +633,7 @@ static struct usb_config umass_cbi_config[UMASS_T_CBI_MAX] = {
.endpoint = UE_ADDR_ANY,
.direction = UE_DIR_IN,
.bufsize = UMASS_BULK_SIZE,
- .flags = {.proxy_buffer = 1,.short_xfer_ok = 1, UMASS_USB_FLAGS},
+ .flags = {.proxy_buffer = 1,.short_xfer_ok = 1,.ext_buffer=1,},
.callback = &umass_t_cbi_data_read_callback,
.timeout = 0, /* overwritten later */
},
@@ -656,7 +652,7 @@ static struct usb_config umass_cbi_config[UMASS_T_CBI_MAX] = {
.endpoint = UE_ADDR_ANY,
.direction = UE_DIR_OUT,
.bufsize = UMASS_BULK_SIZE,
- .flags = {.proxy_buffer = 1,.short_xfer_ok = 1, UMASS_USB_FLAGS},
+ .flags = {.proxy_buffer = 1,.short_xfer_ok = 1,.ext_buffer=1,},
.callback = &umass_t_cbi_data_write_callback,
.timeout = 0, /* overwritten later */
},
@@ -845,6 +841,8 @@ umass_probe_proto(device_t dev, struct usb_attach_arg *uaa)
quirks |= NO_INQUIRY;
if (usb_test_quirk(uaa, UQ_MSC_NO_INQUIRY_EVPD))
quirks |= NO_INQUIRY_EVPD;
+ if (usb_test_quirk(uaa, UQ_MSC_NO_PREVENT_ALLOW))
+ quirks |= NO_PREVENT_ALLOW;
if (usb_test_quirk(uaa, UQ_MSC_NO_SYNC_CACHE))
quirks |= NO_SYNCHRONIZE_CACHE;
if (usb_test_quirk(uaa, UQ_MSC_SHUTTLE_INIT))
@@ -891,7 +889,7 @@ umass_attach(device_t dev)
struct usb_attach_arg *uaa = device_get_ivars(dev);
struct umass_probe_proto temp = umass_probe_proto(dev, uaa);
struct usb_interface_descriptor *id;
- int32_t err;
+ int err;
/*
* NOTE: the softc struct is cleared in device_set_driver.
@@ -1004,6 +1002,24 @@ umass_attach(device_t dev)
"transfers, %s\n", usbd_errstr(err));
goto detach;
}
+#ifdef USB_DEBUG
+ if (umass_throttle > 0) {
+ uint8_t x;
+ int iv;
+
+ iv = umass_throttle;
+
+ if (iv < 1)
+ iv = 1;
+ else if (iv > 8000)
+ iv = 8000;
+
+ for (x = 0; x != UMASS_T_MAX; x++) {
+ if (sc->sc_xfer[x] != NULL)
+ usbd_xfer_set_interval(sc->sc_xfer[x], iv);
+ }
+ }
+#endif
sc->sc_transform =
(sc->sc_proto & UMASS_PROTO_SCSI) ? &umass_scsi_transform :
(sc->sc_proto & UMASS_PROTO_UFI) ? &umass_ufi_transform :
@@ -1058,14 +1074,16 @@ umass_detach(device_t dev)
usbd_transfer_unsetup(sc->sc_xfer, UMASS_T_MAX);
-#if (__FreeBSD_version >= 700037)
mtx_lock(&sc->sc_mtx);
-#endif
+
+ /* cancel any leftover CCB's */
+
+ umass_cancel_ccb(sc);
+
umass_cam_detach_sim(sc);
-#if (__FreeBSD_version >= 700037)
mtx_unlock(&sc->sc_mtx);
-#endif
+
mtx_destroy(&sc->sc_mtx);
return (0); /* success */
@@ -1206,7 +1224,6 @@ umass_t_bbb_reset1_callback(struct usb_xfer *xfer, usb_error_t error)
default: /* Error */
umass_tr_error(xfer, error);
return;
-
}
}
@@ -1245,7 +1262,6 @@ tr_transferred:
default: /* Error */
umass_tr_error(xfer, error);
return;
-
}
}
@@ -1331,7 +1347,6 @@ umass_t_bbb_command_callback(struct usb_xfer *xfer, usb_error_t error)
default: /* Error */
umass_tr_error(xfer, error);
return;
-
}
}
@@ -1340,19 +1355,12 @@ umass_t_bbb_data_read_callback(struct usb_xfer *xfer, usb_error_t error)
{
struct umass_softc *sc = usbd_xfer_softc(xfer);
uint32_t max_bulk = usbd_xfer_max_len(xfer);
-#ifndef UMASS_EXT_BUFFER
- struct usb_page_cache *pc;
-#endif
int actlen, sumlen;
usbd_xfer_status(xfer, &actlen, &sumlen, NULL, NULL);
switch (USB_GET_STATE(xfer)) {
case USB_ST_TRANSFERRED:
-#ifndef UMASS_EXT_BUFFER
- pc = usbd_xfer_get_frame(xfer, 0);
- usbd_copy_out(pc, 0, sc->sc_transfer.data_ptr, actlen);
-#endif
sc->sc_transfer.data_rem -= actlen;
sc->sc_transfer.data_ptr += actlen;
sc->sc_transfer.actlen += actlen;
@@ -1374,12 +1382,9 @@ umass_t_bbb_data_read_callback(struct usb_xfer *xfer, usb_error_t error)
}
usbd_xfer_set_timeout(xfer, sc->sc_transfer.data_timeout);
-#ifdef UMASS_EXT_BUFFER
usbd_xfer_set_frame_data(xfer, 0, sc->sc_transfer.data_ptr,
max_bulk);
-#else
- usbd_xfer_set_frame_len(xfer, 0, max_bulk);
-#endif
+
usbd_transfer_submit(xfer);
return;
@@ -1390,7 +1395,6 @@ umass_t_bbb_data_read_callback(struct usb_xfer *xfer, usb_error_t error)
umass_transfer_start(sc, UMASS_T_BBB_DATA_RD_CS);
}
return;
-
}
}
@@ -1406,9 +1410,6 @@ umass_t_bbb_data_write_callback(struct usb_xfer *xfer, usb_error_t error)
{
struct umass_softc *sc = usbd_xfer_softc(xfer);
uint32_t max_bulk = usbd_xfer_max_len(xfer);
-#ifndef UMASS_EXT_BUFFER
- struct usb_page_cache *pc;
-#endif
int actlen, sumlen;
usbd_xfer_status(xfer, &actlen, &sumlen, NULL, NULL);
@@ -1436,14 +1437,8 @@ umass_t_bbb_data_write_callback(struct usb_xfer *xfer, usb_error_t error)
}
usbd_xfer_set_timeout(xfer, sc->sc_transfer.data_timeout);
-#ifdef UMASS_EXT_BUFFER
usbd_xfer_set_frame_data(xfer, 0, sc->sc_transfer.data_ptr,
max_bulk);
-#else
- pc = usbd_xfer_get_frame(xfer, 0);
- usbd_copy_in(pc, 0, sc->sc_transfer.data_ptr, max_bulk);
- usbd_xfer_set_frame_len(xfer, 0, max_bulk);
-#endif
usbd_transfer_submit(xfer);
return;
@@ -1455,7 +1450,6 @@ umass_t_bbb_data_write_callback(struct usb_xfer *xfer, usb_error_t error)
umass_transfer_start(sc, UMASS_T_BBB_DATA_WR_CS);
}
return;
-
}
}
@@ -1581,7 +1575,6 @@ tr_error:
umass_transfer_start(sc, UMASS_T_BBB_DATA_RD_CS);
}
return;
-
}
}
@@ -1612,8 +1605,7 @@ umass_command_start(struct umass_softc *sc, uint8_t dir,
if (sc->sc_xfer[sc->sc_last_xfer_index]) {
usbd_transfer_start(sc->sc_xfer[sc->sc_last_xfer_index]);
} else {
- ccb->ccb_h.status = CAM_TID_INVALID;
- xpt_done(ccb);
+ umass_cancel_ccb(sc);
}
}
@@ -1733,7 +1725,6 @@ umass_t_cbi_reset1_callback(struct usb_xfer *xfer, usb_error_t error)
else
umass_transfer_start(sc, UMASS_T_CBI_RESET2);
break;
-
}
}
@@ -1788,7 +1779,6 @@ tr_transferred:
default: /* Error */
umass_tr_error(xfer, error);
break;
-
}
}
@@ -1878,19 +1868,12 @@ umass_t_cbi_data_read_callback(struct usb_xfer *xfer, usb_error_t error)
{
struct umass_softc *sc = usbd_xfer_softc(xfer);
uint32_t max_bulk = usbd_xfer_max_len(xfer);
-#ifndef UMASS_EXT_BUFFER
- struct usb_page_cache *pc;
-#endif
int actlen, sumlen;
usbd_xfer_status(xfer, &actlen, &sumlen, NULL, NULL);
switch (USB_GET_STATE(xfer)) {
case USB_ST_TRANSFERRED:
-#ifndef UMASS_EXT_BUFFER
- pc = usbd_xfer_get_frame(xfer, 0);
- usbd_copy_out(pc, 0, sc->sc_transfer.data_ptr, actlen);
-#endif
sc->sc_transfer.data_rem -= actlen;
sc->sc_transfer.data_ptr += actlen;
sc->sc_transfer.actlen += actlen;
@@ -1912,12 +1895,9 @@ umass_t_cbi_data_read_callback(struct usb_xfer *xfer, usb_error_t error)
}
usbd_xfer_set_timeout(xfer, sc->sc_transfer.data_timeout);
-#ifdef UMASS_EXT_BUFFER
usbd_xfer_set_frame_data(xfer, 0, sc->sc_transfer.data_ptr,
max_bulk);
-#else
- usbd_xfer_set_frame_len(xfer, 0, max_bulk);
-#endif
+
usbd_transfer_submit(xfer);
break;
@@ -1929,7 +1909,6 @@ umass_t_cbi_data_read_callback(struct usb_xfer *xfer, usb_error_t error)
umass_transfer_start(sc, UMASS_T_CBI_DATA_RD_CS);
}
break;
-
}
}
@@ -1945,9 +1924,6 @@ umass_t_cbi_data_write_callback(struct usb_xfer *xfer, usb_error_t error)
{
struct umass_softc *sc = usbd_xfer_softc(xfer);
uint32_t max_bulk = usbd_xfer_max_len(xfer);
-#ifndef UMASS_EXT_BUFFER
- struct usb_page_cache *pc;
-#endif
int actlen, sumlen;
usbd_xfer_status(xfer, &actlen, &sumlen, NULL, NULL);
@@ -1975,14 +1951,8 @@ umass_t_cbi_data_write_callback(struct usb_xfer *xfer, usb_error_t error)
}
usbd_xfer_set_timeout(xfer, sc->sc_transfer.data_timeout);
-#ifdef UMASS_EXT_BUFFER
usbd_xfer_set_frame_data(xfer, 0, sc->sc_transfer.data_ptr,
max_bulk);
-#else
- pc = usbd_xfer_get_frame(xfer, 0);
- usbd_copy_in(pc, 0, sc->sc_transfer.data_ptr, max_bulk);
- usbd_xfer_set_frame_len(xfer, 0, max_bulk);
-#endif
usbd_transfer_submit(xfer);
break;
@@ -1995,7 +1965,6 @@ umass_t_cbi_data_write_callback(struct usb_xfer *xfer, usb_error_t error)
umass_transfer_start(sc, UMASS_T_CBI_DATA_WR_CS);
}
break;
-
}
}
@@ -2097,7 +2066,6 @@ tr_setup:
usbd_errstr(error));
umass_tr_error(xfer, error);
break;
-
}
}
@@ -2126,9 +2094,7 @@ umass_cam_attach_sim(struct umass_softc *sc)
DEVNAME_SIM,
sc /* priv */ ,
sc->sc_unit /* unit number */ ,
-#if (__FreeBSD_version >= 700037)
&sc->sc_mtx /* mutex */ ,
-#endif
1 /* maximum device openings */ ,
0 /* maximum tagged device openings */ ,
devq);
@@ -2138,27 +2104,15 @@ umass_cam_attach_sim(struct umass_softc *sc)
return (ENOMEM);
}
-#if (__FreeBSD_version >= 700037)
mtx_lock(&sc->sc_mtx);
-#endif
-#if (__FreeBSD_version >= 700048)
- if (xpt_bus_register(sc->sc_sim, sc->sc_dev, sc->sc_unit) != CAM_SUCCESS) {
+ if (xpt_bus_register(sc->sc_sim, sc->sc_dev,
+ sc->sc_unit) != CAM_SUCCESS) {
mtx_unlock(&sc->sc_mtx);
return (ENOMEM);
}
-#else
- if (xpt_bus_register(sc->sc_sim, sc->sc_unit) != CAM_SUCCESS) {
-#if (__FreeBSD_version >= 700037)
- mtx_unlock(&sc->sc_mtx);
-#endif
- return (ENOMEM);
- }
-#endif
-
-#if (__FreeBSD_version >= 700037)
mtx_unlock(&sc->sc_mtx);
-#endif
+
return (0);
}
@@ -2186,7 +2140,7 @@ umass_cam_detach_sim(struct umass_softc *sc)
if (sc->sc_sim != NULL) {
if (xpt_bus_deregister(cam_sim_path(sc->sc_sim))) {
/* accessing the softc is not possible after this */
- sc->sc_sim->softc = UMASS_GONE;
+ sc->sc_sim->softc = NULL;
cam_sim_free(sc->sc_sim, /* free_devq */ TRUE);
} else {
panic("%s: CAM layer is busy\n",
@@ -2205,68 +2159,11 @@ umass_cam_action(struct cam_sim *sim, union ccb *ccb)
{
struct umass_softc *sc = (struct umass_softc *)sim->softc;
- if (sc == UMASS_GONE ||
- (sc != NULL && !usbd_device_attached(sc->sc_udev))) {
+ if (sc == NULL) {
ccb->ccb_h.status = CAM_SEL_TIMEOUT;
xpt_done(ccb);
return;
}
- if (sc) {
-#if (__FreeBSD_version < 700037)
- mtx_lock(&sc->sc_mtx);
-#endif
- }
- /*
- * Verify, depending on the operation to perform, that we either got
- * a valid sc, because an existing target was referenced, or
- * otherwise the SIM is addressed.
- *
- * This avoids bombing out at a printf and does give the CAM layer some
- * sensible feedback on errors.
- */
- switch (ccb->ccb_h.func_code) {
- case XPT_SCSI_IO:
- case XPT_RESET_DEV:
- case XPT_GET_TRAN_SETTINGS:
- case XPT_SET_TRAN_SETTINGS:
- case XPT_CALC_GEOMETRY:
- /* the opcodes requiring a target. These should never occur. */
- if (sc == NULL) {
- DPRINTF(sc, UDMASS_GEN, "%s:%d:%d:%d:func_code 0x%04x: "
- "Invalid target (target needed)\n",
- DEVNAME_SIM, cam_sim_path(sc->sc_sim),
- ccb->ccb_h.target_id, ccb->ccb_h.target_lun,
- ccb->ccb_h.func_code);
-
- ccb->ccb_h.status = CAM_TID_INVALID;
- xpt_done(ccb);
- goto done;
- }
- break;
- case XPT_PATH_INQ:
- case XPT_NOOP:
- /*
- * The opcodes sometimes aimed at a target (sc is valid),
- * sometimes aimed at the SIM (sc is invalid and target is
- * CAM_TARGET_WILDCARD)
- */
- if ((sc == NULL) &&
- (ccb->ccb_h.target_id != CAM_TARGET_WILDCARD)) {
- DPRINTF(sc, UDMASS_SCSI, "%s:%d:%d:%d:func_code 0x%04x: "
- "Invalid target (no wildcard)\n",
- DEVNAME_SIM, cam_sim_path(sc->sc_sim),
- ccb->ccb_h.target_id, ccb->ccb_h.target_lun,
- ccb->ccb_h.func_code);
-
- ccb->ccb_h.status = CAM_TID_INVALID;
- xpt_done(ccb);
- goto done;
- }
- break;
- default:
- /* XXX Hm, we should check the input parameters */
- break;
- }
/* Perform the requested action */
switch (ccb->ccb_h.func_code) {
@@ -2353,17 +2250,20 @@ umass_cam_action(struct cam_sim *sim, union ccb *ccb)
*/
if ((sc->sc_quirks & (NO_INQUIRY_EVPD | NO_INQUIRY)) &&
(sc->sc_transfer.cmd_data[1] & SI_EVPD)) {
- struct scsi_sense_data *sense;
-
- sense = &ccb->csio.sense_data;
- bzero(sense, sizeof(*sense));
- sense->error_code = SSD_CURRENT_ERROR;
- sense->flags = SSD_KEY_ILLEGAL_REQUEST;
- sense->add_sense_code = 0x24;
- sense->extra_len = 10;
+
+ scsi_set_sense_data(&ccb->csio.sense_data,
+ /*sense_format*/ SSD_TYPE_NONE,
+ /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
+ /*asc*/ 0x24,
+ /*ascq*/ 0x00,
+ /*extra args*/ SSD_ELEM_NONE);
ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND;
- ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR |
- CAM_AUTOSNS_VALID;
+ ccb->ccb_h.status =
+ CAM_SCSI_STATUS_ERROR |
+ CAM_AUTOSNS_VALID |
+ CAM_DEV_QFRZN;
+ xpt_freeze_devq(ccb->ccb_h.path, 1);
xpt_done(ccb);
goto done;
}
@@ -2382,6 +2282,13 @@ umass_cam_action(struct cam_sim *sim, union ccb *ccb)
if (sc->sc_quirks & FORCE_SHORT_INQUIRY) {
ccb->csio.dxfer_len = SHORT_INQUIRY_LENGTH;
}
+ } else if (sc->sc_transfer.cmd_data[0] == PREVENT_ALLOW) {
+ if (sc->sc_quirks & NO_PREVENT_ALLOW) {
+ ccb->csio.scsi_status = SCSI_STATUS_OK;
+ ccb->ccb_h.status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ goto done;
+ }
} else if (sc->sc_transfer.cmd_data[0] == SYNCHRONIZE_CACHE) {
if (sc->sc_quirks & NO_SYNCHRONIZE_CACHE) {
ccb->csio.scsi_status = SCSI_STATUS_OK;
@@ -2418,12 +2325,11 @@ umass_cam_action(struct cam_sim *sim, union ccb *ccb)
strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
cpi->unit_number = cam_sim_unit(sim);
cpi->bus_id = sc->sc_unit;
-#if (__FreeBSD_version >= 700025)
cpi->protocol = PROTO_SCSI;
cpi->protocol_version = SCSI_REV_2;
cpi->transport = XPORT_USB;
cpi->transport_version = 0;
-#endif
+
if (sc == NULL) {
cpi->base_transfer_speed = 0;
cpi->max_lun = 0;
@@ -2475,16 +2381,12 @@ umass_cam_action(struct cam_sim *sim, union ccb *ccb)
cam_sim_path(sc->sc_sim), ccb->ccb_h.target_id,
ccb->ccb_h.target_lun);
-#if (__FreeBSD_version >= 700025)
cts->protocol = PROTO_SCSI;
cts->protocol_version = SCSI_REV_2;
cts->transport = XPORT_USB;
cts->transport_version = 0;
cts->xport_specific.valid = 0;
-#else
- cts->valid = 0;
- cts->flags = 0; /* no disconnection, tagging */
-#endif
+
ccb->ccb_h.status = CAM_REQ_CMP;
xpt_done(ccb);
break;
@@ -2529,11 +2431,6 @@ umass_cam_action(struct cam_sim *sim, union ccb *ccb)
}
done:
-#if (__FreeBSD_version < 700037)
- if (sc) {
- mtx_unlock(&sc->sc_mtx);
- }
-#endif
return;
}
@@ -2542,7 +2439,7 @@ umass_cam_poll(struct cam_sim *sim)
{
struct umass_softc *sc = (struct umass_softc *)sim->softc;
- if (sc == UMASS_GONE)
+ if (sc == NULL)
return;
DPRINTF(sc, UDMASS_SCSI, "CAM poll\n");
@@ -2628,7 +2525,8 @@ umass_cam_cb(struct umass_softc *sc, union ccb *ccb, uint32_t residue,
* recovered. We return an error to CAM and let CAM
* retry the command if necessary.
*/
- ccb->ccb_h.status = CAM_REQ_CMP_ERR;
+ xpt_freeze_devq(ccb->ccb_h.path, 1);
+ ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
xpt_done(ccb);
break;
}
@@ -2642,12 +2540,17 @@ umass_cam_sense_cb(struct umass_softc *sc, union ccb *ccb, uint32_t residue,
uint8_t status)
{
uint8_t *cmd;
- uint8_t key;
switch (status) {
case STATUS_CMD_OK:
case STATUS_CMD_UNKNOWN:
- case STATUS_CMD_FAILED:
+ case STATUS_CMD_FAILED: {
+ int key, sense_len;
+
+ ccb->csio.sense_resid = residue;
+ sense_len = ccb->csio.sense_len - ccb->csio.sense_resid;
+ key = scsi_get_sense_key(&ccb->csio.sense_data, sense_len,
+ /*show_errors*/ 1);
if (ccb->csio.ccb_h.flags & CAM_CDB_POINTER) {
cmd = (uint8_t *)(ccb->csio.cdb_io.cdb_ptr);
@@ -2655,8 +2558,6 @@ umass_cam_sense_cb(struct umass_softc *sc, union ccb *ccb, uint32_t residue,
cmd = (uint8_t *)(ccb->csio.cdb_io.cdb_bytes);
}
- key = (ccb->csio.sense_data.flags & SSD_KEY);
-
/*
* Getting sense data always succeeds (apart from wire
* failures):
@@ -2688,8 +2589,9 @@ umass_cam_sense_cb(struct umass_softc *sc, union ccb *ccb, uint32_t residue,
* usual.
*/
+ xpt_freeze_devq(ccb->ccb_h.path, 1);
ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR
- | CAM_AUTOSNS_VALID;
+ | CAM_AUTOSNS_VALID | CAM_DEV_QFRZN;
ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND;
#if 0
@@ -2700,34 +2602,40 @@ umass_cam_sense_cb(struct umass_softc *sc, union ccb *ccb, uint32_t residue,
/* the rest of the command was filled in at attach */
- if (umass_std_transform(sc, ccb,
+ if ((sc->sc_transform)(sc,
&sc->cam_scsi_test_unit_ready.opcode,
- sizeof(sc->cam_scsi_test_unit_ready))) {
+ sizeof(sc->cam_scsi_test_unit_ready)) == 1) {
umass_command_start(sc, DIR_NONE, NULL, 0,
ccb->ccb_h.timeout,
&umass_cam_quirk_cb, ccb);
+ break;
}
- break;
} else {
- ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR
- | CAM_AUTOSNS_VALID;
- ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND;
+ xpt_freeze_devq(ccb->ccb_h.path, 1);
+ if (key >= 0) {
+ ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR
+ | CAM_AUTOSNS_VALID | CAM_DEV_QFRZN;
+ ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND;
+ } else
+ ccb->ccb_h.status = CAM_AUTOSENSE_FAIL
+ | CAM_DEV_QFRZN;
}
xpt_done(ccb);
break;
-
+ }
default:
DPRINTF(sc, UDMASS_SCSI, "Autosense failed, "
"status %d\n", status);
- ccb->ccb_h.status = CAM_AUTOSENSE_FAIL;
+ xpt_freeze_devq(ccb->ccb_h.path, 1);
+ ccb->ccb_h.status = CAM_AUTOSENSE_FAIL | CAM_DEV_QFRZN;
xpt_done(ccb);
}
}
/*
* This completion code just handles the fact that we sent a test-unit-ready
- * after having previously failed a READ CAPACITY with CHECK_COND. Even
- * though this command succeeded, we have to tell CAM to retry.
+ * after having previously failed a READ CAPACITY with CHECK_COND. The CCB
+ * status for CAM is already set earlier.
*/
static void
umass_cam_quirk_cb(struct umass_softc *sc, union ccb *ccb, uint32_t residue,
@@ -2736,9 +2644,6 @@ umass_cam_quirk_cb(struct umass_softc *sc, union ccb *ccb, uint32_t residue,
DPRINTF(sc, UDMASS_SCSI, "Test unit ready "
"returned status %d\n", status);
- ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR
- | CAM_AUTOSNS_VALID;
- ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND;
xpt_done(ccb);
}
@@ -3027,7 +2932,8 @@ umass_std_transform(struct umass_softc *sc, union ccb *ccb,
xpt_done(ccb);
return (0);
} else if (retval == 0) {
- ccb->ccb_h.status = CAM_REQ_INVALID;
+ xpt_freeze_devq(ccb->ccb_h.path, 1);
+ ccb->ccb_h.status = CAM_REQ_INVALID | CAM_DEV_QFRZN;
xpt_done(ccb);
return (0);
}
diff --git a/freebsd/sys/dev/usb/usb.h b/freebsd/sys/dev/usb/usb.h
index 2f216d27..9492d491 100644
--- a/freebsd/sys/dev/usb/usb.h
+++ b/freebsd/sys/dev/usb/usb.h
@@ -223,7 +223,8 @@ typedef struct usb_device_request usb_device_request_t;
#define UR_RESET_TT 0x09
#define UR_GET_TT_STATE 0x0a
#define UR_STOP_TT 0x0b
-#define UR_SET_HUB_DEPTH 0x0c
+#define UR_SET_AND_TEST 0x0c /* USB 2.0 only */
+#define UR_SET_HUB_DEPTH 0x0c /* USB 3.0 only */
#define USB_SS_HUB_DEPTH_MAX 5
#define UR_GET_PORT_ERR_COUNT 0x0d
@@ -246,6 +247,7 @@ typedef struct usb_device_request usb_device_request_t;
#define UHF_PORT_LINK_STATE 5
#define UHF_PORT_POWER 8
#define UHF_PORT_LOW_SPEED 9
+#define UHF_PORT_L1 10
#define UHF_C_PORT_CONNECTION 16
#define UHF_C_PORT_ENABLE 17
#define UHF_C_PORT_SUSPEND 18
@@ -253,6 +255,7 @@ typedef struct usb_device_request usb_device_request_t;
#define UHF_C_PORT_RESET 20
#define UHF_PORT_TEST 21
#define UHF_PORT_INDICATOR 22
+#define UHF_C_PORT_L1 23
/* SuperSpeed HUB specific features */
#define UHF_PORT_U1_TIMEOUT 23
@@ -322,7 +325,12 @@ struct usb_devcap_usb2ext_descriptor {
uByte bDescriptorType;
uByte bDevCapabilityType;
uDWord bmAttributes;
-#define USB_V2EXT_LPM 0x02
+#define USB_V2EXT_LPM (1U << 1)
+#define USB_V2EXT_BESL_SUPPORTED (1U << 2)
+#define USB_V2EXT_BESL_BASELINE_VALID (1U << 3)
+#define USB_V2EXT_BESL_DEEP_VALID (1U << 4)
+#define USB_V2EXT_BESL_BASELINE_GET(x) (((x) >> 8) & 0xF)
+#define USB_V2EXT_BESL_DEEP_GET(x) (((x) >> 12) & 0xF)
} __packed;
typedef struct usb_devcap_usb2ext_descriptor usb_devcap_usb2ext_descriptor_t;
@@ -669,6 +677,7 @@ struct usb_port_status {
#define UPS_SUSPEND 0x0004
#define UPS_OVERCURRENT_INDICATOR 0x0008
#define UPS_RESET 0x0010
+#define UPS_PORT_L1 0x0020 /* USB 2.0 only */
/* The link-state bits are valid for Super-Speed USB HUBs */
#define UPS_PORT_LINK_STATE_GET(x) (((x) >> 5) & 0xF)
#define UPS_PORT_LINK_STATE_SET(x) (((x) & 0xF) << 5)
@@ -699,7 +708,8 @@ struct usb_port_status {
#define UPS_C_SUSPEND 0x0004
#define UPS_C_OVERCURRENT_INDICATOR 0x0008
#define UPS_C_PORT_RESET 0x0010
-#define UPS_C_BH_PORT_RESET 0x0020
+#define UPS_C_PORT_L1 0x0020 /* USB 2.0 only */
+#define UPS_C_BH_PORT_RESET 0x0020 /* USB 3.0 only */
#define UPS_C_PORT_LINK_STATE 0x0040
#define UPS_C_PORT_CONFIG_ERROR 0x0080
} __packed;
diff --git a/freebsd/sys/dev/usb/usb_dev.c b/freebsd/sys/dev/usb/usb_dev.c
index 7ca10d7b..7697b64c 100644
--- a/freebsd/sys/dev/usb/usb_dev.c
+++ b/freebsd/sys/dev/usb/usb_dev.c
@@ -83,7 +83,7 @@
#ifdef USB_DEBUG
static int usb_fifo_debug = 0;
-SYSCTL_NODE(_hw_usb, OID_AUTO, dev, CTLFLAG_RW, 0, "USB device");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, dev, CTLFLAG_RW, 0, "USB device");
SYSCTL_INT(_hw_usb_dev, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN,
&usb_fifo_debug, 0, "Debug Level");
TUNABLE_INT("hw.usb.dev.debug", &usb_fifo_debug);
@@ -770,7 +770,7 @@ usb_fifo_close(struct usb_fifo *f, int fflags)
/* check if a thread wants SIGIO */
if (f->async_p != NULL) {
PROC_LOCK(f->async_p);
- psignal(f->async_p, SIGIO);
+ kern_psignal(f->async_p, SIGIO);
PROC_UNLOCK(f->async_p);
f->async_p = NULL;
}
@@ -844,7 +844,7 @@ usb_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
struct usb_cdev_privdata *cpd;
int err, ep;
- DPRINTFN(2, "%s fflags=0x%08x\n", dev->si_name, fflags);
+ DPRINTFN(2, "%s fflags=0x%08x\n", devtoname(dev), fflags);
KASSERT(fflags & (FREAD|FWRITE), ("invalid open flags"));
if (((fflags & FREAD) && !(pd->mode & FREAD)) ||
@@ -1584,7 +1584,7 @@ usb_fifo_wakeup(struct usb_fifo *f)
}
if (f->async_p != NULL) {
PROC_LOCK(f->async_p);
- psignal(f->async_p, SIGIO);
+ kern_psignal(f->async_p, SIGIO);
PROC_UNLOCK(f->async_p);
}
}
diff --git a/freebsd/sys/dev/usb/usb_device.c b/freebsd/sys/dev/usb/usb_device.c
index 3a58158d..78ae3d8c 100644
--- a/freebsd/sys/dev/usb/usb_device.c
+++ b/freebsd/sys/dev/usb/usb_device.c
@@ -2431,77 +2431,6 @@ usbd_get_device_index(struct usb_device *udev)
}
#if USB_HAVE_DEVCTL
-/*------------------------------------------------------------------------*
- * usb_notify_addq
- *
- * This function will generate events for dev.
- *------------------------------------------------------------------------*/
-#ifndef BURN_BRIDGES
-static void
-usb_notify_addq_compat(const char *type, struct usb_device *udev)
-{
- char *data = NULL;
- const char *ntype;
- struct malloc_type *mt;
- const size_t buf_size = 512;
-
- /* Convert notify type */
- if (strcmp(type, "ATTACH") == 0)
- ntype = "+";
- else if (strcmp(type, "DETACH") == 0)
- ntype = "-";
- else
- return;
-
- mtx_lock(&malloc_mtx);
- mt = malloc_desc2type("bus"); /* XXX M_BUS */
- mtx_unlock(&malloc_mtx);
- if (mt == NULL)
- return;
-
- data = malloc(buf_size, mt, M_NOWAIT);
- if (data == NULL)
- return;
-
- /* String it all together. */
- snprintf(data, buf_size,
- "%s"
-#if USB_HAVE_UGEN
- "%s "
-#endif
- "at port=%u "
- "vendor=0x%04x "
- "product=0x%04x "
- "devclass=0x%02x "
- "devsubclass=0x%02x "
- "sernum=\"%s\" "
- "release=0x%04x "
-#if USB_HAVE_UGEN
- "on %s\n"
-#endif
- "",
- ntype,
-#if USB_HAVE_UGEN
- udev->ugen_name,
-#endif
- udev->port_no,
- UGETW(udev->ddesc.idVendor),
- UGETW(udev->ddesc.idProduct),
- udev->ddesc.bDeviceClass,
- udev->ddesc.bDeviceSubClass,
- usb_get_serial(udev),
- UGETW(udev->ddesc.bcdDevice)
-#if USB_HAVE_UGEN
- , udev->parent_hub != NULL ?
- udev->parent_hub->ugen_name :
- device_get_nameunit(device_get_parent(udev->bus->bdev))
-#endif
- );
-
- devctl_queue_data(data);
-}
-#endif
-
static void
usb_notify_addq(const char *type, struct usb_device *udev)
{
@@ -2509,10 +2438,6 @@ usb_notify_addq(const char *type, struct usb_device *udev)
struct sbuf *sb;
int i;
-#ifndef BURN_BRIDGES
- usb_notify_addq_compat(type, udev);
-#endif
-
/* announce the device */
sb = sbuf_new_auto();
sbuf_printf(sb,
diff --git a/freebsd/sys/dev/usb/usb_device.h b/freebsd/sys/dev/usb/usb_device.h
index 03ddf1e6..8e13e3de 100644
--- a/freebsd/sys/dev/usb/usb_device.h
+++ b/freebsd/sys/dev/usb/usb_device.h
@@ -215,6 +215,7 @@ struct usb_device {
uint16_t power; /* mA the device uses */
uint16_t langid; /* language for strings */
+ uint16_t autoQuirk[USB_MAX_AUTO_QUIRK]; /* dynamic quirks */
uint8_t address; /* device addess */
uint8_t device_index; /* device index in "bus->devices" */
@@ -257,8 +258,6 @@ struct usb_device {
uint32_t clear_stall_errors; /* number of clear-stall failures */
- uint16_t autoQuirk[USB_MAX_AUTO_QUIRK]; /* dynamic quirks */
-
union usb_device_scratch scratch;
};
diff --git a/freebsd/sys/dev/usb/usb_freebsd.h b/freebsd/sys/dev/usb/usb_freebsd.h
index 8f9bb4c6..06369a25 100644
--- a/freebsd/sys/dev/usb/usb_freebsd.h
+++ b/freebsd/sys/dev/usb/usb_freebsd.h
@@ -48,7 +48,15 @@
#define USB_TD_GET_PROC(td) (td)->td_proc
#define USB_PROC_GET_GID(td) (td)->p_pgid
+#if (!defined(USB_HOST_ALIGN)) || (USB_HOST_ALIGN <= 0)
+/* Use default value. */
+#undef USB_HOST_ALIGN
#define USB_HOST_ALIGN 8 /* bytes, must be power of two */
+#endif
+/* Sanity check for USB_HOST_ALIGN: Verify power of two. */
+#if ((-USB_HOST_ALIGN) & USB_HOST_ALIGN) != USB_HOST_ALIGN
+#error "USB_HOST_ALIGN is not power of two."
+#endif
#define USB_FS_ISOC_UFRAME_MAX 4 /* exclusive unit */
#define USB_BUS_MAX 256 /* units */
#define USB_MAX_DEVICES 128 /* units */
diff --git a/freebsd/sys/dev/usb/usb_generic.c b/freebsd/sys/dev/usb/usb_generic.c
index 361ee2ef..a0b7f007 100644
--- a/freebsd/sys/dev/usb/usb_generic.c
+++ b/freebsd/sys/dev/usb/usb_generic.c
@@ -128,7 +128,7 @@ struct usb_fifo_methods usb_ugen_methods = {
#ifdef USB_DEBUG
static int ugen_debug = 0;
-SYSCTL_NODE(_hw_usb, OID_AUTO, ugen, CTLFLAG_RW, 0, "USB generic");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, ugen, CTLFLAG_RW, 0, "USB generic");
SYSCTL_INT(_hw_usb_ugen, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN, &ugen_debug,
0, "Debug level");
TUNABLE_INT("hw.usb.ugen.debug", &ugen_debug);
@@ -1833,6 +1833,17 @@ ugen_get_power_mode(struct usb_fifo *f)
}
static int
+ugen_get_power_usage(struct usb_fifo *f)
+{
+ struct usb_device *udev = f->udev;
+
+ if (udev == NULL)
+ return (0);
+
+ return (udev->power);
+}
+
+static int
ugen_do_port_feature(struct usb_fifo *f, uint8_t port_no,
uint8_t set, uint16_t feature)
{
@@ -2194,6 +2205,10 @@ ugen_ioctl_post(struct usb_fifo *f, u_long cmd, void *addr, int fflags)
*u.pint = ugen_get_power_mode(f);
break;
+ case USB_GET_POWER_USAGE:
+ *u.pint = ugen_get_power_usage(f);
+ break;
+
case USB_SET_PORT_ENABLE:
error = ugen_do_port_feature(f,
*u.pint, 1, UHF_PORT_ENABLE);
diff --git a/freebsd/sys/dev/usb/usb_hid.c b/freebsd/sys/dev/usb/usb_hid.c
index 86fa27b0..737237bd 100644
--- a/freebsd/sys/dev/usb/usb_hid.c
+++ b/freebsd/sys/dev/usb/usb_hid.c
@@ -847,3 +847,79 @@ usbd_req_get_hid_desc(struct usb_device *udev, struct mtx *mtx,
}
return (USB_ERR_NORMAL_COMPLETION);
}
+
+/*------------------------------------------------------------------------*
+ * hid_is_mouse
+ *
+ * This function will decide if a USB descriptor belongs to a USB mouse.
+ *
+ * Return values:
+ * Zero: Not a USB mouse.
+ * Else: Is a USB mouse.
+ *------------------------------------------------------------------------*/
+int
+hid_is_mouse(const void *d_ptr, uint16_t d_len)
+{
+ struct hid_data *hd;
+ struct hid_item hi;
+ int mdepth;
+ int found;
+
+ hd = hid_start_parse(d_ptr, d_len, 1 << hid_input);
+ if (hd == NULL)
+ return (0);
+
+ mdepth = 0;
+ found = 0;
+
+ while (hid_get_item(hd, &hi)) {
+ switch (hi.kind) {
+ case hid_collection:
+ if (mdepth != 0)
+ mdepth++;
+ else if (hi.collection == 1 &&
+ hi.usage ==
+ HID_USAGE2(HUP_GENERIC_DESKTOP, HUG_MOUSE))
+ mdepth++;
+ break;
+ case hid_endcollection:
+ if (mdepth != 0)
+ mdepth--;
+ break;
+ case hid_input:
+ if (mdepth == 0)
+ break;
+ if (hi.usage ==
+ HID_USAGE2(HUP_GENERIC_DESKTOP, HUG_X) &&
+ (hi.flags & (HIO_CONST|HIO_RELATIVE)) == HIO_RELATIVE)
+ found++;
+ if (hi.usage ==
+ HID_USAGE2(HUP_GENERIC_DESKTOP, HUG_Y) &&
+ (hi.flags & (HIO_CONST|HIO_RELATIVE)) == HIO_RELATIVE)
+ found++;
+ break;
+ default:
+ break;
+ }
+ }
+ hid_end_parse(hd);
+ return (found);
+}
+
+/*------------------------------------------------------------------------*
+ * hid_is_keyboard
+ *
+ * This function will decide if a USB descriptor belongs to a USB keyboard.
+ *
+ * Return values:
+ * Zero: Not a USB keyboard.
+ * Else: Is a USB keyboard.
+ *------------------------------------------------------------------------*/
+int
+hid_is_keyboard(const void *d_ptr, uint16_t d_len)
+{
+ if (hid_is_collection(d_ptr, d_len,
+ HID_USAGE2(HUP_GENERIC_DESKTOP, HUG_KEYBOARD)))
+ return (1);
+ return (0);
+}
diff --git a/freebsd/sys/dev/usb/usb_hub.c b/freebsd/sys/dev/usb/usb_hub.c
index 97b3a4dd..2dee6784 100644
--- a/freebsd/sys/dev/usb/usb_hub.c
+++ b/freebsd/sys/dev/usb/usb_hub.c
@@ -78,7 +78,7 @@
#ifdef USB_DEBUG
static int uhub_debug = 0;
-SYSCTL_NODE(_hw_usb, OID_AUTO, uhub, CTLFLAG_RW, 0, "USB HUB");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, uhub, CTLFLAG_RW, 0, "USB HUB");
SYSCTL_INT(_hw_usb_uhub, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN, &uhub_debug, 0,
"Debug level");
TUNABLE_INT("hw.usb.uhub.debug", &uhub_debug);
diff --git a/freebsd/sys/dev/usb/usb_ioctl.h b/freebsd/sys/dev/usb/usb_ioctl.h
index 9af6ee5c..9e66bd5a 100644
--- a/freebsd/sys/dev/usb/usb_ioctl.h
+++ b/freebsd/sys/dev/usb/usb_ioctl.h
@@ -270,7 +270,8 @@ struct usb_gen_quirk {
#define USB_IFACE_DRIVER_DETACH _IOW ('U', 125, int)
#define USB_GET_PLUGTIME _IOR ('U', 126, uint32_t)
#define USB_READ_DIR _IOW ('U', 127, struct usb_read_dir)
-/* 128 - 135 unused */
+/* 128 - 134 unused */
+#define USB_GET_POWER_USAGE _IOR ('U', 135, int)
#define USB_SET_TX_FORCE_SHORT _IOW ('U', 136, int)
#define USB_SET_TX_TIMEOUT _IOW ('U', 137, int)
#define USB_GET_TX_FRAME_SIZE _IOR ('U', 138, int)
diff --git a/freebsd/sys/dev/usb/usb_msctest.c b/freebsd/sys/dev/usb/usb_msctest.c
index 5e8eebb3..fcb9f026 100644
--- a/freebsd/sys/dev/usb/usb_msctest.c
+++ b/freebsd/sys/dev/usb/usb_msctest.c
@@ -64,7 +64,6 @@
#include <dev/usb/usb_transfer.h>
#include <dev/usb/usb_msctest.h>
#include <dev/usb/usb_debug.h>
-#include <dev/usb/usb_busdma.h>
#include <dev/usb/usb_device.h>
#include <dev/usb/usb_request.h>
#include <dev/usb/usb_util.h>
@@ -106,6 +105,8 @@ static uint8_t scsi_sync_cache[] = { 0x35, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00 };
static uint8_t scsi_request_sense[] = { 0x03, 0x00, 0x00, 0x00, 0x12, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+static uint8_t scsi_read_capacity[] = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 };
#define BULK_SIZE 64 /* dummy */
#define ERR_CSW_FAILED -1
@@ -481,7 +482,7 @@ bbb_command_start(struct bbb_transfer *sc, uint8_t dir, uint8_t lun,
sc->cmd_len = cmd_len;
memset(&sc->cbw.CBWCDB, 0, sizeof(sc->cbw.CBWCDB));
memcpy(&sc->cbw.CBWCDB, cmd_ptr, cmd_len);
- DPRINTFN(1, "SCSI cmd = %*D\n", (int)cmd_len, &sc->cbw.CBWCDB, ":");
+ DPRINTFN(1, "SCSI cmd = %*D\n", (int)cmd_len, (char *)sc->cbw.CBWCDB, ":");
mtx_lock(&sc->mtx);
usbd_transfer_start(sc->xfer[sc->state]);
@@ -650,7 +651,7 @@ usb_msc_auto_quirk(struct usb_device *udev, uint8_t iface_index)
}
is_no_direct = 1;
- for (timeout = 4; timeout; timeout--) {
+ for (timeout = 4; timeout != 0; timeout--) {
err = bbb_command_start(sc, DIR_IN, 0, sc->buffer,
SCSI_INQ_LEN, &scsi_inquiry, sizeof(scsi_inquiry),
USB_MS_HZ);
@@ -660,8 +661,11 @@ usb_msc_auto_quirk(struct usb_device *udev, uint8_t iface_index)
if (sid_type == 0x00)
is_no_direct = 0;
break;
- } else if (err != ERR_CSW_FAILED)
- break; /* non retryable error */
+ } else if (err != ERR_CSW_FAILED) {
+ DPRINTF("Device is not responding "
+ "properly to SCSI INQUIRY command.\n");
+ goto error; /* non retryable error */
+ }
usb_pause_mtx(NULL, hz);
}
@@ -679,7 +683,9 @@ usb_msc_auto_quirk(struct usb_device *udev, uint8_t iface_index)
if (err != ERR_CSW_FAILED)
goto error;
}
+ timeout = 1;
+retry_sync_cache:
err = bbb_command_start(sc, DIR_IN, 0, NULL, 0,
&scsi_sync_cache, sizeof(scsi_sync_cache),
USB_MS_HZ);
@@ -692,6 +698,42 @@ usb_msc_auto_quirk(struct usb_device *udev, uint8_t iface_index)
DPRINTF("Device doesn't handle synchronize cache\n");
usbd_add_dynamic_quirk(udev, UQ_MSC_NO_SYNC_CACHE);
+
+ } else {
+
+ /*
+ * Certain Kingston memory sticks fail the first
+ * read capacity after a synchronize cache command
+ * has been issued. Disable the synchronize cache
+ * command for such devices.
+ */
+
+ err = bbb_command_start(sc, DIR_IN, 0, sc->buffer, 8,
+ &scsi_read_capacity, sizeof(scsi_read_capacity),
+ USB_MS_HZ);
+
+ if (err != 0) {
+ if (err != ERR_CSW_FAILED)
+ goto error;
+
+ err = bbb_command_start(sc, DIR_IN, 0, sc->buffer, 8,
+ &scsi_read_capacity, sizeof(scsi_read_capacity),
+ USB_MS_HZ);
+
+ if (err == 0) {
+ if (timeout--)
+ goto retry_sync_cache;
+
+ DPRINTF("Device most likely doesn't "
+ "handle synchronize cache\n");
+
+ usbd_add_dynamic_quirk(udev,
+ UQ_MSC_NO_SYNC_CACHE);
+ } else {
+ if (err != ERR_CSW_FAILED)
+ goto error;
+ }
+ }
}
/* clear sense status of any failed commands on the device */
diff --git a/freebsd/sys/dev/usb/usb_process.c b/freebsd/sys/dev/usb/usb_process.c
index fb422df4..59b26567 100644
--- a/freebsd/sys/dev/usb/usb_process.c
+++ b/freebsd/sys/dev/usb/usb_process.c
@@ -69,17 +69,13 @@ static int usb_pcount;
#define USB_THREAD_CREATE(f, s, p, ...) \
kproc_kthread_add((f), (s), &usbproc, (p), RFHIGHPID, \
0, "usb", __VA_ARGS__)
-#if (__FreeBSD_version >= 900000)
#define USB_THREAD_SUSPEND_CHECK() kthread_suspend_check()
-#else
-#define USB_THREAD_SUSPEND_CHECK() kthread_suspend_check(curthread)
-#endif
#define USB_THREAD_SUSPEND(p) kthread_suspend(p,0)
#define USB_THREAD_EXIT(err) kthread_exit()
#else
#define USB_THREAD_CREATE(f, s, p, ...) \
kthread_create((f), (s), (p), RFHIGHPID, 0, __VA_ARGS__)
-#define USB_THREAD_SUSPEND_CHECK() kthread_suspend_check(curproc)
+#define USB_THREAD_SUSPEND_CHECK() kthread_suspend_check()
#define USB_THREAD_SUSPEND(p) kthread_suspend(p,0)
#define USB_THREAD_EXIT(err) kthread_exit(err)
#endif
@@ -87,7 +83,7 @@ static int usb_pcount;
#ifdef USB_DEBUG
static int usb_proc_debug;
-SYSCTL_NODE(_hw_usb, OID_AUTO, proc, CTLFLAG_RW, 0, "USB process");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, proc, CTLFLAG_RW, 0, "USB process");
SYSCTL_INT(_hw_usb_proc, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN, &usb_proc_debug, 0,
"Debug level");
TUNABLE_INT("hw.usb.proc.debug", &usb_proc_debug);
diff --git a/freebsd/sys/dev/usb/usb_process.h b/freebsd/sys/dev/usb/usb_process.h
index 23cf6607..9b1a8534 100644
--- a/freebsd/sys/dev/usb/usb_process.h
+++ b/freebsd/sys/dev/usb/usb_process.h
@@ -42,6 +42,7 @@
/* structure prototypes */
struct usb_proc_msg;
+struct usb_device;
/*
* The following structure defines the USB process.
@@ -79,4 +80,9 @@ void usb_proc_free(struct usb_process *up);
void *usb_proc_msignal(struct usb_process *up, void *pm0, void *pm1);
void usb_proc_rewakeup(struct usb_process *up);
+void usb_proc_explore_mwait(struct usb_device *, void *, void *);
+void *usb_proc_explore_msignal(struct usb_device *, void *, void *);
+void usb_proc_explore_lock(struct usb_device *);
+void usb_proc_explore_unlock(struct usb_device *);
+
#endif /* _USB_PROCESS_H_ */
diff --git a/freebsd/sys/dev/usb/usb_request.c b/freebsd/sys/dev/usb/usb_request.c
index 2897cadb..167a7228 100644
--- a/freebsd/sys/dev/usb/usb_request.c
+++ b/freebsd/sys/dev/usb/usb_request.c
@@ -2158,3 +2158,57 @@ usbd_req_set_port_link_state(struct usb_device *udev, struct mtx *mtx,
USETW(req.wLength, 0);
return (usbd_do_request(udev, mtx, &req, 0));
}
+
+/*------------------------------------------------------------------------*
+ * usbd_req_set_lpm_info
+ *
+ * USB 2.0 specific request for Link Power Management.
+ *
+ * Returns:
+ * 0: Success
+ * USB_ERR_PENDING_REQUESTS: NYET
+ * USB_ERR_TIMEOUT: TIMEOUT
+ * USB_ERR_STALL: STALL
+ * Else: Failure
+ *------------------------------------------------------------------------*/
+usb_error_t
+usbd_req_set_lpm_info(struct usb_device *udev, struct mtx *mtx,
+ uint8_t port, uint8_t besl, uint8_t addr, uint8_t rwe)
+{
+ struct usb_device_request req;
+ usb_error_t err;
+ uint8_t buf[1];
+
+ req.bmRequestType = UT_WRITE_CLASS_OTHER;
+ req.bRequest = UR_SET_AND_TEST;
+ USETW(req.wValue, UHF_PORT_L1);
+ req.wIndex[0] = (port & 0xF) | ((besl & 0xF) << 4);
+ req.wIndex[1] = (addr & 0x7F) | (rwe ? 0x80 : 0x00);
+ USETW(req.wLength, sizeof(buf));
+
+ /* set default value in case of short transfer */
+ buf[0] = 0x00;
+
+ err = usbd_do_request(udev, mtx, &req, buf);
+ if (err)
+ return (err);
+
+ switch (buf[0]) {
+ case 0x00: /* SUCCESS */
+ break;
+ case 0x10: /* NYET */
+ err = USB_ERR_PENDING_REQUESTS;
+ break;
+ case 0x11: /* TIMEOUT */
+ err = USB_ERR_TIMEOUT;
+ break;
+ case 0x30: /* STALL */
+ err = USB_ERR_STALLED;
+ break;
+ default: /* reserved */
+ err = USB_ERR_IOERROR;
+ break;
+ }
+ return (err);
+}
+
diff --git a/freebsd/sys/dev/usb/usb_request.h b/freebsd/sys/dev/usb/usb_request.h
index 74823af2..5fcedd5e 100644
--- a/freebsd/sys/dev/usb/usb_request.h
+++ b/freebsd/sys/dev/usb/usb_request.h
@@ -91,5 +91,7 @@ usb_error_t usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx,
uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint);
usb_error_t usbd_req_set_port_link_state(struct usb_device *udev,
struct mtx *mtx, uint8_t port, uint8_t link_state);
+usb_error_t usbd_req_set_lpm_info(struct usb_device *udev, struct mtx *mtx,
+ uint8_t port, uint8_t besl, uint8_t addr, uint8_t rwe);
#endif /* _USB_REQUEST_H_ */
diff --git a/freebsd/sys/dev/usb/usb_transfer.c b/freebsd/sys/dev/usb/usb_transfer.c
index d3d41709..b2528186 100644
--- a/freebsd/sys/dev/usb/usb_transfer.c
+++ b/freebsd/sys/dev/usb/usb_transfer.c
@@ -2206,7 +2206,7 @@ usbd_callback_wrapper(struct usb_xfer_queue *pq)
struct usb_xfer_root *info = xfer->xroot;
USB_BUS_LOCK_ASSERT(info->bus, MA_OWNED);
- if (!mtx_owned(info->xfer_mtx)) {
+ if (!mtx_owned(info->xfer_mtx) && !SCHEDULER_STOPPED()) {
/*
* Cases that end up here:
*
@@ -3179,14 +3179,14 @@ usbd_transfer_poll(struct usb_xfer **ppxfer, uint16_t max)
/* make sure that the BUS mutex is not locked */
drop_bus = 0;
- while (mtx_owned(&xroot->udev->bus->bus_mtx)) {
+ while (mtx_owned(&xroot->udev->bus->bus_mtx) && !SCHEDULER_STOPPED()) {
mtx_unlock(&xroot->udev->bus->bus_mtx);
drop_bus++;
}
/* make sure that the transfer mutex is not locked */
drop_xfer = 0;
- while (mtx_owned(xroot->xfer_mtx)) {
+ while (mtx_owned(xroot->xfer_mtx) && !SCHEDULER_STOPPED()) {
mtx_unlock(xroot->xfer_mtx);
drop_xfer++;
}
diff --git a/freebsd/sys/dev/usb/usb_util.c b/freebsd/sys/dev/usb/usb_util.c
index 5c56da87..24558b84 100644
--- a/freebsd/sys/dev/usb/usb_util.c
+++ b/freebsd/sys/dev/usb/usb_util.c
@@ -60,31 +60,6 @@
#include <dev/usb/usb_bus.h>
/*------------------------------------------------------------------------*
- * device_delete_all_children - delete all children of a device
- *------------------------------------------------------------------------*/
-#ifndef device_delete_all_children
-int
-device_delete_all_children(device_t dev)
-{
- device_t *devlist;
- int devcount;
- int error;
-
- error = device_get_children(dev, &devlist, &devcount);
- if (error == 0) {
- while (devcount-- > 0) {
- error = device_delete_child(dev, devlist[devcount]);
- if (error) {
- break;
- }
- }
- free(devlist, M_TEMP);
- }
- return (error);
-}
-#endif
-
-/*------------------------------------------------------------------------*
* device_set_usb_desc
*
* This function can be called at probe or attach to set the USB
@@ -150,33 +125,21 @@ device_set_usb_desc(device_t dev)
*
* This function will delay the code by the passed number of system
* ticks. The passed mutex "mtx" will be dropped while waiting, if
- * "mtx" is not NULL.
+ * "mtx" is different from NULL.
*------------------------------------------------------------------------*/
void
-usb_pause_mtx(struct mtx *mtx, int _ticks)
+usb_pause_mtx(struct mtx *mtx, int timo)
{
if (mtx != NULL)
mtx_unlock(mtx);
- if (cold) {
- /* convert to milliseconds */
- _ticks = (_ticks * 1000) / hz;
- /* convert to microseconds, rounded up */
- _ticks = (_ticks + 1) * 1000;
- DELAY(_ticks);
-
- } else {
+ /*
+ * Add one tick to the timeout so that we don't return too
+ * early! Note that pause() will assert that the passed
+ * timeout is positive and non-zero!
+ */
+ pause("USBWAIT", timo + 1);
- /*
- * Add one to the number of ticks so that we don't return
- * too early!
- */
- _ticks++;
-
- if (pause("USBWAIT", _ticks)) {
- /* ignore */
- }
- }
if (mtx != NULL)
mtx_lock(mtx);
}
diff --git a/freebsd/sys/dev/usb/usb_util.h b/freebsd/sys/dev/usb/usb_util.h
index 35abeddd..7e52404f 100644
--- a/freebsd/sys/dev/usb/usb_util.h
+++ b/freebsd/sys/dev/usb/usb_util.h
@@ -27,7 +27,6 @@
#ifndef _USB_UTIL_H_
#define _USB_UTIL_H_
-int device_delete_all_children(device_t dev);
uint8_t usb_make_str_desc(void *ptr, uint16_t max_len, const char *s);
void usb_printbcd(char *p, uint16_t p_len, uint16_t bcd);
void usb_trim_spaces(char *p);
diff --git a/freebsd/sys/dev/usb/usbhid.h b/freebsd/sys/dev/usb/usbhid.h
index f40232aa..f6c447ca 100644
--- a/freebsd/sys/dev/usb/usbhid.h
+++ b/freebsd/sys/dev/usb/usbhid.h
@@ -242,5 +242,7 @@ struct usb_hid_descriptor *hid_get_descriptor_from_usb(
usb_error_t usbd_req_get_hid_desc(struct usb_device *udev, struct mtx *mtx,
void **descp, uint16_t *sizep, struct malloc_type *mem,
uint8_t iface_index);
+int hid_is_mouse(const void *d_ptr, uint16_t d_len);
+int hid_is_keyboard(const void *d_ptr, uint16_t d_len);
#endif /* _KERNEL */
#endif /* _USB_HID_H_ */
diff --git a/freebsd/sys/fs/devfs/devfs_int.h b/freebsd/sys/fs/devfs/devfs_int.h
index aa06e374..429a7e3f 100644
--- a/freebsd/sys/fs/devfs/devfs_int.h
+++ b/freebsd/sys/fs/devfs/devfs_int.h
@@ -38,6 +38,7 @@
#ifdef _KERNEL
struct devfs_dirent;
+struct devfs_mount;
struct cdev_privdata {
struct file *cdpd_fp;
@@ -72,11 +73,17 @@ struct cdev_priv {
#define cdev2priv(c) member2struct(cdev_priv, cdp_c, c)
-struct cdev *devfs_alloc(int);
-void devfs_free(struct cdev *);
-void devfs_create(struct cdev *dev);
-void devfs_destroy(struct cdev *dev);
-void devfs_destroy_cdevpriv(struct cdev_privdata *p);
+struct cdev *devfs_alloc(int);
+int devfs_dev_exists(const char *);
+void devfs_free(struct cdev *);
+void devfs_create(struct cdev *);
+void devfs_destroy(struct cdev *);
+void devfs_destroy_cdevpriv(struct cdev_privdata *);
+
+int devfs_dir_find(const char *);
+void devfs_dir_ref_de(struct devfs_mount *, struct devfs_dirent *);
+void devfs_dir_unref_de(struct devfs_mount *, struct devfs_dirent *);
+int devfs_pathpath(const char *, const char *);
extern struct unrhdr *devfs_inos;
extern struct mtx devmtx;
diff --git a/freebsd/sys/h8300/h8300/legacy.c b/freebsd/sys/h8300/h8300/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/h8300/h8300/legacy.c
+++ b/freebsd/sys/h8300/h8300/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/h8300/include/machine/in_cksum.h b/freebsd/sys/h8300/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/h8300/include/machine/in_cksum.h
+++ b/freebsd/sys/h8300/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/h8300/include/machine/pci_cfgreg.h b/freebsd/sys/h8300/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/h8300/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/h8300/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/h8300/pci/pci_bus.c b/freebsd/sys/h8300/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/h8300/pci/pci_bus.c
+++ b/freebsd/sys/h8300/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/i386/i386/legacy.c b/freebsd/sys/i386/i386/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/i386/i386/legacy.c
+++ b/freebsd/sys/i386/i386/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/i386/include/machine/cpufunc.h b/freebsd/sys/i386/include/machine/cpufunc.h
index 50314ecf..b41e8fea 100644
--- a/freebsd/sys/i386/include/machine/cpufunc.h
+++ b/freebsd/sys/i386/include/machine/cpufunc.h
@@ -55,13 +55,13 @@ extern u_int read_eflags(void);
struct region_descriptor;
-#define readb(va) (*(volatile u_int8_t *) (va))
-#define readw(va) (*(volatile u_int16_t *) (va))
-#define readl(va) (*(volatile u_int32_t *) (va))
+#define readb(va) (*(volatile uint8_t *) (va))
+#define readw(va) (*(volatile uint16_t *) (va))
+#define readl(va) (*(volatile uint32_t *) (va))
-#define writeb(va, d) (*(volatile u_int8_t *) (va) = (d))
-#define writew(va, d) (*(volatile u_int16_t *) (va) = (d))
-#define writel(va, d) (*(volatile u_int32_t *) (va) = (d))
+#define writeb(va, d) (*(volatile uint8_t *) (va) = (d))
+#define writew(va, d) (*(volatile uint16_t *) (va) = (d))
+#define writel(va, d) (*(volatile uint32_t *) (va) = (d))
#if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE)
@@ -99,6 +99,13 @@ clflush(u_long addr)
}
static __inline void
+clts(void)
+{
+
+ __asm __volatile("clts");
+}
+
+static __inline void
disable_intr(void)
{
#ifdef XEN
@@ -135,16 +142,18 @@ enable_intr(void)
}
static __inline void
-cpu_monitor(const void *addr, int extensions, int hints)
+cpu_monitor(const void *addr, u_long extensions, u_int hints)
{
- __asm __volatile("monitor;"
- : :"a" (addr), "c" (extensions), "d"(hints));
+
+ __asm __volatile("monitor"
+ : : "a" (addr), "c" (extensions), "d" (hints));
}
static __inline void
-cpu_mwait(int extensions, int hints)
+cpu_mwait(u_long extensions, u_int hints)
{
- __asm __volatile("mwait;" : :"a" (hints), "c" (extensions));
+
+ __asm __volatile("mwait" : : "a" (hints), "c" (extensions));
}
static __inline void
@@ -200,7 +209,7 @@ inb(u_int port)
{
u_char data;
- __asm volatile("inb %w1, %0" : "=a" (data) : "Nd" (port));
+ __asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port));
return (data);
}
@@ -209,33 +218,33 @@ inl(u_int port)
{
u_int data;
- __asm volatile("inl %w1, %0" : "=a" (data) : "Nd" (port));
+ __asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port));
return (data);
}
static __inline void
-insb(u_int port, void *addr, size_t cnt)
+insb(u_int port, void *addr, size_t count)
{
__asm __volatile("cld; rep; insb"
- : "+D" (addr), "+c" (cnt)
+ : "+D" (addr), "+c" (count)
: "d" (port)
: "memory");
}
static __inline void
-insw(u_int port, void *addr, size_t cnt)
+insw(u_int port, void *addr, size_t count)
{
__asm __volatile("cld; rep; insw"
- : "+D" (addr), "+c" (cnt)
+ : "+D" (addr), "+c" (count)
: "d" (port)
: "memory");
}
static __inline void
-insl(u_int port, void *addr, size_t cnt)
+insl(u_int port, void *addr, size_t count)
{
__asm __volatile("cld; rep; insl"
- : "+D" (addr), "+c" (cnt)
+ : "+D" (addr), "+c" (count)
: "d" (port)
: "memory");
}
@@ -251,7 +260,7 @@ inw(u_int port)
{
u_short data;
- __asm volatile("inw %w1, %0" : "=a" (data) : "Nd" (port));
+ __asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port));
return (data);
}
@@ -264,37 +273,37 @@ outb(u_int port, u_char data)
static __inline void
outl(u_int port, u_int data)
{
- __asm volatile("outl %0, %w1" : : "a" (data), "Nd" (port));
+ __asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port));
}
static __inline void
-outsb(u_int port, const void *addr, size_t cnt)
+outsb(u_int port, const void *addr, size_t count)
{
__asm __volatile("cld; rep; outsb"
- : "+S" (addr), "+c" (cnt)
+ : "+S" (addr), "+c" (count)
: "d" (port));
}
static __inline void
-outsw(u_int port, const void *addr, size_t cnt)
+outsw(u_int port, const void *addr, size_t count)
{
__asm __volatile("cld; rep; outsw"
- : "+S" (addr), "+c" (cnt)
+ : "+S" (addr), "+c" (count)
: "d" (port));
}
static __inline void
-outsl(u_int port, const void *addr, size_t cnt)
+outsl(u_int port, const void *addr, size_t count)
{
__asm __volatile("cld; rep; outsl"
- : "+S" (addr), "+c" (cnt)
+ : "+S" (addr), "+c" (count)
: "d" (port));
}
static __inline void
outw(u_int port, u_short data)
{
- __asm volatile("outw %0, %w1" : : "a" (data), "Nd" (port));
+ __asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port));
}
static __inline void
@@ -343,6 +352,15 @@ rdtsc(void)
return (rv);
}
+static __inline uint32_t
+rdtsc32(void)
+{
+ uint32_t rv;
+
+ __asm __volatile("rdtsc" : "=a" (rv) : : "edx");
+ return (rv);
+}
+
static __inline void
wbinvd(void)
{
@@ -455,11 +473,11 @@ invlpg(u_int addr)
#endif
}
-static __inline u_int
+static __inline u_short
rfs(void)
{
- u_int sel;
- __asm __volatile("mov %%fs,%0" : "=rm" (sel));
+ u_short sel;
+ __asm __volatile("movw %%fs,%0" : "=rm" (sel));
return (sel);
}
@@ -471,11 +489,11 @@ rgdt(void)
return (gdtr);
}
-static __inline u_int
+static __inline u_short
rgs(void)
{
- u_int sel;
- __asm __volatile("mov %%gs,%0" : "=rm" (sel));
+ u_short sel;
+ __asm __volatile("movw %%gs,%0" : "=rm" (sel));
return (sel);
}
@@ -495,11 +513,11 @@ rldt(void)
return (ldtr);
}
-static __inline u_int
+static __inline u_short
rss(void)
{
- u_int sel;
- __asm __volatile("mov %%ss,%0" : "=rm" (sel));
+ u_short sel;
+ __asm __volatile("movw %%ss,%0" : "=rm" (sel));
return (sel);
}
@@ -512,15 +530,15 @@ rtr(void)
}
static __inline void
-load_fs(u_int sel)
+load_fs(u_short sel)
{
- __asm __volatile("mov %0,%%fs" : : "rm" (sel));
+ __asm __volatile("movw %0,%%fs" : : "rm" (sel));
}
static __inline void
-load_gs(u_int sel)
+load_gs(u_short sel)
{
- __asm __volatile("mov %0,%%gs" : : "rm" (sel));
+ __asm __volatile("movw %0,%%gs" : : "rm" (sel));
}
static __inline void
@@ -690,6 +708,9 @@ int breakpoint(void);
#endif
u_int bsfl(u_int mask);
u_int bsrl(u_int mask);
+void clflush(u_long addr);
+void clts(void);
+void cpuid_count(u_int ax, u_int cx, u_int *p);
void disable_intr(void);
void do_cpuid(u_int ax, u_int *p);
void enable_intr(void);
@@ -697,9 +718,9 @@ void halt(void);
void ia32_pause(void);
u_char inb(u_int port);
u_int inl(u_int port);
-void insb(u_int port, void *addr, size_t cnt);
-void insl(u_int port, void *addr, size_t cnt);
-void insw(u_int port, void *addr, size_t cnt);
+void insb(u_int port, void *addr, size_t count);
+void insl(u_int port, void *addr, size_t count);
+void insw(u_int port, void *addr, size_t count);
register_t intr_disable(void);
void intr_restore(register_t ef);
void invd(void);
@@ -719,14 +740,14 @@ void load_dr4(u_int dr4);
void load_dr5(u_int dr5);
void load_dr6(u_int dr6);
void load_dr7(u_int dr7);
-void load_fs(u_int sel);
-void load_gs(u_int sel);
+void load_fs(u_short sel);
+void load_gs(u_short sel);
void ltr(u_short sel);
void outb(u_int port, u_char data);
void outl(u_int port, u_int data);
-void outsb(u_int port, const void *addr, size_t cnt);
-void outsl(u_int port, const void *addr, size_t cnt);
-void outsw(u_int port, const void *addr, size_t cnt);
+void outsb(u_int port, const void *addr, size_t count);
+void outsl(u_int port, const void *addr, size_t count);
+void outsw(u_int port, const void *addr, size_t count);
void outw(u_int port, u_short data);
u_int rcr0(void);
u_int rcr2(void);
diff --git a/freebsd/sys/i386/include/machine/in_cksum.h b/freebsd/sys/i386/include/machine/in_cksum.h
index c692f69a..34d85be2 100644
--- a/freebsd/sys/i386/include/machine/in_cksum.h
+++ b/freebsd/sys/i386/include/machine/in_cksum.h
@@ -54,6 +54,7 @@
* therefore always exactly five 32-bit words.
*/
#if defined(__GNUCLIKE_ASM) && !defined(__INTEL_COMPILER)
+#if defined(IPVERSION) && (IPVERSION == 4)
static __inline u_int
in_cksum_hdr(const struct ip *ip)
{
@@ -88,6 +89,7 @@ in_cksum_update(struct ip *ip)
__tmpsum = (int)ntohs(ip->ip_sum) + 256;
ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16));
}
+#endif
static __inline u_short
in_addword(u_short sum, u_short b)
@@ -121,6 +123,7 @@ in_pseudo(u_int sum, u_int b, u_int c)
}
#else
+#if defined(IPVERSION) && (IPVERSION == 4)
#define in_cksum_update(ip) \
do { \
int __tmpsum; \
@@ -129,10 +132,13 @@ in_pseudo(u_int sum, u_int b, u_int c)
} while(0)
#endif
+#endif
#ifdef _KERNEL
#if !defined(__GNUCLIKE_ASM) || defined(__INTEL_COMPILER)
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
#endif
diff --git a/freebsd/sys/i386/include/machine/intr_machdep.h b/freebsd/sys/i386/include/machine/intr_machdep.h
index 535fcf8f..123966eb 100644
--- a/freebsd/sys/i386/include/machine/intr_machdep.h
+++ b/freebsd/sys/i386/include/machine/intr_machdep.h
@@ -123,14 +123,15 @@ struct trapframe;
extern struct mtx icu_lock;
extern int elcr_found;
+#ifndef DEV_ATPIC
+void atpic_reset(void);
+#endif
/* XXX: The elcr_* prototypes probably belong somewhere else. */
int elcr_probe(void);
enum intr_trigger elcr_read_trigger(u_int irq);
void elcr_resume(void);
void elcr_write_trigger(u_int irq, enum intr_trigger trigger);
-#ifdef SMP
void intr_add_cpu(u_int cpu);
-#endif
int intr_add_handler(const char *name, int vector, driver_filter_t filter,
driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
#ifdef SMP
diff --git a/freebsd/sys/i386/include/machine/md_var.h b/freebsd/sys/i386/include/machine/md_var.h
index 168ee629..3563e6ce 100644
--- a/freebsd/sys/i386/include/machine/md_var.h
+++ b/freebsd/sys/i386/include/machine/md_var.h
@@ -91,6 +91,7 @@ void doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs));
void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
void dump_add_page(vm_paddr_t);
void dump_drop_page(vm_paddr_t);
+void initializecpu(void);
void enable_sse(void);
void fillw(int /*u_short*/ pat, void *base, size_t cnt);
void i686_pagezero(void *addr);
diff --git a/freebsd/sys/i386/include/machine/pci_cfgreg.h b/freebsd/sys/i386/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/i386/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/i386/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/i386/include/machine/specialreg.h b/freebsd/sys/i386/include/machine/specialreg.h
index 1dfbbf4e..3ca5e7ae 100644
--- a/freebsd/sys/i386/include/machine/specialreg.h
+++ b/freebsd/sys/i386/include/machine/specialreg.h
@@ -209,6 +209,12 @@
#define CPUID_HTT_CORES 0x00ff0000
#define CPUID_LOCAL_APIC_ID 0xff000000
+/*
+ * CPUID instruction 6 ecx info
+ */
+#define CPUID_PERF_STAT 0x00000001
+#define CPUID_PERF_BIAS 0x00000008
+
/*
* CPUID instruction 0xb ebx info.
*/
@@ -217,6 +223,11 @@
#define CPUID_TYPE_CORE 2
/*
+ * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
+ */
+#define CPUID_EXTSTATE_XSAVEOPT 0x00000001
+
+/*
* AMD extended function 8000_0007h edx info
*/
#define AMDPM_TS 0x00000001
diff --git a/freebsd/sys/i386/pci/pci_bus.c b/freebsd/sys/i386/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/i386/pci/pci_bus.c
+++ b/freebsd/sys/i386/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c
index 23170bb6..e694f479 100644
--- a/freebsd/sys/kern/init_main.c
+++ b/freebsd/sys/kern/init_main.c
@@ -57,11 +57,13 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <sys/ktr.h>
#include <rtems/bsd/sys/lock.h>
+#include <sys/loginclass.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/systm.h>
#include <sys/signalvar.h>
@@ -103,9 +105,11 @@ struct vmspace vmspace0;
struct proc *initproc;
int boothowto = 0; /* initialized so that it can be patched */
-SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0,
+ "Boot control flags, passed from loader");
int bootverbose;
-SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0,
+ "Control the output of verbose kernel messages");
/*
* This ensures that there is at least one entry so that the sysinit_set
@@ -160,6 +164,24 @@ sysinit_add(struct sysinit **set, struct sysinit **set_end)
}
#endif /* __rtems__ */
+#if defined (DDB) && defined(VERBOSE_SYSINIT)
+static const char *
+symbol_name(vm_offset_t va, db_strategy_t strategy)
+{
+ const char *name;
+ c_db_sym_t sym;
+ db_expr_t offset;
+
+ if (va == 0)
+ return (NULL);
+ sym = db_search_symbol(va, strategy, &offset);
+ if (offset != 0)
+ return (NULL);
+ db_symbol_values(sym, &name, NULL);
+ return (name);
+}
+#endif
+
/*
* System startup; initialize the world, create process 0, mount root
* filesystem, and fork to create init and pagedaemon. Most of the
@@ -248,15 +270,16 @@ restart:
}
if (verbose) {
#if defined(DDB)
- const char *name;
- c_db_sym_t sym;
- db_expr_t offset;
-
- sym = db_search_symbol((vm_offset_t)(*sipp)->func,
- DB_STGY_PROC, &offset);
- db_symbol_values(sym, &name, NULL);
- if (name != NULL)
- printf(" %s(%p)... ", name, (*sipp)->udata);
+ const char *func, *data;
+
+ func = symbol_name((vm_offset_t)(*sipp)->func,
+ DB_STGY_PROC);
+ data = symbol_name((vm_offset_t)(*sipp)->udata,
+ DB_STGY_ANY);
+ if (func != NULL && data != NULL)
+ printf(" %s(&%s)... ", func, data);
+ else if (func != NULL)
+ printf(" %s(%p)... ", func, (*sipp)->udata);
else
#endif
printf(" %p(%p)... ", (*sipp)->func,
@@ -418,8 +441,9 @@ proc0_init(void *dummy __unused)
{
#ifndef __rtems__
struct proc *p;
- unsigned i;
struct thread *td;
+ vm_paddr_t pageablemem;
+ int i;
GIANT_REQUIRED;
p = &proc0;
@@ -474,11 +498,14 @@ proc0_init(void *dummy __unused)
knlist_init_mtx(&p->p_klist, &p->p_mtx);
STAILQ_INIT(&p->p_ktr);
p->p_nice = NZERO;
+ /* pid_max cannot be greater than PID_MAX */
td->td_tid = PID_MAX + 1;
+ LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
td->td_state = TDS_RUNNING;
td->td_pri_class = PRI_TIMESHARE;
td->td_user_pri = PUSER;
td->td_base_user_pri = PUSER;
+ td->td_lend_user_pri = PRI_MAX;
td->td_priority = PVM;
td->td_base_pri = PVM;
td->td_oncpu = 0;
@@ -492,7 +519,7 @@ proc0_init(void *dummy __unused)
strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
strncpy(td->td_name, "swapper", sizeof (td->td_name));
- callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
+ callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
@@ -502,6 +529,7 @@ proc0_init(void *dummy __unused)
p->p_ucred->cr_uidinfo = uifind(0);
p->p_ucred->cr_ruidinfo = uifind(0);
p->p_ucred->cr_prison = &prison0;
+ p->p_ucred->cr_loginclass = loginclass_find("default");
#ifdef AUDIT
audit_cred_kproc0(p->p_ucred);
#endif
@@ -529,12 +557,21 @@ proc0_init(void *dummy __unused)
p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
- i = ptoa(cnt.v_free_count);
- p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i;
- p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
- p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
+ p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz;
+ p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz;
+ p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
+ p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
+ /* Cast to avoid overflow on i386/PAE. */
+ pageablemem = ptoa((vm_paddr_t)cnt.v_free_count);
+ p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
+ p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
+ p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;
+ p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
p->p_cpulimit = RLIM_INFINITY;
+ /* Initialize resource accounting structures. */
+ racct_create(&p->p_racct);
+
p->p_stats = pstats_alloc();
/* Allocate a prototype map so we have something to fork. */
@@ -546,14 +583,12 @@ proc0_init(void *dummy __unused)
* proc0 is not expected to enter usermode, so there is no special
* handling for sv_minuser here, like is done for exec_new_vmspace().
*/
- vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
- p->p_sysent->sv_maxuser);
- vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
-
- /*-
- * call the init and ctor for the new thread and proc
- * we wait to do this until all other structures
- * are fairly sane.
+ vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0),
+ p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser);
+
+ /*
+ * Call the init and ctor for the new thread and proc. We wait
+ * to do this until all other structures are fairly sane.
*/
EVENTHANDLER_INVOKE(process_init, p);
EVENTHANDLER_INVOKE(thread_init, td);
@@ -564,6 +599,9 @@ proc0_init(void *dummy __unused)
* Charge root for one process.
*/
(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
+ PROC_LOCK(p);
+ racct_add_force(p, RACCT_NPROC, 1);
+ PROC_UNLOCK(p);
#endif /* __rtems__ */
}
SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL);
@@ -661,7 +699,8 @@ SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
#endif
static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT;
SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout,
- CTLFLAG_RW, &init_shutdown_timeout, 0, "");
+ CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). "
+ "Unused within kernel, but used to control init(8)");
/*
* Start the initial user process; try exec'ing each pathname in init_path.
@@ -770,7 +809,7 @@ start_init(void *dummy)
* Otherwise, return via fork_trampoline() all the way
* to user mode as init!
*/
- if ((error = execve(td, &args)) == 0) {
+ if ((error = sys_execve(td, &args)) == 0) {
mtx_unlock(&Giant);
return;
}
@@ -795,7 +834,8 @@ create_init(const void *udata __unused)
struct ucred *newcred, *oldcred;
int error;
- error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
+ error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc,
+ NULL, 0);
if (error)
panic("cannot fork init: %d\n", error);
KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c
index 449bc991..69c47246 100644
--- a/freebsd/sys/kern/kern_event.c
+++ b/freebsd/sys/kern/kern_event.c
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/capability.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
@@ -131,6 +132,8 @@ static struct fileops kqueueops = {
.fo_kqfilter = kqueue_kqfilter,
.fo_stat = kqueue_stat,
.fo_close = kqueue_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
};
#else /* __rtems__ */
static const rtems_filesystem_file_handlers_r kqueueops;
@@ -162,17 +165,30 @@ static int filt_user(struct knote *kn, long hint);
static void filt_usertouch(struct knote *kn, struct kevent *kev,
u_long type);
-static struct filterops file_filtops =
- { 1, filt_fileattach, NULL, NULL };
-static struct filterops kqread_filtops =
- { 1, NULL, filt_kqdetach, filt_kqueue };
+static struct filterops file_filtops = {
+ .f_isfd = 1,
+ .f_attach = filt_fileattach,
+};
+static struct filterops kqread_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_kqdetach,
+ .f_event = filt_kqueue,
+};
/* XXX - move to kern_proc.c? */
#ifndef __rtems__
-static struct filterops proc_filtops =
- { 0, filt_procattach, filt_procdetach, filt_proc };
+static struct filterops proc_filtops = {
+ .f_isfd = 0,
+ .f_attach = filt_procattach,
+ .f_detach = filt_procdetach,
+ .f_event = filt_proc,
+};
#endif /* __rtems__ */
-static struct filterops timer_filtops =
- { 0, filt_timerattach, filt_timerdetach, filt_timer };
+static struct filterops timer_filtops = {
+ .f_isfd = 0,
+ .f_attach = filt_timerattach,
+ .f_detach = filt_timerdetach,
+ .f_event = filt_timer,
+};
static struct filterops user_filtops = {
.f_attach = filt_userattach,
.f_detach = filt_userdetach,
@@ -256,8 +272,10 @@ filt_nullattach(struct knote *kn)
return (ENXIO);
};
-struct filterops null_filtops =
- { 0, filt_nullattach, NULL, NULL };
+struct filterops null_filtops = {
+ .f_isfd = 0,
+ .f_attach = filt_nullattach,
+};
/* XXX - make SYSINIT to add these, and move into respective modules. */
extern struct filterops sig_filtops;
@@ -703,13 +721,11 @@ filt_usertouch(struct knote *kn, struct kevent *kev, u_long type)
}
}
-#ifndef __rtems__
-int
-kqueue(struct thread *td, struct kqueue_args *uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_kqueue(struct thread *td, struct kqueue_args *uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_kqueue(struct thread *td, struct kqueue_args *uap)
{
struct filedesc *fdp;
struct kqueue *kq;
@@ -721,7 +737,7 @@ rtems_bsd_kqueue(struct thread *td, struct kqueue_args *uap)
#else /* __rtems__ */
(void) fdp;
#endif /* __rtems__ */
- error = falloc(td, &fp, &fd);
+ error = falloc(td, &fp, &fd, 0);
if (error)
goto done2;
@@ -761,7 +777,7 @@ kqueue(void)
int error;
if (td != NULL) {
- error = rtems_bsd_kqueue(td, &ua);
+ error = sys_kqueue(td, &ua);
} else {
error = ENOMEM;
}
@@ -784,17 +800,15 @@ struct kevent_args {
const struct timespec *timeout;
};
#endif
-#ifndef __rtems__
-int
-kevent(struct thread *td, struct kevent_args *uap)
-#else /* __rtems__ */
+#ifdef __rtems__
static int
kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct
kevent_copyops *k_ops, const struct timespec *timeout);
-static int
-rtems_bsd_kevent(struct thread *td, struct kevent_args *uap)
+static
#endif /* __rtems__ */
+int
+sys_kevent(struct thread *td, struct kevent_args *uap)
{
struct timespec ts, *tsp;
struct kevent_copyops k_ops = { uap,
@@ -864,7 +878,7 @@ kevent(int kq, const struct kevent *changelist, int nchanges,
int error;
if (td != NULL) {
- error = rtems_bsd_kevent(td, &ua);
+ error = sys_kevent(td, &ua);
} else {
error = ENOMEM;
}
@@ -923,7 +937,7 @@ kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
struct file *fp;
int i, n, nerrors, error;
- if ((error = fget(td, fd, &fp)) != 0)
+ if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0)
return (error);
if ((error = kqueue_acquire(fp, &kq)) != 0)
goto done_norel;
@@ -1079,7 +1093,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa
findkn:
if (fops->f_isfd) {
KASSERT(td != NULL, ("td is NULL"));
- error = fget(td, kev->ident, &fp);
+ error = fget(td, kev->ident, CAP_POLL_EVENT, &fp);
if (error)
goto done;
@@ -1348,7 +1362,7 @@ kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident,
size = kq->kq_knlistsize;
while (size <= fd)
size += KQEXTENT;
- list = malloc(size * sizeof list, M_KQUEUE, mflag);
+ list = malloc(size * sizeof(*list), M_KQUEUE, mflag);
if (list == NULL)
return ENOMEM;
KQ_LOCK(kq);
@@ -1358,13 +1372,13 @@ kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident,
} else {
if (kq->kq_knlist != NULL) {
bcopy(kq->kq_knlist, list,
- kq->kq_knlistsize * sizeof list);
+ kq->kq_knlistsize * sizeof(*list));
to_free = kq->kq_knlist;
kq->kq_knlist = NULL;
}
bzero((caddr_t)list +
- kq->kq_knlistsize * sizeof list,
- (size - kq->kq_knlistsize) * sizeof list);
+ kq->kq_knlistsize * sizeof(*list),
+ (size - kq->kq_knlistsize) * sizeof(*list));
kq->kq_knlistsize = size;
kq->kq_knlist = list;
}
@@ -2373,7 +2387,7 @@ kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok)
struct file *fp;
int error;
- if ((error = fget(td, fd, &fp)) != 0)
+ if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0)
return (error);
if ((error = kqueue_acquire(fp, &kq)) != 0)
goto noacquire;
diff --git a/freebsd/sys/kern/kern_hhook.c b/freebsd/sys/kern/kern_hhook.c
index f6c9e73e..21239b24 100644
--- a/freebsd/sys/kern/kern_hhook.c
+++ b/freebsd/sys/kern/kern_hhook.c
@@ -1,7 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/*-
- * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org>
+ * Copyright (c) 2010,2013 Lawrence Stewart <lstewart@freebsd.org>
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
@@ -63,15 +63,20 @@ struct hhook {
static MALLOC_DEFINE(M_HHOOK, "hhook", "Helper hooks are linked off hhook_head lists");
LIST_HEAD(hhookheadhead, hhook_head);
-VNET_DEFINE(struct hhookheadhead, hhook_head_list);
-#define V_hhook_head_list VNET(hhook_head_list)
+struct hhookheadhead hhook_head_list;
+VNET_DEFINE(struct hhookheadhead, hhook_vhead_list);
+#define V_hhook_vhead_list VNET(hhook_vhead_list)
static struct mtx hhook_head_list_lock;
MTX_SYSINIT(hhookheadlistlock, &hhook_head_list_lock, "hhook_head list lock",
MTX_DEF);
+/* Protected by hhook_head_list_lock. */
+static uint32_t n_hhookheads;
+
/* Private function prototypes. */
static void hhook_head_destroy(struct hhook_head *hhh);
+void khelp_new_hhook_registered(struct hhook_head *hhh, uint32_t flags);
#define HHHLIST_LOCK() mtx_lock(&hhook_head_list_lock)
#define HHHLIST_UNLOCK() mtx_unlock(&hhook_head_list_lock)
@@ -166,21 +171,71 @@ hhook_add_hook(struct hhook_head *hhh, struct hookinfo *hki, uint32_t flags)
}
/*
- * Lookup a helper hook point and register a new helper hook function with it.
+ * Register a helper hook function with a helper hook point (including all
+ * virtual instances of the hook point if it is virtualised).
+ *
+ * The logic is unfortunately far more complex than for
+ * hhook_remove_hook_lookup() because hhook_add_hook() can call malloc() with
+ * M_WAITOK and thus we cannot call hhook_add_hook() with the
+ * hhook_head_list_lock held.
+ *
+ * The logic assembles an array of hhook_head structs that correspond to the
+ * helper hook point being hooked and bumps the refcount on each (all done with
+ * the hhook_head_list_lock held). The hhook_head_list_lock is then dropped, and
+ * hhook_add_hook() is called and the refcount dropped for each hhook_head
+ * struct in the array.
*/
int
hhook_add_hook_lookup(struct hookinfo *hki, uint32_t flags)
{
- struct hhook_head *hhh;
- int error;
+ struct hhook_head **heads_to_hook, *hhh;
+ int error, i, n_heads_to_hook;
- hhh = hhook_head_get(hki->hook_type, hki->hook_id);
+tryagain:
+ error = i = 0;
+ /*
+ * Accessing n_hhookheads without hhook_head_list_lock held opens up a
+ * race with hhook_head_register() which we are unlikely to lose, but
+ * nonetheless have to cope with - hence the complex goto logic.
+ */
+ n_heads_to_hook = n_hhookheads;
+ heads_to_hook = malloc(n_heads_to_hook * sizeof(struct hhook_head *),
+ M_HHOOK, flags & HHOOK_WAITOK ? M_WAITOK : M_NOWAIT);
+ if (heads_to_hook == NULL)
+ return (ENOMEM);
- if (hhh == NULL)
- return (ENOENT);
+ HHHLIST_LOCK();
+ LIST_FOREACH(hhh, &hhook_head_list, hhh_next) {
+ if (hhh->hhh_type == hki->hook_type &&
+ hhh->hhh_id == hki->hook_id) {
+ if (i < n_heads_to_hook) {
+ heads_to_hook[i] = hhh;
+ refcount_acquire(&heads_to_hook[i]->hhh_refcount);
+ i++;
+ } else {
+ /*
+ * We raced with hhook_head_register() which
+ * inserted a hhook_head that we need to hook
+ * but did not malloc space for. Abort this run
+ * and try again.
+ */
+ for (i--; i >= 0; i--)
+ refcount_release(&heads_to_hook[i]->hhh_refcount);
+ free(heads_to_hook, M_HHOOK);
+ HHHLIST_UNLOCK();
+ goto tryagain;
+ }
+ }
+ }
+ HHHLIST_UNLOCK();
- error = hhook_add_hook(hhh, hki, flags);
- hhook_head_release(hhh);
+ for (i--; i >= 0; i--) {
+ if (!error)
+ error = hhook_add_hook(heads_to_hook[i], hki, flags);
+ refcount_release(&heads_to_hook[i]->hhh_refcount);
+ }
+
+ free(heads_to_hook, M_HHOOK);
return (error);
}
@@ -212,20 +267,21 @@ hhook_remove_hook(struct hhook_head *hhh, struct hookinfo *hki)
}
/*
- * Lookup a helper hook point and remove a helper hook function from it.
+ * Remove a helper hook function from a helper hook point (including all
+ * virtual instances of the hook point if it is virtualised).
*/
int
hhook_remove_hook_lookup(struct hookinfo *hki)
{
struct hhook_head *hhh;
- hhh = hhook_head_get(hki->hook_type, hki->hook_id);
-
- if (hhh == NULL)
- return (ENOENT);
-
- hhook_remove_hook(hhh, hki);
- hhook_head_release(hhh);
+ HHHLIST_LOCK();
+ LIST_FOREACH(hhh, &hhook_head_list, hhh_next) {
+ if (hhh->hhh_type == hki->hook_type &&
+ hhh->hhh_id == hki->hook_id)
+ hhook_remove_hook(hhh, hki);
+ }
+ HHHLIST_UNLOCK();
return (0);
}
@@ -247,13 +303,6 @@ hhook_head_register(int32_t hhook_type, int32_t hhook_id, struct hhook_head **hh
return (EEXIST);
}
- /* XXXLAS: Need to implement support for non-virtualised hooks. */
- if ((flags & HHOOK_HEADISINVNET) == 0) {
- printf("%s: only vnet-style virtualised hooks can be used\n",
- __func__);
- return (EINVAL);
- }
-
tmphhh = malloc(sizeof(struct hhook_head), M_HHOOK,
M_ZERO | ((flags & HHOOK_WAITOK) ? M_WAITOK : M_NOWAIT));
@@ -265,22 +314,27 @@ hhook_head_register(int32_t hhook_type, int32_t hhook_id, struct hhook_head **hh
tmphhh->hhh_nhooks = 0;
STAILQ_INIT(&tmphhh->hhh_hooks);
HHH_LOCK_INIT(tmphhh);
+ refcount_init(&tmphhh->hhh_refcount, 1);
- if (hhh != NULL)
- refcount_init(&tmphhh->hhh_refcount, 1);
- else
- refcount_init(&tmphhh->hhh_refcount, 0);
-
+ HHHLIST_LOCK();
if (flags & HHOOK_HEADISINVNET) {
tmphhh->hhh_flags |= HHH_ISINVNET;
- HHHLIST_LOCK();
- LIST_INSERT_HEAD(&V_hhook_head_list, tmphhh, hhh_next);
- HHHLIST_UNLOCK();
- } else {
- /* XXXLAS: Add tmphhh to the non-virtualised list. */
+#ifdef VIMAGE
+ KASSERT(curvnet != NULL, ("curvnet is NULL"));
+ tmphhh->hhh_vid = (uintptr_t)curvnet;
+ LIST_INSERT_HEAD(&V_hhook_vhead_list, tmphhh, hhh_vnext);
+#endif
}
+ LIST_INSERT_HEAD(&hhook_head_list, tmphhh, hhh_next);
+ n_hhookheads++;
+ HHHLIST_UNLOCK();
+
+ khelp_new_hhook_registered(tmphhh, flags);
- *hhh = tmphhh;
+ if (hhh != NULL)
+ *hhh = tmphhh;
+ else
+ refcount_release(&tmphhh->hhh_refcount);
return (0);
}
@@ -291,14 +345,20 @@ hhook_head_destroy(struct hhook_head *hhh)
struct hhook *tmp, *tmp2;
HHHLIST_LOCK_ASSERT();
+ KASSERT(n_hhookheads > 0, ("n_hhookheads should be > 0"));
LIST_REMOVE(hhh, hhh_next);
+#ifdef VIMAGE
+ if (hhook_head_is_virtualised(hhh) == HHOOK_HEADISINVNET)
+ LIST_REMOVE(hhh, hhh_vnext);
+#endif
HHH_WLOCK(hhh);
STAILQ_FOREACH_SAFE(tmp, &hhh->hhh_hooks, hhk_next, tmp2)
free(tmp, M_HHOOK);
HHH_WUNLOCK(hhh);
HHH_LOCK_DESTROY(hhh);
free(hhh, M_HHOOK);
+ n_hhookheads--;
}
/*
@@ -350,10 +410,17 @@ hhook_head_get(int32_t hhook_type, int32_t hhook_id)
{
struct hhook_head *hhh;
- /* XXXLAS: Pick hhook_head_list based on hhook_head flags. */
HHHLIST_LOCK();
- LIST_FOREACH(hhh, &V_hhook_head_list, hhh_next) {
+ LIST_FOREACH(hhh, &hhook_head_list, hhh_next) {
if (hhh->hhh_type == hhook_type && hhh->hhh_id == hhook_id) {
+#ifdef VIMAGE
+ if (hhook_head_is_virtualised(hhh) ==
+ HHOOK_HEADISINVNET) {
+ KASSERT(curvnet != NULL, ("curvnet is NULL"));
+ if (hhh->hhh_vid != (uintptr_t)curvnet)
+ continue;
+ }
+#endif
refcount_acquire(&hhh->hhh_refcount);
break;
}
@@ -415,7 +482,7 @@ static void
hhook_vnet_init(const void *unused __unused)
{
- LIST_INIT(&V_hhook_head_list);
+ LIST_INIT(&V_hhook_vhead_list);
}
/*
@@ -432,7 +499,7 @@ hhook_vnet_uninit(const void *unused __unused)
* subsystem should have already called hhook_head_deregister().
*/
HHHLIST_LOCK();
- LIST_FOREACH_SAFE(hhh, &V_hhook_head_list, hhh_next, tmphhh) {
+ LIST_FOREACH_SAFE(hhh, &V_hhook_vhead_list, hhh_vnext, tmphhh) {
printf("%s: hhook_head type=%d, id=%d cleanup required\n",
__func__, hhh->hhh_type, hhh->hhh_id);
hhook_head_destroy(hhh);
@@ -442,9 +509,9 @@ hhook_vnet_uninit(const void *unused __unused)
/*
- * When a vnet is created and being initialised, init the V_hhook_head_list.
+ * When a vnet is created and being initialised, init the V_hhook_vhead_list.
*/
-VNET_SYSINIT(hhook_vnet_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
+VNET_SYSINIT(hhook_vnet_init, SI_SUB_MBUF, SI_ORDER_FIRST,
hhook_vnet_init, NULL);
/*
@@ -452,5 +519,5 @@ VNET_SYSINIT(hhook_vnet_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
* points to clean up on vnet tear down, but in case the KPI is misused,
* provide a function to clean up and free memory for a vnet being destroyed.
*/
-VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
+VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_MBUF, SI_ORDER_ANY,
hhook_vnet_uninit, NULL);
diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c
index 2fd12773..b8074022 100644
--- a/freebsd/sys/kern/kern_intr.c
+++ b/freebsd/sys/kern/kern_intr.c
@@ -84,6 +84,7 @@ struct intr_thread {
/* Interrupt thread flags kept in it_flags */
#define IT_DEAD 0x000001 /* Thread is waiting to exit. */
+#define IT_WAIT 0x000002 /* Thread is waiting for completion. */
struct intr_entropy {
struct thread *td;
@@ -576,17 +577,6 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
}
}
- /* Add the new handler to the event in priority order. */
- TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
- if (temp_ih->ih_pri > ih->ih_pri)
- break;
- }
- if (temp_ih == NULL)
- TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
- else
- TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
- intr_event_update(ie);
-
/* Create a thread if we need one. */
while (ie->ie_thread == NULL && handler != NULL) {
if (ie->ie_flags & IE_ADDING_THREAD)
@@ -603,6 +593,18 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
wakeup(ie);
}
}
+
+ /* Add the new handler to the event in priority order. */
+ TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
+ if (temp_ih->ih_pri > ih->ih_pri)
+ break;
+ }
+ if (temp_ih == NULL)
+ TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
+ else
+ TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
+ intr_event_update(ie);
+
CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
ie->ie_name);
mtx_unlock(&ie->ie_lock);
@@ -650,23 +652,12 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
}
}
- /* Add the new handler to the event in priority order. */
- TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
- if (temp_ih->ih_pri > ih->ih_pri)
- break;
- }
- if (temp_ih == NULL)
- TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
- else
- TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
- intr_event_update(ie);
-
/* For filtered handlers, create a private ithread to run on. */
- if (filter != NULL && handler != NULL) {
+ if (filter != NULL && handler != NULL) {
mtx_unlock(&ie->ie_lock);
- it = ithread_create("intr: newborn", ih);
+ it = ithread_create("intr: newborn", ih);
mtx_lock(&ie->ie_lock);
- it->it_event = ie;
+ it->it_event = ie;
ih->ih_thread = it;
ithread_update(it); // XXX - do we really need this?!?!?
} else { /* Create the global per-event thread if we need one. */
@@ -686,6 +677,18 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
}
}
}
+
+ /* Add the new handler to the event in priority order. */
+ TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
+ if (temp_ih->ih_pri > ih->ih_pri)
+ break;
+ }
+ if (temp_ih == NULL)
+ TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
+ else
+ TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
+ intr_event_update(ie);
+
CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
ie->ie_name);
mtx_unlock(&ie->ie_lock);
@@ -773,7 +776,47 @@ intr_handler_source(void *cookie)
return (ie->ie_source);
}
+/*
+ * Sleep until an ithread finishes executing an interrupt handler.
+ *
+ * XXX Doesn't currently handle interrupt filters or fast interrupt
+ * handlers. This is intended for compatibility with linux drivers
+ * only. Do not use in BSD code.
+ */
+void
+_intr_drain(int irq)
+{
+ struct intr_event *ie;
+ struct intr_thread *ithd;
+ struct thread *td;
+
+ ie = intr_lookup(irq);
+ if (ie == NULL)
+ return;
+ if (ie->ie_thread == NULL)
+ return;
+ ithd = ie->ie_thread;
+ td = ithd->it_thread;
+ /*
+ * We set the flag and wait for it to be cleared to avoid
+ * long delays with potentially busy interrupt handlers
+ * were we to only sample TD_AWAITING_INTR() every tick.
+ */
+ thread_lock(td);
+ if (!TD_AWAITING_INTR(td)) {
+ ithd->it_flags |= IT_WAIT;
+ while (ithd->it_flags & IT_WAIT) {
+ thread_unlock(td);
+ pause("idrain", 1);
+ thread_lock(td);
+ }
+ }
+ thread_unlock(td);
+ return;
+}
#endif /* __rtems__ */
+
+
#ifndef INTR_FILTER
#ifndef __rtems__
int
@@ -835,7 +878,7 @@ ok:
* again and remove this handler if it has already passed
* it on the list.
*/
- ie->ie_thread->it_need = 1;
+ atomic_store_rel_int(&ie->ie_thread->it_need, 1);
} else
TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
thread_unlock(ie->ie_thread->it_thread);
@@ -911,7 +954,7 @@ intr_event_schedule_thread(struct intr_event *ie)
* running. Then, lock the thread and see if we actually need to
* put it on the runqueue.
*/
- it->it_need = 1;
+ atomic_store_rel_int(&it->it_need, 1);
thread_lock(td);
#ifndef __rtems__
if (TD_AWAITING_INTR(td)) {
@@ -998,7 +1041,7 @@ ok:
* again and remove this handler if it has already passed
* it on the list.
*/
- it->it_need = 1;
+ atomic_store_rel_int(&it->it_need, 1);
} else
TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
thread_unlock(it->it_thread);
@@ -1078,7 +1121,7 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
* running. Then, lock the thread and see if we actually need to
* put it on the runqueue.
*/
- it->it_need = 1;
+ atomic_store_rel_int(&it->it_need, 1);
thread_lock(td);
if (TD_AWAITING_INTR(td)) {
CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
@@ -1161,11 +1204,21 @@ swi_sched(void *cookie, int flags)
{
struct intr_handler *ih = (struct intr_handler *)cookie;
struct intr_event *ie = ih->ih_event;
+ struct intr_entropy entropy;
int error;
CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name,
ih->ih_need);
+ if (harvest.swi) {
+ CTR2(KTR_INTR, "swi_sched: pid %d (%s) gathering entropy",
+ curproc->p_pid, curthread->td_name);
+ entropy.event = (uintptr_t)ih;
+ entropy.td = curthread;
+ random_harvest(&entropy, sizeof(entropy), 1, 0,
+ RANDOM_INTERRUPT);
+ }
+
/*
* Set ih_need for this handler so that if the ithread is already
* running it will execute this handler on the next pass. Otherwise,
@@ -1267,7 +1320,7 @@ intr_event_execute_handlers(struct proc *p, struct intr_event *ie)
* interrupt threads always invoke all of their handlers.
*/
if (ie->ie_flags & IE_SOFT) {
- if (!ih->ih_need)
+ if (atomic_load_acq_int(&ih->ih_need) == 0)
continue;
else
atomic_store_rel_int(&ih->ih_need, 0);
@@ -1345,6 +1398,7 @@ ithread_loop(void *arg)
struct intr_event *ie;
struct thread *td;
struct proc *p;
+ int wake;
td = curthread;
#ifndef __rtems__
@@ -1357,6 +1411,7 @@ ithread_loop(void *arg)
("%s: ithread and proc linkage out of sync", __func__));
ie = ithd->it_event;
ie->ie_count = 0;
+ wake = 0;
/*
* As long as we have interrupts outstanding, go through the
@@ -1378,7 +1433,7 @@ ithread_loop(void *arg)
* we are running, it will set it_need to note that we
* should make another pass.
*/
- while (ithd->it_need) {
+ while (atomic_load_acq_int(&ithd->it_need) != 0) {
/*
* This might need a full read and write barrier
* to make sure that this write posts before any
@@ -1397,7 +1452,8 @@ ithread_loop(void *arg)
* set again, so we have to check it again.
*/
thread_lock(td);
- if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
+ if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
+ !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
#ifndef __rtems__
TD_SET_IWAIT(td);
ie->ie_count = 0;
@@ -1415,7 +1471,15 @@ ithread_loop(void *arg)
BSD_ASSERT(sc == RTEMS_SUCCESSFUL);
#endif /* __rtems__ */
}
+ if (ithd->it_flags & IT_WAIT) {
+ wake = 1;
+ ithd->it_flags &= ~IT_WAIT;
+ }
thread_unlock(td);
+ if (wake) {
+ wakeup(ithd);
+ wake = 0;
+ }
}
}
#ifndef __rtems__
@@ -1435,6 +1499,7 @@ int
intr_event_handle(struct intr_event *ie, struct trapframe *frame)
{
struct intr_handler *ih;
+ struct trapframe *oldframe;
struct thread *td;
int error, ret, thread;
@@ -1454,6 +1519,8 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
thread = 0;
ret = 0;
critical_enter();
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = frame;
TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
if (ih->ih_filter == NULL) {
thread = 1;
@@ -1491,6 +1558,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
thread = 1;
}
}
+ td->td_intr_frame = oldframe;
if (thread) {
if (ie->ie_pre_ithread != NULL)
@@ -1529,6 +1597,7 @@ ithread_loop(void *arg)
struct thread *td;
struct proc *p;
int priv;
+ int wake;
td = curthread;
p = td->td_proc;
@@ -1539,6 +1608,7 @@ ithread_loop(void *arg)
("%s: ithread and proc linkage out of sync", __func__));
ie = ithd->it_event;
ie->ie_count = 0;
+ wake = 0;
/*
* As long as we have interrupts outstanding, go through the
@@ -1560,7 +1630,7 @@ ithread_loop(void *arg)
* we are running, it will set it_need to note that we
* should make another pass.
*/
- while (ithd->it_need) {
+ while (atomic_load_acq_int(&ithd->it_need) != 0) {
/*
* This might need a full read and write barrier
* to make sure that this write posts before any
@@ -1582,12 +1652,21 @@ ithread_loop(void *arg)
* set again, so we have to check it again.
*/
thread_lock(td);
- if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
+ if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
+ !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
TD_SET_IWAIT(td);
ie->ie_count = 0;
mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
+ if (ithd->it_flags & IT_WAIT) {
+ wake = 1;
+ ithd->it_flags &= ~IT_WAIT;
+ }
thread_unlock(td);
+ if (wake) {
+ wakeup(ithd);
+ wake = 0;
+ }
}
}
@@ -1682,6 +1761,7 @@ int
intr_event_handle(struct intr_event *ie, struct trapframe *frame)
{
struct intr_thread *ithd;
+ struct trapframe *oldframe;
struct thread *td;
int thread;
@@ -1694,6 +1774,8 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
td->td_intr_nesting_level++;
thread = 0;
critical_enter();
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = frame;
thread = intr_filter_loop(ie, frame, &ithd);
if (thread & FILTER_HANDLED) {
if (ie->ie_post_filter != NULL)
@@ -1702,6 +1784,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
if (ie->ie_pre_ithread != NULL)
ie->ie_pre_ithread(ie->ie_source);
}
+ td->td_intr_frame = oldframe;
critical_exit();
/* Interrupt storm logic */
@@ -1760,7 +1843,16 @@ db_dump_intrhand(struct intr_handler *ih)
break;
}
db_printf(" ");
- db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC);
+ if (ih->ih_filter != NULL) {
+ db_printf("[F]");
+ db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC);
+ }
+ if (ih->ih_handler != NULL) {
+ if (ih->ih_filter != NULL)
+ db_printf(",");
+ db_printf("[H]");
+ db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC);
+ }
db_printf("(%p)", ih->ih_argument);
if (ih->ih_need ||
(ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD |
@@ -1896,8 +1988,7 @@ SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr,
static int
sysctl_intrnames(SYSCTL_HANDLER_ARGS)
{
- return (sysctl_handle_opaque(oidp, intrnames, eintrnames - intrnames,
- req));
+ return (sysctl_handle_opaque(oidp, intrnames, sintrnames, req));
}
SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
@@ -1906,8 +1997,7 @@ SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
static int
sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
{
- return (sysctl_handle_opaque(oidp, intrcnt,
- (char *)eintrcnt - (char *)intrcnt, req));
+ return (sysctl_handle_opaque(oidp, intrcnt, sintrcnt, req));
}
SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD,
@@ -1921,9 +2011,12 @@ DB_SHOW_COMMAND(intrcnt, db_show_intrcnt)
{
u_long *i;
char *cp;
+ u_int j;
cp = intrnames;
- for (i = intrcnt; i != eintrcnt && !db_pager_quit; i++) {
+ j = 0;
+ for (i = intrcnt; j < (sintrcnt / sizeof(u_long)) && !db_pager_quit;
+ i++, j++) {
if (*cp == '\0')
break;
if (*i != 0)
diff --git a/freebsd/sys/kern/kern_khelp.c b/freebsd/sys/kern/kern_khelp.c
index 9e4127da..e1192ae8 100644
--- a/freebsd/sys/kern/kern_khelp.c
+++ b/freebsd/sys/kern/kern_khelp.c
@@ -1,7 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/*-
- * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org>
+ * Copyright (c) 2010,2013 Lawrence Stewart <lstewart@freebsd.org>
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
#include <sys/hhook.h>
-#include <sys/jail.h>
#include <sys/khelp.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
@@ -54,8 +53,6 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/systm.h>
-#include <net/vnet.h>
-
static struct rwlock khelp_list_lock;
RW_SYSINIT(khelplistlock, &khelp_list_lock, "helper list lock");
@@ -63,6 +60,7 @@ static TAILQ_HEAD(helper_head, helper) helpers = TAILQ_HEAD_INITIALIZER(helpers)
/* Private function prototypes. */
static inline void khelp_remove_osd(struct helper *h, struct osd *hosd);
+void khelp_new_hhook_registered(struct hhook_head *hhh, uint32_t flags);
#define KHELP_LIST_WLOCK() rw_wlock(&khelp_list_lock)
#define KHELP_LIST_WUNLOCK() rw_wunlock(&khelp_list_lock)
@@ -76,33 +74,32 @@ khelp_register_helper(struct helper *h)
struct helper *tmph;
int error, i, inserted;
- error = 0;
- inserted = 0;
+ error = inserted = 0;
refcount_init(&h->h_refcount, 0);
h->h_id = osd_register(OSD_KHELP, NULL, NULL);
/* It's only safe to add the hooks after osd_register(). */
- if (h->h_nhooks > 0) {
- for (i = 0; i < h->h_nhooks && !error; i++) {
- /* We don't require the module to assign hook_helper. */
- h->h_hooks[i].hook_helper = h;
- error = khelp_add_hhook(&h->h_hooks[i], HHOOK_NOWAIT);
- }
-
- if (error) {
- for (i--; i >= 0; i--)
- khelp_remove_hhook(&h->h_hooks[i]);
-
- osd_deregister(OSD_KHELP, h->h_id);
- }
+ for (i = 0; i < h->h_nhooks && !error; i++) {
+ /* We don't require the module to assign hook_helper. */
+ h->h_hooks[i].hook_helper = h;
+ error = hhook_add_hook_lookup(&h->h_hooks[i], HHOOK_WAITOK);
+ if (error)
+ printf("%s: \"%s\" khelp module unable to "
+ "hook type %d id %d due to error %d\n", __func__,
+ h->h_name, h->h_hooks[i].hook_type,
+ h->h_hooks[i].hook_id, error);
}
- if (!error) {
+ if (error) {
+ for (i--; i >= 0; i--)
+ hhook_remove_hook_lookup(&h->h_hooks[i]);
+ osd_deregister(OSD_KHELP, h->h_id);
+ } else {
KHELP_LIST_WLOCK();
/*
* Keep list of helpers sorted in descending h_id order. Due to
* the way osd_set() works, a sorted list ensures
- * init_helper_osd() will operate with improved efficiency.
+ * khelp_init_osd() will operate with improved efficiency.
*/
TAILQ_FOREACH(tmph, &helpers, h_next) {
if (tmph->h_id < h->h_id) {
@@ -126,8 +123,6 @@ khelp_deregister_helper(struct helper *h)
struct helper *tmph;
int error, i;
- error = 0;
-
KHELP_LIST_WLOCK();
if (h->h_refcount > 0)
error = EBUSY;
@@ -144,10 +139,8 @@ khelp_deregister_helper(struct helper *h)
KHELP_LIST_WUNLOCK();
if (!error) {
- if (h->h_nhooks > 0) {
- for (i = 0; i < h->h_nhooks; i++)
- khelp_remove_hhook(&h->h_hooks[i]);
- }
+ for (i = 0; i < h->h_nhooks; i++)
+ hhook_remove_hook_lookup(&h->h_hooks[i]);
osd_deregister(OSD_KHELP, h->h_id);
}
@@ -265,28 +258,13 @@ khelp_get_id(char *hname)
int
khelp_add_hhook(struct hookinfo *hki, uint32_t flags)
{
- VNET_ITERATOR_DECL(vnet_iter);
int error;
- error = 0;
-
/*
- * XXXLAS: If a helper is dynamically adding a helper hook function at
- * runtime using this function, we should update the helper's h_hooks
- * struct member to include the additional hookinfo struct.
+ * XXXLAS: Should probably include the functionality to update the
+ * helper's h_hooks struct member.
*/
-
- VNET_LIST_RLOCK_NOSLEEP();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
- error = hhook_add_hook_lookup(hki, flags);
- CURVNET_RESTORE();
-#ifdef VIMAGE
- if (error)
- break;
-#endif
- }
- VNET_LIST_RUNLOCK_NOSLEEP();
+ error = hhook_add_hook_lookup(hki, flags);
return (error);
}
@@ -294,32 +272,47 @@ khelp_add_hhook(struct hookinfo *hki, uint32_t flags)
int
khelp_remove_hhook(struct hookinfo *hki)
{
- VNET_ITERATOR_DECL(vnet_iter);
int error;
- error = 0;
-
/*
- * XXXLAS: If a helper is dynamically removing a helper hook function at
- * runtime using this function, we should update the helper's h_hooks
- * struct member to remove the defunct hookinfo struct.
+ * XXXLAS: Should probably include the functionality to update the
+ * helper's h_hooks struct member.
*/
-
- VNET_LIST_RLOCK_NOSLEEP();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
- error = hhook_remove_hook_lookup(hki);
- CURVNET_RESTORE();
-#ifdef VIMAGE
- if (error)
- break;
-#endif
- }
- VNET_LIST_RUNLOCK_NOSLEEP();
+ error = hhook_remove_hook_lookup(hki);
return (error);
}
+/*
+ * Private KPI between hhook and khelp that allows khelp modules to insert hook
+ * functions into hhook points which register after the modules were loaded.
+ */
+void
+khelp_new_hhook_registered(struct hhook_head *hhh, uint32_t flags)
+{
+ struct helper *h;
+ int error, i;
+
+ KHELP_LIST_RLOCK();
+ TAILQ_FOREACH(h, &helpers, h_next) {
+ for (i = 0; i < h->h_nhooks; i++) {
+ if (hhh->hhh_type != h->h_hooks[i].hook_type ||
+ hhh->hhh_id != h->h_hooks[i].hook_id)
+ continue;
+ error = hhook_add_hook(hhh, &h->h_hooks[i], flags);
+ if (error) {
+ printf("%s: \"%s\" khelp module unable to "
+ "hook type %d id %d due to error %d\n",
+ __func__, h->h_name,
+ h->h_hooks[i].hook_type,
+ h->h_hooks[i].hook_id, error);
+ error = 0;
+ }
+ }
+ }
+ KHELP_LIST_RUNLOCK();
+}
+
#ifndef __rtems__
int
khelp_modevent(module_t mod, int event_type, void *data)
@@ -381,95 +374,3 @@ khelp_modevent(module_t mod, int event_type, void *data)
return (error);
}
#endif /* __rtems__ */
-
-/*
- * This function is called in two separate situations:
- *
- * - When the kernel is booting, it is called directly by the SYSINIT framework
- * to allow Khelp modules which were compiled into the kernel or loaded by the
- * boot loader to insert their non-virtualised hook functions into the kernel.
- *
- * - When the kernel is booting or a vnet is created, this function is also
- * called indirectly through khelp_vnet_init() by the vnet initialisation code.
- * In this situation, Khelp modules are able to insert their virtualised hook
- * functions into the virtualised hook points in the vnet which is being
- * initialised. In the case where the kernel is not compiled with "options
- * VIMAGE", this step is still run once at boot, but the hook functions get
- * transparently inserted into the standard unvirtualised network stack.
- */
-static void
-khelp_init(const void *vnet)
-{
- struct helper *h;
- int error, i, vinit;
- int32_t htype, hid;
-
- error = 0;
- vinit = vnet != NULL;
-
- KHELP_LIST_RLOCK();
- TAILQ_FOREACH(h, &helpers, h_next) {
- for (i = 0; i < h->h_nhooks && !error; i++) {
- htype = h->h_hooks[i].hook_type;
- hid = h->h_hooks[i].hook_id;
-
- /*
- * If we're doing a virtualised init (vinit != 0) and
- * the hook point is virtualised, or we're doing a plain
- * sysinit at boot and the hook point is not
- * virtualised, insert the hook.
- */
- if ((hhook_head_is_virtualised_lookup(htype, hid) ==
- HHOOK_HEADISINVNET && vinit) ||
- (!hhook_head_is_virtualised_lookup(htype, hid) &&
- !vinit)) {
- error = hhook_add_hook_lookup(&h->h_hooks[i],
- HHOOK_NOWAIT);
- }
- }
-
- if (error) {
- /* Remove any helper's hooks we successfully added. */
- for (i--; i >= 0; i--)
- hhook_remove_hook_lookup(&h->h_hooks[i]);
-
- printf("%s: Failed to add hooks for helper \"%s\" (%p)",
- __func__, h->h_name, h);
- if (vinit)
- printf(" to vnet %p.\n", vnet);
- else
- printf(".\n");
-
- error = 0;
- }
- }
- KHELP_LIST_RUNLOCK();
-}
-
-/*
- * Vnet created and being initialised.
- */
-static void
-khelp_vnet_init(const void *unused __unused)
-{
-
- khelp_init(TD_TO_VNET(curthread));
-}
-
-
-/*
- * As the kernel boots, allow Khelp modules which were compiled into the kernel
- * or loaded by the boot loader to insert their non-virtualised hook functions
- * into the kernel.
- */
-SYSINIT(khelp_init, SI_SUB_PROTO_END, SI_ORDER_FIRST, khelp_init, NULL);
-
-/*
- * When a vnet is created and being initialised, we need to insert the helper
- * hook functions for all currently registered Khelp modules into the vnet's
- * helper hook points. The hhook KPI provides a mechanism for subsystems which
- * export helper hook points to clean up on vnet shutdown, so we don't need a
- * VNET_SYSUNINIT for Khelp.
- */
-VNET_SYSINIT(khelp_vnet_init, SI_SUB_PROTO_END, SI_ORDER_FIRST,
- khelp_vnet_init, NULL);
diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c
index d6975a4a..b1b46d7a 100644
--- a/freebsd/sys/kern/kern_linker.c
+++ b/freebsd/sys/kern/kern_linker.c
@@ -67,6 +67,8 @@ __FBSDID("$FreeBSD$");
#ifndef __rtems__
#ifdef KLD_DEBUG
int kld_debug = 0;
+SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW,
+ &kld_debug, 0, "Set various levels of KLD debug");
#endif
#define KLD_LOCK() sx_xlock(&kld_sx)
@@ -743,6 +745,9 @@ linker_file_add_dependency(linker_file_t file, linker_file_t dep)
file->deps = newdeps;
file->deps[file->ndeps] = dep;
file->ndeps++;
+ KLD_DPF(FILE, ("linker_file_add_dependency:"
+ " adding %s as dependency for %s\n",
+ dep->filename, file->filename));
return (0);
}
@@ -1071,7 +1076,7 @@ done:
}
int
-kldload(struct thread *td, struct kldload_args *uap)
+sys_kldload(struct thread *td, struct kldload_args *uap)
{
char *pathname = NULL;
int error, fileid;
@@ -1151,14 +1156,14 @@ kern_kldunload(struct thread *td, int fileid, int flags)
}
int
-kldunload(struct thread *td, struct kldunload_args *uap)
+sys_kldunload(struct thread *td, struct kldunload_args *uap)
{
return (kern_kldunload(td, uap->fileid, LINKER_UNLOAD_NORMAL));
}
int
-kldunloadf(struct thread *td, struct kldunloadf_args *uap)
+sys_kldunloadf(struct thread *td, struct kldunloadf_args *uap)
{
if (uap->flags != LINKER_UNLOAD_NORMAL &&
@@ -1168,7 +1173,7 @@ kldunloadf(struct thread *td, struct kldunloadf_args *uap)
}
int
-kldfind(struct thread *td, struct kldfind_args *uap)
+sys_kldfind(struct thread *td, struct kldfind_args *uap)
{
char *pathname;
const char *filename;
@@ -1201,7 +1206,7 @@ out:
}
int
-kldnext(struct thread *td, struct kldnext_args *uap)
+sys_kldnext(struct thread *td, struct kldnext_args *uap)
{
linker_file_t lf;
int error = 0;
@@ -1238,7 +1243,7 @@ out:
}
int
-kldstat(struct thread *td, struct kldstat_args *uap)
+sys_kldstat(struct thread *td, struct kldstat_args *uap)
{
struct kld_file_stat stat;
int error, version;
@@ -1300,7 +1305,7 @@ kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat)
}
int
-kldfirstmod(struct thread *td, struct kldfirstmod_args *uap)
+sys_kldfirstmod(struct thread *td, struct kldfirstmod_args *uap)
{
linker_file_t lf;
module_t mp;
@@ -1329,7 +1334,7 @@ kldfirstmod(struct thread *td, struct kldfirstmod_args *uap)
}
int
-kldsym(struct thread *td, struct kldsym_args *uap)
+sys_kldsym(struct thread *td, struct kldsym_args *uap)
{
char *symstr = NULL;
c_linker_sym_t sym;
@@ -1631,6 +1636,12 @@ restart:
modname = mp->md_cval;
verinfo = mp->md_data;
mod = modlist_lookup2(modname, verinfo);
+ if (mod == NULL) {
+ printf("KLD file %s - cannot find "
+ "dependency \"%s\"\n",
+ lf->filename, modname);
+ goto fail;
+ }
/* Don't count self-dependencies */
if (lf == mod->container)
continue;
@@ -1647,11 +1658,9 @@ restart:
*/
error = LINKER_LINK_PRELOAD_FINISH(lf);
if (error) {
- TAILQ_REMOVE(&depended_files, lf, loaded);
printf("KLD file %s - could not finalize loading\n",
lf->filename);
- linker_file_unload(lf, LINKER_UNLOAD_FORCE);
- continue;
+ goto fail;
}
linker_file_register_modules(lf);
if (linker_file_lookup_set(lf, "sysinit_set", &si_start,
@@ -1659,6 +1668,10 @@ restart:
sysinit_add(si_start, si_stop);
linker_file_register_sysctls(lf);
lf->flags |= LINKER_FILE_LINKED;
+ continue;
+fail:
+ TAILQ_REMOVE(&depended_files, lf, loaded);
+ linker_file_unload(lf, LINKER_UNLOAD_FORCE);
}
/* woohoo! we made it! */
}
@@ -1765,7 +1778,8 @@ linker_hints_lookup(const char *path, int pathlen, const char *modname,
struct vattr vattr, mattr;
u_char *hints = NULL;
u_char *cp, *recptr, *bufend, *result, *best, *pathbuf, *sep;
- int error, ival, bestver, *intp, reclen, found, flags, clen, blen;
+ int error, ival, bestver, *intp, found, flags, clen, blen;
+ ssize_t reclen;
int vfslocked = 0;
result = NULL;
@@ -1810,7 +1824,7 @@ linker_hints_lookup(const char *path, int pathlen, const char *modname,
VFS_UNLOCK_GIANT(vfslocked);
nd.ni_vp = NULL;
if (reclen != 0) {
- printf("can't read %d\n", reclen);
+ printf("can't read %zd\n", reclen);
goto bad;
}
intp = (int *)hints;
@@ -2184,6 +2198,6 @@ sysctl_kern_function_list(SYSCTL_HANDLER_ARGS)
return (SYSCTL_OUT(req, "", 1));
}
-SYSCTL_PROC(_kern, OID_AUTO, function_list, CTLFLAG_RD,
+SYSCTL_PROC(_kern, OID_AUTO, function_list, CTLTYPE_OPAQUE | CTLFLAG_RD,
NULL, 0, sysctl_kern_function_list, "", "kernel function list");
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/kern_mbuf.c b/freebsd/sys/kern/kern_mbuf.c
index 5ad62894..98cfb1f0 100644
--- a/freebsd/sys/kern/kern_mbuf.c
+++ b/freebsd/sys/kern/kern_mbuf.c
@@ -112,14 +112,23 @@ struct mbstat mbstat;
static void
tunable_mbinit(void *dummy)
{
- TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
/* This has to be done before VM init. */
+ TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
if (nmbclusters == 0)
nmbclusters = 1024 + maxusers * 64;
- nmbjumbop = nmbclusters / 2;
- nmbjumbo9 = nmbjumbop / 2;
- nmbjumbo16 = nmbjumbo9 / 2;
+
+ TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
+ if (nmbjumbop == 0)
+ nmbjumbop = nmbclusters / 2;
+
+ TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
+ if (nmbjumbo9 == 0)
+ nmbjumbo9 = nmbclusters / 4;
+
+ TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
+ if (nmbjumbo16 == 0)
+ nmbjumbo16 = nmbclusters / 8;
}
SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
@@ -237,7 +246,7 @@ static void mb_zfini_pack(void *, int);
static void mb_reclaim(void *);
static void mbuf_init(void *);
-static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int);
+static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
/* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
@@ -356,7 +365,7 @@ mbuf_init(void *dummy)
* pages.
*/
static void *
-mbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
{
/* Inform UMA that this allocator uses kernel_map/object. */
diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c
index 7a364a5e..1450f61c 100644
--- a/freebsd/sys/kern/kern_mib.c
+++ b/freebsd/sys/kern/kern_mib.c
@@ -59,7 +59,7 @@ __FBSDID("$FreeBSD$");
SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0,
"Sysctl internal magic");
-SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW, 0,
+SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW|CTLFLAG_CAPRD, 0,
"High kernel, proc, limits &c");
#ifndef __rtems__
SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW, 0,
@@ -99,10 +99,10 @@ SYSCTL_NODE(, OID_AUTO, regression, CTLFLAG_RW, 0,
SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD|CTLFLAG_MPSAFE,
kern_ident, 0, "Kernel identifier");
-SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD|CTLFLAG_MPSAFE,
- osrelease, 0, "Operating system release");
+SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD|CTLFLAG_MPSAFE|
+ CTLFLAG_CAPRD, osrelease, 0, "Operating system release");
-SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD,
+SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD|CTLFLAG_CAPRD,
0, BSD, "Operating system revision");
SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE,
@@ -111,14 +111,14 @@ SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE,
SYSCTL_STRING(_kern, OID_AUTO, compiler_version, CTLFLAG_RD|CTLFLAG_MPSAFE,
compiler_version, 0, "Version of compiler used to compile kernel");
-SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE,
- ostype, 0, "Operating system type");
+SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE|
+ CTLFLAG_CAPRD, ostype, 0, "Operating system type");
/*
* NOTICE: The *userland* release date is available in
* /usr/include/osreldate.h
*/
-SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD,
+SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD|CTLFLAG_CAPRD,
&osreldate, 0, "Kernel release date");
SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RDTUN,
@@ -130,24 +130,24 @@ SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW,
SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RDTUN,
&maxusers, 0, "Hint for kernel tuning");
-SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD,
+SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD|CTLFLAG_CAPRD,
0, ARG_MAX, "Maximum bytes of argument to execve(2)");
-SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD,
+SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD|CTLFLAG_CAPRD,
0, _POSIX_VERSION, "Version of POSIX attempting to comply to");
-SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN,
+SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
&ngroups_max, 0,
"Maximum number of supplemental groups a user can belong to");
-SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD,
+SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD|CTLFLAG_CAPRD,
0, 1, "Whether job control is available");
#ifdef _POSIX_SAVED_IDS
-SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD,
+SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
0, 1, "Whether saved set-group/user ID is available");
#else
-SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD,
+SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
0, 0, "Whether saved set-group/user ID is available");
#endif
@@ -156,13 +156,13 @@ char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */
SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW,
kernelname, sizeof kernelname, "Name of kernel file booted");
-SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD,
+SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD|CTLFLAG_CAPRD,
&mp_ncpus, 0, "Number of active CPUs");
-SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD,
+SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD|CTLFLAG_CAPRD,
0, BYTE_ORDER, "System byte order");
-SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD,
+SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD|CTLFLAG_CAPRD,
0, PAGE_SIZE, "System memory page size");
static int
@@ -179,7 +179,7 @@ sysctl_kern_arnd(SYSCTL_HANDLER_ARGS)
}
SYSCTL_PROC(_kern, KERN_ARND, arandom,
- CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, NULL, 0,
sysctl_kern_arnd, "", "arc4rand");
static int
@@ -215,7 +215,7 @@ sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_ULONG | CTLFLAG_RD,
0, 0, sysctl_hw_usermem, "LU", "");
-SYSCTL_ULONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, "");
+SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, "");
u_long pagesizes[MAXPAGESIZES] = { PAGE_SIZE };
@@ -462,6 +462,8 @@ FEATURE(compat_freebsd7, "Compatible with FreeBSD 7");
* This is really cheating. These actually live in the libc, something
* which I'm not quite sure is a good idea anyway, but in order for
* getnext and friends to actually work, we define dummies here.
+ *
+ * XXXRW: These probably should be CTLFLAG_CAPRD.
*/
SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD,
"", 0, "PATH that finds all the standard utilities");
@@ -511,6 +513,34 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD,
SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD,
0, sizeof(struct proc), "sizeof(struct proc)");
+static int
+sysctl_kern_pid_max(SYSCTL_HANDLER_ARGS)
+{
+ int error, pm;
+
+ pm = pid_max;
+ error = sysctl_handle_int(oidp, &pm, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ sx_xlock(&proctree_lock);
+ sx_xlock(&allproc_lock);
+
+ /*
+ * Only permit the values less then PID_MAX.
+ * As a safety measure, do not allow to limit the pid_max too much.
+ */
+ if (pm < 300 || pm > PID_MAX)
+ error = EINVAL;
+ else
+ pid_max = pm;
+ sx_xunlock(&allproc_lock);
+ sx_xunlock(&proctree_lock);
+ return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, pid_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_TUN |
+ CTLFLAG_MPSAFE, 0, 0, sysctl_kern_pid_max, "I",
+ "Maximum allowed pid");
+
#include <sys/bio.h>
#include <sys/buf.h>
SYSCTL_INT(_debug_sizeof, OID_AUTO, bio, CTLFLAG_RD,
diff --git a/freebsd/sys/kern/kern_module.c b/freebsd/sys/kern/kern_module.c
index 40e64371..72c9d99d 100644
--- a/freebsd/sys/kern/kern_module.c
+++ b/freebsd/sys/kern/kern_module.c
@@ -337,7 +337,7 @@ module_file(module_t mod)
* Syscalls.
*/
int
-modnext(struct thread *td, struct modnext_args *uap)
+sys_modnext(struct thread *td, struct modnext_args *uap)
{
module_t mod;
int error = 0;
@@ -368,7 +368,7 @@ done2:
}
int
-modfnext(struct thread *td, struct modfnext_args *uap)
+sys_modfnext(struct thread *td, struct modfnext_args *uap)
{
module_t mod;
int error;
@@ -398,7 +398,7 @@ struct module_stat_v1 {
};
int
-modstat(struct thread *td, struct modstat_args *uap)
+sys_modstat(struct thread *td, struct modstat_args *uap)
{
module_t mod;
modspecific_t data;
@@ -451,7 +451,7 @@ modstat(struct thread *td, struct modstat_args *uap)
}
int
-modfind(struct thread *td, struct modfind_args *uap)
+sys_modfind(struct thread *td, struct modfind_args *uap)
{
int error = 0;
char name[MAXMODNAME];
@@ -482,9 +482,9 @@ MODULE_VERSION(kernel, __FreeBSD_version);
typedef union modspecific32 {
int intval;
- u_int32_t uintval;
+ uint32_t uintval;
int longval;
- u_int32_t ulongval;
+ uint32_t ulongval;
} modspecific32_t;
struct module_stat32 {
diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c
index 2fb986bb..0903dd74 100644
--- a/freebsd/sys/kern/kern_sysctl.c
+++ b/freebsd/sys/kern/kern_sysctl.c
@@ -40,12 +40,14 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_capsicum.h>
#include <rtems/bsd/local/opt_compat.h>
#include <rtems/bsd/local/opt_ktrace.h>
#include <rtems/bsd/sys/param.h>
#include <sys/fail.h>
#include <sys/systm.h>
+#include <sys/capability.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
@@ -367,10 +369,31 @@ sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
return (error);
}
+int
+sysctl_remove_name(struct sysctl_oid *parent, const char *name,
+ int del, int recurse)
+{
+ struct sysctl_oid *p, *tmp;
+ int error;
+
+ error = ENOENT;
+ SYSCTL_XLOCK();
+ SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) {
+ if (strcmp(p->oid_name, name) == 0) {
+ error = sysctl_remove_oid_locked(p, del, recurse);
+ break;
+ }
+ }
+ SYSCTL_XUNLOCK();
+
+ return (error);
+}
+
+
static int
sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
{
- struct sysctl_oid *p;
+ struct sysctl_oid *p, *tmp;
int error;
SYSCTL_ASSERT_XLOCKED();
@@ -389,7 +412,8 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
*/
if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
if (oidp->oid_refcnt == 1) {
- SLIST_FOREACH(p, SYSCTL_CHILDREN(oidp), oid_link) {
+ SLIST_FOREACH_SAFE(p,
+ SYSCTL_CHILDREN(oidp), oid_link, tmp) {
if (!recurse)
return (ENOTEMPTY);
error = sysctl_remove_oid_locked(p, del,
@@ -430,14 +454,13 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
}
return (0);
}
-
/*
* Create new sysctls at run time.
* clist may point to a valid context initialized with sysctl_ctx_init().
*/
struct sysctl_oid *
sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
- int number, const char *name, int kind, void *arg1, int arg2,
+ int number, const char *name, int kind, void *arg1, intptr_t arg2,
int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr)
{
struct sysctl_oid *oidp;
@@ -475,6 +498,7 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
SYSCTL_CHILDREN_SET(oidp, malloc(sizeof(struct sysctl_oid_list),
M_SYSCTLOID, M_WAITOK));
SLIST_INIT(SYSCTL_CHILDREN(oidp));
+ oidp->oid_arg2 = arg2;
} else {
oidp->oid_arg1 = arg1;
oidp->oid_arg2 = arg2;
@@ -603,8 +627,12 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
}
break;
case CTLTYPE_INT: printf(" Int\n"); break;
+ case CTLTYPE_UINT: printf(" u_int\n"); break;
+ case CTLTYPE_LONG: printf(" Long\n"); break;
+ case CTLTYPE_ULONG: printf(" u_long\n"); break;
case CTLTYPE_STRING: printf(" String\n"); break;
- case CTLTYPE_QUAD: printf(" Quad\n"); break;
+ case CTLTYPE_U64: printf(" uint64_t\n"); break;
+ case CTLTYPE_S64: printf(" int64_t\n"); break;
case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
default: printf("\n");
}
@@ -687,7 +715,12 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, "");
+/*
+ * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
+ * capability mode.
+ */
+static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_CAPRD,
+ sysctl_sysctl_name, "");
static int
sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
@@ -768,7 +801,12 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, "");
+/*
+ * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
+ * capability mode.
+ */
+static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_CAPRD,
+ sysctl_sysctl_next, "");
static int
name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
@@ -813,7 +851,7 @@ static int
sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
{
char *p;
- int error, oid[CTL_MAXNAME], len;
+ int error, oid[CTL_MAXNAME], len = 0;
struct sysctl_oid *op = 0;
if (!req->newlen)
@@ -830,7 +868,7 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
}
p [req->newlen] = '\0';
- len = 0;
+
SYSCTL_XLOCK();
error = name2oid(p, oid, &len, &op);
SYSCTL_XUNLOCK();
@@ -844,8 +882,13 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
return (error);
}
-SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE,
- 0, 0, sysctl_sysctl_name2oid, "I", "");
+/*
+ * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
+ * capability mode.
+ */
+SYSCTL_PROC(_sysctl, 3, name2oid,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE
+ | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", "");
static int
sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
@@ -872,7 +915,7 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
}
-static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE,
+static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
sysctl_sysctl_oidfmt, "");
static int
@@ -896,7 +939,8 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD, sysctl_sysctl_oiddescr, "");
+static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_CAPRD,
+ sysctl_sysctl_oiddescr, "");
/*
* Default "handler" functions.
@@ -1012,9 +1056,8 @@ sysctl_handle_long(SYSCTL_HANDLER_ARGS)
* a variable: point arg1 at it.
* a constant: pass it in arg2.
*/
-
int
-sysctl_handle_quad(SYSCTL_HANDLER_ARGS)
+sysctl_handle_64(SYSCTL_HANDLER_ARGS)
{
int error = 0;
uint64_t tmpout;
@@ -1198,7 +1241,7 @@ kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
req.oldfunc = sysctl_old_kernel;
req.newfunc = sysctl_new_kernel;
- req.lock = REQ_LOCKED;
+ req.lock = REQ_UNWIRED;
SYSCTL_XLOCK();
error = sysctl_root(0, name, namelen, &req);
@@ -1314,7 +1357,7 @@ sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
ret = 0;
- if (req->lock == REQ_LOCKED && req->oldptr &&
+ if (req->lock != REQ_WIRED && req->oldptr &&
req->oldfunc == sysctl_old_user) {
if (wiredlen != 0) {
ret = vslock(req->oldptr, wiredlen);
@@ -1350,8 +1393,6 @@ sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
return (ENOENT);
indx++;
- if (oid->oid_kind & CTLFLAG_NOLOCK)
- req->lock = REQ_UNLOCKED;
if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
if (oid->oid_handler != NULL || indx == namelen) {
*noid = oid;
@@ -1410,6 +1451,19 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
#ifndef __rtems__
KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
+#ifdef CAPABILITY_MODE
+ /*
+ * If the process is in capability mode, then don't permit reading or
+ * writing unless specifically granted for the node.
+ */
+ if (IN_CAPABILITY_MODE(req->td)) {
+ if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD))
+ return (EPERM);
+ if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))
+ return (EPERM);
+ }
+#endif
+
/* Is this sysctl sensitive to securelevels? */
if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) {
lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
@@ -1487,7 +1541,7 @@ struct sysctl_args {
};
#endif
int
-__sysctl(struct thread *td, struct sysctl_args *uap)
+sys___sysctl(struct thread *td, struct sysctl_args *uap)
{
int error, i, name[CTL_MAXNAME];
size_t j;
@@ -1555,7 +1609,7 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
req.oldfunc = sysctl_old_user;
req.newfunc = sysctl_new_user;
- req.lock = REQ_LOCKED;
+ req.lock = REQ_UNWIRED;
#ifdef KTRACE
if (KTRPOINT(curthread, KTR_SYSCTL))
@@ -1577,7 +1631,7 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
SYSCTL_XUNLOCK();
if (error != EAGAIN)
break;
- uio_yield();
+ kern_yield(PRI_USER);
}
CURVNET_RESTORE();
diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c
index 344c379e..e113aef6 100644
--- a/freebsd/sys/kern/kern_time.c
+++ b/freebsd/sys/kern/kern_time.c
@@ -178,7 +178,7 @@ struct clock_gettime_args {
#ifndef __rtems__
/* ARGSUSED */
int
-clock_gettime(struct thread *td, struct clock_gettime_args *uap)
+sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap)
{
struct timespec ats;
int error;
@@ -265,7 +265,7 @@ struct clock_settime_args {
#ifndef __rtems__
/* ARGSUSED */
int
-clock_settime(struct thread *td, struct clock_settime_args *uap)
+sys_clock_settime(struct thread *td, struct clock_settime_args *uap)
{
struct timespec ats;
int error;
@@ -302,7 +302,7 @@ struct clock_getres_args {
#endif
#ifndef __rtems__
int
-clock_getres(struct thread *td, struct clock_getres_args *uap)
+sys_clock_getres(struct thread *td, struct clock_getres_args *uap)
{
struct timespec ts;
int error;
@@ -407,7 +407,7 @@ struct nanosleep_args {
#endif
/* ARGSUSED */
int
-nanosleep(struct thread *td, struct nanosleep_args *uap)
+sys_nanosleep(struct thread *td, struct nanosleep_args *uap)
{
struct timespec rmt, rqt;
int error;
@@ -438,7 +438,7 @@ struct gettimeofday_args {
#endif
/* ARGSUSED */
int
-gettimeofday(struct thread *td, struct gettimeofday_args *uap)
+sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap)
{
struct timeval atv;
struct timezone rtz;
@@ -464,7 +464,7 @@ struct settimeofday_args {
#endif
/* ARGSUSED */
int
-settimeofday(struct thread *td, struct settimeofday_args *uap)
+sys_settimeofday(struct thread *td, struct settimeofday_args *uap)
{
struct timeval atv, *tvp;
struct timezone atz, *tzp;
@@ -536,7 +536,7 @@ struct getitimer_args {
};
#endif
int
-getitimer(struct thread *td, struct getitimer_args *uap)
+sys_getitimer(struct thread *td, struct getitimer_args *uap)
{
struct itimerval aitv;
int error;
@@ -588,14 +588,14 @@ struct setitimer_args {
};
#endif
int
-setitimer(struct thread *td, struct setitimer_args *uap)
+sys_setitimer(struct thread *td, struct setitimer_args *uap)
{
struct itimerval aitv, oitv;
int error;
if (uap->itv == NULL) {
uap->itv = uap->oitv;
- return (getitimer(td, (struct getitimer_args *)uap));
+ return (sys_getitimer(td, (struct getitimer_args *)uap));
}
if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval))))
@@ -672,13 +672,11 @@ realitexpire(void *arg)
struct timeval ctv, ntv;
p = (struct proc *)arg;
- PROC_LOCK(p);
- psignal(p, SIGALRM);
+ kern_psignal(p, SIGALRM);
if (!timevalisset(&p->p_realtimer.it_interval)) {
timevalclear(&p->p_realtimer.it_value);
if (p->p_flag & P_WEXIT)
wakeup(&p->p_itcallout);
- PROC_UNLOCK(p);
return;
}
for (;;) {
@@ -690,7 +688,6 @@ realitexpire(void *arg)
timevalsub(&ntv, &ctv);
callout_reset(&p->p_itcallout, tvtohz(&ntv) - 1,
realitexpire, p);
- PROC_UNLOCK(p);
return;
}
}
@@ -940,7 +937,7 @@ struct ktimer_create_args {
};
#endif
int
-ktimer_create(struct thread *td, struct ktimer_create_args *uap)
+sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap)
{
struct sigevent *evp1, ev;
int id;
@@ -1079,7 +1076,7 @@ struct ktimer_delete_args {
};
#endif
int
-ktimer_delete(struct thread *td, struct ktimer_delete_args *uap)
+sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap)
{
return (kern_timer_delete(td, uap->timerid));
}
@@ -1144,7 +1141,7 @@ struct ktimer_settime_args {
};
#endif
int
-ktimer_settime(struct thread *td, struct ktimer_settime_args *uap)
+sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap)
{
struct proc *p = td->td_proc;
struct itimer *it;
@@ -1185,7 +1182,7 @@ struct ktimer_gettime_args {
};
#endif
int
-ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap)
+sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap)
{
struct proc *p = td->td_proc;
struct itimer *it;
@@ -1216,7 +1213,7 @@ struct timer_getoverrun_args {
};
#endif
int
-ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap)
+sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap)
{
struct proc *p = td->td_proc;
struct itimer *it;
@@ -1419,28 +1416,22 @@ void
itimer_fire(struct itimer *it)
{
struct proc *p = it->it_proc;
- int ret;
+ struct thread *td;
if (it->it_sigev.sigev_notify == SIGEV_SIGNAL ||
it->it_sigev.sigev_notify == SIGEV_THREAD_ID) {
- PROC_LOCK(p);
+ if (sigev_findtd(p, &it->it_sigev, &td) != 0) {
+ ITIMER_LOCK(it);
+ timespecclear(&it->it_time.it_value);
+ timespecclear(&it->it_time.it_interval);
+ callout_stop(&it->it_callout);
+ ITIMER_UNLOCK(it);
+ return;
+ }
if (!KSI_ONQ(&it->it_ksi)) {
it->it_ksi.ksi_errno = 0;
- ret = psignal_event(p, &it->it_sigev, &it->it_ksi);
- if (__predict_false(ret != 0)) {
- it->it_overrun++;
- /*
- * Broken userland code, thread went
- * away, disarm the timer.
- */
- if (ret == ESRCH) {
- ITIMER_LOCK(it);
- timespecclear(&it->it_time.it_value);
- timespecclear(&it->it_time.it_interval);
- callout_stop(&it->it_callout);
- ITIMER_UNLOCK(it);
- }
- }
+ ksiginfo_set_sigev(&it->it_ksi, &it->it_sigev);
+ tdsendsignal(p, td, it->it_ksi.ksi_signo, &it->it_ksi);
} else {
if (it->it_overrun < INT_MAX)
it->it_overrun++;
diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c
index 38871b03..4ef7909f 100644
--- a/freebsd/sys/kern/kern_timeout.c
+++ b/freebsd/sys/kern/kern_timeout.c
@@ -136,6 +136,7 @@ struct callout_cpu {
int cc_softticks;
int cc_cancel;
int cc_waiting;
+ int cc_firsttick;
};
#ifdef SMP
@@ -158,8 +159,9 @@ struct callout_cpu cc_cpu;
#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED)
static int timeout_cpu;
+void (*callout_new_inserted)(int cpu, int ticks) = NULL;
-MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
+static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
/**
* Locked by cc_lock:
@@ -352,8 +354,6 @@ kern_timeout_callwheel_init(void)
/*
* Start standard softclock thread.
*/
-void *softclock_ih;
-
static void
start_softclock(void *dummy)
{
@@ -364,9 +364,8 @@ start_softclock(void *dummy)
cc = CC_CPU(timeout_cpu);
if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK,
- INTR_MPSAFE, &softclock_ih))
+ INTR_MPSAFE, &cc->cc_cookie))
panic("died while creating standard software ithreads");
- cc->cc_cookie = softclock_ih;
#ifdef SMP
CPU_FOREACH(cpu) {
if (cpu == timeout_cpu)
@@ -400,7 +399,7 @@ callout_tick(void)
need_softclock = 0;
cc = CC_SELF();
mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- cc->cc_ticks++;
+ cc->cc_firsttick = cc->cc_ticks = ticks;
for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
bucket = cc->cc_softticks & callwheelmask;
if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
@@ -417,6 +416,33 @@ callout_tick(void)
swi_sched(cc->cc_cookie, 0);
}
+int
+callout_tickstofirst(int limit)
+{
+ struct callout_cpu *cc;
+ struct callout *c;
+ struct callout_tailq *sc;
+ int curticks;
+ int skip = 1;
+
+ cc = CC_SELF();
+ mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
+ curticks = cc->cc_ticks;
+ while( skip < ncallout && skip < limit ) {
+ sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
+ /* search scanning ticks */
+ TAILQ_FOREACH( c, sc, c_links.tqe ){
+ if (c->c_time - curticks <= ncallout)
+ goto out;
+ }
+ skip++;
+ }
+out:
+ cc->cc_firsttick = curticks + skip;
+ mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+ return (skip);
+}
+
static struct callout_cpu *
callout_lock(struct callout *c)
{
@@ -453,24 +479,28 @@ callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
c->c_arg = arg;
c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
c->c_func = func;
- c->c_time = cc->cc_ticks + to_ticks;
- TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
+ c->c_time = ticks + to_ticks;
+ TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
c, c_links.tqe);
+ if ((c->c_time - cc->cc_firsttick) < 0 &&
+ callout_new_inserted != NULL) {
+ cc->cc_firsttick = c->c_time;
+ (*callout_new_inserted)(cpu,
+ to_ticks + (ticks - cc->cc_ticks));
+ }
}
static void
callout_cc_del(struct callout *c, struct callout_cpu *cc)
{
- if (cc->cc_next == c)
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
- c->c_func = NULL;
- SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
- }
+ if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
+ return;
+ c->c_func = NULL;
+ SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
}
-static struct callout *
+static void
softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
int *lockcalls, int *gcalls)
{
@@ -492,7 +522,9 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
static timeout_t *lastfunc;
#endif
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+ KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) ==
+ (CALLOUT_PENDING | CALLOUT_ACTIVE),
+ ("softclock_call_cc: pend|act %p %x", c, c->c_flags));
class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
c_lock = c->c_lock;
@@ -564,20 +596,7 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
class->lc_unlock(c_lock);
skip:
CC_LOCK(cc);
- /*
- * If the current callout is locally allocated (from
- * timeout(9)) then put it on the freelist.
- *
- * Note: we need to check the cached copy of c_flags because
- * if it was not local, then it's not safe to deref the
- * callout pointer.
- */
- if (c_flags & CALLOUT_LOCAL_ALLOC) {
- KASSERT(c->c_flags == CALLOUT_LOCAL_ALLOC,
- ("corrupted callout"));
- c->c_func = NULL;
- SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
- }
+ KASSERT(cc->cc_curr == c, ("mishandled cc_curr"));
cc->cc_curr = NULL;
if (cc->cc_waiting) {
/*
@@ -586,13 +605,22 @@ skip:
* If the callout was scheduled for
* migration just cancel it.
*/
- if (cc_cme_migrating(cc))
+ if (cc_cme_migrating(cc)) {
cc_cme_cleanup(cc);
+
+ /*
+ * It should be assert here that the callout is not
+ * destroyed but that is not easy.
+ */
+ c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ }
cc->cc_waiting = 0;
CC_UNLOCK(cc);
wakeup(&cc->cc_waiting);
CC_LOCK(cc);
} else if (cc_cme_migrating(cc)) {
+ KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
+ ("Migrating legacy callout %p", c));
#ifdef SMP
/*
* If the callout was scheduled for
@@ -605,23 +633,20 @@ skip:
cc_cme_cleanup(cc);
/*
- * Handle deferred callout stops
+ * It should be assert here that the callout is not destroyed
+ * but that is not easy.
+ *
+ * As first thing, handle deferred callout stops.
*/
if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
CTR3(KTR_CALLOUT,
"deferred cancelled %p func %p arg %p",
c, new_func, new_arg);
callout_cc_del(c, cc);
- goto nextc;
+ return;
}
-
c->c_flags &= ~CALLOUT_DFRMIGRATION;
- /*
- * It should be assert here that the
- * callout is not destroyed but that
- * is not easy.
- */
new_cc = callout_cpu_switch(c, cc, new_cpu);
callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
new_cpu);
@@ -631,10 +656,19 @@ skip:
panic("migration should not happen");
#endif
}
-#ifdef SMP
-nextc:
-#endif
- return (cc->cc_next);
+ /*
+ * If the current callout is locally allocated (from
+ * timeout(9)) then put it on the freelist.
+ *
+ * Note: we need to check the cached copy of c_flags because
+ * if it was not local, then it's not safe to deref the
+ * callout pointer.
+ */
+ KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 ||
+ c->c_flags == CALLOUT_LOCAL_ALLOC,
+ ("corrupted callout"));
+ if (c_flags & CALLOUT_LOCAL_ALLOC)
+ callout_cc_del(c, cc);
}
/*
@@ -701,10 +735,12 @@ softclock(void *arg)
steps = 0;
}
} else {
+ cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
TAILQ_REMOVE(bucket, c, c_links.tqe);
- c = softclock_call_cc(c, cc, &mpcalls,
+ softclock_call_cc(c, cc, &mpcalls,
&lockcalls, &gcalls);
steps = 0;
+ c = cc->cc_next;
}
}
}
@@ -1073,6 +1109,8 @@ again:
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
+ if (cc->cc_next == c)
+ cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
c_links.tqe);
callout_cc_del(c, cc);
diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c
index 3d1bd2bc..951a63c6 100644
--- a/freebsd/sys/kern/subr_bus.c
+++ b/freebsd/sys/kern/subr_bus.c
@@ -55,6 +55,8 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <net/vnet.h>
+
#include <machine/stdarg.h>
#include <vm/uma.h>
@@ -124,7 +126,7 @@ struct device {
char* desc; /**< driver specific description */
int busy; /**< count of calls to device_busy() */
device_state_t state; /**< current device state */
- u_int32_t devflags; /**< api level flags for device_get_flags() */
+ uint32_t devflags; /**< api level flags for device_get_flags() */
u_int flags; /**< internal device flags */
#define DF_ENABLED 0x01 /* device should be probed/attached */
#define DF_FIXEDCLASS 0x02 /* devclass specified at create time */
@@ -235,7 +237,7 @@ devclass_sysctl_init(devclass_t dc)
SYSCTL_STATIC_CHILDREN(_dev), OID_AUTO, dc->name,
CTLFLAG_RD, NULL, "");
SYSCTL_ADD_PROC(&dc->sysctl_ctx, SYSCTL_CHILDREN(dc->sysctl_tree),
- OID_AUTO, "%parent", CTLFLAG_RD,
+ OID_AUTO, "%parent", CTLTYPE_STRING | CTLFLAG_RD,
dc, DEVCLASS_SYSCTL_PARENT, devclass_sysctl_handler, "A",
"parent class");
}
@@ -300,23 +302,23 @@ device_sysctl_init(device_t dev)
dev->nameunit + strlen(dc->name),
CTLFLAG_RD, NULL, "");
SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree),
- OID_AUTO, "%desc", CTLFLAG_RD,
+ OID_AUTO, "%desc", CTLTYPE_STRING | CTLFLAG_RD,
dev, DEVICE_SYSCTL_DESC, device_sysctl_handler, "A",
"device description");
SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree),
- OID_AUTO, "%driver", CTLFLAG_RD,
+ OID_AUTO, "%driver", CTLTYPE_STRING | CTLFLAG_RD,
dev, DEVICE_SYSCTL_DRIVER, device_sysctl_handler, "A",
"device driver name");
SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree),
- OID_AUTO, "%location", CTLFLAG_RD,
+ OID_AUTO, "%location", CTLTYPE_STRING | CTLFLAG_RD,
dev, DEVICE_SYSCTL_LOCATION, device_sysctl_handler, "A",
"device location relative to parent");
SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree),
- OID_AUTO, "%pnpinfo", CTLFLAG_RD,
+ OID_AUTO, "%pnpinfo", CTLTYPE_STRING | CTLFLAG_RD,
dev, DEVICE_SYSCTL_PNPINFO, device_sysctl_handler, "A",
"device identification");
SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree),
- OID_AUTO, "%parent", CTLFLAG_RD,
+ OID_AUTO, "%parent", CTLTYPE_STRING | CTLFLAG_RD,
dev, DEVICE_SYSCTL_PARENT, device_sysctl_handler, "A",
"parent device");
#endif /* __rtems__ */
@@ -605,7 +607,7 @@ devctl_queue_data_f(char *data, int flags)
p = devsoftc.async_proc;
if (p != NULL) {
PROC_LOCK(p);
- psignal(p, SIGIO);
+ kern_psignal(p, SIGIO);
PROC_UNLOCK(p);
}
return;
@@ -742,25 +744,7 @@ bad:
static void
devadded(device_t dev)
{
- char *pnp = NULL;
- char *tmp = NULL;
-
- pnp = malloc(1024, M_BUS, M_NOWAIT);
- if (pnp == NULL)
- goto fail;
- tmp = malloc(1024, M_BUS, M_NOWAIT);
- if (tmp == NULL)
- goto fail;
- *pnp = '\0';
- bus_child_pnpinfo_str(dev, pnp, 1024);
- snprintf(tmp, 1024, "%s %s", device_get_nameunit(dev), pnp);
- devaddq("+", tmp, dev);
-fail:
- if (pnp != NULL)
- free(pnp, M_BUS);
- if (tmp != NULL)
- free(tmp, M_BUS);
- return;
+ devaddq("+", device_get_nameunit(dev), dev);
}
/*
@@ -770,25 +754,7 @@ fail:
static void
devremoved(device_t dev)
{
- char *pnp = NULL;
- char *tmp = NULL;
-
- pnp = malloc(1024, M_BUS, M_NOWAIT);
- if (pnp == NULL)
- goto fail;
- tmp = malloc(1024, M_BUS, M_NOWAIT);
- if (tmp == NULL)
- goto fail;
- *pnp = '\0';
- bus_child_pnpinfo_str(dev, pnp, 1024);
- snprintf(tmp, 1024, "%s %s", device_get_nameunit(dev), pnp);
- devaddq("-", tmp, dev);
-fail:
- if (pnp != NULL)
- free(pnp, M_BUS);
- if (tmp != NULL)
- free(tmp, M_BUS);
- return;
+ devaddq("-", device_get_nameunit(dev), dev);
}
/*
@@ -796,7 +762,7 @@ fail:
* the first time that no match happens, so we don't keep getting this
* message. Should that prove to be undesirable, we can change it.
* This is called when all drivers that can attach to a given bus
- * decline to accept this device. Other errrors may not be detected.
+ * decline to accept this device. Other errors may not be detected.
*/
static void
devnomatch(device_t dev)
@@ -1110,7 +1076,7 @@ devclass_driver_added(devclass_t dc, driver_t *driver)
* @param dc the devclass to edit
* @param driver the driver to register
*/
-static int
+int
devclass_add_driver(devclass_t dc, driver_t *driver, int pass, devclass_t *dcp)
{
driverlink_t dl;
@@ -1243,7 +1209,7 @@ devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver)
* @param dc the devclass to edit
* @param driver the driver to unregister
*/
-static int
+int
devclass_delete_driver(devclass_t busclass, driver_t *driver)
{
devclass_t dc = devclass_find(driver->name);
@@ -1953,6 +1919,8 @@ device_delete_child(device_t dev, device_t child)
return (error);
if (child->devclass)
devclass_delete_device(child->devclass, child);
+ if (child->parent)
+ BUS_CHILD_DELETED(dev, child);
TAILQ_REMOVE(&dev->children, child, link);
TAILQ_REMOVE(&bus_data_devices, child, devlink);
kobj_delete((kobj_t) child, M_BUS);
@@ -2350,7 +2318,7 @@ device_get_desc(device_t dev)
/**
* @brief Return the device's flags
*/
-u_int32_t
+uint32_t
device_get_flags(device_t dev)
{
return (dev->devflags);
@@ -2466,7 +2434,7 @@ device_set_desc_copy(device_t dev, const char* desc)
* @brief Set the device's flags
*/
void
-device_set_flags(device_t dev, u_int32_t flags)
+device_set_flags(device_t dev, uint32_t flags)
{
dev->devflags = flags;
}
@@ -2502,6 +2470,35 @@ device_set_softc(device_t dev, void *softc)
}
/**
+ * @brief Free claimed softc
+ *
+ * Most drivers do not need to use this since the softc is freed
+ * automatically when the driver is detached.
+ */
+void
+device_free_softc(void *softc)
+{
+ free(softc, M_BUS_SC);
+}
+
+/**
+ * @brief Claim softc
+ *
+ * This function can be used to let the driver free the automatically
+ * allocated softc using "device_free_softc()". This function is
+ * useful when the driver is refcounting the softc and the softc
+ * cannot be freed when the "device_detach" method is called.
+ */
+void
+device_claim_softc(device_t dev)
+{
+ if (dev->softc)
+ dev->flags |= DF_EXTERNALSOFTC;
+ else
+ dev->flags &= ~DF_EXTERNALSOFTC;
+}
+
+/**
* @brief Get the device's ivars field
*
* The ivars field is used by the parent device to store per-device
@@ -2790,7 +2787,11 @@ device_probe_and_attach(device_t dev)
return (0);
else if (error != 0)
return (error);
- return (device_attach(dev));
+
+ CURVNET_SET_QUIET(vnet0);
+ error = device_attach(dev);
+ CURVNET_RESTORE();
+ return error;
}
/**
@@ -3061,6 +3062,7 @@ resource_list_add(struct resource_list *rl, int type, int rid,
rle->type = type;
rle->rid = rid;
rle->res = NULL;
+ rle->flags = 0;
}
if (rle->res)
@@ -3073,6 +3075,58 @@ resource_list_add(struct resource_list *rl, int type, int rid,
}
/**
+ * @brief Determine if a resource entry is busy.
+ *
+ * Returns true if a resource entry is busy meaning that it has an
+ * associated resource that is not an unallocated "reserved" resource.
+ *
+ * @param rl the resource list to search
+ * @param type the resource entry type (e.g. SYS_RES_MEMORY)
+ * @param rid the resource identifier
+ *
+ * @returns Non-zero if the entry is busy, zero otherwise.
+ */
+int
+resource_list_busy(struct resource_list *rl, int type, int rid)
+{
+ struct resource_list_entry *rle;
+
+ rle = resource_list_find(rl, type, rid);
+ if (rle == NULL || rle->res == NULL)
+ return (0);
+ if ((rle->flags & (RLE_RESERVED | RLE_ALLOCATED)) == RLE_RESERVED) {
+ KASSERT(!(rman_get_flags(rle->res) & RF_ACTIVE),
+ ("reserved resource is active"));
+ return (0);
+ }
+ return (1);
+}
+
+/**
+ * @brief Determine if a resource entry is reserved.
+ *
+ * Returns true if a resource entry is reserved meaning that it has an
+ * associated "reserved" resource. The resource can either be
+ * allocated or unallocated.
+ *
+ * @param rl the resource list to search
+ * @param type the resource entry type (e.g. SYS_RES_MEMORY)
+ * @param rid the resource identifier
+ *
+ * @returns Non-zero if the entry is reserved, zero otherwise.
+ */
+int
+resource_list_reserved(struct resource_list *rl, int type, int rid)
+{
+ struct resource_list_entry *rle;
+
+ rle = resource_list_find(rl, type, rid);
+ if (rle != NULL && rle->flags & RLE_RESERVED)
+ return (1);
+ return (0);
+}
+
+/**
* @brief Find a resource entry by type and rid.
*
* @param rl the resource list to search
@@ -3115,6 +3169,66 @@ resource_list_delete(struct resource_list *rl, int type, int rid)
}
/**
+ * @brief Allocate a reserved resource
+ *
+ * This can be used by busses to force the allocation of resources
+ * that are always active in the system even if they are not allocated
+ * by a driver (e.g. PCI BARs). This function is usually called when
+ * adding a new child to the bus. The resource is allocated from the
+ * parent bus when it is reserved. The resource list entry is marked
+ * with RLE_RESERVED to note that it is a reserved resource.
+ *
+ * Subsequent attempts to allocate the resource with
+ * resource_list_alloc() will succeed the first time and will set
+ * RLE_ALLOCATED to note that it has been allocated. When a reserved
+ * resource that has been allocated is released with
+ * resource_list_release() the resource RLE_ALLOCATED is cleared, but
+ * the actual resource remains allocated. The resource can be released to
+ * the parent bus by calling resource_list_unreserve().
+ *
+ * @param rl the resource list to allocate from
+ * @param bus the parent device of @p child
+ * @param child the device for which the resource is being reserved
+ * @param type the type of resource to allocate
+ * @param rid a pointer to the resource identifier
+ * @param start hint at the start of the resource range - pass
+ * @c 0UL for any start address
+ * @param end hint at the end of the resource range - pass
+ * @c ~0UL for any end address
+ * @param count hint at the size of range required - pass @c 1
+ * for any size
+ * @param flags any extra flags to control the resource
+ * allocation - see @c RF_XXX flags in
+ * <sys/rman.h> for details
+ *
+ * @returns the resource which was allocated or @c NULL if no
+ * resource could be allocated
+ */
+struct resource *
+resource_list_reserve(struct resource_list *rl, device_t bus, device_t child,
+ int type, int *rid, u_long start, u_long end, u_long count, u_int flags)
+{
+ struct resource_list_entry *rle = NULL;
+ int passthrough = (device_get_parent(child) != bus);
+ struct resource *r;
+
+ if (passthrough)
+ panic(
+ "resource_list_reserve() should only be called for direct children");
+ if (flags & RF_ACTIVE)
+ panic(
+ "resource_list_reserve() should only reserve inactive resources");
+
+ r = resource_list_alloc(rl, bus, child, type, rid, start, end, count,
+ flags);
+ if (r != NULL) {
+ rle = resource_list_find(rl, type, *rid);
+ rle->flags |= RLE_RESERVED;
+ }
+ return (r);
+}
+
+/**
* @brief Helper function for implementing BUS_ALLOC_RESOURCE()
*
* Implement BUS_ALLOC_RESOURCE() by looking up a resource from the list
@@ -3165,8 +3279,19 @@ resource_list_alloc(struct resource_list *rl, device_t bus, device_t child,
if (!rle)
return (NULL); /* no resource of that type/rid */
- if (rle->res)
+ if (rle->res) {
+ if (rle->flags & RLE_RESERVED) {
+ if (rle->flags & RLE_ALLOCATED)
+ return (NULL);
+ if ((flags & RF_ACTIVE) &&
+ bus_activate_resource(child, type, *rid,
+ rle->res) != 0)
+ return (NULL);
+ rle->flags |= RLE_ALLOCATED;
+ return (rle->res);
+ }
panic("resource_list_alloc: resource entry is busy");
+ }
if (isdefault) {
start = rle->start;
@@ -3198,7 +3323,7 @@ resource_list_alloc(struct resource_list *rl, device_t bus, device_t child,
* @param rl the resource list which was allocated from
* @param bus the parent device of @p child
* @param child the device which is requesting a release
- * @param type the type of resource to allocate
+ * @param type the type of resource to release
* @param rid the resource identifier
* @param res the resource to release
*
@@ -3225,6 +3350,19 @@ resource_list_release(struct resource_list *rl, device_t bus, device_t child,
panic("resource_list_release: can't find resource");
if (!rle->res)
panic("resource_list_release: resource entry is not busy");
+ if (rle->flags & RLE_RESERVED) {
+ if (rle->flags & RLE_ALLOCATED) {
+ if (rman_get_flags(res) & RF_ACTIVE) {
+ error = bus_deactivate_resource(child, type,
+ rid, res);
+ if (error)
+ return (error);
+ }
+ rle->flags &= ~RLE_ALLOCATED;
+ return (0);
+ }
+ return (EINVAL);
+ }
error = BUS_RELEASE_RESOURCE(device_get_parent(bus), child,
type, rid, res);
@@ -3236,6 +3374,45 @@ resource_list_release(struct resource_list *rl, device_t bus, device_t child,
}
/**
+ * @brief Fully release a reserved resource
+ *
+ * Fully releases a resouce reserved via resource_list_reserve().
+ *
+ * @param rl the resource list which was allocated from
+ * @param bus the parent device of @p child
+ * @param child the device whose reserved resource is being released
+ * @param type the type of resource to release
+ * @param rid the resource identifier
+ * @param res the resource to release
+ *
+ * @retval 0 success
+ * @retval non-zero a standard unix error code indicating what
+ * error condition prevented the operation
+ */
+int
+resource_list_unreserve(struct resource_list *rl, device_t bus, device_t child,
+ int type, int rid)
+{
+ struct resource_list_entry *rle = NULL;
+ int passthrough = (device_get_parent(child) != bus);
+
+ if (passthrough)
+ panic(
+ "resource_list_unreserve() should only be called for direct children");
+
+ rle = resource_list_find(rl, type, rid);
+
+ if (!rle)
+ panic("resource_list_unreserve: can't find resource");
+ if (!(rle->flags & RLE_RESERVED))
+ return (EINVAL);
+ if (rle->flags & RLE_ALLOCATED)
+ return (EBUSY);
+ rle->flags &= ~RLE_RESERVED;
+ return (resource_list_release(rl, bus, child, type, rid, rle->res));
+}
+
+/**
* @brief Print a description of resources in a resource list
*
* Print all resources of a specified type, for use in BUS_PRINT_CHILD().
@@ -3331,7 +3508,7 @@ bus_generic_probe(device_t dev)
* on early-pass busses during BUS_NEW_PASS().
*/
if (dl->pass > bus_current_pass)
- continue;
+ continue;
DEVICE_IDENTIFY(dl->driver, dev);
}
@@ -3864,6 +4041,10 @@ bus_generic_rl_release_resource(device_t dev, device_t child, int type,
{
struct resource_list * rl = NULL;
+ if (device_get_parent(child) != dev)
+ return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
+ type, rid, r));
+
rl = BUS_GET_RESOURCE_LIST(dev, child);
if (!rl)
return (EINVAL);
@@ -3884,6 +4065,10 @@ bus_generic_rl_alloc_resource(device_t dev, device_t child, int type,
{
struct resource_list * rl = NULL;
+ if (device_get_parent(child) != dev)
+ return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
+ type, rid, start, end, count, flags));
+
rl = BUS_GET_RESOURCE_LIST(dev, child);
if (!rl)
return (NULL);
@@ -4038,15 +4223,6 @@ bus_setup_intr(device_t dev, struct resource *r, int flags,
return (error);
if (handler != NULL && !(flags & INTR_MPSAFE))
device_printf(dev, "[GIANT-LOCKED]\n");
- if (bootverbose && (flags & INTR_MPSAFE))
- device_printf(dev, "[MPSAFE]\n");
- if (filter != NULL) {
- if (handler == NULL)
- device_printf(dev, "[FILTER]\n");
- else
- device_printf(dev, "[FILTER+ITHREAD]\n");
- } else
- device_printf(dev, "[ITHREAD]\n");
return (0);
}
diff --git a/freebsd/sys/kern/subr_hash.c b/freebsd/sys/kern/subr_hash.c
new file mode 100644
index 00000000..e526a866
--- /dev/null
+++ b/freebsd/sys/kern/subr_hash.c
@@ -0,0 +1,130 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+/*
+ * General routine to allocate a hash table with control of memory flags.
+ */
+void *
+hashinit_flags(int elements, struct malloc_type *type, u_long *hashmask,
+ int flags)
+{
+ long hashsize;
+ LIST_HEAD(generic, generic) *hashtbl;
+ int i;
+
+ KASSERT(elements > 0, ("%s: bad elements", __func__));
+ /* Exactly one of HASH_WAITOK and HASH_NOWAIT must be set. */
+ KASSERT((flags & HASH_WAITOK) ^ (flags & HASH_NOWAIT),
+ ("Bad flags (0x%x) passed to hashinit_flags", flags));
+
+ for (hashsize = 1; hashsize <= elements; hashsize <<= 1)
+ continue;
+ hashsize >>= 1;
+
+ if (flags & HASH_NOWAIT)
+ hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl),
+ type, M_NOWAIT);
+ else
+ hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl),
+ type, M_WAITOK);
+
+ if (hashtbl != NULL) {
+ for (i = 0; i < hashsize; i++)
+ LIST_INIT(&hashtbl[i]);
+ *hashmask = hashsize - 1;
+ }
+ return (hashtbl);
+}
+
+/*
+ * Allocate and initialize a hash table with default flag: may sleep.
+ */
+void *
+hashinit(int elements, struct malloc_type *type, u_long *hashmask)
+{
+
+ return (hashinit_flags(elements, type, hashmask, HASH_WAITOK));
+}
+
+void
+hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask)
+{
+ LIST_HEAD(generic, generic) *hashtbl, *hp;
+
+ hashtbl = vhashtbl;
+ for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++)
+ KASSERT(LIST_EMPTY(hp), ("%s: hash not empty", __func__));
+ free(hashtbl, type);
+}
+
+static const int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531,
+ 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143,
+ 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 };
+#define NPRIMES (sizeof(primes) / sizeof(primes[0]))
+
+/*
+ * General routine to allocate a prime number sized hash table.
+ */
+void *
+phashinit(int elements, struct malloc_type *type, u_long *nentries)
+{
+ long hashsize;
+ LIST_HEAD(generic, generic) *hashtbl;
+ int i;
+
+ KASSERT(elements > 0, ("%s: bad elements", __func__));
+ for (i = 1, hashsize = primes[1]; hashsize <= elements;) {
+ i++;
+ if (i == NPRIMES)
+ break;
+ hashsize = primes[i];
+ }
+ hashsize = primes[i - 1];
+ hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK);
+ for (i = 0; i < hashsize; i++)
+ LIST_INIT(&hashtbl[i]);
+ *nentries = hashsize;
+ return (hashtbl);
+}
diff --git a/freebsd/sys/kern/subr_kobj.c b/freebsd/sys/kern/subr_kobj.c
index 9cfe868b..5666f274 100644
--- a/freebsd/sys/kern/subr_kobj.c
+++ b/freebsd/sys/kern/subr_kobj.c
@@ -66,7 +66,7 @@ static int kobj_next_id = 1;
#define KOBJ_UNLOCK() mtx_unlock(&kobj_mtx)
#define KOBJ_ASSERT(what) mtx_assert(&kobj_mtx, what);
-SYSCTL_UINT(_kern, OID_AUTO, kobj_methodcount, CTLFLAG_RD,
+SYSCTL_INT(_kern, OID_AUTO, kobj_methodcount, CTLFLAG_RD,
&kobj_next_id, 0, "");
static void
diff --git a/freebsd/sys/kern/subr_module.c b/freebsd/sys/kern/subr_module.c
index 592bd083..f2aa7026 100644
--- a/freebsd/sys/kern/subr_module.c
+++ b/freebsd/sys/kern/subr_module.c
@@ -37,7 +37,8 @@ __FBSDID("$FreeBSD$");
* Preloaded module support
*/
-caddr_t preload_metadata;
+vm_offset_t preload_addr_relocate = 0;
+caddr_t preload_metadata;
/*
* Search for the preloaded module (name)
@@ -46,24 +47,24 @@ caddr_t
preload_search_by_name(const char *name)
{
caddr_t curp;
- u_int32_t *hdr;
+ uint32_t *hdr;
int next;
if (preload_metadata != NULL) {
curp = preload_metadata;
for (;;) {
- hdr = (u_int32_t *)curp;
+ hdr = (uint32_t *)curp;
if (hdr[0] == 0 && hdr[1] == 0)
break;
/* Search for a MODINFO_NAME field */
if ((hdr[0] == MODINFO_NAME) &&
- !strcmp(name, curp + sizeof(u_int32_t) * 2))
+ !strcmp(name, curp + sizeof(uint32_t) * 2))
return(curp);
/* skip to next field */
- next = sizeof(u_int32_t) * 2 + hdr[1];
+ next = sizeof(uint32_t) * 2 + hdr[1];
next = roundup(next, sizeof(u_long));
curp += next;
}
@@ -78,7 +79,7 @@ caddr_t
preload_search_by_type(const char *type)
{
caddr_t curp, lname;
- u_int32_t *hdr;
+ uint32_t *hdr;
int next;
if (preload_metadata != NULL) {
@@ -86,7 +87,7 @@ preload_search_by_type(const char *type)
curp = preload_metadata;
lname = NULL;
for (;;) {
- hdr = (u_int32_t *)curp;
+ hdr = (uint32_t *)curp;
if (hdr[0] == 0 && hdr[1] == 0)
break;
@@ -96,11 +97,11 @@ preload_search_by_type(const char *type)
/* Search for a MODINFO_TYPE field */
if ((hdr[0] == MODINFO_TYPE) &&
- !strcmp(type, curp + sizeof(u_int32_t) * 2))
+ !strcmp(type, curp + sizeof(uint32_t) * 2))
return(lname);
/* skip to next field */
- next = sizeof(u_int32_t) * 2 + hdr[1];
+ next = sizeof(uint32_t) * 2 + hdr[1];
next = roundup(next, sizeof(u_long));
curp += next;
}
@@ -115,7 +116,7 @@ caddr_t
preload_search_next_name(caddr_t base)
{
caddr_t curp;
- u_int32_t *hdr;
+ uint32_t *hdr;
int next;
if (preload_metadata != NULL) {
@@ -124,15 +125,15 @@ preload_search_next_name(caddr_t base)
if (base) {
/* skip to next field */
curp = base;
- hdr = (u_int32_t *)curp;
- next = sizeof(u_int32_t) * 2 + hdr[1];
+ hdr = (uint32_t *)curp;
+ next = sizeof(uint32_t) * 2 + hdr[1];
next = roundup(next, sizeof(u_long));
curp += next;
} else
curp = preload_metadata;
for (;;) {
- hdr = (u_int32_t *)curp;
+ hdr = (uint32_t *)curp;
if (hdr[0] == 0 && hdr[1] == 0)
break;
@@ -141,7 +142,7 @@ preload_search_next_name(caddr_t base)
return curp;
/* skip to next field */
- next = sizeof(u_int32_t) * 2 + hdr[1];
+ next = sizeof(uint32_t) * 2 + hdr[1];
next = roundup(next, sizeof(u_long));
curp += next;
}
@@ -157,13 +158,13 @@ caddr_t
preload_search_info(caddr_t mod, int inf)
{
caddr_t curp;
- u_int32_t *hdr;
- u_int32_t type = 0;
+ uint32_t *hdr;
+ uint32_t type = 0;
int next;
curp = mod;
for (;;) {
- hdr = (u_int32_t *)curp;
+ hdr = (uint32_t *)curp;
/* end of module data? */
if (hdr[0] == 0 && hdr[1] == 0)
break;
@@ -184,10 +185,10 @@ preload_search_info(caddr_t mod, int inf)
* data.
*/
if (hdr[0] == inf)
- return(curp + (sizeof(u_int32_t) * 2));
+ return(curp + (sizeof(uint32_t) * 2));
/* skip to next field */
- next = sizeof(u_int32_t) * 2 + hdr[1];
+ next = sizeof(uint32_t) * 2 + hdr[1];
next = roundup(next, sizeof(u_long));
curp += next;
}
@@ -201,7 +202,7 @@ void
preload_delete_name(const char *name)
{
caddr_t curp;
- u_int32_t *hdr;
+ uint32_t *hdr;
int next;
int clearing;
@@ -210,13 +211,13 @@ preload_delete_name(const char *name)
clearing = 0;
curp = preload_metadata;
for (;;) {
- hdr = (u_int32_t *)curp;
+ hdr = (uint32_t *)curp;
if (hdr[0] == 0 && hdr[1] == 0)
break;
/* Search for a MODINFO_NAME field */
if (hdr[0] == MODINFO_NAME) {
- if (!strcmp(name, curp + sizeof(u_int32_t) * 2))
+ if (!strcmp(name, curp + sizeof(uint32_t) * 2))
clearing = 1; /* got it, start clearing */
else if (clearing)
clearing = 0; /* at next one now.. better stop */
@@ -225,19 +226,41 @@ preload_delete_name(const char *name)
hdr[0] = MODINFO_EMPTY;
/* skip to next field */
- next = sizeof(u_int32_t) * 2 + hdr[1];
+ next = sizeof(uint32_t) * 2 + hdr[1];
next = roundup(next, sizeof(u_long));
curp += next;
}
}
}
+void *
+preload_fetch_addr(caddr_t mod)
+{
+ caddr_t *mdp;
+
+ mdp = (caddr_t *)preload_search_info(mod, MODINFO_ADDR);
+ if (mdp == NULL)
+ return (NULL);
+ return (*mdp + preload_addr_relocate);
+}
+
+size_t
+preload_fetch_size(caddr_t mod)
+{
+ size_t *mdp;
+
+ mdp = (size_t *)preload_search_info(mod, MODINFO_SIZE);
+ if (mdp == NULL)
+ return (0);
+ return (*mdp);
+}
+
/* Called from locore on i386. Convert physical pointers to kvm. Sigh. */
void
preload_bootstrap_relocate(vm_offset_t offset)
{
caddr_t curp;
- u_int32_t *hdr;
+ uint32_t *hdr;
vm_offset_t *ptr;
int next;
@@ -245,7 +268,7 @@ preload_bootstrap_relocate(vm_offset_t offset)
curp = preload_metadata;
for (;;) {
- hdr = (u_int32_t *)curp;
+ hdr = (uint32_t *)curp;
if (hdr[0] == 0 && hdr[1] == 0)
break;
@@ -254,14 +277,14 @@ preload_bootstrap_relocate(vm_offset_t offset)
case MODINFO_ADDR:
case MODINFO_METADATA|MODINFOMD_SSYM:
case MODINFO_METADATA|MODINFOMD_ESYM:
- ptr = (vm_offset_t *)(curp + (sizeof(u_int32_t) * 2));
+ ptr = (vm_offset_t *)(curp + (sizeof(uint32_t) * 2));
*ptr += offset;
break;
}
/* The rest is beyond us for now */
/* skip to next field */
- next = sizeof(u_int32_t) * 2 + hdr[1];
+ next = sizeof(uint32_t) * 2 + hdr[1];
next = roundup(next, sizeof(u_long));
curp += next;
}
diff --git a/freebsd/sys/kern/subr_rman.c b/freebsd/sys/kern/subr_rman.c
index 5480201c..668201a9 100644
--- a/freebsd/sys/kern/subr_rman.c
+++ b/freebsd/sys/kern/subr_rman.c
@@ -1087,11 +1087,21 @@ found:
return (error);
}
-SYSCTL_NODE(_hw_bus, OID_AUTO, rman, CTLFLAG_RD, sysctl_rman,
+static SYSCTL_NODE(_hw_bus, OID_AUTO, rman, CTLFLAG_RD, sysctl_rman,
"kernel resource manager");
#ifdef DDB
static void
+dump_rman_header(struct rman *rm)
+{
+
+ if (db_pager_quit)
+ return;
+ db_printf("rman %p: %s (0x%lx-0x%lx full range)\n",
+ rm, rm->rm_descr, rm->rm_start, rm->rm_end);
+}
+
+static void
dump_rman(struct rman *rm)
{
struct resource_i *r;
@@ -1099,8 +1109,6 @@ dump_rman(struct rman *rm)
if (db_pager_quit)
return;
- db_printf("rman: %s\n", rm->rm_descr);
- db_printf(" 0x%lx-0x%lx (full range)\n", rm->rm_start, rm->rm_end);
TAILQ_FOREACH(r, &rm->rm_list, r_link) {
if (r->r_dev != NULL) {
devname = device_get_nameunit(r->r_dev);
@@ -1121,16 +1129,29 @@ dump_rman(struct rman *rm)
DB_SHOW_COMMAND(rman, db_show_rman)
{
- if (have_addr)
+ if (have_addr) {
+ dump_rman_header((struct rman *)addr);
dump_rman((struct rman *)addr);
+ }
+}
+
+DB_SHOW_COMMAND(rmans, db_show_rmans)
+{
+ struct rman *rm;
+
+ TAILQ_FOREACH(rm, &rman_head, rm_link) {
+ dump_rman_header(rm);
+ }
}
DB_SHOW_ALL_COMMAND(rman, db_show_all_rman)
{
struct rman *rm;
- TAILQ_FOREACH(rm, &rman_head, rm_link)
+ TAILQ_FOREACH(rm, &rman_head, rm_link) {
+ dump_rman_header(rm);
dump_rman(rm);
+ }
}
DB_SHOW_ALIAS(allrman, db_show_all_rman);
#endif
diff --git a/freebsd/sys/kern/subr_sbuf.c b/freebsd/sys/kern/subr_sbuf.c
index a92c09c1..9ea11990 100644
--- a/freebsd/sys/kern/subr_sbuf.c
+++ b/freebsd/sys/kern/subr_sbuf.c
@@ -52,12 +52,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sbuf.h>
-struct sbuf_drain {
- sbuf_drain_func *s_func; /* drain function */
- void *s_arg; /* user-supplied drain argument */
- int s_error; /* current error code */
-};
-
#ifdef _KERNEL
static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK)
@@ -74,10 +68,10 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBUF_ISDYNAMIC(s) ((s)->s_flags & SBUF_DYNAMIC)
#define SBUF_ISDYNSTRUCT(s) ((s)->s_flags & SBUF_DYNSTRUCT)
#define SBUF_ISFINISHED(s) ((s)->s_flags & SBUF_FINISHED)
-#define SBUF_HASOVERFLOWED(s) ((s)->s_flags & SBUF_OVERFLOWED)
#define SBUF_HASROOM(s) ((s)->s_len < (s)->s_size - 1)
-#define SBUF_FREESPACE(s) ((s)->s_size - (s)->s_len - 1)
+#define SBUF_FREESPACE(s) ((s)->s_size - ((s)->s_len + 1))
#define SBUF_CANEXTEND(s) ((s)->s_flags & SBUF_AUTOEXTEND)
+#define SBUF_ISSECTION(s) ((s)->s_flags & SBUF_INSECTION)
/*
* Set / clear flags
@@ -86,8 +80,14 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBUF_CLEARFLAG(s, f) do { (s)->s_flags &= ~(f); } while (0)
#define SBUF_MINEXTENDSIZE 16 /* Should be power of 2. */
+
+#ifdef PAGE_SIZE
#define SBUF_MAXEXTENDSIZE PAGE_SIZE
#define SBUF_MAXEXTENDINCR PAGE_SIZE
+#else
+#define SBUF_MAXEXTENDSIZE 4096
+#define SBUF_MAXEXTENDINCR 4096
+#endif
/*
* Debugging support
@@ -103,7 +103,8 @@ _assert_sbuf_integrity(const char *fun, struct sbuf *s)
KASSERT(s->s_buf != NULL,
("%s called with uninitialized or corrupt sbuf", fun));
KASSERT(s->s_len < s->s_size,
- ("wrote past end of sbuf (%d >= %d)", s->s_len, s->s_size));
+ ("wrote past end of sbuf (%jd >= %jd)",
+ (intmax_t)s->s_len, (intmax_t)s->s_size));
}
static void
@@ -146,7 +147,6 @@ sbuf_extendsize(int size)
return (newsize);
}
-
/*
* Extend an sbuf.
*/
@@ -162,7 +162,7 @@ sbuf_extend(struct sbuf *s, int addlen)
newbuf = SBMALLOC(newsize);
if (newbuf == NULL)
return (-1);
- bcopy(s->s_buf, newbuf, s->s_size);
+ memcpy(newbuf, s->s_buf, s->s_size);
if (SBUF_ISDYNAMIC(s))
SBFREE(s->s_buf);
else
@@ -173,6 +173,38 @@ sbuf_extend(struct sbuf *s, int addlen)
}
/*
+ * Initialize the internals of an sbuf.
+ * If buf is non-NULL, it points to a static or already-allocated string
+ * big enough to hold at least length characters.
+ */
+static struct sbuf *
+sbuf_newbuf(struct sbuf *s, char *buf, int length, int flags)
+{
+
+ memset(s, 0, sizeof(*s));
+ s->s_flags = flags;
+ s->s_size = length;
+ s->s_buf = buf;
+
+ if ((s->s_flags & SBUF_AUTOEXTEND) == 0) {
+ KASSERT(s->s_size >= 0,
+ ("attempt to create a too small sbuf"));
+ }
+
+ if (s->s_buf != NULL)
+ return (s);
+
+ if ((flags & SBUF_AUTOEXTEND) != 0)
+ s->s_size = sbuf_extendsize(s->s_size);
+
+ s->s_buf = SBMALLOC(s->s_size);
+ if (s->s_buf == NULL)
+ return (NULL);
+ SBUF_SETFLAG(s, SBUF_DYNAMIC);
+ return (s);
+}
+
+/*
* Initialize an sbuf.
* If buf is non-NULL, it points to a static or already-allocated string
* big enough to hold at least length characters.
@@ -187,31 +219,17 @@ sbuf_new(struct sbuf *s, char *buf, int length, int flags)
("%s called with invalid flags", __func__));
flags &= SBUF_USRFLAGMSK;
- if (s == NULL) {
- s = SBMALLOC(sizeof(*s));
- if (s == NULL)
- return (NULL);
- bzero(s, sizeof(*s));
- s->s_flags = flags;
- SBUF_SETFLAG(s, SBUF_DYNSTRUCT);
- } else {
- bzero(s, sizeof(*s));
- s->s_flags = flags;
- }
- s->s_size = length;
- if (buf != NULL) {
- s->s_buf = buf;
- return (s);
- }
- if ((flags & SBUF_AUTOEXTEND) != 0)
- s->s_size = sbuf_extendsize(s->s_size);
- s->s_buf = SBMALLOC(s->s_size);
- if (s->s_buf == NULL) {
- if (SBUF_ISDYNSTRUCT(s))
- SBFREE(s);
+ if (s != NULL)
+ return (sbuf_newbuf(s, buf, length, flags));
+
+ s = SBMALLOC(sizeof(*s));
+ if (s == NULL)
+ return (NULL);
+ if (sbuf_newbuf(s, buf, length, flags) == NULL) {
+ SBFREE(s);
return (NULL);
}
- SBUF_SETFLAG(s, SBUF_DYNAMIC);
+ SBUF_SETFLAG(s, SBUF_DYNSTRUCT);
return (s);
}
@@ -239,6 +257,8 @@ sbuf_uionew(struct sbuf *s, struct uio *uio, int *error)
return (NULL);
}
s->s_len = s->s_size - 1;
+ if (SBUF_ISSECTION(s))
+ s->s_sect_len = s->s_size - 1;
*error = 0;
return (s);
}
@@ -255,10 +275,9 @@ sbuf_clear(struct sbuf *s)
/* don't care if it's finished or not */
SBUF_CLEARFLAG(s, SBUF_FINISHED);
- SBUF_CLEARFLAG(s, SBUF_OVERFLOWED);
- if (s->s_drain != NULL)
- s->s_drain->s_error = 0;
+ s->s_error = 0;
s->s_len = 0;
+ s->s_sect_len = 0;
}
/*
@@ -266,16 +285,19 @@ sbuf_clear(struct sbuf *s)
* Effectively truncates the sbuf at the new position.
*/
int
-sbuf_setpos(struct sbuf *s, int pos)
+sbuf_setpos(struct sbuf *s, ssize_t pos)
{
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
KASSERT(pos >= 0,
- ("attempt to seek to a negative position (%d)", pos));
+ ("attempt to seek to a negative position (%jd)", (intmax_t)pos));
KASSERT(pos < s->s_size,
- ("attempt to seek past end of sbuf (%d >= %d)", pos, s->s_size));
+ ("attempt to seek past end of sbuf (%jd >= %jd)",
+ (intmax_t)pos, (intmax_t)s->s_size));
+ KASSERT(!SBUF_ISSECTION(s),
+ ("attempt to seek when in a section"));
if (pos < 0 || pos > s->s_len)
return (-1);
@@ -293,22 +315,10 @@ sbuf_set_drain(struct sbuf *s, sbuf_drain_func *func, void *ctx)
assert_sbuf_state(s, 0);
assert_sbuf_integrity(s);
- KASSERT((s->s_drain != NULL && func == s->s_drain->s_func) ||
- s->s_len == 0,
+ KASSERT(func == s->s_drain_func || s->s_len == 0,
("Cannot change drain to %p on non-empty sbuf %p", func, s));
- if (func == NULL) {
- SBFREE(s->s_drain);
- s->s_drain = NULL;
- return;
- }
- if (s->s_drain == NULL) {
- s->s_drain = SBMALLOC(sizeof(*s->s_drain));
- if (s->s_drain == NULL)
- return;
- }
- s->s_drain->s_func = func;
- s->s_drain->s_arg = ctx;
- s->s_drain->s_error = 0;
+ s->s_drain_func = func;
+ s->s_drain_arg = ctx;
}
/*
@@ -320,11 +330,11 @@ sbuf_drain(struct sbuf *s)
int len;
KASSERT(s->s_len > 0, ("Shouldn't drain empty sbuf %p", s));
- len = s->s_drain->s_func(s->s_drain->s_arg, s->s_buf, s->s_len);
+ KASSERT(s->s_error == 0, ("Called %s with error on %p", __func__, s));
+ len = s->s_drain_func(s->s_drain_arg, s->s_buf, s->s_len);
if (len < 0) {
- s->s_drain->s_error = -len;
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- return (s->s_drain->s_error);
+ s->s_error = -len;
+ return (s->s_error);
}
KASSERT(len > 0 && len <= s->s_len,
("Bad drain amount %d for sbuf %p", len, s));
@@ -349,39 +359,29 @@ sbuf_drain(struct sbuf *s)
* buffer and marking overflow.
*/
static void
-sbuf_put_byte(int c, struct sbuf *s)
+sbuf_put_byte(struct sbuf *s, int c)
{
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return;
if (SBUF_FREESPACE(s) <= 0) {
- /*
+ /*
* If there is a drain, use it, otherwise extend the
* buffer.
*/
- if (s->s_drain != NULL)
+ if (s->s_drain_func != NULL)
(void)sbuf_drain(s);
else if (sbuf_extend(s, 1) < 0)
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- if (SBUF_HASOVERFLOWED(s))
+ s->s_error = ENOMEM;
+ if (s->s_error != 0)
return;
}
s->s_buf[s->s_len++] = c;
-}
-
-/*
- * Append a non-NUL character to an sbuf. This prototype signature is
- * suitable for use with kvprintf(9).
- */
-static void
-sbuf_putc_func(int c, void *arg)
-{
-
- if (c != '\0')
- sbuf_put_byte(c, arg);
+ if (SBUF_ISSECTION(s))
+ s->s_sect_len++;
}
/*
@@ -396,13 +396,13 @@ sbuf_bcat(struct sbuf *s, const void *buf, size_t len)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
for (; str < end; str++) {
- sbuf_put_byte(*str, s);
- if (SBUF_HASOVERFLOWED(s))
+ sbuf_put_byte(s, *str);
+ if (s->s_error != 0)
return (-1);
- }
+ }
return (0);
}
@@ -416,10 +416,10 @@ sbuf_bcopyin(struct sbuf *s, const void *uaddr, size_t len)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
- KASSERT(s->s_drain == NULL,
+ KASSERT(s->s_drain_func == NULL,
("Nonsensical copyin to sbuf %p with a drain", s));
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
if (len == 0)
return (0);
@@ -460,12 +460,12 @@ sbuf_cat(struct sbuf *s, const char *str)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
while (*str != '\0') {
- sbuf_put_byte(*str++, s);
- if (SBUF_HASOVERFLOWED(s))
+ sbuf_put_byte(s, *str++);
+ if (s->s_error != 0)
return (-1);
}
return (0);
@@ -482,10 +482,10 @@ sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
- KASSERT(s->s_drain == NULL,
+ KASSERT(s->s_drain_func == NULL,
("Nonsensical copyin to sbuf %p with a drain", s));
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
if (len == 0)
@@ -497,10 +497,12 @@ sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len)
}
switch (copyinstr(uaddr, s->s_buf + s->s_len, len + 1, &done)) {
case ENAMETOOLONG:
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
+ s->s_error = ENOMEM;
/* fall through */
case 0:
s->s_len += done - 1;
+ if (SBUF_ISSECTION(s))
+ s->s_sect_len += done - 1;
break;
default:
return (-1); /* XXX */
@@ -528,6 +530,19 @@ sbuf_cpy(struct sbuf *s, const char *str)
* Format the given argument list and append the resulting string to an sbuf.
*/
#if defined(_KERNEL) && !defined(__rtems__)
+
+/*
+ * Append a non-NUL character to an sbuf. This prototype signature is
+ * suitable for use with kvprintf(9).
+ */
+static void
+sbuf_putc_func(int c, void *arg)
+{
+
+ if (c != '\0')
+ sbuf_put_byte(arg, c);
+}
+
int
sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
{
@@ -539,7 +554,7 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
("%s called with a NULL format string", __func__));
(void)kvprintf(fmt, sbuf_putc_func, s, 10, ap);
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
return (0);
}
@@ -556,7 +571,7 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
KASSERT(fmt != NULL,
("%s called with a NULL format string", __func__));
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
/*
@@ -580,7 +595,7 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
if (SBUF_FREESPACE(s) >= len)
break;
/* Cannot print with the current available space. */
- if (s->s_drain != NULL && s->s_len > 0)
+ if (s->s_drain_func != NULL && s->s_len > 0)
error = sbuf_drain(s);
else
error = sbuf_extend(s, len - SBUF_FREESPACE(s));
@@ -598,13 +613,15 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
if (SBUF_FREESPACE(s) < len)
len = SBUF_FREESPACE(s);
s->s_len += len;
+ if (SBUF_ISSECTION(s))
+ s->s_sect_len += len;
if (!SBUF_HASROOM(s) && !SBUF_CANEXTEND(s))
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
+ s->s_error = ENOMEM;
KASSERT(s->s_len < s->s_size,
("wrote past end of sbuf (%d >= %d)", s->s_len, s->s_size));
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
return (0);
}
@@ -632,8 +649,8 @@ int
sbuf_putc(struct sbuf *s, int c)
{
- sbuf_putc_func(c, s);
- if (SBUF_HASOVERFLOWED(s))
+ sbuf_put_byte(s, c);
+ if (s->s_error != 0)
return (-1);
return (0);
}
@@ -647,26 +664,29 @@ sbuf_trim(struct sbuf *s)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
- KASSERT(s->s_drain == NULL,
+ KASSERT(s->s_drain_func == NULL,
("%s makes no sense on sbuf %p with drain", __func__, s));
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
- while (s->s_len > 0 && isspace(s->s_buf[s->s_len-1]))
+ while (s->s_len > 0 && isspace(s->s_buf[s->s_len-1])) {
--s->s_len;
+ if (SBUF_ISSECTION(s))
+ s->s_sect_len--;
+ }
return (0);
}
/*
- * Check if an sbuf overflowed
+ * Check if an sbuf has an error.
*/
int
-sbuf_overflowed(struct sbuf *s)
+sbuf_error(const struct sbuf *s)
{
- return (SBUF_HASOVERFLOWED(s));
+ return (s->s_error);
}
/*
@@ -675,28 +695,23 @@ sbuf_overflowed(struct sbuf *s)
int
sbuf_finish(struct sbuf *s)
{
- int error = 0;
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
- if (s->s_drain != NULL) {
- error = s->s_drain->s_error;
- while (s->s_len > 0 && error == 0)
- error = sbuf_drain(s);
- } else if (SBUF_HASOVERFLOWED(s))
- error = ENOMEM;
+ if (s->s_drain_func != NULL) {
+ while (s->s_len > 0 && s->s_error == 0)
+ s->s_error = sbuf_drain(s);
+ }
s->s_buf[s->s_len] = '\0';
- SBUF_CLEARFLAG(s, SBUF_OVERFLOWED);
SBUF_SETFLAG(s, SBUF_FINISHED);
#ifdef _KERNEL
- return (error);
+ return (s->s_error);
#else
- /*XXX*/if (error) {
- errno = error;
+ errno = s->s_error;
+ if (s->s_error)
return (-1);
- } else
- return (0);
+ return (0);
#endif
}
@@ -709,7 +724,7 @@ sbuf_data(struct sbuf *s)
assert_sbuf_integrity(s);
assert_sbuf_state(s, SBUF_FINISHED);
- KASSERT(s->s_drain == NULL,
+ KASSERT(s->s_drain_func == NULL,
("%s makes no sense on sbuf %p with drain", __func__, s));
return (s->s_buf);
@@ -718,16 +733,16 @@ sbuf_data(struct sbuf *s)
/*
* Return the length of the sbuf data.
*/
-int
+ssize_t
sbuf_len(struct sbuf *s)
{
assert_sbuf_integrity(s);
/* don't care if it's finished or not */
- KASSERT(s->s_drain == NULL,
+ KASSERT(s->s_drain_func == NULL,
("%s makes no sense on sbuf %p with drain", __func__, s));
- if (SBUF_HASOVERFLOWED(s))
+ if (s->s_error != 0)
return (-1);
return (s->s_len);
}
@@ -745,10 +760,8 @@ sbuf_delete(struct sbuf *s)
if (SBUF_ISDYNAMIC(s))
SBFREE(s->s_buf);
- if (s->s_drain != NULL)
- SBFREE(s->s_drain);
isdyn = SBUF_ISDYNSTRUCT(s);
- bzero(s, sizeof(*s));
+ memset(s, 0, sizeof(*s));
if (isdyn)
SBFREE(s);
}
@@ -757,8 +770,63 @@ sbuf_delete(struct sbuf *s)
* Check if an sbuf has been finished.
*/
int
-sbuf_done(struct sbuf *s)
+sbuf_done(const struct sbuf *s)
{
return (SBUF_ISFINISHED(s));
}
+
+/*
+ * Start a section.
+ */
+void
+sbuf_start_section(struct sbuf *s, ssize_t *old_lenp)
+{
+
+ assert_sbuf_integrity(s);
+ assert_sbuf_state(s, 0);
+
+ if (!SBUF_ISSECTION(s)) {
+ KASSERT(s->s_sect_len == 0,
+ ("s_sect_len != 0 when starting a section"));
+ if (old_lenp != NULL)
+ *old_lenp = -1;
+ SBUF_SETFLAG(s, SBUF_INSECTION);
+ } else {
+ KASSERT(old_lenp != NULL,
+ ("s_sect_len should be saved when starting a subsection"));
+ *old_lenp = s->s_sect_len;
+ s->s_sect_len = 0;
+ }
+}
+
+/*
+ * End the section padding to the specified length with the specified
+ * character.
+ */
+ssize_t
+sbuf_end_section(struct sbuf *s, ssize_t old_len, size_t pad, int c)
+{
+ ssize_t len;
+
+ assert_sbuf_integrity(s);
+ assert_sbuf_state(s, 0);
+ KASSERT(SBUF_ISSECTION(s),
+ ("attempt to end a section when not in a section"));
+
+ if (pad > 1) {
+ len = roundup(s->s_sect_len, pad) - s->s_sect_len;
+ for (; s->s_error == 0 && len > 0; len--)
+ sbuf_put_byte(s, c);
+ }
+ len = s->s_sect_len;
+ if (old_len == -1) {
+ s->s_sect_len = 0;
+ SBUF_CLEARFLAG(s, SBUF_INSECTION);
+ } else {
+ s->s_sect_len += old_len;
+ }
+ if (s->s_error != 0)
+ return (-1);
+ return (len);
+}
diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c
index be19c5b4..867b0e6b 100644
--- a/freebsd/sys/kern/subr_taskqueue.c
+++ b/freebsd/sys/kern/subr_taskqueue.c
@@ -68,34 +68,52 @@ struct taskqueue {
int tq_spin;
#endif /* __rtems__ */
int tq_flags;
+ int tq_callouts;
};
#define TQ_FLAGS_ACTIVE (1 << 0)
#define TQ_FLAGS_BLOCKED (1 << 1)
#define TQ_FLAGS_PENDING (1 << 2)
-static void taskqueue_run_locked(struct taskqueue *);
+#define DT_CALLOUT_ARMED (1 << 0)
-static __inline void
-TQ_LOCK(struct taskqueue *tq)
-{
#ifndef __rtems__
- if (tq->tq_spin)
- mtx_lock_spin(&tq->tq_mutex);
- else
+#define TQ_LOCK(tq) \
+ do { \
+ if ((tq)->tq_spin) \
+ mtx_lock_spin(&(tq)->tq_mutex); \
+ else \
+ mtx_lock(&(tq)->tq_mutex); \
+ } while (0)
+
+#define TQ_UNLOCK(tq) \
+ do { \
+ if ((tq)->tq_spin) \
+ mtx_unlock_spin(&(tq)->tq_mutex); \
+ else \
+ mtx_unlock(&(tq)->tq_mutex); \
+ } while (0)
+#else /* __rtems__ */
+#define TQ_LOCK(tq) \
+ do { \
+ mtx_lock(&(tq)->tq_mutex); \
+ } while (0)
+
+#define TQ_UNLOCK(tq) \
+ do { \
+ mtx_unlock(&(tq)->tq_mutex); \
+ } while (0)
#endif /* __rtems__ */
- mtx_lock(&tq->tq_mutex);
-}
-static __inline void
-TQ_UNLOCK(struct taskqueue *tq)
+void
+_timeout_task_init(struct taskqueue *queue, struct timeout_task *timeout_task,
+ int priority, task_fn_t func, void *context)
{
-#ifndef __rtems__
- if (tq->tq_spin)
- mtx_unlock_spin(&tq->tq_mutex);
- else
-#endif /* __rtems__ */
- mtx_unlock(&tq->tq_mutex);
+
+ TASK_INIT(&timeout_task->t, priority, func, context);
+ callout_init_mtx(&timeout_task->c, &queue->tq_mutex, 0);
+ timeout_task->q = queue;
+ timeout_task->f = 0;
}
static __inline int
@@ -153,7 +171,7 @@ static void
taskqueue_terminate(struct thread **pp, struct taskqueue *tq)
{
- while (tq->tq_tcount > 0) {
+ while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
wakeup(tq);
TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
}
@@ -165,30 +183,27 @@ taskqueue_free(struct taskqueue *queue)
TQ_LOCK(queue);
queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
- taskqueue_run_locked(queue);
taskqueue_terminate(queue->tq_threads, queue);
KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
+ KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
mtx_destroy(&queue->tq_mutex);
free(queue->tq_threads, M_TASKQUEUE);
free(queue, M_TASKQUEUE);
}
-int
-taskqueue_enqueue(struct taskqueue *queue, struct task *task)
+static int
+taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
{
struct task *ins;
struct task *prev;
- TQ_LOCK(queue);
-
/*
* Count multiple enqueues.
*/
if (task->ta_pending) {
if (task->ta_pending < USHRT_MAX)
task->ta_pending++;
- TQ_UNLOCK(queue);
- return 0;
+ return (0);
}
/*
@@ -216,9 +231,64 @@ taskqueue_enqueue(struct taskqueue *queue, struct task *task)
else
queue->tq_flags |= TQ_FLAGS_PENDING;
+ return (0);
+}
+int
+taskqueue_enqueue(struct taskqueue *queue, struct task *task)
+{
+ int res;
+
+ TQ_LOCK(queue);
+ res = taskqueue_enqueue_locked(queue, task);
TQ_UNLOCK(queue);
- return 0;
+ return (res);
+}
+
+static void
+taskqueue_timeout_func(void *arg)
+{
+ struct taskqueue *queue;
+ struct timeout_task *timeout_task;
+
+ timeout_task = arg;
+ queue = timeout_task->q;
+ KASSERT((timeout_task->f & DT_CALLOUT_ARMED) != 0, ("Stray timeout"));
+ timeout_task->f &= ~DT_CALLOUT_ARMED;
+ queue->tq_callouts--;
+ taskqueue_enqueue_locked(timeout_task->q, &timeout_task->t);
+}
+
+int
+taskqueue_enqueue_timeout(struct taskqueue *queue,
+ struct timeout_task *timeout_task, int ticks)
+{
+ int res;
+
+ TQ_LOCK(queue);
+ KASSERT(timeout_task->q == NULL || timeout_task->q == queue,
+ ("Migrated queue"));
+ KASSERT(!queue->tq_spin, ("Timeout for spin-queue"));
+ timeout_task->q = queue;
+ res = timeout_task->t.ta_pending;
+ if (ticks == 0) {
+ taskqueue_enqueue_locked(queue, &timeout_task->t);
+ } else {
+ if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
+ res++;
+ } else {
+ queue->tq_callouts++;
+ timeout_task->f |= DT_CALLOUT_ARMED;
+ if (ticks < 0)
+ ticks = -ticks; /* Ignore overflow. */
+ }
+ if (ticks > 0) {
+ callout_reset(&timeout_task->c, ticks,
+ taskqueue_timeout_func, timeout_task);
+ }
+ }
+ TQ_UNLOCK(queue);
+ return (res);
}
void
@@ -297,26 +367,76 @@ task_is_running(struct taskqueue *queue, struct task *task)
return (0);
}
+static int
+taskqueue_cancel_locked(struct taskqueue *queue, struct task *task,
+ u_int *pendp)
+{
+
+ if (task->ta_pending > 0)
+ STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link);
+ if (pendp != NULL)
+ *pendp = task->ta_pending;
+ task->ta_pending = 0;
+ return (task_is_running(queue, task) ? EBUSY : 0);
+}
+
+int
+taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp)
+{
+ u_int pending;
+ int error;
+
+ TQ_LOCK(queue);
+ pending = task->ta_pending;
+ error = taskqueue_cancel_locked(queue, task, pendp);
+ TQ_UNLOCK(queue);
+
+ return (error);
+}
+
+int
+taskqueue_cancel_timeout(struct taskqueue *queue,
+ struct timeout_task *timeout_task, u_int *pendp)
+{
+ u_int pending, pending1;
+ int error;
+
+ TQ_LOCK(queue);
+ pending = !!callout_stop(&timeout_task->c);
+ error = taskqueue_cancel_locked(queue, &timeout_task->t, &pending1);
+ if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
+ timeout_task->f &= ~DT_CALLOUT_ARMED;
+ queue->tq_callouts--;
+ }
+ TQ_UNLOCK(queue);
+
+ if (pendp != NULL)
+ *pendp = pending + pending1;
+ return (error);
+}
+
void
taskqueue_drain(struct taskqueue *queue, struct task *task)
{
-#ifndef __rtems__
- if (queue->tq_spin) { /* XXX */
- mtx_lock_spin(&queue->tq_mutex);
- while (task->ta_pending != 0 || task_is_running(queue, task))
- msleep_spin(task, &queue->tq_mutex, "-", 0);
- mtx_unlock_spin(&queue->tq_mutex);
- } else {
-#endif /* __rtems__ */
- WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
- mtx_lock(&queue->tq_mutex);
- while (task->ta_pending != 0 || task_is_running(queue, task))
- msleep(task, &queue->tq_mutex, PWAIT, "-", 0);
- mtx_unlock(&queue->tq_mutex);
#ifndef __rtems__
- }
+ if (!queue->tq_spin)
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
#endif /* __rtems__ */
+
+ TQ_LOCK(queue);
+ while (task->ta_pending != 0 || task_is_running(queue, task))
+ TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0);
+ TQ_UNLOCK(queue);
+}
+
+void
+taskqueue_drain_timeout(struct taskqueue *queue,
+ struct timeout_task *timeout_task)
+{
+
+ callout_drain(&timeout_task->c);
+ taskqueue_drain(queue, &timeout_task->t);
}
static void
@@ -423,6 +543,7 @@ taskqueue_thread_loop(void *arg)
break;
TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
}
+ taskqueue_run_locked(tq);
/* rendezvous with thread that asked us to terminate */
tq->tq_tcount--;
diff --git a/freebsd/sys/kern/subr_uio.c b/freebsd/sys/kern/subr_uio.c
new file mode 100644
index 00000000..74f01ffa
--- /dev/null
+++ b/freebsd/sys/kern/subr_uio.c
@@ -0,0 +1,629 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_zero.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mman.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#ifdef ZERO_COPY_SOCKETS
+#include <vm/vm_object.h>
+#endif
+
+#ifndef __rtems__
+SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV,
+ "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
+#endif /* __rtems__ */
+
+static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
+
+#ifndef __rtems__
+#ifdef ZERO_COPY_SOCKETS
+/* Declared in uipc_socket.c */
+extern int so_zero_copy_receive;
+
+/*
+ * Identify the physical page mapped at the given kernel virtual
+ * address. Insert this physical page into the given address space at
+ * the given virtual address, replacing the physical page, if any,
+ * that already exists there.
+ */
+static int
+vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
+{
+ vm_map_t map = mapa;
+ vm_page_t kern_pg, user_pg;
+ vm_object_t uobject;
+ vm_map_entry_t entry;
+ vm_pindex_t upindex;
+ vm_prot_t prot;
+ boolean_t wired;
+
+ KASSERT((uaddr & PAGE_MASK) == 0,
+ ("vm_pgmoveco: uaddr is not page aligned"));
+
+ /*
+ * Herein the physical page is validated and dirtied. It is
+ * unwired in sf_buf_mext().
+ */
+ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
+ kern_pg->valid = VM_PAGE_BITS_ALL;
+ KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1,
+ ("vm_pgmoveco: kern_pg is not correctly wired"));
+
+ if ((vm_map_lookup(&map, uaddr,
+ VM_PROT_WRITE, &entry, &uobject,
+ &upindex, &prot, &wired)) != KERN_SUCCESS) {
+ return(EFAULT);
+ }
+ VM_OBJECT_LOCK(uobject);
+retry:
+ if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
+ if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco"))
+ goto retry;
+ vm_page_lock(user_pg);
+ pmap_remove_all(user_pg);
+ vm_page_free(user_pg);
+ vm_page_unlock(user_pg);
+ } else {
+ /*
+ * Even if a physical page does not exist in the
+ * object chain's first object, a physical page from a
+ * backing object may be mapped read only.
+ */
+ if (uobject->backing_object != NULL)
+ pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE);
+ }
+ vm_page_insert(kern_pg, uobject, upindex);
+ vm_page_dirty(kern_pg);
+ VM_OBJECT_UNLOCK(uobject);
+ vm_map_lookup_done(map, entry);
+ return(KERN_SUCCESS);
+}
+#endif /* ZERO_COPY_SOCKETS */
+
+int
+copyin_nofault(const void *udaddr, void *kaddr, size_t len)
+{
+ int error, save;
+
+ save = vm_fault_disable_pagefaults();
+ error = copyin(udaddr, kaddr, len);
+ vm_fault_enable_pagefaults(save);
+ return (error);
+}
+
+int
+copyout_nofault(const void *kaddr, void *udaddr, size_t len)
+{
+ int error, save;
+
+ save = vm_fault_disable_pagefaults();
+ error = copyout(kaddr, udaddr, len);
+ vm_fault_enable_pagefaults(save);
+ return (error);
+}
+
+#define PHYS_PAGE_COUNT(len) (howmany(len, PAGE_SIZE) + 1)
+
+int
+physcopyin(void *src, vm_paddr_t dst, size_t len)
+{
+ vm_page_t m[PHYS_PAGE_COUNT(len)];
+ struct iovec iov[1];
+ struct uio uio;
+ int i;
+
+ iov[0].iov_base = src;
+ iov[0].iov_len = len;
+ uio.uio_iov = iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_resid = len;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = UIO_WRITE;
+ for (i = 0; i < PHYS_PAGE_COUNT(len); i++, dst += PAGE_SIZE)
+ m[i] = PHYS_TO_VM_PAGE(dst);
+ return (uiomove_fromphys(m, dst & PAGE_MASK, len, &uio));
+}
+
+int
+physcopyout(vm_paddr_t src, void *dst, size_t len)
+{
+ vm_page_t m[PHYS_PAGE_COUNT(len)];
+ struct iovec iov[1];
+ struct uio uio;
+ int i;
+
+ iov[0].iov_base = dst;
+ iov[0].iov_len = len;
+ uio.uio_iov = iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_resid = len;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = UIO_READ;
+ for (i = 0; i < PHYS_PAGE_COUNT(len); i++, src += PAGE_SIZE)
+ m[i] = PHYS_TO_VM_PAGE(src);
+ return (uiomove_fromphys(m, src & PAGE_MASK, len, &uio));
+}
+
+#undef PHYS_PAGE_COUNT
+#endif /* __rtems__ */
+
+int
+uiomove(void *cp, int n, struct uio *uio)
+{
+
+ return (uiomove_faultflag(cp, n, uio, 0));
+}
+
+int
+uiomove_nofault(void *cp, int n, struct uio *uio)
+{
+
+ return (uiomove_faultflag(cp, n, uio, 1));
+}
+
+static int
+uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault)
+{
+#ifndef __rtems__
+ struct thread *td;
+#endif /* __rtems__ */
+ struct iovec *iov;
+ size_t cnt;
+ int error, newflags, save;
+
+#ifndef __rtems__
+ td = curthread;
+#endif /* __rtems__ */
+ error = 0;
+
+ KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
+ ("uiomove: mode"));
+ KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td,
+ ("uiomove proc"));
+ if (!nofault)
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
+ "Calling uiomove()");
+
+#ifndef __rtems__
+ /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
+ newflags = TDP_DEADLKTREAT;
+ if (uio->uio_segflg == UIO_USERSPACE && nofault) {
+ /*
+ * Fail if a non-spurious page fault occurs.
+ */
+ newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
+ }
+ save = curthread_pflags_set(newflags);
+#else /* __rtems__ */
+ (void) newflags;
+ (void) save;
+#endif /* __rtems__ */
+
+ while (n > 0 && uio->uio_resid) {
+ iov = uio->uio_iov;
+ cnt = iov->iov_len;
+ if (cnt == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ continue;
+ }
+ if (cnt > n)
+ cnt = n;
+
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+#ifndef __rtems__
+ maybe_yield();
+#endif /* __rtems__ */
+ if (uio->uio_rw == UIO_READ)
+ error = copyout(cp, iov->iov_base, cnt);
+ else
+ error = copyin(iov->iov_base, cp, cnt);
+ if (error)
+ goto out;
+ break;
+
+ case UIO_SYSSPACE:
+ if (uio->uio_rw == UIO_READ)
+ bcopy(cp, iov->iov_base, cnt);
+ else
+ bcopy(iov->iov_base, cp, cnt);
+ break;
+ case UIO_NOCOPY:
+ break;
+ }
+ iov->iov_base = (char *)iov->iov_base + cnt;
+ iov->iov_len -= cnt;
+ uio->uio_resid -= cnt;
+ uio->uio_offset += cnt;
+ cp = (char *)cp + cnt;
+ n -= cnt;
+ }
+out:
+#ifndef __rtems__
+ curthread_pflags_restore(save);
+#endif /* __rtems__ */
+ return (error);
+}
+
+#ifndef __rtems__
+/*
+ * Wrapper for uiomove() that validates the arguments against a known-good
+ * kernel buffer. Currently, uiomove accepts a signed (n) argument, which
+ * is almost definitely a bad thing, so we catch that here as well. We
+ * return a runtime failure, but it might be desirable to generate a runtime
+ * assertion failure instead.
+ */
+int
+uiomove_frombuf(void *buf, int buflen, struct uio *uio)
+{
+ size_t offset, n;
+
+ if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
+ (offset = uio->uio_offset) != uio->uio_offset)
+ return (EINVAL);
+ if (buflen <= 0 || offset >= buflen)
+ return (0);
+ if ((n = buflen - offset) > IOSIZE_MAX)
+ return (EINVAL);
+ return (uiomove((char *)buf + offset, n, uio));
+}
+
+#ifdef ZERO_COPY_SOCKETS
+/*
+ * Experimental support for zero-copy I/O
+ */
+static int
+userspaceco(void *cp, u_int cnt, struct uio *uio, int disposable)
+{
+ struct iovec *iov;
+ int error;
+
+ iov = uio->uio_iov;
+ if (uio->uio_rw == UIO_READ) {
+ if ((so_zero_copy_receive != 0)
+ && ((cnt & PAGE_MASK) == 0)
+ && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0)
+ && ((uio->uio_offset & PAGE_MASK) == 0)
+ && ((((intptr_t) cp) & PAGE_MASK) == 0)
+ && (disposable != 0)) {
+ /* SOCKET: use page-trading */
+ /*
+ * We only want to call vm_pgmoveco() on
+ * disposeable pages, since it gives the
+ * kernel page to the userland process.
+ */
+ error = vm_pgmoveco(&curproc->p_vmspace->vm_map,
+ (vm_offset_t)cp, (vm_offset_t)iov->iov_base);
+
+ /*
+ * If we get an error back, attempt
+ * to use copyout() instead. The
+ * disposable page should be freed
+ * automatically if we weren't able to move
+ * it into userland.
+ */
+ if (error != 0)
+ error = copyout(cp, iov->iov_base, cnt);
+ } else {
+ error = copyout(cp, iov->iov_base, cnt);
+ }
+ } else {
+ error = copyin(iov->iov_base, cp, cnt);
+ }
+ return (error);
+}
+
+int
+uiomoveco(void *cp, int n, struct uio *uio, int disposable)
+{
+ struct iovec *iov;
+ u_int cnt;
+ int error;
+
+ KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
+ ("uiomoveco: mode"));
+ KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
+ ("uiomoveco proc"));
+
+ while (n > 0 && uio->uio_resid) {
+ iov = uio->uio_iov;
+ cnt = iov->iov_len;
+ if (cnt == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ continue;
+ }
+ if (cnt > n)
+ cnt = n;
+
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+ maybe_yield();
+ error = userspaceco(cp, cnt, uio, disposable);
+ if (error)
+ return (error);
+ break;
+
+ case UIO_SYSSPACE:
+ if (uio->uio_rw == UIO_READ)
+ bcopy(cp, iov->iov_base, cnt);
+ else
+ bcopy(iov->iov_base, cp, cnt);
+ break;
+ case UIO_NOCOPY:
+ break;
+ }
+ iov->iov_base = (char *)iov->iov_base + cnt;
+ iov->iov_len -= cnt;
+ uio->uio_resid -= cnt;
+ uio->uio_offset += cnt;
+ cp = (char *)cp + cnt;
+ n -= cnt;
+ }
+ return (0);
+}
+#endif /* ZERO_COPY_SOCKETS */
+
+/*
+ * Give next character to user as result of read.
+ */
+int
+ureadc(int c, struct uio *uio)
+{
+ struct iovec *iov;
+ char *iov_base;
+
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
+ "Calling ureadc()");
+
+again:
+ if (uio->uio_iovcnt == 0 || uio->uio_resid == 0)
+ panic("ureadc");
+ iov = uio->uio_iov;
+ if (iov->iov_len == 0) {
+ uio->uio_iovcnt--;
+ uio->uio_iov++;
+ goto again;
+ }
+ switch (uio->uio_segflg) {
+
+ case UIO_USERSPACE:
+ if (subyte(iov->iov_base, c) < 0)
+ return (EFAULT);
+ break;
+
+ case UIO_SYSSPACE:
+ iov_base = iov->iov_base;
+ *iov_base = c;
+ iov->iov_base = iov_base;
+ break;
+
+ case UIO_NOCOPY:
+ break;
+ }
+ iov->iov_base = (char *)iov->iov_base + 1;
+ iov->iov_len--;
+ uio->uio_resid--;
+ uio->uio_offset++;
+ return (0);
+}
+
+int
+copyinfrom(const void * __restrict src, void * __restrict dst, size_t len,
+ int seg)
+{
+ int error = 0;
+
+ switch (seg) {
+ case UIO_USERSPACE:
+ error = copyin(src, dst, len);
+ break;
+ case UIO_SYSSPACE:
+ bcopy(src, dst, len);
+ break;
+ default:
+ panic("copyinfrom: bad seg %d\n", seg);
+ }
+ return (error);
+}
+
+int
+copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len,
+ size_t * __restrict copied, int seg)
+{
+ int error = 0;
+
+ switch (seg) {
+ case UIO_USERSPACE:
+ error = copyinstr(src, dst, len, copied);
+ break;
+ case UIO_SYSSPACE:
+ error = copystr(src, dst, len, copied);
+ break;
+ default:
+ panic("copyinstrfrom: bad seg %d\n", seg);
+ }
+ return (error);
+}
+#endif /* __rtems__ */
+
+int
+copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error)
+{
+ u_int iovlen;
+
+ *iov = NULL;
+ if (iovcnt > UIO_MAXIOV)
+ return (error);
+ iovlen = iovcnt * sizeof (struct iovec);
+ *iov = malloc(iovlen, M_IOV, M_WAITOK);
+ error = copyin(iovp, *iov, iovlen);
+ if (error) {
+ free(*iov, M_IOV);
+ *iov = NULL;
+ }
+ return (error);
+}
+
+#ifndef __rtems__
+int
+copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop)
+{
+ struct iovec *iov;
+ struct uio *uio;
+ u_int iovlen;
+ int error, i;
+
+ *uiop = NULL;
+ if (iovcnt > UIO_MAXIOV)
+ return (EINVAL);
+ iovlen = iovcnt * sizeof (struct iovec);
+ uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
+ iov = (struct iovec *)(uio + 1);
+ error = copyin(iovp, iov, iovlen);
+ if (error) {
+ free(uio, M_IOV);
+ return (error);
+ }
+ uio->uio_iov = iov;
+ uio->uio_iovcnt = iovcnt;
+ uio->uio_segflg = UIO_USERSPACE;
+ uio->uio_offset = -1;
+ uio->uio_resid = 0;
+ for (i = 0; i < iovcnt; i++) {
+ if (iov->iov_len > IOSIZE_MAX - uio->uio_resid) {
+ free(uio, M_IOV);
+ return (EINVAL);
+ }
+ uio->uio_resid += iov->iov_len;
+ iov++;
+ }
+ *uiop = uio;
+ return (0);
+}
+
+struct uio *
+cloneuio(struct uio *uiop)
+{
+ struct uio *uio;
+ int iovlen;
+
+ iovlen = uiop->uio_iovcnt * sizeof (struct iovec);
+ uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
+ *uio = *uiop;
+ uio->uio_iov = (struct iovec *)(uio + 1);
+ bcopy(uiop->uio_iov, uio->uio_iov, iovlen);
+ return (uio);
+}
+
+/*
+ * Map some anonymous memory in user space of size sz, rounded up to the page
+ * boundary.
+ */
+int
+copyout_map(struct thread *td, vm_offset_t *addr, size_t sz)
+{
+ struct vmspace *vms;
+ int error;
+ vm_size_t size;
+
+ vms = td->td_proc->p_vmspace;
+
+ /*
+ * Map somewhere after heap in process memory.
+ */
+ PROC_LOCK(td->td_proc);
+ *addr = round_page((vm_offset_t)vms->vm_daddr +
+ lim_max(td->td_proc, RLIMIT_DATA));
+ PROC_UNLOCK(td->td_proc);
+
+ /* round size up to page boundry */
+ size = (vm_size_t)round_page(sz);
+
+ error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE,
+ VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0);
+
+ return (error);
+}
+
+/*
+ * Unmap memory in user space.
+ */
+int
+copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz)
+{
+ vm_map_t map;
+ vm_size_t size;
+
+ if (sz == 0)
+ return (0);
+
+ map = &td->td_proc->p_vmspace->vm_map;
+ size = (vm_size_t)round_page(sz);
+
+ if (vm_map_remove(map, addr, addr + size) != KERN_SUCCESS)
+ return (EINVAL);
+
+ return (0);
+}
+#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c
index 7cc2a516..fefc94d9 100644
--- a/freebsd/sys/kern/sys_generic.c
+++ b/freebsd/sys/kern/sys_generic.c
@@ -39,12 +39,14 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_capsicum.h>
#include <rtems/bsd/local/opt_compat.h>
#include <rtems/bsd/local/opt_ktrace.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
+#include <sys/capability.h>
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/fcntl.h>
@@ -78,6 +80,16 @@ __FBSDID("$FreeBSD$");
#endif /* __rtems__ */
#ifndef __rtems__
+int iosize_max_clamp = 1;
+SYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW,
+ &iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX");
+/*
+ * Assert that the return value of read(2) and write(2) syscalls fits
+ * into a register. If not, an architecture will need to provide the
+ * usermode wrappers to reconstruct the result.
+ */
+CTASSERT(sizeof(register_t) >= sizeof(size_t));
+
static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
#endif /* __rtems__ */
static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
@@ -147,7 +159,7 @@ struct read_args {
};
#endif
int
-read(td, uap)
+sys_read(td, uap)
struct thread *td;
struct read_args *uap;
{
@@ -155,7 +167,7 @@ read(td, uap)
struct iovec aiov;
int error;
- if (uap->nbyte > INT_MAX)
+ if (uap->nbyte > IOSIZE_MAX)
return (EINVAL);
aiov.iov_base = uap->buf;
aiov.iov_len = uap->nbyte;
@@ -180,7 +192,7 @@ struct pread_args {
};
#endif
int
-pread(td, uap)
+sys_pread(td, uap)
struct thread *td;
struct pread_args *uap;
{
@@ -188,7 +200,7 @@ pread(td, uap)
struct iovec aiov;
int error;
- if (uap->nbyte > INT_MAX)
+ if (uap->nbyte > IOSIZE_MAX)
return (EINVAL);
aiov.iov_base = uap->buf;
aiov.iov_len = uap->nbyte;
@@ -211,7 +223,7 @@ freebsd6_pread(td, uap)
oargs.buf = uap->buf;
oargs.nbyte = uap->nbyte;
oargs.offset = uap->offset;
- return (pread(td, &oargs));
+ return (sys_pread(td, &oargs));
}
/*
@@ -225,7 +237,7 @@ struct readv_args {
};
#endif
int
-readv(struct thread *td, struct readv_args *uap)
+sys_readv(struct thread *td, struct readv_args *uap)
{
struct uio *auio;
int error;
@@ -244,7 +256,7 @@ kern_readv(struct thread *td, int fd, struct uio *auio)
struct file *fp;
int error;
- error = fget_read(td, fd, &fp);
+ error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp);
if (error)
return (error);
error = dofileread(td, fd, fp, auio, (off_t)-1, 0);
@@ -264,7 +276,7 @@ struct preadv_args {
};
#endif
int
-preadv(struct thread *td, struct preadv_args *uap)
+sys_preadv(struct thread *td, struct preadv_args *uap)
{
struct uio *auio;
int error;
@@ -287,7 +299,7 @@ kern_preadv(td, fd, auio, offset)
struct file *fp;
int error;
- error = fget_read(td, fd, &fp);
+ error = fget_read(td, fd, CAP_READ, &fp);
if (error)
return (error);
if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
@@ -356,7 +368,7 @@ struct write_args {
};
#endif
int
-write(td, uap)
+sys_write(td, uap)
struct thread *td;
struct write_args *uap;
{
@@ -364,7 +376,7 @@ write(td, uap)
struct iovec aiov;
int error;
- if (uap->nbyte > INT_MAX)
+ if (uap->nbyte > IOSIZE_MAX)
return (EINVAL);
aiov.iov_base = (void *)(uintptr_t)uap->buf;
aiov.iov_len = uap->nbyte;
@@ -389,7 +401,7 @@ struct pwrite_args {
};
#endif
int
-pwrite(td, uap)
+sys_pwrite(td, uap)
struct thread *td;
struct pwrite_args *uap;
{
@@ -397,7 +409,7 @@ pwrite(td, uap)
struct iovec aiov;
int error;
- if (uap->nbyte > INT_MAX)
+ if (uap->nbyte > IOSIZE_MAX)
return (EINVAL);
aiov.iov_base = (void *)(uintptr_t)uap->buf;
aiov.iov_len = uap->nbyte;
@@ -420,7 +432,7 @@ freebsd6_pwrite(td, uap)
oargs.buf = uap->buf;
oargs.nbyte = uap->nbyte;
oargs.offset = uap->offset;
- return (pwrite(td, &oargs));
+ return (sys_pwrite(td, &oargs));
}
/*
@@ -434,7 +446,7 @@ struct writev_args {
};
#endif
int
-writev(struct thread *td, struct writev_args *uap)
+sys_writev(struct thread *td, struct writev_args *uap)
{
struct uio *auio;
int error;
@@ -453,7 +465,7 @@ kern_writev(struct thread *td, int fd, struct uio *auio)
struct file *fp;
int error;
- error = fget_write(td, fd, &fp);
+ error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp);
if (error)
return (error);
error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0);
@@ -473,7 +485,7 @@ struct pwritev_args {
};
#endif
int
-pwritev(struct thread *td, struct pwritev_args *uap)
+sys_pwritev(struct thread *td, struct pwritev_args *uap)
{
struct uio *auio;
int error;
@@ -496,7 +508,7 @@ kern_pwritev(td, fd, auio, offset)
struct file *fp;
int error;
- error = fget_write(td, fd, &fp);
+ error = fget_write(td, fd, CAP_WRITE, &fp);
if (error)
return (error);
if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
@@ -536,7 +548,8 @@ dofilewrite(td, fd, fp, auio, offset, flags)
ktruio = cloneuio(auio);
#endif
cnt = auio->uio_resid;
- if (fp->f_type == DTYPE_VNODE)
+ if (fp->f_type == DTYPE_VNODE &&
+ (fp->f_vnread_flags & FDEVFS_VNODE) == 0)
bwillwrite();
if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) {
if (auio->uio_resid != cnt && (error == ERESTART ||
@@ -545,7 +558,7 @@ dofilewrite(td, fd, fp, auio, offset, flags)
/* Socket layer is responsible for issuing SIGPIPE. */
if (fp->f_type != DTYPE_SOCKET && error == EPIPE) {
PROC_LOCK(td->td_proc);
- tdksignal(td, SIGPIPE, NULL);
+ tdsignal(td, SIGPIPE);
PROC_UNLOCK(td->td_proc);
}
}
@@ -578,7 +591,7 @@ kern_ftruncate(td, fd, length)
AUDIT_ARG_FD(fd);
if (length < 0)
return (EINVAL);
- error = fget(td, fd, &fp);
+ error = fget(td, fd, CAP_FTRUNCATE, &fp);
if (error)
return (error);
AUDIT_ARG_FILE(td->td_proc, fp);
@@ -599,7 +612,7 @@ struct ftruncate_args {
};
#endif
int
-ftruncate(td, uap)
+sys_ftruncate(td, uap)
struct thread *td;
struct ftruncate_args *uap;
{
@@ -633,7 +646,7 @@ struct ioctl_args {
#endif
/* ARGSUSED */
int
-ioctl(struct thread *td, struct ioctl_args *uap)
+sys_ioctl(struct thread *td, struct ioctl_args *uap)
{
u_long com;
int arg, error;
@@ -708,7 +721,7 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
AUDIT_ARG_FD(fd);
AUDIT_ARG_CMD(com);
- if ((error = fget(td, fd, &fp)) != 0)
+ if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0)
return (error);
if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
fdrop(fp, td);
@@ -765,7 +778,7 @@ poll_no_poll(int events)
}
int
-pselect(struct thread *td, struct pselect_args *uap)
+sys_pselect(struct thread *td, struct pselect_args *uap)
{
struct timespec ts;
struct timeval tv, *tvp;
@@ -824,7 +837,7 @@ struct select_args {
};
#endif
int
-select(struct thread *td, struct select_args *uap)
+sys_select(struct thread *td, struct select_args *uap)
{
struct timeval tv, *tvp;
int error;
@@ -1157,6 +1170,37 @@ selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events)
return (n);
}
+static __inline int
+getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp)
+{
+ struct file *fp;
+#ifdef CAPABILITIES
+ struct file *fp_fromcap;
+ int error;
+#endif
+
+ if ((fp = fget_unlocked(fdp, fd)) == NULL)
+ return (EBADF);
+#ifdef CAPABILITIES
+ /*
+ * If the file descriptor is for a capability, test rights and use
+ * the file descriptor references by the capability.
+ */
+ error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap);
+ if (error) {
+ fdrop(fp, curthread);
+ return (error);
+ }
+ if (fp != fp_fromcap) {
+ fhold(fp_fromcap);
+ fdrop(fp, curthread);
+ fp = fp_fromcap;
+ }
+#endif /* CAPABILITIES */
+ *fpp = fp;
+ return (0);
+}
+
/*
* Traverse the list of fds attached to this thread's seltd and check for
* completion.
@@ -1172,6 +1216,7 @@ selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
struct file *fp;
fd_mask bit;
int fd, ev, n, idx;
+ int error;
#ifndef __rtems__
fdp = td->td_proc->p_fd;
@@ -1187,8 +1232,9 @@ selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
/* If the selinfo wasn't cleared the event didn't fire. */
if (si != NULL)
continue;
- if ((fp = fget_unlocked(fdp, fd)) == NULL)
- return (EBADF);
+ error = getselfd_cap(fdp, fd, &fp);
+ if (error)
+ return (error);
idx = fd / NFDBITS;
bit = (fd_mask)1 << (fd % NFDBITS);
ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td);
@@ -1216,6 +1262,7 @@ selscan(td, ibits, obits, nfd)
fd_mask bit;
int ev, flags, end, fd;
int n, idx;
+ int error;
#ifndef __rtems__
fdp = td->td_proc->p_fd;
@@ -1230,8 +1277,9 @@ selscan(td, ibits, obits, nfd)
flags = selflags(ibits, idx, bit);
if (flags == 0)
continue;
- if ((fp = fget_unlocked(fdp, fd)) == NULL)
- return (EBADF);
+ error = getselfd_cap(fdp, fd, &fp);
+ if (error)
+ return (error);
selfdalloc(td, (void *)(uintptr_t)fd);
ev = fo_poll(fp, flags, td->td_ucred, td);
fdrop(fp, td);
@@ -1251,13 +1299,11 @@ struct poll_args {
int timeout;
};
#endif
-#ifndef __rtems__
-int
-poll(td, uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_poll(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_poll(td, uap)
struct thread *td;
struct poll_args *uap;
{
@@ -1350,7 +1396,7 @@ poll(struct pollfd fds[], nfds_t nfds, int timeout)
int error;
if (td != NULL) {
- error = rtems_bsd_poll(td, &ua);
+ error = sys_poll(td, &ua);
} else {
error = ENOMEM;
}
@@ -1395,11 +1441,17 @@ pollrescan(struct thread *td)
#else /* __rtems__ */
fp = fget_unlocked(fdp, fd->fd);
#endif /* __rtems__ */
+#ifdef CAPABILITIES
+ if ((fp == NULL)
+ || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
+#else
if (fp == NULL) {
+#endif
fd->revents = POLLNVAL;
n++;
continue;
}
+
/*
* Note: backend also returns POLLHUP and
* POLLERR if appropriate.
@@ -1472,7 +1524,12 @@ pollscan(td, fds, nfd)
#else /* __rtems__ */
fp = fget_unlocked(fdp, fds->fd);
#endif /* __rtems__ */
+#ifdef CAPABILITIES
+ if ((fp == NULL)
+ || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
+#else
if (fp == NULL) {
+#endif
fds->revents = POLLNVAL;
n++;
} else {
@@ -1514,11 +1571,11 @@ struct openbsd_poll_args {
};
#endif
int
-openbsd_poll(td, uap)
+sys_openbsd_poll(td, uap)
register struct thread *td;
register struct openbsd_poll_args *uap;
{
- return (poll(td, (struct poll_args *)uap));
+ return (sys_poll(td, (struct poll_args *)uap));
}
/*
diff --git a/freebsd/sys/kern/sys_socket.c b/freebsd/sys/kern/sys_socket.c
index 0c1efdf0..08777991 100644
--- a/freebsd/sys/kern/sys_socket.c
+++ b/freebsd/sys/kern/sys_socket.c
@@ -67,6 +67,8 @@ struct fileops socketops = {
.fo_kqfilter = soo_kqfilter,
.fo_stat = soo_stat,
.fo_close = soo_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
.fo_flags = DFLAG_PASSABLE
};
#endif /* __rtems__ */
@@ -145,7 +147,7 @@ soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
#ifndef __rtems__
PROC_LOCK(uio->uio_td->td_proc);
- tdksignal(uio->uio_td, SIGPIPE, NULL);
+ tdsignal(uio->uio_td, SIGPIPE);
PROC_UNLOCK(uio->uio_td->td_proc);
#else /* __rtems__ */
/* FIXME: Determine if we really want to use signals */
diff --git a/freebsd/sys/kern/uipc_domain.c b/freebsd/sys/kern/uipc_domain.c
index 450c1d8c..3af4448c 100644
--- a/freebsd/sys/kern/uipc_domain.c
+++ b/freebsd/sys/kern/uipc_domain.c
@@ -241,28 +241,11 @@ domain_add(void *data)
mtx_unlock(&dom_mtx);
}
-static void
-socket_zone_change(void *tag)
-{
-
- uma_zone_set_max(socket_zone, maxsockets);
-}
-
/* ARGSUSED*/
static void
domaininit(void *dummy)
{
- /*
- * Before we do any setup, make sure to initialize the
- * zone allocator we get struct sockets from.
- */
- socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(socket_zone, maxsockets);
- EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
- EVENTHANDLER_PRI_FIRST);
-
if (max_linkhdr < 16) /* XXX */
max_linkhdr = 16;
diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c
index 3b0a5fe9..abf4dd3e 100644
--- a/freebsd/sys/kern/uipc_mbuf.c
+++ b/freebsd/sys/kern/uipc_mbuf.c
@@ -913,8 +913,8 @@ m_cat(struct mbuf *m, struct mbuf *n)
while (m->m_next)
m = m->m_next;
while (n) {
- if (m->m_flags & M_EXT ||
- m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
+ if (!M_WRITABLE(m) ||
+ M_TRAILINGSPACE(m) < n->m_len) {
/* just join the two chains */
m->m_next = n;
return;
@@ -1586,7 +1586,7 @@ again:
n = m->m_next;
if (n == NULL)
break;
- if ((m->m_flags & M_RDONLY) == 0 &&
+ if (M_WRITABLE(m) &&
n->m_len < M_TRAILINGSPACE(m)) {
bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
n->m_len);
@@ -1728,7 +1728,8 @@ struct mbuf *
m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
{
struct mbuf *m, *mb;
- int error, length, total;
+ int error, length;
+ ssize_t total;
int progress = 0;
/*
@@ -2035,7 +2036,7 @@ mbprof_textify(void)
{
int offset;
char *c;
- u_int64_t *p;
+ uint64_t *p;
p = &mbprof.wasted[0];
diff --git a/freebsd/sys/kern/uipc_mbuf2.c b/freebsd/sys/kern/uipc_mbuf2.c
index a2e4e395..a44ddc27 100644
--- a/freebsd/sys/kern/uipc_mbuf2.c
+++ b/freebsd/sys/kern/uipc_mbuf2.c
@@ -316,7 +316,7 @@ m_tag_free_default(struct m_tag *t)
/* Get a packet tag structure along with specified data following. */
struct m_tag *
-m_tag_alloc(u_int32_t cookie, int type, int len, int wait)
+m_tag_alloc(uint32_t cookie, int type, int len, int wait)
{
struct m_tag *t;
@@ -378,7 +378,7 @@ m_tag_delete_nonpersistent(struct mbuf *m)
/* Find a tag, starting from a given position. */
struct m_tag *
-m_tag_locate(struct mbuf *m, u_int32_t cookie, int type, struct m_tag *t)
+m_tag_locate(struct mbuf *m, uint32_t cookie, int type, struct m_tag *t)
{
struct m_tag *p;
diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c
index b89abc84..2a0e527d 100644
--- a/freebsd/sys/kern/uipc_sockbuf.c
+++ b/freebsd/sys/kern/uipc_sockbuf.c
@@ -63,7 +63,7 @@ void (*aio_swake)(struct socket *, struct sockbuf *);
u_long sb_max = SB_MAX;
u_long sb_max_adj =
- SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
+ (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
static u_long sb_efficiency = 8; /* parameter for sbreserve() */
@@ -945,6 +945,13 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
/* Return closest mbuf in chain for current offset. */
*moff = off - sb->sb_sndptroff;
m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
+ if (*moff == m->m_len) {
+ *moff = 0;
+ sb->sb_sndptroff += m->m_len;
+ m = ret = m->m_next;
+ KASSERT(ret->m_len > 0,
+ ("mbuf %p in sockbuf %p chain has no valid data", ret, sb));
+ }
/* Advance by len to be as close as possible for the next transmit. */
for (off = off - sb->sb_sndptroff + len - 1;
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index 227e6dac..9ca2c14c 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -137,6 +137,8 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/jail.h>
+#include <sys/syslog.h>
+#include <netinet/in.h>
#include <net/vnet.h>
@@ -159,18 +161,24 @@ static void filt_sowdetach(struct knote *kn);
static int filt_sowrite(struct knote *kn, long hint);
static int filt_solisten(struct knote *kn, long hint);
-static struct filterops solisten_filtops =
- { 1, NULL, filt_sordetach, filt_solisten };
-static struct filterops soread_filtops =
- { 1, NULL, filt_sordetach, filt_soread };
-static struct filterops sowrite_filtops =
- { 1, NULL, filt_sowdetach, filt_sowrite };
+static struct filterops solisten_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sordetach,
+ .f_event = filt_solisten,
+};
+static struct filterops soread_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sordetach,
+ .f_event = filt_soread,
+};
+static struct filterops sowrite_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sowdetach,
+ .f_event = filt_sowrite,
+};
-uma_zone_t socket_zone;
so_gen_t so_gencnt; /* generation count for sockets */
-int maxsockets;
-
MALLOC_DEFINE(M_SONAME, "soname", "socket name");
MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
@@ -178,15 +186,37 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
VNET_ASSERT(curvnet != NULL, \
("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
+/*
+ * Limit on the number of connections in the listen queue waiting
+ * for accept(2).
+ */
static int somaxconn = SOMAXCONN;
-static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS);
-/* XXX: we dont have SYSCTL_USHORT */
+
+static int
+sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int val;
+
+ val = somaxconn;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || !req->newptr )
+ return (error);
+
+ if (val < 1 || val > USHRT_MAX)
+ return (EINVAL);
+
+ somaxconn = val;
+ return (0);
+}
SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW,
- 0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection "
- "queue size");
+ 0, sizeof(int), sysctl_somaxconn, "I",
+ "Maximum listen socket pending connection accept queue size");
+
static int numopensockets;
SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
&numopensockets, 0, "Number of open sockets");
+
#ifdef ZERO_COPY_SOCKETS
/* These aren't static because they're used in other files. */
int so_zero_copy_send = 1;
@@ -220,6 +250,49 @@ MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
/*
+ * Initialize the socket subsystem and set up the socket
+ * memory allocator.
+ */
+uma_zone_t socket_zone;
+int maxsockets;
+
+static void
+socket_zone_change(void *tag)
+{
+
+ uma_zone_set_max(socket_zone, maxsockets);
+}
+
+static void
+socket_init(void *tag)
+{
+
+ socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(socket_zone, maxsockets);
+ EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
+ EVENTHANDLER_PRI_FIRST);
+}
+SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
+
+/*
+ * Initialise maxsockets. This SYSINIT must be run after
+ * tunable_mbinit().
+ */
+static void
+init_maxsockets(void *ignored)
+{
+
+ TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
+#ifndef __rtems__
+ maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
+#else /* __rtems__ */
+ maxsockets = imax(maxsockets, nmbclusters);
+#endif /* __rtems__ */
+}
+SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
+
+/*
* Sysctl to get and set the maximum global sockets limit. Notify protocols
* of the change so that they can update their dependent limits as required.
*/
@@ -245,29 +318,11 @@ sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
}
return (error);
}
-
SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
&maxsockets, 0, sysctl_maxsockets, "IU",
"Maximum number of sockets avaliable");
/*
- * Initialise maxsockets. This SYSINIT must be run after
- * tunable_mbinit().
- */
-static void
-init_maxsockets(void *ignored)
-{
-
- TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
-#ifndef __rtems__
- maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
-#else /* __rtems__ */
- maxsockets = imax(maxsockets, nmbclusters);
-#endif /* __rtems__ */
-}
-SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
-
-/*
* Socket operation routines. These routines are called by the routines in
* sys_socket.c or from a system process, and implement the semantics of
* socket operations by switching out to the protocol specific routines.
@@ -450,16 +505,24 @@ sonewconn(struct socket *head, int connstatus)
over = (head->so_qlen > 3 * head->so_qlimit / 2);
ACCEPT_UNLOCK();
#ifdef REGRESSION
- if (regression_sonewconn_earlytest && over)
+ if (regression_sonewconn_earlytest && over) {
#else
- if (over)
+ if (over) {
#endif
+ log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
+ "%i already in queue awaiting acceptance\n",
+ __func__, head->so_pcb, head->so_qlen);
return (NULL);
+ }
VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
__func__, __LINE__, head));
so = soalloc(head->so_vnet);
- if (so == NULL)
+ if (so == NULL) {
+ log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
+ "limit reached or out of memory\n",
+ __func__, head->so_pcb);
return (NULL);
+ }
if ((head->so_options & SO_ACCEPTFILTER) != 0)
connstatus = 0;
so->so_head = head;
@@ -476,9 +539,16 @@ sonewconn(struct socket *head, int connstatus)
knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
VNET_SO_ASSERT(head);
- if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
- (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
+ __func__, head->so_pcb);
return (NULL);
}
so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
@@ -489,6 +559,20 @@ sonewconn(struct socket *head, int connstatus)
so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
so->so_state |= connstatus;
ACCEPT_LOCK();
+ /*
+ * The accept socket may be tearing down but we just
+ * won a race on the ACCEPT_LOCK.
+ * However, if sctp_peeloff() is called on a 1-to-many
+ * style socket, the SO_ACCEPTCONN doesn't need to be set.
+ */
+ if (!(head->so_options & SO_ACCEPTCONN) &&
+ ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
+ (head->so_type != SOCK_SEQPACKET))) {
+ SOCK_LOCK(so);
+ so->so_head = NULL;
+ sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */
+ return (NULL);
+ }
if (connstatus) {
TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
so->so_qstate |= SQ_COMP;
@@ -583,9 +667,12 @@ solisten_proto(struct socket *so, int backlog)
}
/*
- * Attempt to free a socket. This should really be sotryfree().
+ * Evaluate the reference count and named references on a socket; if no
+ * references remain, free it. This should be called whenever a reference is
+ * released, such as in sorele(), but also when named reference flags are
+ * cleared in socket or protocol code.
*
- * sofree() will succeed if:
+ * sofree() will free the socket if:
*
* - There are no outstanding file descriptor references or related consumers
* (so_count == 0).
@@ -598,9 +685,6 @@ solisten_proto(struct socket *so, int backlog)
* - The socket is not in a completed connection queue, so a process has been
* notified that it is present. If it is removed, the user process may
* block in accept() despite select() saying the socket was ready.
- *
- * Otherwise, it will quietly abort so that a future call to sofree(), when
- * conditions are right, can succeed.
*/
void
sofree(struct socket *so)
@@ -714,9 +798,14 @@ soclose(struct socket *so)
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
+ ACCEPT_LOCK();
if (so->so_options & SO_ACCEPTCONN) {
struct socket *sp;
- ACCEPT_LOCK();
+ /*
+ * Prevent new additions to the accept queues due
+ * to ACCEPT_LOCK races while we are draining them.
+ */
+ so->so_options &= ~SO_ACCEPTCONN;
while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
TAILQ_REMOVE(&so->so_incomp, sp, so_list);
so->so_incqlen--;
@@ -735,13 +824,15 @@ drop:
soabort(sp);
ACCEPT_LOCK();
}
- ACCEPT_UNLOCK();
+ KASSERT((TAILQ_EMPTY(&so->so_comp)),
+ ("%s: so_comp populated", __func__));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)),
+ ("%s: so_incomp populated", __func__));
}
- ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
- sorele(so);
+ sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */
CURVNET_RESTORE();
return (error);
}
@@ -863,12 +954,6 @@ struct so_zerocopy_stats{
int found_ifp;
};
struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
-#include <netinet/in.h>
-#include <net/route.h>
-#include <netinet/in_pcb.h>
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_object.h>
/*
* sosend_copyin() is only used if zero copy sockets are enabled. Otherwise
@@ -890,11 +975,10 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
int flags)
{
struct mbuf *m, **mp, *top;
- long len, resid;
+ long len;
+ ssize_t resid;
int error;
-#ifdef ZERO_COPY_SOCKETS
int cow_send;
-#endif
*retmp = top = NULL;
mp = &top;
@@ -902,11 +986,8 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
resid = uio->uio_resid;
error = 0;
do {
-#ifdef ZERO_COPY_SOCKETS
cow_send = 0;
-#endif /* ZERO_COPY_SOCKETS */
if (resid >= MINCLSIZE) {
-#ifdef ZERO_COPY_SOCKETS
if (top == NULL) {
m = m_gethdr(M_WAITOK, MT_DATA);
m->m_pkthdr.len = 0;
@@ -914,9 +995,9 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
} else
m = m_get(M_WAITOK, MT_DATA);
if (so_zero_copy_send &&
- resid>=PAGE_SIZE &&
- *space>=PAGE_SIZE &&
- uio->uio_iov->iov_len>=PAGE_SIZE) {
+ resid >= PAGE_SIZE &&
+ *space >= PAGE_SIZE &&
+ uio->uio_iov->iov_len >= PAGE_SIZE) {
so_zerocp_stats.size_ok++;
so_zerocp_stats.align_ok++;
cow_send = socow_setup(m, uio);
@@ -926,15 +1007,6 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
m_clget(m, M_WAITOK);
len = min(min(MCLBYTES, resid), *space);
}
-#else /* ZERO_COPY_SOCKETS */
- if (top == NULL) {
- m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
- m->m_pkthdr.len = 0;
- m->m_pkthdr.rcvif = NULL;
- } else
- m = m_getcl(M_WAIT, MT_DATA, 0);
- len = min(min(MCLBYTES, resid), *space);
-#endif /* ZERO_COPY_SOCKETS */
} else {
if (top == NULL) {
m = m_gethdr(M_WAIT, MT_DATA);
@@ -959,12 +1031,10 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
}
*space -= len;
-#ifdef ZERO_COPY_SOCKETS
if (cow_send)
error = 0;
else
-#endif /* ZERO_COPY_SOCKETS */
- error = uiomove(mtod(m, void *), (int)len, uio);
+ error = uiomove(mtod(m, void *), (int)len, uio);
resid = uio->uio_resid;
m->m_len = len;
*mp = m;
@@ -982,7 +1052,7 @@ out:
*retmp = top;
return (error);
}
-#endif /*ZERO_COPY_SOCKETS*/
+#endif /* ZERO_COPY_SOCKETS */
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
@@ -990,7 +1060,8 @@ int
sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
- long space, resid;
+ long space;
+ ssize_t resid;
int clen = 0, error, dontroute;
#ifdef ZERO_COPY_SOCKETS
int atomic = sosendallatonce(so) || top;
@@ -1164,7 +1235,8 @@ int
sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
- long space, resid;
+ long space;
+ ssize_t resid;
int clen = 0, error, dontroute;
int atomic = sosendallatonce(so) || top;
@@ -1463,11 +1535,12 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
struct mbuf *m, **mp;
- int flags, len, error, offset;
+ int flags, error, offset;
+ ssize_t len;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
int moff, type = 0;
- int orig_resid = uio->uio_resid;
+ ssize_t orig_resid = uio->uio_resid;
mp = mp0;
if (psa != NULL)
@@ -1920,6 +1993,7 @@ release:
/*
* Optimized version of soreceive() for stream (TCP) sockets.
+ * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled.
*/
int
soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
@@ -1960,20 +2034,9 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
}
oresid = uio->uio_resid;
- /* We will never ever get anything unless we are connected. */
+ /* We will never ever get anything unless we are or were connected. */
if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
- /* When disconnecting there may be still some data left. */
- if (sb->sb_cc > 0)
- goto deliver;
- if (!(so->so_state & SS_ISDISCONNECTED))
- error = ENOTCONN;
- goto out;
- }
-
- /* Socket buffer is empty and we shall not block. */
- if (sb->sb_cc == 0 &&
- ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
- error = EAGAIN;
+ error = ENOTCONN;
goto out;
}
@@ -2000,6 +2063,13 @@ restart:
goto out;
}
+ /* Socket buffer is empty and we shall not block. */
+ if (sb->sb_cc == 0 &&
+ ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
+ error = EAGAIN;
+ goto out;
+ }
+
/* Socket buffer got some data that we shall deliver now. */
if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
((sb->sb_flags & SS_NBIO) ||
@@ -2012,7 +2082,7 @@ restart:
/* On MSG_WAITALL we must wait until all data or error arrives. */
if ((flags & MSG_WAITALL) &&
- (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat))
+ (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat))
goto deliver;
/*
@@ -2040,7 +2110,11 @@ deliver:
if (mp0 != NULL) {
/* Dequeue as many mbufs as possible. */
if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
- for (*mp0 = m = sb->sb_mb;
+ if (*mp0 == NULL)
+ *mp0 = sb->sb_mb;
+ else
+ m_cat(*mp0, sb->sb_mb);
+ for (m = sb->sb_mb;
m != NULL && m->m_len <= len;
m = m->m_next) {
len -= m->m_len;
@@ -2048,10 +2122,11 @@ deliver:
sbfree(sb, m);
n = m;
}
+ n->m_next = NULL;
sb->sb_mb = m;
+ sb->sb_lastrecord = sb->sb_mb;
if (sb->sb_mb == NULL)
SB_EMPTY_FIXUP(sb);
- n->m_next = NULL;
}
/* Copy the remainder. */
if (len > 0) {
@@ -2062,9 +2137,9 @@ deliver:
if (m == NULL)
len = 0; /* Don't flush data from sockbuf. */
else
- uio->uio_resid -= m->m_len;
+ uio->uio_resid -= len;
if (*mp0 != NULL)
- n->m_next = m;
+ m_cat(*mp0, m);
else
*mp0 = m;
if (*mp0 == NULL) {
@@ -2128,7 +2203,8 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
struct mbuf *m, *m2;
- int flags, len, error;
+ int flags, error;
+ ssize_t len;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
@@ -2334,9 +2410,11 @@ soshutdown(struct socket *so, int how)
sorflush(so);
if (how != SHUT_RD) {
error = (*pr->pr_usrreqs->pru_shutdown)(so);
+ wakeup(&so->so_timeo);
CURVNET_RESTORE();
return (error);
}
+ wakeup(&so->so_timeo);
CURVNET_RESTORE();
return (0);
}
@@ -2445,6 +2523,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
struct linger l;
struct timeval tv;
u_long val;
+ uint32_t val32;
#ifdef MAC
struct mac extmac;
#endif
@@ -2509,21 +2588,29 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_SETFIB:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
+ if (error)
+ goto bad;
+
if (optval < 0 || optval >= rt_numfibs) {
error = EINVAL;
goto bad;
}
if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
(so->so_proto->pr_domain->dom_family == PF_INET6) ||
- (so->so_proto->pr_domain->dom_family == PF_ROUTE))) {
+ (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
so->so_fibnum = optval;
- /* Note: ignore error */
- if (so->so_proto->pr_ctloutput)
- (*so->so_proto->pr_ctloutput)(so, sopt);
- } else {
+ else
so->so_fibnum = 0;
- }
break;
+
+ case SO_USER_COOKIE:
+ error = sooptcopyin(sopt, &val32, sizeof val32,
+ sizeof val32);
+ if (error)
+ goto bad;
+ so->so_user_cookie = val32;
+ break;
+
case SO_SNDBUF:
case SO_RCVBUF:
case SO_SNDLOWAT:
@@ -3289,24 +3376,6 @@ socheckuid(struct socket *so, uid_t uid)
}
#endif /* __rtems__ */
-static int
-sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
-{
- int error;
- int val;
-
- val = somaxconn;
- error = sysctl_handle_int(oidp, &val, 0, req);
- if (error || !req->newptr )
- return (error);
-
- if (val < 1 || val > USHRT_MAX)
- return (EINVAL);
-
- somaxconn = val;
- return (0);
-}
-
/*
* These functions are used by protocols to notify the socket layer (and its
* consumers) of state changes in the sockets driven by protocol-side events.
diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c
index c62cc9b5..738b5c3c 100644
--- a/freebsd/sys/kern/uipc_syscalls.c
+++ b/freebsd/sys/kern/uipc_syscalls.c
@@ -37,6 +37,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_capsicum.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_sctp.h>
@@ -45,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/capability.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
@@ -82,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
#include <vm/vm.h>
+#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
@@ -96,6 +99,16 @@ __FBSDID("$FreeBSD$");
#endif /* INET || INET6 */
#ifdef __rtems__
#include <machine/rtems-bsd-syscall-api.h>
+
+static int kern_bind(struct thread *, int, struct sockaddr *);
+
+static int kern_connect(struct thread *, int, struct sockaddr *);
+
+static int kern_setsockopt( struct thread *td, int s, int level, int name,
+ void *val, enum uio_seg valseg, socklen_t valsize);
+
+static int kern_getsockopt( struct thread *td, int s, int level, int name,
+ void *val, enum uio_seg valseg, socklen_t *valsize);
#endif /* __rtems__ */
static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
@@ -124,33 +137,47 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
/*
- * Convert a user file descriptor to a kernel file entry. A reference on the
- * file entry is held upon returning. This is lighter weight than
- * fgetsock(), which bumps the socket reference drops the file reference
- * count instead, as this approach avoids several additional mutex operations
- * associated with the additional reference count. If requested, return the
- * open file flags.
+ * Convert a user file descriptor to a kernel file entry and check that, if
+ * it is a capability, the right rights are present. A reference on the file
+ * entry is held upon returning.
*/
static int
-getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
+getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
+ struct file **fpp, u_int *fflagp)
{
struct file *fp;
+#ifdef CAPABILITIES
+ struct file *fp_fromcap;
int error;
+#endif
fp = NULL;
- if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) {
- error = EBADF;
- } else if (fp->f_type != DTYPE_SOCKET) {
+ if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
+ return (EBADF);
+#ifdef CAPABILITIES
+ /*
+ * If the file descriptor is for a capability, test rights and use
+ * the file descriptor referenced by the capability.
+ */
+ error = cap_funwrap(fp, rights, &fp_fromcap);
+ if (error) {
fdrop(fp, curthread);
- fp = NULL;
- error = ENOTSOCK;
- } else {
- if (fflagp != NULL)
- *fflagp = fp->f_flag;
- error = 0;
+ return (error);
}
+ if (fp != fp_fromcap) {
+ fhold(fp_fromcap);
+ fdrop(fp, curthread);
+ fp = fp_fromcap;
+ }
+#endif /* CAPABILITIES */
+ if (fp->f_type != DTYPE_SOCKET) {
+ fdrop(fp, curthread);
+ return (ENOTSOCK);
+ }
+ if (fflagp != NULL)
+ *fflagp = fp->f_flag;
*fpp = fp;
- return (error);
+ return (0);
}
#else /* __rtems__ */
static int
@@ -185,7 +212,7 @@ rtems_bsd_getsock(int fd, struct file **fpp, u_int *fflagp)
return (error);
}
-#define getsock(fdp, fd, fpp, fflagp) rtems_bsd_getsock(fd, fpp, fflagp)
+#define getsock_cap(fdp, fd, rights, fpp, fflagp) rtems_bsd_getsock(fd, fpp, fflagp)
#endif /* __rtems__ */
/*
@@ -195,13 +222,11 @@ rtems_bsd_getsock(int fd, struct file **fpp, u_int *fflagp)
#define COMPAT_OLDSOCK
#endif
-#ifndef __rtems__
-int
-socket(td, uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_socket(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_socket(td, uap)
struct thread *td;
struct socket_args /* {
int domain;
@@ -226,7 +251,7 @@ rtems_bsd_socket(td, uap)
#ifndef __rtems__
fdp = td->td_proc->p_fd;
#endif /* __rtems__ */
- error = falloc(td, &fp, &fd);
+ error = falloc(td, &fp, &fd, 0);
if (error)
return (error);
/* An extra reference on `fp' has been held for us by falloc(). */
@@ -254,7 +279,7 @@ socket(int domain, int type, int protocol)
int error;
if (td != NULL) {
- error = rtems_bsd_socket(td, &ua);
+ error = sys_socket(td, &ua);
} else {
error = ENOMEM;
}
@@ -267,16 +292,12 @@ socket(int domain, int type, int protocol)
}
#endif /* __rtems__ */
-#ifndef __rtems__
/* ARGSUSED */
-int
-bind(td, uap)
-#else /* __rtems__ */
-static int kern_bind(struct thread *, int, struct sockaddr *);
-
-static int
-rtems_bsd_bind(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_bind(td, uap)
struct thread *td;
struct bind_args /* {
int s;
@@ -307,7 +328,7 @@ bind(int socket, const struct sockaddr *address, socklen_t address_len)
int error;
if (td != NULL) {
- error = rtems_bsd_bind(td, &ua);
+ error = sys_bind(td, &ua);
} else {
error = ENOMEM;
}
@@ -327,7 +348,7 @@ kern_bind(td, fd, sa)
int error;
AUDIT_ARG_FD(fd);
- error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
if (error)
return (error);
so = fp->f_data;
@@ -345,13 +366,11 @@ kern_bind(td, fd, sa)
}
/* ARGSUSED */
-#ifndef __rtems__
-int
-listen(td, uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_listen(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_listen(td, uap)
struct thread *td;
struct listen_args /* {
int s;
@@ -363,7 +382,7 @@ rtems_bsd_listen(td, uap)
int error;
AUDIT_ARG_FD(uap->s);
- error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
if (error == 0) {
so = fp->f_data;
#ifdef MAC
@@ -387,7 +406,7 @@ listen(int socket, int backlog)
int error;
if (td != NULL) {
- error = rtems_bsd_listen(td, &ua);
+ error = sys_listen(td, &ua);
} else {
error = ENOMEM;
}
@@ -508,7 +527,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
#ifndef __rtems__
fdp = td->td_proc->p_fd;
#endif /* __rtems__ */
- error = getsock(fdp, s, &headfp, &fflag);
+ error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
if (error)
return (error);
head = headfp->f_data;
@@ -521,7 +540,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
if (error != 0)
goto done;
#endif
- error = falloc(td, &nfp, &fd);
+ error = falloc(td, &nfp, &fd, 0);
if (error)
goto done;
ACCEPT_LOCK();
@@ -643,7 +662,7 @@ done:
#ifndef __rtems__
int
-accept(td, uap)
+sys_accept(td, uap)
struct thread *td;
struct accept_args *uap;
{
@@ -664,15 +683,11 @@ oaccept(td, uap)
#endif /* __rtems__ */
/* ARGSUSED */
-#ifndef __rtems__
-int
-connect(td, uap)
-#else /* __rtems__ */
-static int kern_connect(struct thread *, int, struct sockaddr *);
-
-static int
-rtems_bsd_connect(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_connect(td, uap)
struct thread *td;
struct connect_args /* {
int s;
@@ -704,7 +719,7 @@ connect(int socket, const struct sockaddr *address, socklen_t address_len)
int error;
if (td != NULL) {
- error = rtems_bsd_connect(td, &ua);
+ error = sys_connect(td, &ua);
} else {
error = ENOMEM;
}
@@ -726,7 +741,7 @@ kern_connect(td, fd, sa)
int interrupted = 0;
AUDIT_ARG_FD(fd);
- error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
if (error)
return (error);
so = fp->f_data;
@@ -800,12 +815,12 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol,
if (error)
goto free1;
/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
- error = falloc(td, &fp1, &fd);
+ error = falloc(td, &fp1, &fd, 0);
if (error)
goto free2;
rsv[0] = fd;
fp1->f_data = so1; /* so1 already has ref count */
- error = falloc(td, &fp2, &fd);
+ error = falloc(td, &fp2, &fd, 0);
if (error)
goto free3;
fp2->f_data = so2; /* so2 already has ref count */
@@ -842,7 +857,7 @@ free1:
}
int
-socketpair(struct thread *td, struct socketpair_args *uap)
+sys_socketpair(struct thread *td, struct socketpair_args *uap)
{
int error, sv[2];
@@ -875,6 +890,11 @@ sendit(td, s, mp, flags)
struct sockaddr *to;
int error;
+#ifdef CAPABILITY_MODE
+ if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
+ return (ECAPMODE);
+#endif
+
if (mp->msg_name != NULL) {
error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
if (error) {
@@ -935,14 +955,18 @@ kern_sendit(td, s, mp, flags, control, segflg)
struct uio auio;
struct iovec *iov;
struct socket *so;
- int i;
- int len, error;
+ int i, error;
+ ssize_t len;
+ cap_rights_t rights;
#ifdef KTRACE
struct uio *ktruio = NULL;
#endif
AUDIT_ARG_FD(s);
- error = getsock(td->td_proc->p_fd, s, &fp, NULL);
+ rights = CAP_WRITE;
+ if (mp->msg_name != NULL)
+ rights |= CAP_CONNECT;
+ error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
if (error)
return (error);
so = (struct socket *)fp->f_data;
@@ -992,7 +1016,7 @@ kern_sendit(td, s, mp, flags, control, segflg)
!(flags & MSG_NOSIGNAL)) {
#ifndef __rtems__
PROC_LOCK(td->td_proc);
- tdksignal(td, SIGPIPE, NULL);
+ tdsignal(td, SIGPIPE);
PROC_UNLOCK(td->td_proc);
#else /* __rtems__ */
/* FIXME: Determine if we really want to use signals */
@@ -1012,13 +1036,11 @@ bad:
return (error);
}
-#ifndef __rtems__
-int
-sendto(td, uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_sendto(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_sendto(td, uap)
struct thread *td;
struct sendto_args /* {
int s;
@@ -1063,7 +1085,7 @@ sendto(int socket, const void *message, size_t length, int flags,
int error;
if (td != NULL) {
- error = rtems_bsd_sendto(td, &ua);
+ error = sys_sendto(td, &ua);
} else {
error = ENOMEM;
}
@@ -1132,13 +1154,11 @@ osendmsg(td, uap)
#endif
#endif /* __rtems__ */
-#ifndef __rtems__
-int
-sendmsg(td, uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_sendmsg(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_sendmsg(td, uap)
struct thread *td;
struct sendmsg_args /* {
int s;
@@ -1177,7 +1197,7 @@ sendmsg(int socket, const struct msghdr *message, int flags)
int error;
if (td != NULL) {
- error = rtems_bsd_sendmsg(td, &ua);
+ error = sys_sendmsg(td, &ua);
} else {
error = ENOMEM;
}
@@ -1204,7 +1224,7 @@ kern_recvit(td, s, mp, fromseg, controlp)
struct uio auio;
struct iovec *iov;
int i;
- socklen_t len;
+ ssize_t len;
int error;
struct mbuf *m, *control = 0;
caddr_t ctlbuf;
@@ -1215,11 +1235,11 @@ kern_recvit(td, s, mp, fromseg, controlp)
struct uio *ktruio = NULL;
#endif
- if(controlp != NULL)
- *controlp = 0;
+ if (controlp != NULL)
+ *controlp = NULL;
AUDIT_ARG_FD(s);
- error = getsock(td->td_proc->p_fd, s, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
if (error)
return (error);
so = fp->f_data;
@@ -1255,19 +1275,19 @@ kern_recvit(td, s, mp, fromseg, controlp)
(mp->msg_control || controlp) ? &control : (struct mbuf **)0,
&mp->msg_flags);
if (error) {
- if (auio.uio_resid != (int)len && (error == ERESTART ||
+ if (auio.uio_resid != len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
error = 0;
}
#ifdef KTRACE
if (ktruio != NULL) {
- ktruio->uio_resid = (int)len - auio.uio_resid;
+ ktruio->uio_resid = len - auio.uio_resid;
ktrgenio(s, UIO_READ, ktruio, error);
}
#endif
if (error)
goto out;
- td->td_retval[0] = (int)len - auio.uio_resid;
+ td->td_retval[0] = len - auio.uio_resid;
if (mp->msg_name) {
len = mp->msg_namelen;
if (len <= 0 || fromsa == 0)
@@ -1375,13 +1395,11 @@ recvit(td, s, mp, namelenp)
return (error);
}
-#ifndef __rtems__
-int
-recvfrom(td, uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_recvfrom(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_recvfrom(td, uap)
struct thread *td;
struct recvfrom_args /* {
int s;
@@ -1432,7 +1450,7 @@ recvfrom(int socket, void *__restrict buffer, size_t length, int flags,
int error;
if (td != NULL) {
- error = rtems_bsd_recvfrom(td, &ua);
+ error = sys_recvfrom(td, &ua);
} else {
error = ENOMEM;
}
@@ -1454,7 +1472,7 @@ orecvfrom(td, uap)
{
uap->flags |= MSG_COMPAT;
- return (recvfrom(td, uap));
+ return (sys_recvfrom(td, uap));
}
#endif
@@ -1521,13 +1539,11 @@ orecvmsg(td, uap)
#endif
#endif /* __rtems__ */
-#ifndef __rtems__
-int
-recvmsg(td, uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_recvmsg(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_recvmsg(td, uap)
struct thread *td;
struct recvmsg_args /* {
int s;
@@ -1572,7 +1588,7 @@ recvmsg(int socket, struct msghdr *message, int flags)
int error;
if (td != NULL) {
- error = rtems_bsd_recvmsg(td, &ua);
+ error = sys_recvmsg(td, &ua);
} else {
error = ENOMEM;
}
@@ -1586,13 +1602,11 @@ recvmsg(int socket, struct msghdr *message, int flags)
#endif /* __rtems__ */
/* ARGSUSED */
-#ifndef __rtems__
-int
-shutdown(td, uap)
-#else /* __rtems__ */
-static int
-rtems_bsd_shutdown(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_shutdown(td, uap)
struct thread *td;
struct shutdown_args /* {
int s;
@@ -1604,7 +1618,8 @@ rtems_bsd_shutdown(td, uap)
int error;
AUDIT_ARG_FD(uap->s);
- error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
+ NULL);
if (error == 0) {
so = fp->f_data;
error = soshutdown(so, uap->how);
@@ -1620,24 +1635,18 @@ shutdown(int socket, int how)
.s = socket,
.how = how
};
- int error = rtems_bsd_shutdown(NULL, &ua);
+ int error = sys_shutdown(NULL, &ua);
return rtems_bsd_error_to_status_and_errno(error);
}
#endif /* __rtems__ */
/* ARGSUSED */
-#ifndef __rtems__
-int
-setsockopt(td, uap)
-#else /* __rtems__ */
-static int
-kern_setsockopt( struct thread *td, int s, int level, int name, void *val,
- enum uio_seg valseg, socklen_t valsize);
-
-static int
-rtems_bsd_setsockopt(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_setsockopt(td, uap)
struct thread *td;
struct setsockopt_args /* {
int s;
@@ -1667,7 +1676,7 @@ setsockopt(int socket, int level, int option_name, const void *option_value,
int error;
if (td != NULL) {
- error = rtems_bsd_setsockopt(td, &ua);
+ error = sys_setsockopt(td, &ua);
} else {
error = ENOMEM;
}
@@ -1713,7 +1722,7 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize)
}
AUDIT_ARG_FD(s);
- error = getsock(td->td_proc->p_fd, s, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
if (error == 0) {
so = fp->f_data;
error = sosetopt(so, &sopt);
@@ -1723,17 +1732,11 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize)
}
/* ARGSUSED */
-#ifndef __rtems__
-int
-getsockopt(td, uap)
-#else /* __rtems__ */
-static int
-kern_getsockopt( struct thread *td, int s, int level, int name, void *val,
- enum uio_seg valseg, socklen_t *valsize);
-
-static int
-rtems_bsd_getsockopt(td, uap)
+#ifdef __rtems__
+static
#endif /* __rtems__ */
+int
+sys_getsockopt(td, uap)
struct thread *td;
struct getsockopt_args /* {
int s;
@@ -1775,7 +1778,7 @@ getsockopt(int socket, int level, int option_name, void *__restrict
int error;
if (td != NULL) {
- error = rtems_bsd_getsockopt(td, &ua);
+ error = sys_getsockopt(td, &ua);
} else {
error = ENOMEM;
}
@@ -1825,7 +1828,7 @@ kern_getsockopt(td, s, level, name, val, valseg, valsize)
}
AUDIT_ARG_FD(s);
- error = getsock(td->td_proc->p_fd, s, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
if (error == 0) {
so = fp->f_data;
error = sogetopt(so, &sopt);
@@ -1914,7 +1917,7 @@ kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
return (EINVAL);
AUDIT_ARG_FD(fd);
- error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
if (error)
return (error);
so = fp->f_data;
@@ -1944,7 +1947,7 @@ bad:
#ifndef __rtems__
int
-getsockname(td, uap)
+sys_getsockname(td, uap)
struct thread *td;
struct getsockname_args *uap;
{
@@ -2043,7 +2046,7 @@ kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
return (EINVAL);
AUDIT_ARG_FD(fd);
- error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
if (error)
return (error);
so = fp->f_data;
@@ -2078,7 +2081,7 @@ done:
#ifndef __rtems__
int
-getpeername(td, uap)
+sys_getpeername(td, uap)
struct thread *td;
struct getpeername_args *uap;
{
@@ -2188,7 +2191,7 @@ sf_buf_mext(void *addr, void *args)
m = sf_buf_page(args);
sf_buf_free(args);
- vm_page_lock_queues();
+ vm_page_lock(m);
vm_page_unwire(m, 0);
/*
* Check for the object going away on us. This can
@@ -2197,7 +2200,7 @@ sf_buf_mext(void *addr, void *args)
*/
if (m->wire_count == 0 && m->object == NULL)
vm_page_free(m);
- vm_page_unlock_queues();
+ vm_page_unlock(m);
if (addr == NULL)
return;
sfs = addr;
@@ -2220,7 +2223,7 @@ sf_buf_mext(void *addr, void *args)
* specified, write the total number of bytes sent into *sbytes.
*/
int
-sendfile(struct thread *td, struct sendfile_args *uap)
+sys_sendfile(struct thread *td, struct sendfile_args *uap)
{
return (do_sendfile(td, uap, 0));
@@ -2290,9 +2293,11 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
struct mbuf *m = NULL;
struct sf_buf *sf;
struct vm_page *pg;
+ struct vattr va;
off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
int error, hdrlen = 0, mnw = 0;
int vfslocked;
+ int bsize;
struct sendfile_sync *sfs = NULL;
/*
@@ -2302,11 +2307,23 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
* we send only the header/trailer and no payload data.
*/
AUDIT_ARG_FD(uap->fd);
- if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
+ if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
goto out;
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vn_lock(vp, LK_SHARED | LK_RETRY);
if (vp->v_type == VREG) {
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ if (uap->nbytes == 0) {
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0) {
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ obj = NULL;
+ goto out;
+ }
+ rem = va.va_size;
+ } else
+ rem = uap->nbytes;
obj = vp->v_object;
if (obj != NULL) {
/*
@@ -2324,7 +2341,8 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
obj = NULL;
}
}
- }
+ } else
+ bsize = 0; /* silence gcc */
VOP_UNLOCK(vp, 0);
VFS_UNLOCK_GIANT(vfslocked);
if (obj == NULL) {
@@ -2340,8 +2358,8 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
* The socket must be a stream socket and connected.
* Remember if it a blocking or non-blocking socket.
*/
- if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
- NULL)) != 0)
+ if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
+ &sock_fp, NULL)) != 0)
goto out;
so = sock_fp->f_data;
if (so->so_type != SOCK_STREAM) {
@@ -2361,8 +2379,7 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
mnw = 1;
if (uap->flags & SF_SYNC) {
- sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
- memset(sfs, 0, sizeof *sfs);
+ sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
cv_init(&sfs->cv, "sendfile");
}
@@ -2418,10 +2435,20 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
* The outer loop checks the state and available space of the socket
* and takes care of the overall progress.
*/
- for (off = uap->offset, rem = uap->nbytes; ; ) {
- int loopbytes = 0;
- int space = 0;
- int done = 0;
+ for (off = uap->offset; ; ) {
+ struct mbuf *mtail;
+ int loopbytes;
+ int space;
+ int done;
+
+ if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
+ (uap->nbytes == 0 && va.va_size == fsbytes))
+ break;
+
+ mtail = NULL;
+ loopbytes = 0;
+ space = 0;
+ done = 0;
/*
* Check the socket state for ongoing connection,
@@ -2489,34 +2516,42 @@ retry_space:
*/
space -= hdrlen;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ error = vn_lock(vp, LK_SHARED);
+ if (error != 0) {
+ VFS_UNLOCK_GIANT(vfslocked);
+ goto done;
+ }
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0 || off >= va.va_size) {
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ goto done;
+ }
+ VFS_UNLOCK_GIANT(vfslocked);
+
/*
* Loop and construct maximum sized mbuf chain to be bulk
* dumped into socket buffer.
*/
- while(space > loopbytes) {
+ while (space > loopbytes) {
vm_pindex_t pindex;
vm_offset_t pgoff;
struct mbuf *m0;
- VM_OBJECT_LOCK(obj);
/*
* Calculate the amount to transfer.
* Not to exceed a page, the EOF,
* or the passed in nbytes.
*/
pgoff = (vm_offset_t)(off & PAGE_MASK);
- xfsize = omin(PAGE_SIZE - pgoff,
- obj->un_pager.vnp.vnp_size - uap->offset -
- fsbytes - loopbytes);
- if (uap->nbytes)
- rem = (uap->nbytes - fsbytes - loopbytes);
- else
- rem = obj->un_pager.vnp.vnp_size -
- uap->offset - fsbytes - loopbytes;
- xfsize = omin(rem, xfsize);
+ rem = va.va_size - uap->offset;
+ if (uap->nbytes != 0)
+ rem = omin(rem, uap->nbytes);
+ rem -= fsbytes + loopbytes;
+ xfsize = omin(PAGE_SIZE - pgoff, rem);
xfsize = omin(space - loopbytes, xfsize);
if (xfsize <= 0) {
- VM_OBJECT_UNLOCK(obj);
done = 1; /* all data sent */
break;
}
@@ -2526,6 +2561,7 @@ retry_space:
* if not found or wait and loop if busy.
*/
pindex = OFF_TO_IDX(off);
+ VM_OBJECT_LOCK(obj);
pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
@@ -2543,45 +2579,29 @@ retry_space:
else if (uap->flags & SF_NODISKIO)
error = EBUSY;
else {
- int bsize, resid;
+ ssize_t resid;
- /*
- * Ensure that our page is still around
- * when the I/O completes.
- */
- vm_page_io_start(pg);
VM_OBJECT_UNLOCK(obj);
/*
* Get the page from backing store.
- */
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- error = vn_lock(vp, LK_SHARED);
- if (error != 0)
- goto after_read;
- bsize = vp->v_mount->mnt_stat.f_iosize;
-
- /*
* XXXMAC: Because we don't have fp->f_cred
* here, we pass in NOCRED. This is probably
* wrong, but is consistent with our original
* implementation.
*/
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
td->td_ucred, NOCRED, &resid, td);
- VOP_UNLOCK(vp, 0);
- after_read:
VFS_UNLOCK_GIANT(vfslocked);
- VM_OBJECT_LOCK(obj);
- vm_page_io_finish(pg);
- if (!error)
- VM_OBJECT_UNLOCK(obj);
+ if (error)
+ VM_OBJECT_LOCK(obj);
mbstat.sf_iocnt++;
}
if (error) {
- vm_page_lock_queues();
+ vm_page_lock(pg);
vm_page_unwire(pg, 0);
/*
* See if anyone else might know about
@@ -2589,11 +2609,9 @@ retry_space:
* then free it.
*/
if (pg->wire_count == 0 && pg->valid == 0 &&
- pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
- pg->hold_count == 0) {
+ pg->busy == 0 && !(pg->oflags & VPO_BUSY))
vm_page_free(pg);
- }
- vm_page_unlock_queues();
+ vm_page_unlock(pg);
VM_OBJECT_UNLOCK(obj);
if (error == EAGAIN)
error = 0; /* not a real error */
@@ -2613,14 +2631,11 @@ retry_space:
SFB_CATCH);
if (sf == NULL) {
mbstat.sf_allocfail++;
- vm_page_lock_queues();
+ vm_page_lock(pg);
vm_page_unwire(pg, 0);
- /*
- * XXX: Not same check as above!?
- */
- if (pg->wire_count == 0 && pg->object == NULL)
- vm_page_free(pg);
- vm_page_unlock_queues();
+ KASSERT(pg->object != NULL,
+ ("kern_sendfile: object disappeared"));
+ vm_page_unlock(pg);
if (m == NULL)
error = (mnw ? EAGAIN : EINTR);
break;
@@ -2633,7 +2648,7 @@ retry_space:
m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
if (m0 == NULL) {
error = (mnw ? EAGAIN : ENOBUFS);
- sf_buf_mext((void *)sf_buf_kva(sf), sf);
+ sf_buf_mext(NULL, sf);
break;
}
MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
@@ -2642,10 +2657,13 @@ retry_space:
m0->m_len = xfsize;
/* Append to mbuf chain. */
- if (m != NULL)
- m_cat(m, m0);
+ if (mtail != NULL)
+ mtail->m_next = m0;
+ else if (m != NULL)
+ m_last(m)->m_next = m0;
else
m = m0;
+ mtail = m0;
/* Keep track of bits processed. */
loopbytes += xfsize;
@@ -2658,6 +2676,8 @@ retry_space:
}
}
+ VOP_UNLOCK(vp, 0);
+
/* Add the buffer chain to the socket buffer. */
if (m != NULL) {
int mlen, err;
@@ -2762,7 +2782,7 @@ out:
* XXX: We should make this loadable one day.
*/
int
-sctp_peeloff(td, uap)
+sys_sctp_peeloff(td, uap)
struct thread *td;
struct sctp_peeloff_args /* {
int sd;
@@ -2779,7 +2799,7 @@ sctp_peeloff(td, uap)
fdp = td->td_proc->p_fd;
AUDIT_ARG_FD(uap->sd);
- error = fgetsock(td, uap->sd, &head, &fflag);
+ error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
if (error)
goto done2;
if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
@@ -2795,15 +2815,17 @@ sctp_peeloff(td, uap)
* but that is ok.
*/
- error = falloc(td, &nfp, &fd);
+ error = falloc(td, &nfp, &fd, 0);
if (error)
goto done;
td->td_retval[0] = fd;
CURVNET_SET(head->so_vnet);
so = sonewconn(head, SS_ISCONNECTED);
- if (so == NULL)
+ if (so == NULL) {
+ error = ENOMEM;
goto noconnection;
+ }
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
@@ -2853,7 +2875,7 @@ done2:
}
int
-sctp_generic_sendmsg (td, uap)
+sys_sctp_generic_sendmsg (td, uap)
struct thread *td;
struct sctp_generic_sendmsg_args /* {
int sd,
@@ -2876,6 +2898,7 @@ sctp_generic_sendmsg (td, uap)
#endif
struct uio auio;
struct iovec iov[1];
+ cap_rights_t rights;
if (uap->sinfo) {
error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
@@ -2883,16 +2906,19 @@ sctp_generic_sendmsg (td, uap)
return (error);
u_sinfo = &sinfo;
}
+
+ rights = CAP_WRITE;
if (uap->tolen) {
error = getsockaddr(&to, uap->to, uap->tolen);
if (error) {
to = NULL;
goto sctp_bad2;
}
+ rights |= CAP_CONNECT;
}
AUDIT_ARG_FD(uap->sd);
- error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
if (error)
goto sctp_bad;
#ifdef KTRACE
@@ -2935,7 +2961,7 @@ sctp_generic_sendmsg (td, uap)
if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
!(uap->flags & MSG_NOSIGNAL)) {
PROC_LOCK(td->td_proc);
- tdksignal(td, SIGPIPE, NULL);
+ tdsignal(td, SIGPIPE);
PROC_UNLOCK(td->td_proc);
}
}
@@ -2960,7 +2986,7 @@ sctp_bad2:
}
int
-sctp_generic_sendmsg_iov(td, uap)
+sys_sctp_generic_sendmsg_iov(td, uap)
struct thread *td;
struct sctp_generic_sendmsg_iov_args /* {
int sd,
@@ -2976,13 +3002,15 @@ sctp_generic_sendmsg_iov(td, uap)
struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
struct socket *so;
struct file *fp = NULL;
- int error=0, len, i;
+ int error=0, i;
+ ssize_t len;
struct sockaddr *to = NULL;
#ifdef KTRACE
struct uio *ktruio = NULL;
#endif
struct uio auio;
struct iovec *iov, *tiov;
+ cap_rights_t rights;
if (uap->sinfo) {
error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
@@ -2990,16 +3018,18 @@ sctp_generic_sendmsg_iov(td, uap)
return (error);
u_sinfo = &sinfo;
}
+ rights = CAP_WRITE;
if (uap->tolen) {
error = getsockaddr(&to, uap->to, uap->tolen);
if (error) {
to = NULL;
goto sctp_bad2;
}
+ rights |= CAP_CONNECT;
}
AUDIT_ARG_FD(uap->sd);
- error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
if (error)
goto sctp_bad1;
@@ -3056,7 +3086,7 @@ sctp_generic_sendmsg_iov(td, uap)
if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
!(uap->flags & MSG_NOSIGNAL)) {
PROC_LOCK(td->td_proc);
- tdksignal(td, SIGPIPE, NULL);
+ tdsignal(td, SIGPIPE);
PROC_UNLOCK(td->td_proc);
}
}
@@ -3083,7 +3113,7 @@ sctp_bad2:
}
int
-sctp_generic_recvmsg(td, uap)
+sys_sctp_generic_recvmsg(td, uap)
struct thread *td;
struct sctp_generic_recvmsg_args /* {
int sd,
@@ -3096,7 +3126,7 @@ sctp_generic_recvmsg(td, uap)
} */ *uap;
{
#if (defined(INET) || defined(INET6)) && defined(SCTP)
- u_int8_t sockbufstore[256];
+ uint8_t sockbufstore[256];
struct uio auio;
struct iovec *iov, *tiov;
struct sctp_sndrcvinfo sinfo;
@@ -3104,14 +3134,15 @@ sctp_generic_recvmsg(td, uap)
struct file *fp = NULL;
struct sockaddr *fromsa;
int fromlen;
- int len, i, msg_flags;
+ ssize_t len;
+ int i, msg_flags;
int error = 0;
#ifdef KTRACE
struct uio *ktruio = NULL;
#endif
AUDIT_ARG_FD(uap->sd);
- error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
if (error) {
return (error);
}
@@ -3182,7 +3213,7 @@ sctp_generic_recvmsg(td, uap)
(struct sctp_sndrcvinfo *)&sinfo, 1);
CURVNET_RESTORE();
if (error) {
- if (auio.uio_resid != (int)len && (error == ERESTART ||
+ if (auio.uio_resid != len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
error = 0;
} else {
@@ -3191,13 +3222,13 @@ sctp_generic_recvmsg(td, uap)
}
#ifdef KTRACE
if (ktruio != NULL) {
- ktruio->uio_resid = (int)len - auio.uio_resid;
+ ktruio->uio_resid = len - auio.uio_resid;
ktrgenio(uap->sd, UIO_READ, ktruio, error);
}
#endif /* KTRACE */
if (error)
goto out;
- td->td_retval[0] = (int)len - auio.uio_resid;
+ td->td_retval[0] = len - auio.uio_resid;
if (fromlen && uap->from) {
len = fromlen;
@@ -3205,7 +3236,7 @@ sctp_generic_recvmsg(td, uap)
len = 0;
else {
len = MIN(len, fromsa->sa_len);
- error = copyout(fromsa, uap->from, (unsigned)len);
+ error = copyout(fromsa, uap->from, (size_t)len);
if (error)
goto out;
}
diff --git a/freebsd/sys/libkern/arc4random.c b/freebsd/sys/libkern/arc4random.c
index ab6357f7..32ca0157 100644
--- a/freebsd/sys/libkern/arc4random.c
+++ b/freebsd/sys/libkern/arc4random.c
@@ -26,6 +26,8 @@ __FBSDID("$FreeBSD$");
#define ARC4_RESEED_SECONDS 300
#define ARC4_KEYBYTES (256 / 8)
+int arc4rand_iniseed_state = ARC4_ENTR_NONE;
+
static u_int8_t arc4_i, arc4_j;
static int arc4_numruns = 0;
static u_int8_t arc4_sbox[256];
@@ -132,7 +134,8 @@ arc4rand(void *ptr, u_int len, int reseed)
struct timeval tv;
getmicrouptime(&tv);
- if (reseed ||
+ if (atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_HAVE,
+ ARC4_ENTR_SEED) || reseed ||
(arc4_numruns > ARC4_RESEED_BYTES) ||
(tv.tv_sec > arc4_t_reseed))
arc4_randomstir();
diff --git a/freebsd/sys/lm32/include/machine/in_cksum.h b/freebsd/sys/lm32/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/lm32/include/machine/in_cksum.h
+++ b/freebsd/sys/lm32/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/lm32/include/machine/pci_cfgreg.h b/freebsd/sys/lm32/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/lm32/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/lm32/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/lm32/lm32/legacy.c b/freebsd/sys/lm32/lm32/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/lm32/lm32/legacy.c
+++ b/freebsd/sys/lm32/lm32/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/lm32/pci/pci_bus.c b/freebsd/sys/lm32/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/lm32/pci/pci_bus.c
+++ b/freebsd/sys/lm32/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/m32c/include/machine/in_cksum.h b/freebsd/sys/m32c/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/m32c/include/machine/in_cksum.h
+++ b/freebsd/sys/m32c/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/m32c/include/machine/pci_cfgreg.h b/freebsd/sys/m32c/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/m32c/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/m32c/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/m32c/m32c/legacy.c b/freebsd/sys/m32c/m32c/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/m32c/m32c/legacy.c
+++ b/freebsd/sys/m32c/m32c/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/m32c/pci/pci_bus.c b/freebsd/sys/m32c/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/m32c/pci/pci_bus.c
+++ b/freebsd/sys/m32c/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/m32r/include/machine/in_cksum.h b/freebsd/sys/m32r/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/m32r/include/machine/in_cksum.h
+++ b/freebsd/sys/m32r/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/m32r/include/machine/pci_cfgreg.h b/freebsd/sys/m32r/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/m32r/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/m32r/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/m32r/m32r/legacy.c b/freebsd/sys/m32r/m32r/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/m32r/m32r/legacy.c
+++ b/freebsd/sys/m32r/m32r/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/m32r/pci/pci_bus.c b/freebsd/sys/m32r/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/m32r/pci/pci_bus.c
+++ b/freebsd/sys/m32r/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/m68k/include/machine/in_cksum.h b/freebsd/sys/m68k/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/m68k/include/machine/in_cksum.h
+++ b/freebsd/sys/m68k/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/m68k/include/machine/pci_cfgreg.h b/freebsd/sys/m68k/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/m68k/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/m68k/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/m68k/m68k/legacy.c b/freebsd/sys/m68k/m68k/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/m68k/m68k/legacy.c
+++ b/freebsd/sys/m68k/m68k/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/m68k/pci/pci_bus.c b/freebsd/sys/m68k/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/m68k/pci/pci_bus.c
+++ b/freebsd/sys/m68k/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/mips/include/machine/cpufunc.h b/freebsd/sys/mips/include/machine/cpufunc.h
index d32e4920..bfabe90b 100644
--- a/freebsd/sys/mips/include/machine/cpufunc.h
+++ b/freebsd/sys/mips/include/machine/cpufunc.h
@@ -69,6 +69,9 @@
static __inline void
mips_barrier(void)
{
+#ifdef CPU_CNMIPS
+ __asm __volatile("" : : : "memory");
+#else
__asm __volatile (".set noreorder\n\t"
"nop\n\t"
"nop\n\t"
@@ -80,6 +83,7 @@ mips_barrier(void)
"nop\n\t"
".set reorder\n\t"
: : : "memory");
+#endif
}
static __inline void
@@ -91,8 +95,15 @@ mips_cp0_sync(void)
static __inline void
mips_wbflush(void)
{
+#if defined(CPU_CNMIPS)
+ __asm __volatile (".set noreorder\n\t"
+ "syncw\n\t"
+ ".set reorder\n"
+ : : : "memory");
+#else
__asm __volatile ("sync" : : : "memory");
mips_barrier();
+#endif
}
static __inline void
@@ -108,8 +119,13 @@ mips_write_membar(void)
}
#ifdef _KERNEL
+/*
+ * XXX
+ * It would be nice to add variants that read/write register_t, to avoid some
+ * ABI checks.
+ */
#if defined(__mips_n32) || defined(__mips_n64)
-#define MIPS_RDRW64_COP0(n,r) \
+#define MIPS_RW64_COP0(n,r) \
static __inline uint64_t \
mips_rd_ ## n (void) \
{ \
@@ -131,18 +147,49 @@ mips_wr_ ## n (uint64_t a0) \
mips_barrier(); \
} struct __hack
+#define MIPS_RW64_COP0_SEL(n,r,s) \
+static __inline uint64_t \
+mips_rd_ ## n(void) \
+{ \
+ int v0; \
+ __asm __volatile ("dmfc0 %[v0], $"__XSTRING(r)", "__XSTRING(s)";" \
+ : [v0] "=&r"(v0)); \
+ mips_barrier(); \
+ return (v0); \
+} \
+static __inline void \
+mips_wr_ ## n(uint64_t a0) \
+{ \
+ __asm __volatile ("dmtc0 %[a0], $"__XSTRING(r)", "__XSTRING(s)";" \
+ __XSTRING(COP0_SYNC)";" \
+ : \
+ : [a0] "r"(a0)); \
+ mips_barrier(); \
+} struct __hack
+
#if defined(__mips_n64)
-MIPS_RDRW64_COP0(entrylo0, MIPS_COP_0_TLB_LO0);
-MIPS_RDRW64_COP0(entrylo1, MIPS_COP_0_TLB_LO1);
-MIPS_RDRW64_COP0(entryhi, MIPS_COP_0_TLB_HI);
-MIPS_RDRW64_COP0(pagemask, MIPS_COP_0_TLB_PG_MASK);
+MIPS_RW64_COP0(excpc, MIPS_COP_0_EXC_PC);
+MIPS_RW64_COP0(entryhi, MIPS_COP_0_TLB_HI);
+MIPS_RW64_COP0(pagemask, MIPS_COP_0_TLB_PG_MASK);
+#ifdef CPU_CNMIPS
+MIPS_RW64_COP0_SEL(cvmcount, MIPS_COP_0_COUNT, 6);
+MIPS_RW64_COP0_SEL(cvmctl, MIPS_COP_0_COUNT, 7);
+MIPS_RW64_COP0_SEL(cvmmemctl, MIPS_COP_0_COMPARE, 7);
+MIPS_RW64_COP0_SEL(icache_err, MIPS_COP_0_CACHE_ERR, 0);
+MIPS_RW64_COP0_SEL(dcache_err, MIPS_COP_0_CACHE_ERR, 1);
#endif
-MIPS_RDRW64_COP0(xcontext, MIPS_COP_0_TLB_XCONTEXT);
+#endif
+#if defined(__mips_n64) || defined(__mips_n32) /* PHYSADDR_64_BIT */
+MIPS_RW64_COP0(entrylo0, MIPS_COP_0_TLB_LO0);
+MIPS_RW64_COP0(entrylo1, MIPS_COP_0_TLB_LO1);
+#endif
+MIPS_RW64_COP0(xcontext, MIPS_COP_0_TLB_XCONTEXT);
-#undef MIPS_RDRW64_COP0
+#undef MIPS_RW64_COP0
+#undef MIPS_RW64_COP0_SEL
#endif
-#define MIPS_RDRW32_COP0(n,r) \
+#define MIPS_RW32_COP0(n,r) \
static __inline uint32_t \
mips_rd_ ## n (void) \
{ \
@@ -164,7 +211,7 @@ mips_wr_ ## n (uint32_t a0) \
mips_barrier(); \
} struct __hack
-#define MIPS_RDRW32_COP0_SEL(n,r,s) \
+#define MIPS_RW32_COP0_SEL(n,r,s) \
static __inline uint32_t \
mips_rd_ ## n(void) \
{ \
@@ -199,42 +246,55 @@ static __inline void mips_sync_icache (void)
}
#endif
-MIPS_RDRW32_COP0(compare, MIPS_COP_0_COMPARE);
-MIPS_RDRW32_COP0(config, MIPS_COP_0_CONFIG);
-MIPS_RDRW32_COP0_SEL(config1, MIPS_COP_0_CONFIG, 1);
-MIPS_RDRW32_COP0_SEL(config2, MIPS_COP_0_CONFIG, 2);
-MIPS_RDRW32_COP0_SEL(config3, MIPS_COP_0_CONFIG, 3);
-MIPS_RDRW32_COP0(count, MIPS_COP_0_COUNT);
-MIPS_RDRW32_COP0(index, MIPS_COP_0_TLB_INDEX);
-MIPS_RDRW32_COP0(wired, MIPS_COP_0_TLB_WIRED);
-MIPS_RDRW32_COP0(cause, MIPS_COP_0_CAUSE);
-MIPS_RDRW32_COP0(status, MIPS_COP_0_STATUS);
+MIPS_RW32_COP0(compare, MIPS_COP_0_COMPARE);
+MIPS_RW32_COP0(config, MIPS_COP_0_CONFIG);
+MIPS_RW32_COP0_SEL(config1, MIPS_COP_0_CONFIG, 1);
+MIPS_RW32_COP0_SEL(config2, MIPS_COP_0_CONFIG, 2);
+MIPS_RW32_COP0_SEL(config3, MIPS_COP_0_CONFIG, 3);
+#ifdef CPU_CNMIPS
+MIPS_RW32_COP0_SEL(config4, MIPS_COP_0_CONFIG, 4);
+#endif
+#ifdef CPU_NLM
+MIPS_RW32_COP0_SEL(config6, MIPS_COP_0_CONFIG, 6);
+MIPS_RW32_COP0_SEL(config7, MIPS_COP_0_CONFIG, 7);
+#endif
+MIPS_RW32_COP0(count, MIPS_COP_0_COUNT);
+MIPS_RW32_COP0(index, MIPS_COP_0_TLB_INDEX);
+MIPS_RW32_COP0(wired, MIPS_COP_0_TLB_WIRED);
+MIPS_RW32_COP0(cause, MIPS_COP_0_CAUSE);
+#if !defined(__mips_n64)
+MIPS_RW32_COP0(excpc, MIPS_COP_0_EXC_PC);
+#endif
+MIPS_RW32_COP0(status, MIPS_COP_0_STATUS);
/* XXX: Some of these registers are specific to MIPS32. */
#if !defined(__mips_n64)
-MIPS_RDRW32_COP0(entrylo0, MIPS_COP_0_TLB_LO0);
-MIPS_RDRW32_COP0(entrylo1, MIPS_COP_0_TLB_LO1);
-MIPS_RDRW32_COP0(entryhi, MIPS_COP_0_TLB_HI);
-MIPS_RDRW32_COP0(pagemask, MIPS_COP_0_TLB_PG_MASK);
+MIPS_RW32_COP0(entryhi, MIPS_COP_0_TLB_HI);
+MIPS_RW32_COP0(pagemask, MIPS_COP_0_TLB_PG_MASK);
+#endif
+#if !defined(__mips_n64) && !defined(__mips_n32) /* !PHYSADDR_64_BIT */
+MIPS_RW32_COP0(entrylo0, MIPS_COP_0_TLB_LO0);
+MIPS_RW32_COP0(entrylo1, MIPS_COP_0_TLB_LO1);
#endif
-MIPS_RDRW32_COP0(prid, MIPS_COP_0_PRID);
+MIPS_RW32_COP0(prid, MIPS_COP_0_PRID);
/* XXX 64-bit? */
-MIPS_RDRW32_COP0_SEL(ebase, MIPS_COP_0_PRID, 1);
-MIPS_RDRW32_COP0(watchlo, MIPS_COP_0_WATCH_LO);
-MIPS_RDRW32_COP0_SEL(watchlo1, MIPS_COP_0_WATCH_LO, 1);
-MIPS_RDRW32_COP0_SEL(watchlo2, MIPS_COP_0_WATCH_LO, 2);
-MIPS_RDRW32_COP0_SEL(watchlo3, MIPS_COP_0_WATCH_LO, 3);
-MIPS_RDRW32_COP0(watchhi, MIPS_COP_0_WATCH_HI);
-MIPS_RDRW32_COP0_SEL(watchhi1, MIPS_COP_0_WATCH_HI, 1);
-MIPS_RDRW32_COP0_SEL(watchhi2, MIPS_COP_0_WATCH_HI, 2);
-MIPS_RDRW32_COP0_SEL(watchhi3, MIPS_COP_0_WATCH_HI, 3);
-
-MIPS_RDRW32_COP0_SEL(perfcnt0, MIPS_COP_0_PERFCNT, 0);
-MIPS_RDRW32_COP0_SEL(perfcnt1, MIPS_COP_0_PERFCNT, 1);
-MIPS_RDRW32_COP0_SEL(perfcnt2, MIPS_COP_0_PERFCNT, 2);
-MIPS_RDRW32_COP0_SEL(perfcnt3, MIPS_COP_0_PERFCNT, 3);
-
-#undef MIPS_RDRW32_COP0
+MIPS_RW32_COP0_SEL(ebase, MIPS_COP_0_PRID, 1);
+MIPS_RW32_COP0(watchlo, MIPS_COP_0_WATCH_LO);
+MIPS_RW32_COP0_SEL(watchlo1, MIPS_COP_0_WATCH_LO, 1);
+MIPS_RW32_COP0_SEL(watchlo2, MIPS_COP_0_WATCH_LO, 2);
+MIPS_RW32_COP0_SEL(watchlo3, MIPS_COP_0_WATCH_LO, 3);
+MIPS_RW32_COP0(watchhi, MIPS_COP_0_WATCH_HI);
+MIPS_RW32_COP0_SEL(watchhi1, MIPS_COP_0_WATCH_HI, 1);
+MIPS_RW32_COP0_SEL(watchhi2, MIPS_COP_0_WATCH_HI, 2);
+MIPS_RW32_COP0_SEL(watchhi3, MIPS_COP_0_WATCH_HI, 3);
+
+MIPS_RW32_COP0_SEL(perfcnt0, MIPS_COP_0_PERFCNT, 0);
+MIPS_RW32_COP0_SEL(perfcnt1, MIPS_COP_0_PERFCNT, 1);
+MIPS_RW32_COP0_SEL(perfcnt2, MIPS_COP_0_PERFCNT, 2);
+MIPS_RW32_COP0_SEL(perfcnt3, MIPS_COP_0_PERFCNT, 3);
+
+#undef MIPS_RW32_COP0
+#undef MIPS_RW32_COP0_SEL
static __inline register_t
intr_disable(void)
diff --git a/freebsd/sys/mips/include/machine/cpuregs.h b/freebsd/sys/mips/include/machine/cpuregs.h
index 456c545c..01d710d2 100644
--- a/freebsd/sys/mips/include/machine/cpuregs.h
+++ b/freebsd/sys/mips/include/machine/cpuregs.h
@@ -198,12 +198,10 @@
#endif
/* CPU dependent mtc0 hazard hook */
-#ifdef CPU_CNMIPS
-#define COP0_SYNC nop; nop; nop; nop; nop;
+#if defined(CPU_CNMIPS) || defined(CPU_RMI)
+#define COP0_SYNC
#elif defined(CPU_SB1)
#define COP0_SYNC ssnop; ssnop; ssnop; ssnop; ssnop; ssnop; ssnop; ssnop; ssnop
-#elif defined(CPU_RMI)
-#define COP0_SYNC
#else
/*
* Pick a reasonable default based on the "typical" spacing described in the
@@ -571,6 +569,7 @@
* 16/1 MIPS_COP_0_CONFIG1 ..33 Configuration register 1.
* 16/2 MIPS_COP_0_CONFIG2 ..33 Configuration register 2.
* 16/3 MIPS_COP_0_CONFIG3 ..33 Configuration register 3.
+ * 16/4 MIPS_COP_0_CONFIG4 ..33 Configuration register 4.
* 17 MIPS_COP_0_LLADDR .336 Load Linked Address.
* 18 MIPS_COP_0_WATCH_LO .336 WatchLo register.
* 19 MIPS_COP_0_WATCH_HI .333 WatchHi register.
@@ -656,7 +655,7 @@
#define MIPS_CONFIG1_TLBSZ_MASK 0x7E000000 /* bits 30..25 # tlb entries minus one */
#define MIPS_CONFIG1_TLBSZ_SHIFT 25
-#define MIPS_MAX_TLB_ENTRIES 64
+#define MIPS_MAX_TLB_ENTRIES 128
#define MIPS_CONFIG1_IS_MASK 0x01C00000 /* bits 24..22 icache sets per way */
#define MIPS_CONFIG1_IS_SHIFT 22
@@ -679,6 +678,10 @@
#define MIPS_CONFIG1_EP 0x00000002 /* EJTAG implemented */
#define MIPS_CONFIG1_FP 0x00000001 /* FPU implemented */
+#define MIPS_CONFIG4_MMUSIZEEXT 0x000000FF /* bits 7.. 0 MMU Size Extension */
+#define MIPS_CONFIG4_MMUEXTDEF 0x0000C000 /* bits 15.14 MMU Extension Definition */
+#define MIPS_CONFIG4_MMUEXTDEF_MMUSIZEEXT 0x00004000 /* This values denotes CONFIG4 bits */
+
/*
* Values for the code field in a break instruction.
*/
diff --git a/freebsd/sys/mips/include/machine/in_cksum.h b/freebsd/sys/mips/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/mips/include/machine/in_cksum.h
+++ b/freebsd/sys/mips/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/mips/include/machine/pci_cfgreg.h b/freebsd/sys/mips/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/mips/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/mips/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/mips/mips/legacy.c b/freebsd/sys/mips/mips/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/mips/mips/legacy.c
+++ b/freebsd/sys/mips/mips/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/mips/pci/pci_bus.c b/freebsd/sys/mips/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/mips/pci/pci_bus.c
+++ b/freebsd/sys/mips/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index 6b1e4b8a..55e60e9e 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -95,12 +95,16 @@ MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
#define PRINET 26 /* interruptible */
+#define SIZEOF_BPF_HDR(type) \
+ (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
+
#ifdef COMPAT_FREEBSD32
#include <sys/mount.h>
#include <compat/freebsd32/freebsd32.h>
#define BPF_ALIGNMENT32 sizeof(int32_t)
#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))
+#ifndef BURN_BRIDGES
/*
* 32-bit version of structure prepended to each packet. We use this header
* instead of the standard one for 32-bit streams. We mark the a stream as
@@ -113,6 +117,7 @@ struct bpf_hdr32 {
uint16_t bh_hdrlen; /* length of bpf header (this struct
plus alignment padding) */
};
+#endif
struct bpf_program32 {
u_int bf_len;
@@ -125,11 +130,11 @@ struct bpf_dltlist32 {
};
#define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
-#define BIOCSRTIMEOUT32 _IOW('B',109, struct timeval32)
-#define BIOCGRTIMEOUT32 _IOR('B',110, struct timeval32)
-#define BIOCGDLTLIST32 _IOWR('B',121, struct bpf_dltlist32)
-#define BIOCSETWF32 _IOW('B',123, struct bpf_program32)
-#define BIOCSETFNR32 _IOW('B',130, struct bpf_program32)
+#define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32)
+#define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32)
+#define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32)
+#define BIOCSETWF32 _IOW('B', 123, struct bpf_program32)
+#define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32)
#endif
/*
@@ -154,7 +159,7 @@ static __inline void
bpf_wakeup(struct bpf_d *);
static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
- struct timeval *);
+ struct bintime *);
static void reset_d(struct bpf_d *);
static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
@@ -171,7 +176,7 @@ SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
static int bpf_zerocopy_enable = 0;
SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
&bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
-SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
+static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
bpf_stats_sysctl, "bpf statistics portal");
static VNET_DEFINE(int, bpf_optimize_writers) = 0;
@@ -198,8 +203,11 @@ static struct cdevsw bpf_cdevsw = {
.d_kqfilter = bpfkqfilter,
};
-static struct filterops bpfread_filtops =
- { 1, NULL, filt_bpfdetach, filt_bpfread };
+static struct filterops bpfread_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_bpfdetach,
+ .f_event = filt_bpfread,
+};
eventhandler_tag bpf_ifdetach_cookie = NULL;
@@ -813,6 +821,7 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
* particular buffer method.
*/
bpf_buffer_init(d);
+ d->bd_hbuf_in_use = 0;
d->bd_bufmode = BPF_BUFMODE_BUFFER;
d->bd_sig = SIGIO;
d->bd_direction = BPF_D_INOUT;
@@ -866,6 +875,14 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag)
callout_stop(&d->bd_callout);
timed_out = (d->bd_state == BPF_TIMED_OUT);
d->bd_state = BPF_IDLE;
+ while (d->bd_hbuf_in_use) {
+ error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+ PRINET|PCATCH, "bd_hbuf", 0);
+ if (error != 0) {
+ BPFD_UNLOCK(d);
+ return (error);
+ }
+ }
/*
* If the hold buffer is empty, then do a timed sleep, which
* ends when the timeout expires or when enough packets
@@ -934,24 +951,27 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag)
/*
* At this point, we know we have something in the hold slot.
*/
+ d->bd_hbuf_in_use = 1;
BPFD_UNLOCK(d);
/*
* Move data from hold buffer into user space.
* We know the entire buffer is transferred since
* we checked above that the read buffer is bpf_bufsize bytes.
- *
- * XXXRW: More synchronization needed here: what if a second thread
- * issues a read on the same fd at the same time? Don't want this
- * getting invalidated.
+ *
+ * We do not have to worry about simultaneous reads because
+ * we waited for sole access to the hold buffer above.
*/
error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
BPFD_LOCK(d);
+ KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
d->bd_fbuf = d->bd_hbuf;
d->bd_hbuf = NULL;
d->bd_hlen = 0;
bpf_buf_reclaimed(d);
+ d->bd_hbuf_in_use = 0;
+ wakeup(&d->bd_hbuf_in_use);
BPFD_UNLOCK(d);
return (error);
@@ -1105,6 +1125,9 @@ reset_d(struct bpf_d *d)
BPFD_LOCK_ASSERT(d);
+ while (d->bd_hbuf_in_use)
+ mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
+ "bd_hbuf", 0);
if ((d->bd_hbuf != NULL) &&
(d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
/* Free the hold buffer. */
@@ -1145,6 +1168,8 @@ reset_d(struct bpf_d *d)
* BIOCSHDRCMPLT Set "header already complete" flag
* BIOCGDIRECTION Get packet direction flag
* BIOCSDIRECTION Set packet direction flag
+ * BIOCGTSTAMP Get time stamp format and resolution.
+ * BIOCSTSTAMP Set time stamp format and resolution.
* BIOCLOCK Set "locked" flag
* BIOCFEEDBACK Set packet feedback mode.
* BIOCSETZBUF Set current zero-copy buffer locations.
@@ -1193,6 +1218,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
case BIOCVERSION:
case BIOCGRSIG:
case BIOCGHDRCMPLT:
+ case BIOCSTSTAMP:
case BIOCFEEDBACK:
case FIONREAD:
case BIOCLOCK:
@@ -1242,6 +1268,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
BPFD_LOCK(d);
n = d->bd_slen;
+ while (d->bd_hbuf_in_use)
+ mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+ PRINET, "bd_hbuf", 0);
if (d->bd_hbuf)
n += d->bd_hlen;
BPFD_UNLOCK(d);
@@ -1547,6 +1576,30 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
}
break;
+ /*
+ * Get packet timestamp format and resolution.
+ */
+ case BIOCGTSTAMP:
+ BPFD_LOCK(d);
+ *(u_int *)addr = d->bd_tstamp;
+ BPFD_UNLOCK(d);
+ break;
+
+ /*
+ * Set packet timestamp format and resolution.
+ */
+ case BIOCSTSTAMP:
+ {
+ u_int func;
+
+ func = *(u_int *)addr;
+ if (BPF_T_VALID(func))
+ d->bd_tstamp = func;
+ else
+ error = EINVAL;
+ }
+ break;
+
case BIOCFEEDBACK:
BPFD_LOCK(d);
d->bd_feedback = *(u_int *)addr;
@@ -1931,6 +1984,9 @@ filt_bpfread(struct knote *kn, long hint)
ready = bpf_ready(d);
if (ready) {
kn->kn_data = d->bd_slen;
+ while (d->bd_hbuf_in_use)
+ mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+ PRINET, "bd_hbuf", 0);
if (d->bd_hbuf)
kn->kn_data += d->bd_hlen;
} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
@@ -1942,6 +1998,48 @@ filt_bpfread(struct knote *kn, long hint)
return (ready);
}
+#define BPF_TSTAMP_NONE 0
+#define BPF_TSTAMP_FAST 1
+#define BPF_TSTAMP_NORMAL 2
+#define BPF_TSTAMP_EXTERN 3
+
+static int
+bpf_ts_quality(int tstype)
+{
+
+ if (tstype == BPF_T_NONE)
+ return (BPF_TSTAMP_NONE);
+ if ((tstype & BPF_T_FAST) != 0)
+ return (BPF_TSTAMP_FAST);
+
+ return (BPF_TSTAMP_NORMAL);
+}
+
+static int
+bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
+{
+ struct m_tag *tag;
+ int quality;
+
+ quality = bpf_ts_quality(tstype);
+ if (quality == BPF_TSTAMP_NONE)
+ return (quality);
+
+ if (m != NULL) {
+ tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
+ if (tag != NULL) {
+ *bt = *(struct bintime *)(tag + 1);
+ return (BPF_TSTAMP_EXTERN);
+ }
+ }
+ if (quality == BPF_TSTAMP_NORMAL)
+ binuptime(bt);
+ else
+ getbinuptime(bt);
+
+ return (quality);
+}
+
/*
* Incoming linkage from device drivers. Process the packet pkt, of length
* pktlen, which is stored in a contiguous buffer. The packet is parsed
@@ -1951,15 +2049,15 @@ filt_bpfread(struct knote *kn, long hint)
void
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
{
+ struct bintime bt;
struct bpf_d *d;
#ifdef BPF_JITTER
bpf_jit_filter *bf;
#endif
u_int slen;
int gottime;
- struct timeval tv;
- gottime = 0;
+ gottime = BPF_TSTAMP_NONE;
BPFIF_RLOCK(bp);
@@ -1994,15 +2092,13 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
BPFD_LOCK(d);
d->bd_fcount++;
- if (!gottime) {
- microtime(&tv);
- gottime = 1;
- }
+ if (gottime < bpf_ts_quality(d->bd_tstamp))
+ gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
#ifdef MAC
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
#endif
catchpacket(d, pkt, pktlen, slen,
- bpf_append_bytes, &tv);
+ bpf_append_bytes, &bt);
BPFD_UNLOCK(d);
}
}
@@ -2020,13 +2116,13 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
void
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
{
+ struct bintime bt;
struct bpf_d *d;
#ifdef BPF_JITTER
bpf_jit_filter *bf;
#endif
u_int pktlen, slen;
int gottime;
- struct timeval tv;
/* Skip outgoing duplicate packets. */
if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
@@ -2034,9 +2130,8 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
return;
}
- gottime = 0;
-
pktlen = m_length(m, NULL);
+ gottime = BPF_TSTAMP_NONE;
BPFIF_RLOCK(bp);
@@ -2056,15 +2151,13 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
BPFD_LOCK(d);
d->bd_fcount++;
- if (!gottime) {
- microtime(&tv);
- gottime = 1;
- }
+ if (gottime < bpf_ts_quality(d->bd_tstamp))
+ gottime = bpf_gettime(&bt, d->bd_tstamp, m);
#ifdef MAC
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
#endif
catchpacket(d, (u_char *)m, pktlen, slen,
- bpf_append_mbuf, &tv);
+ bpf_append_mbuf, &bt);
BPFD_UNLOCK(d);
}
}
@@ -2078,11 +2171,11 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
void
bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
{
+ struct bintime bt;
struct mbuf mb;
struct bpf_d *d;
u_int pktlen, slen;
int gottime;
- struct timeval tv;
/* Skip outgoing duplicate packets. */
if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
@@ -2090,8 +2183,6 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
return;
}
- gottime = 0;
-
pktlen = m_length(m, NULL);
/*
* Craft on-stack mbuf suitable for passing to bpf_filter.
@@ -2103,6 +2194,7 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
mb.m_len = dlen;
pktlen += dlen;
+ gottime = BPF_TSTAMP_NONE;
BPFIF_RLOCK(bp);
@@ -2115,15 +2207,13 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
BPFD_LOCK(d);
d->bd_fcount++;
- if (!gottime) {
- microtime(&tv);
- gottime = 1;
- }
+ if (gottime < bpf_ts_quality(d->bd_tstamp))
+ gottime = bpf_gettime(&bt, d->bd_tstamp, m);
#ifdef MAC
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
#endif
catchpacket(d, (u_char *)&mb, pktlen, slen,
- bpf_append_mbuf, &tv);
+ bpf_append_mbuf, &bt);
BPFD_UNLOCK(d);
}
}
@@ -2132,6 +2222,69 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
#undef BPF_CHECK_DIRECTION
+#undef BPF_TSTAMP_NONE
+#undef BPF_TSTAMP_FAST
+#undef BPF_TSTAMP_NORMAL
+#undef BPF_TSTAMP_EXTERN
+
+static int
+bpf_hdrlen(struct bpf_d *d)
+{
+ int hdrlen;
+
+ hdrlen = d->bd_bif->bif_hdrlen;
+#ifndef BURN_BRIDGES
+ if (d->bd_tstamp == BPF_T_NONE ||
+ BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
+#ifdef COMPAT_FREEBSD32
+ if (d->bd_compat32)
+ hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
+ else
+#endif
+ hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
+ else
+#endif
+ hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
+#ifdef COMPAT_FREEBSD32
+ if (d->bd_compat32)
+ hdrlen = BPF_WORDALIGN32(hdrlen);
+ else
+#endif
+ hdrlen = BPF_WORDALIGN(hdrlen);
+
+ return (hdrlen - d->bd_bif->bif_hdrlen);
+}
+
+static void
+bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
+{
+ struct bintime bt2;
+ struct timeval tsm;
+ struct timespec tsn;
+
+ if ((tstype & BPF_T_MONOTONIC) == 0) {
+ bt2 = *bt;
+ bintime_add(&bt2, &boottimebin);
+ bt = &bt2;
+ }
+ switch (BPF_T_FORMAT(tstype)) {
+ case BPF_T_MICROTIME:
+ bintime2timeval(bt, &tsm);
+ ts->bt_sec = tsm.tv_sec;
+ ts->bt_frac = tsm.tv_usec;
+ break;
+ case BPF_T_NANOTIME:
+ bintime2timespec(bt, &tsn);
+ ts->bt_sec = tsn.tv_sec;
+ ts->bt_frac = tsn.tv_nsec;
+ break;
+ case BPF_T_BINTIME:
+ ts->bt_sec = bt->sec;
+ ts->bt_frac = bt->frac;
+ break;
+ }
+}
+
/*
* Move the packet data from interface memory (pkt) into the
* store buffer. "cpfn" is the routine called to do the actual data
@@ -2142,15 +2295,19 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
static void
catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
- struct timeval *tv)
+ struct bintime *bt)
{
- struct bpf_hdr hdr;
+ struct bpf_xhdr hdr;
+#ifndef BURN_BRIDGES
+ struct bpf_hdr hdr_old;
#ifdef COMPAT_FREEBSD32
- struct bpf_hdr32 hdr32;
+ struct bpf_hdr32 hdr32_old;
+#endif
#endif
- int totlen, curlen;
- int hdrlen = d->bd_bif->bif_hdrlen;
+ int caplen, curlen, hdrlen, totlen;
int do_wakeup = 0;
+ int do_timestamp;
+ int tstype;
BPFD_LOCK_ASSERT(d);
@@ -2162,6 +2319,9 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
* spot to do it.
*/
if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
+ while (d->bd_hbuf_in_use)
+ mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+ PRINET, "bd_hbuf", 0);
d->bd_fbuf = d->bd_hbuf;
d->bd_hbuf = NULL;
d->bd_hlen = 0;
@@ -2174,6 +2334,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
* much. Otherwise, transfer the whole packet (unless
* we hit the buffer size limit).
*/
+ hdrlen = bpf_hdrlen(d);
totlen = hdrlen + min(snaplen, pktlen);
if (totlen > d->bd_bufsize)
totlen = d->bd_bufsize;
@@ -2203,6 +2364,9 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
++d->bd_dcount;
return;
}
+ while (d->bd_hbuf_in_use)
+ mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+ PRINET, "bd_hbuf", 0);
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
@@ -2213,19 +2377,39 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
* reader should be woken up.
*/
do_wakeup = 1;
+ caplen = totlen - hdrlen;
+ tstype = d->bd_tstamp;
+ do_timestamp = tstype != BPF_T_NONE;
+#ifndef BURN_BRIDGES
+ if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
+ struct bpf_ts ts;
+ if (do_timestamp)
+ bpf_bintime2ts(bt, &ts, tstype);
#ifdef COMPAT_FREEBSD32
- /*
- * If this is a 32-bit stream, then stick a 32-bit header at the
- * front and copy the data into the buffer.
- */
- if (d->bd_compat32) {
- bzero(&hdr32, sizeof(hdr32));
- hdr32.bh_tstamp.tv_sec = tv->tv_sec;
- hdr32.bh_tstamp.tv_usec = tv->tv_usec;
- hdr32.bh_datalen = pktlen;
- hdr32.bh_hdrlen = hdrlen;
- hdr.bh_caplen = hdr32.bh_caplen = totlen - hdrlen;
- bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32, sizeof(hdr32));
+ if (d->bd_compat32) {
+ bzero(&hdr32_old, sizeof(hdr32_old));
+ if (do_timestamp) {
+ hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
+ hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
+ }
+ hdr32_old.bh_datalen = pktlen;
+ hdr32_old.bh_hdrlen = hdrlen;
+ hdr32_old.bh_caplen = caplen;
+ bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
+ sizeof(hdr32_old));
+ goto copy;
+ }
+#endif
+ bzero(&hdr_old, sizeof(hdr_old));
+ if (do_timestamp) {
+ hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
+ hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
+ }
+ hdr_old.bh_datalen = pktlen;
+ hdr_old.bh_hdrlen = hdrlen;
+ hdr_old.bh_caplen = caplen;
+ bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
+ sizeof(hdr_old));
goto copy;
}
#endif
@@ -2235,19 +2419,20 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
* move forward the length of the header plus padding.
*/
bzero(&hdr, sizeof(hdr));
- hdr.bh_tstamp = *tv;
+ if (do_timestamp)
+ bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
hdr.bh_datalen = pktlen;
hdr.bh_hdrlen = hdrlen;
- hdr.bh_caplen = totlen - hdrlen;
+ hdr.bh_caplen = caplen;
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
/*
* Copy the packet data into the store buffer and update its length.
*/
-#ifdef COMPAT_FREEBSD32
- copy:
+#ifndef BURN_BRIDGES
+copy:
#endif
- (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
+ (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
d->bd_slen = curlen + totlen;
if (do_wakeup)
@@ -2318,13 +2503,7 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
BPF_UNLOCK();
- /*
- * Compute the length of the bpf header. This is not necessarily
- * equal to SIZEOF_BPF_HDR because we want to insert spacing such
- * that the network layer header begins on a longword boundary (for
- * performance reasons and to alleviate alignment restrictions).
- */
- bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
+ bp->bif_hdrlen = hdrlen;
if (bootverbose)
if_printf(ifp, "bpf attached\n");
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
index 004815ad..e362f161 100644
--- a/freebsd/sys/net/bpf.h
+++ b/freebsd/sys/net/bpf.h
@@ -45,6 +45,8 @@
typedef int32_t bpf_int32;
typedef u_int32_t bpf_u_int32;
+typedef int64_t bpf_int64;
+typedef u_int64_t bpf_u_int64;
/*
* Alignment macros. BPF_WORDALIGN rounds up to the next
@@ -113,36 +115,38 @@ struct bpf_zbuf {
size_t bz_buflen; /* Size of zero-copy buffers. */
};
-#define BIOCGBLEN _IOR('B',102, u_int)
-#define BIOCSBLEN _IOWR('B',102, u_int)
-#define BIOCSETF _IOW('B',103, struct bpf_program)
-#define BIOCFLUSH _IO('B',104)
-#define BIOCPROMISC _IO('B',105)
-#define BIOCGDLT _IOR('B',106, u_int)
-#define BIOCGETIF _IOR('B',107, struct ifreq)
-#define BIOCSETIF _IOW('B',108, struct ifreq)
-#define BIOCSRTIMEOUT _IOW('B',109, struct timeval)
-#define BIOCGRTIMEOUT _IOR('B',110, struct timeval)
-#define BIOCGSTATS _IOR('B',111, struct bpf_stat)
-#define BIOCIMMEDIATE _IOW('B',112, u_int)
-#define BIOCVERSION _IOR('B',113, struct bpf_version)
-#define BIOCGRSIG _IOR('B',114, u_int)
-#define BIOCSRSIG _IOW('B',115, u_int)
-#define BIOCGHDRCMPLT _IOR('B',116, u_int)
-#define BIOCSHDRCMPLT _IOW('B',117, u_int)
-#define BIOCGDIRECTION _IOR('B',118, u_int)
-#define BIOCSDIRECTION _IOW('B',119, u_int)
-#define BIOCSDLT _IOW('B',120, u_int)
-#define BIOCGDLTLIST _IOWR('B',121, struct bpf_dltlist)
+#define BIOCGBLEN _IOR('B', 102, u_int)
+#define BIOCSBLEN _IOWR('B', 102, u_int)
+#define BIOCSETF _IOW('B', 103, struct bpf_program)
+#define BIOCFLUSH _IO('B', 104)
+#define BIOCPROMISC _IO('B', 105)
+#define BIOCGDLT _IOR('B', 106, u_int)
+#define BIOCGETIF _IOR('B', 107, struct ifreq)
+#define BIOCSETIF _IOW('B', 108, struct ifreq)
+#define BIOCSRTIMEOUT _IOW('B', 109, struct timeval)
+#define BIOCGRTIMEOUT _IOR('B', 110, struct timeval)
+#define BIOCGSTATS _IOR('B', 111, struct bpf_stat)
+#define BIOCIMMEDIATE _IOW('B', 112, u_int)
+#define BIOCVERSION _IOR('B', 113, struct bpf_version)
+#define BIOCGRSIG _IOR('B', 114, u_int)
+#define BIOCSRSIG _IOW('B', 115, u_int)
+#define BIOCGHDRCMPLT _IOR('B', 116, u_int)
+#define BIOCSHDRCMPLT _IOW('B', 117, u_int)
+#define BIOCGDIRECTION _IOR('B', 118, u_int)
+#define BIOCSDIRECTION _IOW('B', 119, u_int)
+#define BIOCSDLT _IOW('B', 120, u_int)
+#define BIOCGDLTLIST _IOWR('B', 121, struct bpf_dltlist)
#define BIOCLOCK _IO('B', 122)
-#define BIOCSETWF _IOW('B',123, struct bpf_program)
-#define BIOCFEEDBACK _IOW('B',124, u_int)
-#define BIOCGETBUFMODE _IOR('B',125, u_int)
-#define BIOCSETBUFMODE _IOW('B',126, u_int)
-#define BIOCGETZMAX _IOR('B',127, size_t)
-#define BIOCROTZBUF _IOR('B',128, struct bpf_zbuf)
-#define BIOCSETZBUF _IOW('B',129, struct bpf_zbuf)
-#define BIOCSETFNR _IOW('B',130, struct bpf_program)
+#define BIOCSETWF _IOW('B', 123, struct bpf_program)
+#define BIOCFEEDBACK _IOW('B', 124, u_int)
+#define BIOCGETBUFMODE _IOR('B', 125, u_int)
+#define BIOCSETBUFMODE _IOW('B', 126, u_int)
+#define BIOCGETZMAX _IOR('B', 127, size_t)
+#define BIOCROTZBUF _IOR('B', 128, struct bpf_zbuf)
+#define BIOCSETZBUF _IOW('B', 129, struct bpf_zbuf)
+#define BIOCSETFNR _IOW('B', 130, struct bpf_program)
+#define BIOCGTSTAMP _IOR('B', 131, u_int)
+#define BIOCSTSTAMP _IOW('B', 132, u_int)
/* Obsolete */
#define BIOCGSEESENT BIOCGDIRECTION
@@ -155,9 +159,48 @@ enum bpf_direction {
BPF_D_OUT /* See outgoing packets */
};
+/* Time stamping functions */
+#define BPF_T_MICROTIME 0x0000
+#define BPF_T_NANOTIME 0x0001
+#define BPF_T_BINTIME 0x0002
+#define BPF_T_NONE 0x0003
+#define BPF_T_FORMAT_MASK 0x0003
+#define BPF_T_NORMAL 0x0000
+#define BPF_T_FAST 0x0100
+#define BPF_T_MONOTONIC 0x0200
+#define BPF_T_MONOTONIC_FAST (BPF_T_FAST | BPF_T_MONOTONIC)
+#define BPF_T_FLAG_MASK 0x0300
+#define BPF_T_FORMAT(t) ((t) & BPF_T_FORMAT_MASK)
+#define BPF_T_FLAG(t) ((t) & BPF_T_FLAG_MASK)
+#define BPF_T_VALID(t) \
+ ((t) == BPF_T_NONE || (BPF_T_FORMAT(t) != BPF_T_NONE && \
+ ((t) & ~(BPF_T_FORMAT_MASK | BPF_T_FLAG_MASK)) == 0))
+
+#define BPF_T_MICROTIME_FAST (BPF_T_MICROTIME | BPF_T_FAST)
+#define BPF_T_NANOTIME_FAST (BPF_T_NANOTIME | BPF_T_FAST)
+#define BPF_T_BINTIME_FAST (BPF_T_BINTIME | BPF_T_FAST)
+#define BPF_T_MICROTIME_MONOTONIC (BPF_T_MICROTIME | BPF_T_MONOTONIC)
+#define BPF_T_NANOTIME_MONOTONIC (BPF_T_NANOTIME | BPF_T_MONOTONIC)
+#define BPF_T_BINTIME_MONOTONIC (BPF_T_BINTIME | BPF_T_MONOTONIC)
+#define BPF_T_MICROTIME_MONOTONIC_FAST (BPF_T_MICROTIME | BPF_T_MONOTONIC_FAST)
+#define BPF_T_NANOTIME_MONOTONIC_FAST (BPF_T_NANOTIME | BPF_T_MONOTONIC_FAST)
+#define BPF_T_BINTIME_MONOTONIC_FAST (BPF_T_BINTIME | BPF_T_MONOTONIC_FAST)
+
/*
* Structure prepended to each packet.
*/
+struct bpf_ts {
+ bpf_int64 bt_sec; /* seconds */
+ bpf_u_int64 bt_frac; /* fraction */
+};
+struct bpf_xhdr {
+ struct bpf_ts bh_tstamp; /* time stamp */
+ bpf_u_int32 bh_caplen; /* length of captured portion */
+ bpf_u_int32 bh_datalen; /* original length of packet */
+ u_short bh_hdrlen; /* length of bpf header (this struct
+ plus alignment padding) */
+};
+/* Obsolete */
struct bpf_hdr {
struct timeval bh_tstamp; /* time stamp */
bpf_u_int32 bh_caplen; /* length of captured portion */
@@ -165,14 +208,9 @@ struct bpf_hdr {
u_short bh_hdrlen; /* length of bpf header (this struct
plus alignment padding) */
};
-/*
- * Because the structure above is not a multiple of 4 bytes, some compilers
- * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
- * Only the kernel needs to know about it; applications use bh_hdrlen.
- */
#ifdef _KERNEL
-#define SIZEOF_BPF_HDR (sizeof(struct bpf_hdr) <= 20 ? 18 : \
- sizeof(struct bpf_hdr))
+#define MTAG_BPF 0x627066
+#define MTAG_BPF_TIMESTAMP 0
#endif
/*
@@ -241,6 +279,24 @@ struct bpf_zbuf_header {
*/
#define DLT_SYMANTEC_FIREWALL 99
+/*
+ * Values between 100 and 103 are used in capture file headers as
+ * link-layer header type LINKTYPE_ values corresponding to DLT_ types
+ * that differ between platforms; don't use those values for new DLT_
+ * new types.
+ */
+
+/*
+ * Values starting with 104 are used for newly-assigned link-layer
+ * header type values; for those link-layer header types, the DLT_
+ * value returned by pcap_datalink() and passed to pcap_open_dead(),
+ * and the LINKTYPE_ value that appears in capture files, are the
+ * same.
+ *
+ * DLT_MATCHING_MIN is the lowest such value; DLT_MATCHING_MAX is
+ * the highest such value.
+ */
+#define DLT_MATCHING_MIN 104
/*
* This value was defined by libpcap 0.5; platforms that have defined
@@ -806,6 +862,281 @@ struct bpf_zbuf_header {
*/
#define DLT_IEEE802_15_4_NONASK_PHY 215
+/*
+ * David Gibson <david@gibson.dropbear.id.au> requested this for
+ * captures from the Linux kernel /dev/input/eventN devices. This
+ * is used to communicate keystrokes and mouse movements from the
+ * Linux kernel to display systems, such as Xorg.
+ */
+#define DLT_LINUX_EVDEV 216
+
+/*
+ * GSM Um and Abis interfaces, preceded by a "gsmtap" header.
+ *
+ * Requested by Harald Welte <laforge@gnumonks.org>.
+ */
+#define DLT_GSMTAP_UM 217
+#define DLT_GSMTAP_ABIS 218
+
+/*
+ * MPLS, with an MPLS label as the link-layer header.
+ * Requested by Michele Marchetto <michele@openbsd.org> on behalf
+ * of OpenBSD.
+ */
+#define DLT_MPLS 219
+
+/*
+ * USB packets, beginning with a Linux USB header, with the USB header
+ * padded to 64 bytes; required for memory-mapped access.
+ */
+#define DLT_USB_LINUX_MMAPPED 220
+
+/*
+ * DECT packets, with a pseudo-header; requested by
+ * Matthias Wenzel <tcpdump@mazzoo.de>.
+ */
+#define DLT_DECT 221
+/*
+ * From: "Lidwa, Eric (GSFC-582.0)[SGT INC]" <eric.lidwa-1@nasa.gov>
+ * Date: Mon, 11 May 2009 11:18:30 -0500
+ *
+ * DLT_AOS. We need it for AOS Space Data Link Protocol.
+ * I have already written dissectors for but need an OK from
+ * legal before I can submit a patch.
+ *
+ */
+#define DLT_AOS 222
+
+/*
+ * Wireless HART (Highway Addressable Remote Transducer)
+ * From the HART Communication Foundation
+ * IES/PAS 62591
+ *
+ * Requested by Sam Roberts <vieuxtech@gmail.com>.
+ */
+#define DLT_WIHART 223
+
+/*
+ * Fibre Channel FC-2 frames, beginning with a Frame_Header.
+ * Requested by Kahou Lei <kahou82@gmail.com>.
+ */
+#define DLT_FC_2 224
+
+/*
+ * Fibre Channel FC-2 frames, beginning with an encoding of the
+ * SOF, and ending with an encoding of the EOF.
+ *
+ * The encodings represent the frame delimiters as 4-byte sequences
+ * representing the corresponding ordered sets, with K28.5
+ * represented as 0xBC, and the D symbols as the corresponding
+ * byte values; for example, SOFi2, which is K28.5 - D21.5 - D1.2 - D21.2,
+ * is represented as 0xBC 0xB5 0x55 0x55.
+ *
+ * Requested by Kahou Lei <kahou82@gmail.com>.
+ */
+#define DLT_FC_2_WITH_FRAME_DELIMS 225
+/*
+ * Solaris ipnet pseudo-header; requested by Darren Reed <Darren.Reed@Sun.COM>.
+ *
+ * The pseudo-header starts with a one-byte version number; for version 2,
+ * the pseudo-header is:
+ *
+ * struct dl_ipnetinfo {
+ * u_int8_t dli_version;
+ * u_int8_t dli_family;
+ * u_int16_t dli_htype;
+ * u_int32_t dli_pktlen;
+ * u_int32_t dli_ifindex;
+ * u_int32_t dli_grifindex;
+ * u_int32_t dli_zsrc;
+ * u_int32_t dli_zdst;
+ * };
+ *
+ * dli_version is 2 for the current version of the pseudo-header.
+ *
+ * dli_family is a Solaris address family value, so it's 2 for IPv4
+ * and 26 for IPv6.
+ *
+ * dli_htype is a "hook type" - 0 for incoming packets, 1 for outgoing
+ * packets, and 2 for packets arriving from another zone on the same
+ * machine.
+ *
+ * dli_pktlen is the length of the packet data following the pseudo-header
+ * (so the captured length minus dli_pktlen is the length of the
+ * pseudo-header, assuming the entire pseudo-header was captured).
+ *
+ * dli_ifindex is the interface index of the interface on which the
+ * packet arrived.
+ *
+ * dli_grifindex is the group interface index number (for IPMP interfaces).
+ *
+ * dli_zsrc is the zone identifier for the source of the packet.
+ *
+ * dli_zdst is the zone identifier for the destination of the packet.
+ *
+ * A zone number of 0 is the global zone; a zone number of 0xffffffff
+ * means that the packet arrived from another host on the network, not
+ * from another zone on the same machine.
+ *
+ * An IPv4 or IPv6 datagram follows the pseudo-header; dli_family indicates
+ * which of those it is.
+ */
+#define DLT_IPNET 226
+
+/*
+ * CAN (Controller Area Network) frames, with a pseudo-header as supplied
+ * by Linux SocketCAN. See Documentation/networking/can.txt in the Linux
+ * source.
+ *
+ * Requested by Felix Obenhuber <felix@obenhuber.de>.
+ */
+#define DLT_CAN_SOCKETCAN 227
+
+/*
+ * Raw IPv4/IPv6; different from DLT_RAW in that the DLT_ value specifies
+ * whether it's v4 or v6. Requested by Darren Reed <Darren.Reed@Sun.COM>.
+ */
+#define DLT_IPV4 228
+#define DLT_IPV6 229
+
+/*
+ * IEEE 802.15.4, exactly as it appears in the spec (no padding, no
+ * nothing), and with no FCS at the end of the frame; requested by
+ * Jon Smirl <jonsmirl@gmail.com>.
+ */
+#define DLT_IEEE802_15_4_NOFCS 230
+
+/*
+ * Raw D-Bus:
+ *
+ * http://www.freedesktop.org/wiki/Software/dbus
+ *
+ * messages:
+ *
+ * http://dbus.freedesktop.org/doc/dbus-specification.html#message-protocol-messages
+ *
+ * starting with the endianness flag, followed by the message type, etc.,
+ * but without the authentication handshake before the message sequence:
+ *
+ * http://dbus.freedesktop.org/doc/dbus-specification.html#auth-protocol
+ *
+ * Requested by Martin Vidner <martin@vidner.net>.
+ */
+#define DLT_DBUS 231
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>.
+ */
+#define DLT_JUNIPER_VS 232
+#define DLT_JUNIPER_SRX_E2E 233
+#define DLT_JUNIPER_FIBRECHANNEL 234
+
+/*
+ * DVB-CI (DVB Common Interface for communication between a PC Card
+ * module and a DVB receiver). See
+ *
+ * http://www.kaiser.cx/pcap-dvbci.html
+ *
+ * for the specification.
+ *
+ * Requested by Martin Kaiser <martin@kaiser.cx>.
+ */
+#define DLT_DVB_CI 235
+
+/*
+ * Variant of 3GPP TS 27.010 multiplexing protocol (similar to, but
+ * *not* the same as, 27.010). Requested by Hans-Christoph Schemmel
+ * <hans-christoph.schemmel@cinterion.com>.
+ */
+#define DLT_MUX27010 236
+
+/*
+ * STANAG 5066 D_PDUs. Requested by M. Baris Demiray
+ * <barisdemiray@gmail.com>.
+ */
+#define DLT_STANAG_5066_D_PDU 237
+
+/*
+ * Juniper-private data link type, as per request from
+ * Hannes Gredler <hannes@juniper.net>.
+ */
+#define DLT_JUNIPER_ATM_CEMIC 238
+
+/*
+ * NetFilter LOG messages
+ * (payload of netlink NFNL_SUBSYS_ULOG/NFULNL_MSG_PACKET packets)
+ *
+ * Requested by Jakub Zawadzki <darkjames-ws@darkjames.pl>
+ */
+#define DLT_NFLOG 239
+
+/*
+ * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type
+ * for Ethernet packets with a 4-byte pseudo-header and always
+ * with the payload including the FCS, as supplied by their
+ * netANALYZER hardware and software.
+ *
+ * Requested by Holger P. Frommer <HPfrommer@hilscher.com>
+ */
+#define DLT_NETANALYZER 240
+
+/*
+ * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type
+ * for Ethernet packets with a 4-byte pseudo-header and FCS and
+ * with the Ethernet header preceded by 7 bytes of preamble and
+ * 1 byte of SFD, as supplied by their netANALYZER hardware and
+ * software.
+ *
+ * Requested by Holger P. Frommer <HPfrommer@hilscher.com>
+ */
+#define DLT_NETANALYZER_TRANSPARENT 241
+
+/*
+ * IP-over-Infiniband, as specified by RFC 4391.
+ *
+ * Requested by Petr Sumbera <petr.sumbera@oracle.com>.
+ */
+#define DLT_IPOIB 242
+
+/*
+ * MPEG-2 transport stream (ISO 13818-1/ITU-T H.222.0).
+ *
+ * Requested by Guy Martin <gmsoft@tuxicoman.be>.
+ */
+#define DLT_MPEG_2_TS 243
+
+/*
+ * ng4T GmbH's UMTS Iub/Iur-over-ATM and Iub/Iur-over-IP format as
+ * used by their ng40 protocol tester.
+ *
+ * Requested by Jens Grimmer <jens.grimmer@ng4t.com>.
+ */
+#define DLT_NG40 244
+
+/*
+ * Pseudo-header giving adapter number and flags, followed by an NFC
+ * (Near-Field Communications) Logical Link Control Protocol (LLCP) PDU,
+ * as specified by NFC Forum Logical Link Control Protocol Technical
+ * Specification LLCP 1.1.
+ *
+ * Requested by Mike Wakerly <mikey@google.com>.
+ */
+#define DLT_NFC_LLCP 245
+
+/*
+ * 245 is used as LINKTYPE_PFSYNC; do not use it for any other purpose.
+ *
+ * DLT_PFSYNC has different values on different platforms, and all of
+ * them collide with something used elsewhere. On platforms that
+ * don't already define it, define it as 245.
+ */
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) && !defined(__APPLE__)
+#define DLT_PFSYNC 246
+#endif
+
+#define DLT_MATCHING_MAX 246 /* highest value in the "matching" range */
+
/*
* DLT and savefile link type values are split into a class and
* a member of that class. A class value of 0 indicates a regular
@@ -904,7 +1235,8 @@ SYSCTL_DECL(_net_bpf);
/*
* Rotate the packet buffers in descriptor d. Move the store buffer into the
* hold slot, and the free buffer ino the store slot. Zero the length of the
- * new store buffer. Descriptor lock should be held.
+ * new store buffer. Descriptor lock should be held. Hold buffer must
+ * not be marked "in use".
*/
#define ROTATE_BUFFERS(d) do { \
(d)->bd_hbuf = (d)->bd_sbuf; \
@@ -926,7 +1258,7 @@ struct bpf_if {
LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
#ifdef BPF_INTERNAL
u_int bif_dlt; /* link layer type */
- u_int bif_hdrlen; /* length of header (with padding) */
+ u_int bif_hdrlen; /* length of link header */
struct ifnet *bif_ifp; /* corresponding interface */
struct rwlock bif_lock; /* interface lock */
LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
index 910bcd0c..ec6aed74 100644
--- a/freebsd/sys/net/bpf_buffer.c
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -81,6 +81,8 @@ __FBSDID("$FreeBSD$");
#include <net/bpf_buffer.h>
#include <net/bpfdesc.h>
+#define PRINET 26 /* interruptible */
+
/*
* Implement historical kernel memory buffering model for BPF: two malloc(9)
* kernel buffers are hung off of the descriptor. The size is fixed prior to
@@ -90,10 +92,10 @@ __FBSDID("$FreeBSD$");
static int bpf_bufsize = 4096;
SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW,
- &bpf_bufsize, 0, "Maximum capture buffer size in bytes");
+ &bpf_bufsize, 0, "Default capture buffer size in bytes");
static int bpf_maxbufsize = BPF_MAXBUFSIZE;
SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
- &bpf_maxbufsize, 0, "Default capture buffer in bytes");
+ &bpf_maxbufsize, 0, "Maximum capture buffer in bytes");
/*
* Simple data copy to the current kernel buffer.
@@ -191,6 +193,9 @@ bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
return (EINVAL);
}
+ while (d->bd_hbuf_in_use)
+ mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
+ PRINET, "bd_hbuf", 0);
/* Free old buffers if set */
if (d->bd_fbuf != NULL)
free(d->bd_fbuf, M_BPF);
diff --git a/freebsd/sys/net/bpf_filter.c b/freebsd/sys/net/bpf_filter.c
index 3e310000..a313f4bd 100644
--- a/freebsd/sys/net/bpf_filter.c
+++ b/freebsd/sys/net/bpf_filter.c
@@ -179,6 +179,8 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
bpf_u_int32 k;
u_int32_t mem[BPF_MEMWORDS];
+ bzero(mem, sizeof(mem));
+
if (pc == NULL)
/*
* No filter means accept all.
diff --git a/freebsd/sys/net/bpf_jitter.c b/freebsd/sys/net/bpf_jitter.c
index cd4d7d2b..e3d91e85 100644
--- a/freebsd/sys/net/bpf_jitter.c
+++ b/freebsd/sys/net/bpf_jitter.c
@@ -2,7 +2,7 @@
/*-
* Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
- * Copyright (C) 2005-2008 Jung-uk Kim <jkim@FreeBSD.org>
+ * Copyright (C) 2005-2009 Jung-uk Kim <jkim@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -44,14 +44,15 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#else
#include <stdlib.h>
-#include <string.h>
+#include <sys/mman.h>
+#include <rtems/bsd/sys/param.h>
#include <rtems/bsd/sys/types.h>
#endif
#include <net/bpf.h>
#include <net/bpf_jitter.h>
-bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, int *);
+bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *);
static u_int bpf_jit_accept_all(u_char *, u_int, u_int);
@@ -62,27 +63,36 @@ SYSCTL_NODE(_net, OID_AUTO, bpf_jitter, CTLFLAG_RW, 0, "BPF JIT compiler");
int bpf_jitter_enable = 1;
SYSCTL_INT(_net_bpf_jitter, OID_AUTO, enable, CTLFLAG_RW,
&bpf_jitter_enable, 0, "enable BPF JIT compiler");
+#endif
bpf_jit_filter *
bpf_jitter(struct bpf_insn *fp, int nins)
{
bpf_jit_filter *filter;
- /* Allocate the filter structure */
+ /* Allocate the filter structure. */
+#ifdef _KERNEL
filter = (struct bpf_jit_filter *)malloc(sizeof(*filter),
- M_BPFJIT, M_NOWAIT | M_ZERO);
+ M_BPFJIT, M_NOWAIT);
+#else
+ filter = (struct bpf_jit_filter *)malloc(sizeof(*filter));
+#endif
if (filter == NULL)
return (NULL);
- /* No filter means accept all */
+ /* No filter means accept all. */
if (fp == NULL || nins == 0) {
filter->func = bpf_jit_accept_all;
return (filter);
}
- /* Create the binary */
- if ((filter->func = bpf_jit_compile(fp, nins, filter->mem)) == NULL) {
+ /* Create the binary. */
+ if ((filter->func = bpf_jit_compile(fp, nins, &filter->size)) == NULL) {
+#ifdef _KERNEL
free(filter, M_BPFJIT);
+#else
+ free(filter);
+#endif
return (NULL);
}
@@ -93,46 +103,16 @@ void
bpf_destroy_jit_filter(bpf_jit_filter *filter)
{
+#ifdef _KERNEL
if (filter->func != bpf_jit_accept_all)
free(filter->func, M_BPFJIT);
free(filter, M_BPFJIT);
-}
#else
-bpf_jit_filter *
-bpf_jitter(struct bpf_insn *fp, int nins)
-{
- bpf_jit_filter *filter;
-
- /* Allocate the filter structure */
- filter = (struct bpf_jit_filter *)malloc(sizeof(*filter));
- if (filter == NULL)
- return (NULL);
- memset(filter, 0, sizeof(*filter));
-
- /* No filter means accept all */
- if (fp == NULL || nins == 0) {
- filter->func = bpf_jit_accept_all;
- return (filter);
- }
-
- /* Create the binary */
- if ((filter->func = bpf_jit_compile(fp, nins, filter->mem)) == NULL) {
- free(filter);
- return (NULL);
- }
-
- return (filter);
-}
-
-void
-bpf_destroy_jit_filter(bpf_jit_filter *filter)
-{
-
if (filter->func != bpf_jit_accept_all)
- free(filter->func);
+ munmap(filter->func, filter->size);
free(filter);
-}
#endif
+}
static u_int
bpf_jit_accept_all(__unused u_char *p, __unused u_int wirelen,
diff --git a/freebsd/sys/net/bpf_jitter.h b/freebsd/sys/net/bpf_jitter.h
index 04491b01..90a1ff5f 100644
--- a/freebsd/sys/net/bpf_jitter.h
+++ b/freebsd/sys/net/bpf_jitter.h
@@ -1,6 +1,6 @@
/*-
* Copyright (C) 2002-2003 NetGroup, Politecnico di Torino (Italy)
- * Copyright (C) 2005-2008 Jung-uk Kim <jkim@FreeBSD.org>
+ * Copyright (C) 2005-2009 Jung-uk Kim <jkim@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,8 +53,7 @@ typedef u_int (*bpf_filter_func)(u_char *, u_int, u_int);
typedef struct bpf_jit_filter {
/* The native filtering binary, in the form of a bpf_filter_func. */
bpf_filter_func func;
-
- int mem[BPF_MEMWORDS]; /* Scratch memory */
+ size_t size;
} bpf_jit_filter;
/*
diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h
index c3265ce1..496f0b36 100644
--- a/freebsd/sys/net/bpfdesc.h
+++ b/freebsd/sys/net/bpfdesc.h
@@ -63,6 +63,7 @@ struct bpf_d {
caddr_t bd_sbuf; /* store slot */
caddr_t bd_hbuf; /* hold slot */
caddr_t bd_fbuf; /* free slot */
+ int bd_hbuf_in_use; /* don't rotate buffers */
int bd_slen; /* current length of store buffer */
int bd_hlen; /* current length of hold buffer */
@@ -82,6 +83,7 @@ struct bpf_d {
u_char bd_writer; /* non-zero if d is writer-only */
int bd_hdrcmplt; /* false to fill in src lladdr automatically */
int bd_direction; /* select packet direction */
+ int bd_tstamp; /* select time stamping function */
int bd_feedback; /* true to feed back sent packets */
int bd_async; /* non-zero if packet reception should generate signal */
int bd_sig; /* signal to send upon packet reception */
diff --git a/freebsd/sys/net/flowtable.h b/freebsd/sys/net/flowtable.h
index 6e79a3cf..d810fa33 100644
--- a/freebsd/sys/net/flowtable.h
+++ b/freebsd/sys/net/flowtable.h
@@ -37,6 +37,7 @@ $FreeBSD$
#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
#define FL_PCPU (1<<1) /* pcpu cache */
#define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */
+#define FL_IPV6 (1<<9)
#define FL_TCP (1<<11)
#define FL_SCTP (1<<12)
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 92e705b0..a1c1e49e 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h> /* hz */
#include <sys/socket.h> /* for net/if.h */
#include <sys/sockio.h>
+#include <sys/sysctl.h>
#include <machine/stdarg.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
@@ -170,7 +171,8 @@ static void lacp_enable_distributing(struct lacp_port *);
static int lacp_xmit_lacpdu(struct lacp_port *);
static int lacp_xmit_marker(struct lacp_port *);
-#if defined(LACP_DEBUG)
+/* Debugging */
+
static void lacp_dump_lacpdu(const struct lacpdu *);
static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
size_t);
@@ -186,10 +188,14 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *,
size_t);
static void lacp_dprintf(const struct lacp_port *, const char *, ...)
__attribute__((__format__(__printf__, 2, 3)));
-#define LACP_DPRINTF(a) lacp_dprintf a
-#else
-#define LACP_DPRINTF(a) /* nothing */
-#endif
+
+static int lacp_debug = 0;
+SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN,
+ &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)");
+TUNABLE_INT("net.lacp_debug", &lacp_debug);
+
+#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; }
+#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); }
/*
* partner administration variables.
@@ -292,10 +298,10 @@ lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
goto bad;
}
-#if defined(LACP_DEBUG)
- LACP_DPRINTF((lp, "lacpdu receive\n"));
- lacp_dump_lacpdu(du);
-#endif /* defined(LACP_DEBUG) */
+ if (lacp_debug > 0) {
+ lacp_dprintf(lp, "lacpdu receive\n");
+ lacp_dump_lacpdu(du);
+ }
LACP_LOCK(lsc);
lacp_sm_rx(lp, du);
@@ -372,10 +378,10 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
sizeof(du->ldu_collector));
du->ldu_collector.lci_maxdelay = 0;
-#if defined(LACP_DEBUG)
- LACP_DPRINTF((lp, "lacpdu transmit\n"));
- lacp_dump_lacpdu(du);
-#endif /* defined(LACP_DEBUG) */
+ if (lacp_debug > 0) {
+ lacp_dprintf(lp, "lacpdu transmit\n");
+ lacp_dump_lacpdu(du);
+ }
m->m_flags |= M_MCAST;
@@ -649,9 +655,7 @@ lacp_disable_distributing(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
-#if defined(LACP_DEBUG)
char buf[LACP_LAGIDSTR_MAX+1];
-#endif /* defined(LACP_DEBUG) */
LACP_LOCK_ASSERT(lsc);
@@ -686,9 +690,7 @@ lacp_enable_distributing(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
-#if defined(LACP_DEBUG)
char buf[LACP_LAGIDSTR_MAX+1];
-#endif /* defined(LACP_DEBUG) */
LACP_LOCK_ASSERT(lsc);
@@ -722,7 +724,8 @@ lacp_transit_expire(void *vp)
LACP_LOCK_ASSERT(lsc);
- LACP_DPRINTF((NULL, "%s\n", __func__));
+ LACP_TRACE(NULL);
+
lsc->lsc_suppress_distributing = FALSE;
}
@@ -840,7 +843,8 @@ lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
return;
}
- LACP_DPRINTF((NULL, "%s\n", __func__));
+ LACP_TRACE(NULL);
+
lsc->lsc_suppress_distributing = TRUE;
/* send a marker frame down each port to verify the queues are empty */
@@ -910,11 +914,9 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
struct lacp_aggregator *la;
struct lacp_aggregator *best_la = NULL;
uint64_t best_speed = 0;
-#if defined(LACP_DEBUG)
char buf[LACP_LAGIDSTR_MAX+1];
-#endif /* defined(LACP_DEBUG) */
- LACP_DPRINTF((NULL, "%s:\n", __func__));
+ LACP_TRACE(NULL);
TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
uint64_t speed;
@@ -948,7 +950,6 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
("invalid aggregator list"));
-#if defined(LACP_DEBUG)
if (lsc->lsc_active_aggregator != best_la) {
LACP_DPRINTF((NULL, "active aggregator changed\n"));
LACP_DPRINTF((NULL, "old %s\n",
@@ -959,7 +960,6 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
}
LACP_DPRINTF((NULL, "new %s\n",
lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
-#endif /* defined(LACP_DEBUG) */
if (lsc->lsc_active_aggregator != best_la) {
sc->sc_ifp->if_baudrate = best_speed;
@@ -1042,9 +1042,7 @@ lacp_compose_key(struct lacp_port *lp)
static void
lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
{
-#if defined(LACP_DEBUG)
char buf[LACP_LAGIDSTR_MAX+1];
-#endif
LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
__func__,
@@ -1060,9 +1058,7 @@ lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
static void
lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
{
-#if defined(LACP_DEBUG)
char buf[LACP_LAGIDSTR_MAX+1];
-#endif
LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
__func__,
@@ -1197,9 +1193,7 @@ lacp_select(struct lacp_port *lp)
{
struct lacp_softc *lsc = lp->lp_lsc;
struct lacp_aggregator *la;
-#if defined(LACP_DEBUG)
char buf[LACP_LAGIDSTR_MAX+1];
-#endif
if (lp->lp_aggregator) {
return;
@@ -1280,7 +1274,8 @@ lacp_sm_mux(struct lacp_port *lp)
enum lacp_selected selected = lp->lp_selected;
struct lacp_aggregator *la;
- /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */
+ if (lacp_debug > 1)
+ lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state);
re_eval:
la = lp->lp_aggregator;
@@ -1389,9 +1384,7 @@ static void
lacp_sm_mux_timer(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
-#if defined(LACP_DEBUG)
char buf[LACP_LAGIDSTR_MAX+1];
-#endif
KASSERT(la->la_pending > 0, ("no pending event"));
@@ -1539,11 +1532,9 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
{
boolean_t active;
uint8_t oldpstate;
-#if defined(LACP_DEBUG)
char buf[LACP_STATESTR_MAX+1];
-#endif
- /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+ LACP_TRACE(lp);
oldpstate = lp->lp_partner.lip_state;
@@ -1578,7 +1569,8 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
static void
lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
{
- /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ LACP_TRACE(lp);
if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
!LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
@@ -1593,7 +1585,7 @@ lacp_sm_rx_record_default(struct lacp_port *lp)
{
uint8_t oldpstate;
- /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+ LACP_TRACE(lp);
oldpstate = lp->lp_partner.lip_state;
lp->lp_partner = lacp_partner_admin;
@@ -1605,7 +1597,8 @@ static void
lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
const struct lacp_peerinfo *info)
{
- /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ LACP_TRACE(lp);
if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
!LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
@@ -1618,7 +1611,8 @@ lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
static void
lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
{
- /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ LACP_TRACE(lp);
lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
}
@@ -1626,7 +1620,8 @@ lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
static void
lacp_sm_rx_update_default_selected(struct lacp_port *lp)
{
- /* LACP_DPRINTF((lp, "%s\n", __func__)); */
+
+ LACP_TRACE(lp);
lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
}
@@ -1814,7 +1809,7 @@ tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
return (0);
}
-#if defined(LACP_DEBUG)
+/* Debugging */
const char *
lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
{
@@ -1944,4 +1939,3 @@ lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
vprintf(fmt, va);
va_end(va);
}
-#endif
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index 7d7869a3..ea4a8a46 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -76,18 +76,18 @@
#include <net/vnet.h>
#if defined(INET) || defined(INET6)
-/*XXX*/
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <netinet/ip.h>
#include <netinet/ip_carp.h>
+#ifdef INET
+#include <netinet/if_ether.h>
+#endif /* INET */
#ifdef INET6
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
-#endif
-#endif
-#ifdef INET
-#include <netinet/if_ether.h>
-#endif
+#endif /* INET6 */
+#endif /* INET || INET6 */
#include <security/mac/mac_framework.h>
@@ -100,8 +100,6 @@ struct ifindex_entry {
struct ifnet *ife_ifnet;
};
-static int slowtimo_started;
-
SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
@@ -122,7 +120,7 @@ SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
&ifdescr_maxlen, 0,
"administrative maximum length for interface description");
-MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
+static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
/* global sx for non-critical path ifdescr */
static struct sx ifdescr_sx;
@@ -161,10 +159,8 @@ static int ifconf(u_long, caddr_t);
static void if_freemulti(struct ifmultiaddr *);
static void if_init(void *);
static void if_grow(void);
-static void if_check(void *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
-static void if_slowtimo(void *);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
@@ -221,12 +217,7 @@ struct sx ifnet_sxlock;
static if_com_alloc_t *if_com_alloc[256];
static if_com_free_t *if_com_free[256];
-/*
- * System initialization
- */
-SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL);
-
-MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
+static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
@@ -427,18 +418,6 @@ if_grow(void)
V_ifindex_table = e;
}
-static void
-if_check(void *dummy __unused)
-{
-
- /*
- * If at least one interface added during boot uses
- * if_watchdog then start the timer.
- */
- if (slowtimo_started)
- if_slowtimo(0);
-}
-
/*
* Allocate a struct ifnet and an index for an interface. A layer 2
* common structure will also be allocated if an allocation routine is
@@ -532,6 +511,7 @@ if_free_type(struct ifnet *ifp, u_char type)
ifp->if_flags |= IFF_DYING; /* XXX: Locking */
+ CURVNET_SET_QUIET(ifp->if_vnet);
IFNET_WLOCK();
KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
("%s: freeing unallocated ifnet", ifp->if_xname));
@@ -539,9 +519,9 @@ if_free_type(struct ifnet *ifp, u_char type)
ifindex_free_locked(ifp->if_index);
IFNET_WUNLOCK();
- if (!refcount_release(&ifp->if_refcount))
- return;
- if_free_internal(ifp);
+ if (refcount_release(&ifp->if_refcount))
+ if_free_internal(ifp);
+ CURVNET_RESTORE();
}
/*
@@ -692,6 +672,15 @@ if_attach_internal(struct ifnet *ifp, int vmove)
TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
/* Reliably crash if used uninitialized. */
ifp->if_broadcastaddr = NULL;
+
+#if defined(INET) || defined(INET6)
+ /* Initialize to max value. */
+ if (ifp->if_hw_tsomax == 0)
+ ifp->if_hw_tsomax = IP_MAXPACKET;
+ KASSERT(ifp->if_hw_tsomax <= IP_MAXPACKET &&
+ ifp->if_hw_tsomax >= IP_MAXPACKET / 8,
+ ("%s: tsomax outside of range", __func__));
+#endif
}
#ifdef VIMAGE
else {
@@ -725,18 +714,6 @@ if_attach_internal(struct ifnet *ifp, int vmove)
/* Announce the interface. */
rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
-
- if (!vmove && ifp->if_watchdog != NULL) {
- if_printf(ifp,
- "WARNING: using obsoleted if_watchdog interface\n");
-
- /*
- * Note that we need if_slowtimo(). If this happens after
- * boot, then call if_slowtimo() directly.
- */
- if (atomic_cmpset_int(&slowtimo_started, 0, 1) && !cold)
- if_slowtimo(0);
- }
}
static void
@@ -856,7 +833,9 @@ void
if_detach(struct ifnet *ifp)
{
+ CURVNET_SET_QUIET(ifp->if_vnet);
if_detach_internal(ifp, 0);
+ CURVNET_RESTORE();
}
static void
@@ -1414,7 +1393,8 @@ if_rtdel(struct radix_node *rn, void *arg)
return (0);
err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
- rt_mask(rt), rt->rt_flags|RTF_RNH_LOCKED,
+ rt_mask(rt),
+ rt->rt_flags|RTF_RNH_LOCKED|RTF_PINNED,
(struct rtentry **) NULL, rt->rt_fibnum);
if (err) {
log(LOG_WARNING, "if_rtdel: error %d\n", err);
@@ -1917,8 +1897,13 @@ if_route(struct ifnet *ifp, int flag, int fam)
#endif
}
-void (*vlan_link_state_p)(struct ifnet *, int); /* XXX: private from if_vlan */
+void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */
void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */
+struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
+struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
+int (*vlan_tag_p)(struct ifnet *, uint16_t *);
+int (*vlan_setcookie_p)(struct ifnet *, void *);
+void *(*vlan_cookie_p)(struct ifnet *);
/*
* Handle a change in the interface link state. To avoid LORs
@@ -1948,7 +1933,7 @@ do_link_state_change(void *arg, int pending)
/* Notify that the link state has changed. */
rt_ifmsg(ifp);
if (ifp->if_vlantrunk != NULL)
- (*vlan_link_state_p)(ifp, 0);
+ (*vlan_link_state_p)(ifp);
if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
IFP2AC(ifp)->ac_netgraph != NULL)
@@ -1969,6 +1954,7 @@ do_link_state_change(void *arg, int pending)
if (log_link_state_change)
log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
(link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
+ EVENTHANDLER_INVOKE(ifnet_link_event, ifp, ifp->if_link_state);
CURVNET_RESTORE();
}
@@ -2023,39 +2009,6 @@ if_qflush(struct ifnet *ifp)
}
/*
- * Handle interface watchdog timer routines. Called
- * from softclock, we decrement timers (if set) and
- * call the appropriate interface routine on expiration.
- *
- * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
- * holding Giant.
- */
-static void
-if_slowtimo(void *arg)
-{
- VNET_ITERATOR_DECL(vnet_iter);
- struct ifnet *ifp;
- int s = splimp();
-
- VNET_LIST_RLOCK_NOSLEEP();
- IFNET_RLOCK_NOSLEEP();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
- TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- if (ifp->if_timer == 0 || --ifp->if_timer)
- continue;
- if (ifp->if_watchdog)
- (*ifp->if_watchdog)(ifp);
- }
- CURVNET_RESTORE();
- }
- IFNET_RUNLOCK_NOSLEEP();
- VNET_LIST_RUNLOCK_NOSLEEP();
- splx(s);
- timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
-}
-
-/*
* Map interface name to interface structure pointer, with or without
* returning a reference.
*/
@@ -2869,7 +2822,7 @@ again:
max_len += sa->sa_len;
}
- if (!sbuf_overflowed(sb))
+ if (sbuf_error(sb) == 0)
valid_len = sbuf_len(sb);
}
IF_ADDR_RUNLOCK(ifp);
@@ -2878,7 +2831,7 @@ again:
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
- if (!sbuf_overflowed(sb))
+ if (sbuf_error(sb) == 0)
valid_len = sbuf_len(sb);
}
}
diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h
index 25d43ac3..5795baac 100644
--- a/freebsd/sys/net/if.h
+++ b/freebsd/sys/net/if.h
@@ -43,9 +43,11 @@
/*
* <net/if.h> does not depend on <sys/time.h> on most other systems. This
* helps userland compatibility. (struct timeval ifi_lastchange)
+ * The same holds for <sys/socket.h>. (struct sockaddr ifru_addr)
*/
#ifndef _KERNEL
#include <rtems/bsd/sys/time.h>
+#include <sys/socket.h>
#endif
struct ifnet;
@@ -199,6 +201,13 @@ struct if_data {
* field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be
* more detailed or differenciated than IFCAP_*.
* Hwassist features are defined CSUM_* in sys/mbuf.h
+ *
+ * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl
+ * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE.
+ * This is not strictly necessary because the common code never
+ * changes capabilities, and it is left to the individual driver
+ * to do the right thing. However, having the filter here
+ * avoids replication of the same code in all individual drivers.
*/
#define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */
#define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */
@@ -221,12 +230,18 @@ struct if_data {
#define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */
#define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */
#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
+#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */
+#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */
+
+#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM)
#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6)
#define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC)
#define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6)
+#define IFCAP_CANTCHANGE (IFCAP_NETMAP)
+
#define IFQ_MAXLEN 50
#define IFNET_SLOWHZ 1 /* granularity is 1 second */
diff --git a/freebsd/sys/net/if_arcsubr.c b/freebsd/sys/net/if_arcsubr.c
index 8ed35f77..fae432ad 100644
--- a/freebsd/sys/net/if_arcsubr.c
+++ b/freebsd/sys/net/if_arcsubr.c
@@ -839,7 +839,7 @@ arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
}
}
-MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals");
+static MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals");
static void*
arc_alloc(u_char type, struct ifnet *ifp)
diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h
index 2bb63582..38c64020 100644
--- a/freebsd/sys/net/if_arp.h
+++ b/freebsd/sys/net/if_arp.h
@@ -50,6 +50,7 @@ struct arphdr {
#define ARPHRD_ARCNET 7 /* arcnet hardware format */
#define ARPHRD_FRELAY 15 /* frame relay hardware format */
#define ARPHRD_IEEE1394 24 /* firewire hardware format */
+#define ARPHRD_INFINIBAND 32 /* infiniband hardware format */
u_short ar_pro; /* format of protocol address */
u_char ar_hln; /* length of hardware address */
u_char ar_pln; /* length of protocol address */
diff --git a/freebsd/sys/net/if_atmsubr.c b/freebsd/sys/net/if_atmsubr.c
index 3d6295e6..a4cbeb09 100644
--- a/freebsd/sys/net/if_atmsubr.c
+++ b/freebsd/sys/net/if_atmsubr.c
@@ -100,7 +100,7 @@ void (*atm_harp_event_p)(struct ifnet *, uint32_t, void *);
SYSCTL_NODE(_hw, OID_AUTO, atm, CTLFLAG_RW, 0, "ATM hardware");
-MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals");
+static MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals");
#ifndef ETHERTYPE_IPV6
#define ETHERTYPE_IPV6 0x86dd
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index 266926a5..c8533f05 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -121,6 +121,7 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
+#include <netinet6/in6_ifattach.h>
#endif
#if defined(INET) || defined(INET6)
#include <netinet/ip_carp.h>
@@ -134,7 +135,7 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_private.h>
/*
* Size of the route hash table. Must be a power of two.
@@ -350,7 +351,7 @@ static struct bstp_cb_ops bridge_ops = {
};
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
+static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
@@ -385,6 +386,12 @@ SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW,
&bridge_inherit_mac, 0,
"Inherit MAC address from the first bridge member");
+static VNET_DEFINE(int, allow_llz_overlap) = 0;
+#define V_allow_llz_overlap VNET(allow_llz_overlap)
+SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW,
+ &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope "
+ "zones of a bridge interface and the member interfaces");
+
struct bridge_control {
int (*bc_func)(struct bridge_softc *, void *);
int bc_argsize;
@@ -1043,14 +1050,6 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
if (ifs->if_bridge != NULL)
return (EBUSY);
- bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
- if (bif == NULL)
- return (ENOMEM);
-
- bif->bif_ifp = ifs;
- bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
- bif->bif_savedcaps = ifs->if_capenable;
-
switch (ifs->if_type) {
case IFT_ETHER:
case IFT_L2VLAN:
@@ -1058,20 +1057,70 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
/* permitted interface types */
break;
default:
- error = EINVAL;
- goto out;
+ return (EINVAL);
}
+#ifdef INET6
+ /*
+ * Two valid inet6 addresses with link-local scope must not be
+ * on the parent interface and the member interfaces at the
+ * same time. This restriction is needed to prevent violation
+ * of link-local scope zone. Attempts to add a member
+ * interface which has inet6 addresses when the parent has
+ * inet6 triggers removal of all inet6 addresses on the member
+ * interface.
+ */
+
+ /* Check if the parent interface has a link-local scope addr. */
+ if (V_allow_llz_overlap == 0 &&
+ in6ifa_llaonifp(sc->sc_ifp) != NULL) {
+ /*
+ * If any, remove all inet6 addresses from the member
+ * interfaces.
+ */
+ BRIDGE_XLOCK(sc);
+ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
+ if (in6ifa_llaonifp(bif->bif_ifp)) {
+ BRIDGE_UNLOCK(sc);
+ in6_ifdetach(bif->bif_ifp);
+ BRIDGE_LOCK(sc);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ bif->bif_ifp->if_xname);
+ }
+ }
+ BRIDGE_XDROP(sc);
+ if (in6ifa_llaonifp(ifs)) {
+ BRIDGE_UNLOCK(sc);
+ in6_ifdetach(ifs);
+ BRIDGE_LOCK(sc);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ ifs->if_xname);
+ }
+ }
+#endif
/* Allow the first Ethernet member to define the MTU */
if (LIST_EMPTY(&sc->sc_iflist))
sc->sc_ifp->if_mtu = ifs->if_mtu;
else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n",
ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu);
- error = EINVAL;
- goto out;
+ return (EINVAL);
}
+ bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (bif == NULL)
+ return (ENOMEM);
+
+ bif->bif_ifp = ifs;
+ bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+ bif->bif_savedcaps = ifs->if_capenable;
+
/*
* Assign the interface's MAC address to the bridge if it's the first
* member and the MAC address of the bridge has not been changed from
@@ -1106,12 +1155,10 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
BRIDGE_LOCK(sc);
break;
}
- if (error)
- bridge_delete_member(sc, bif, 0);
-out:
+
if (error) {
- if (bif != NULL)
- free(bif, M_DEVBUF);
+ bridge_delete_member(sc, bif, 0);
+ free(bif, M_DEVBUF);
}
return (error);
}
@@ -1702,6 +1749,9 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
struct bridge_softc *sc = ifp->if_bridge;
struct bridge_iflist *bif;
+ if (ifp->if_flags & IFF_RENAMING)
+ return;
+
/* Check if the interface is a bridge member */
if (sc != NULL) {
BRIDGE_LOCK(sc);
@@ -1813,8 +1863,10 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
m->m_flags &= ~M_VLANTAG;
}
- if (err == 0)
- dst_ifp->if_transmit(dst_ifp, m);
+ if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
+ m_freem(m0);
+ break;
+ }
}
if (err == 0) {
@@ -2315,6 +2367,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
BRIDGE_UNLOCK(sc); \
return (NULL); \
} \
+ eh = mtod(m, struct ether_header *); \
} \
} \
if (bif->bif_flags & IFBIF_LEARNING) { \
@@ -3111,6 +3164,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
args.m = *mp;
args.oif = ifp;
args.next_hop = NULL;
+ args.next_hop6 = NULL;
args.eh = &eh2;
args.inp = NULL; /* used by ipfw uid/gid/jail rules */
i = V_ip_fw_chk_ptr(&args);
@@ -3396,14 +3450,14 @@ bridge_ip6_checkbasic(struct mbuf **mp)
if ((m = m_copyup(m, sizeof(struct ip6_hdr),
(max_linkhdr + 3) & ~3)) == NULL) {
/* XXXJRT new stat, please */
- V_ip6stat.ip6s_toosmall++;
+ IP6STAT_INC(ip6s_toosmall);
in6_ifstat_inc(inifp, ifs6_in_hdrerr);
goto bad;
}
} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
struct ifnet *inifp = m->m_pkthdr.rcvif;
if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
- V_ip6stat.ip6s_toosmall++;
+ IP6STAT_INC(ip6s_toosmall);
in6_ifstat_inc(inifp, ifs6_in_hdrerr);
goto bad;
}
@@ -3412,7 +3466,7 @@ bridge_ip6_checkbasic(struct mbuf **mp)
ip6 = mtod(m, struct ip6_hdr *);
if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
- V_ip6stat.ip6s_badvers++;
+ IP6STAT_INC(ip6s_badvers);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
goto bad;
}
@@ -3468,7 +3522,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
continue;
}
bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
- } else
+ } else
m_freem(m);
}
diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c
index b0231cc9..b85793f8 100644
--- a/freebsd/sys/net/if_dead.c
+++ b/freebsd/sys/net/if_dead.c
@@ -72,12 +72,6 @@ ifdead_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (ENXIO);
}
-static void
-ifdead_watchdog(struct ifnet *ifp)
-{
-
-}
-
static int
ifdead_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
struct sockaddr *sa)
@@ -109,7 +103,6 @@ if_dead(struct ifnet *ifp)
ifp->if_input = ifdead_input;
ifp->if_start = ifdead_start;
ifp->if_ioctl = ifdead_ioctl;
- ifp->if_watchdog = ifdead_watchdog;
ifp->if_resolvemulti = ifdead_resolvemulti;
ifp->if_qflush = ifdead_qflush;
ifp->if_transmit = ifdead_transmit;
diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c
index fdbcb9b8..91d34722 100644
--- a/freebsd/sys/net/if_enc.c
+++ b/freebsd/sys/net/if_enc.c
@@ -29,6 +29,10 @@
* $FreeBSD$
*/
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_enc.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -55,14 +59,12 @@
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_var.h>
-#include <rtems/bsd/local/opt_inet6.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
-#include <rtems/bsd/local/opt_enc.h>
#include <netipsec/ipsec.h>
#include <netipsec/xform.h>
@@ -102,22 +104,22 @@ IFC_SIMPLE_DECLARE(enc, 1);
* Before and after are relative to when we are stripping the
* outer IP header.
*/
-SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
+static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
-SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW, 0, "enc input sysctl");
+static SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW, 0, "enc input sysctl");
static int ipsec_filter_mask_in = ENC_BEFORE;
-SYSCTL_XINT(_net_enc_in, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
+SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
&ipsec_filter_mask_in, 0, "IPsec input firewall filter mask");
static int ipsec_bpf_mask_in = ENC_BEFORE;
-SYSCTL_XINT(_net_enc_in, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
+SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
&ipsec_bpf_mask_in, 0, "IPsec input bpf mask");
-SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW, 0, "enc output sysctl");
+static SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW, 0, "enc output sysctl");
static int ipsec_filter_mask_out = ENC_BEFORE;
-SYSCTL_XINT(_net_enc_out, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
+SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
&ipsec_filter_mask_out, 0, "IPsec output firewall filter mask");
static int ipsec_bpf_mask_out = ENC_BEFORE|ENC_AFTER;
-SYSCTL_XINT(_net_enc_out, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
+SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
&ipsec_bpf_mask_out, 0, "IPsec output bpf mask");
static void
@@ -179,12 +181,12 @@ enc_modevent(module_t mod, int type, void *data)
}
static moduledata_t enc_mod = {
- "enc",
+ "if_enc",
enc_modevent,
0
};
-DECLARE_MODULE(enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
static int
enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
@@ -245,11 +247,14 @@ ipsec_filter(struct mbuf **mp, int dir, int flags)
}
/* Skip pfil(9) if no filters are loaded */
- if (!(PFIL_HOOKED(&V_inet_pfil_hook)
+ if (1
+#ifdef INET
+ && !PFIL_HOOKED(&V_inet_pfil_hook)
+#endif
#ifdef INET6
- || PFIL_HOOKED(&V_inet6_pfil_hook)
+ && !PFIL_HOOKED(&V_inet6_pfil_hook)
#endif
- )) {
+ ) {
return (0);
}
@@ -265,6 +270,7 @@ ipsec_filter(struct mbuf **mp, int dir, int flags)
error = 0;
ip = mtod(*mp, struct ip *);
switch (ip->ip_v) {
+#ifdef INET
case 4:
/*
* before calling the firewall, swap fields the same as
@@ -284,7 +290,7 @@ ipsec_filter(struct mbuf **mp, int dir, int flags)
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
break;
-
+#endif
#ifdef INET6
case 6:
error = pfil_run_hooks(&V_inet6_pfil_hook, mp,
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
index a79a8341..00ab65bd 100644
--- a/freebsd/sys/net/if_epair.c
+++ b/freebsd/sys/net/if_epair.c
@@ -77,7 +77,7 @@ __FBSDID("$FreeBSD$");
#define EPAIRNAME "epair"
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
+static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
#ifdef EPAIR_DEBUG
static int epair_debug = 0;
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 63013b63..5ee2606e 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -74,7 +74,7 @@
#include <netinet/ip_carp.h>
#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_private.h>
#endif
#ifdef INET6
#include <netinet6/nd6.h>
@@ -136,7 +136,7 @@ static void ether_reassign(struct ifnet *, struct vnet *, char *);
#endif
/* XXX: should be in an arp support file, not here */
-MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
+static MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
#define ETHER_IS_BROADCAST(addr) \
(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
@@ -470,7 +470,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
if (mtag == NULL) {
args.rule.slot = 0;
} else {
- /* dummynet packet, already partially processed */
+ /* dummynet packet, already partially processed */
struct ipfw_rule_ref *r;
/* XXX can we free it after use ? */
@@ -501,6 +501,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
args.m = m; /* the packet we are looking at */
args.oif = dst; /* destination, if any */
args.next_hop = NULL; /* we do not support forward yet */
+ args.next_hop6 = NULL; /* we do not support forward yet */
args.eh = &save_eh; /* MAC header for bridged/MAC packets */
args.inp = NULL; /* used by ipfw uid/gid/jail rules */
i = V_ip_fw_chk_ptr(&args);
@@ -563,7 +564,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
* mbuf chain m with the ethernet header at the front.
*/
static void
-ether_input(struct ifnet *ifp, struct mbuf *m)
+ether_input_internal(struct ifnet *ifp, struct mbuf *m)
{
struct ether_header *eh;
u_short etype;
@@ -695,6 +696,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
ETHER_HDR_LEN - ETHER_TYPE_LEN);
m_adj(m, ETHER_VLAN_ENCAP_LEN);
+ eh = mtod(m, struct ether_header *);
}
M_SETFIB(m, ifp->if_fib);
@@ -709,6 +711,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
CURVNET_RESTORE();
return;
}
+ eh = mtod(m, struct ether_header *);
}
/*
@@ -723,6 +726,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
CURVNET_RESTORE();
return;
}
+ eh = mtod(m, struct ether_header *);
}
#if defined(INET) || defined(INET6)
@@ -762,6 +766,46 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
}
/*
+ * Ethernet input dispatch; by default, direct dispatch here regardless of
+ * global configuration.
+ */
+static void
+ether_nh_input(struct mbuf *m)
+{
+
+ ether_input_internal(m->m_pkthdr.rcvif, m);
+}
+
+static struct netisr_handler ether_nh = {
+ .nh_name = "ether",
+ .nh_handler = ether_nh_input,
+ .nh_proto = NETISR_ETHER,
+ .nh_policy = NETISR_POLICY_SOURCE,
+ .nh_dispatch = NETISR_DISPATCH_DIRECT,
+};
+
+static void
+ether_init(__unused void *arg)
+{
+
+ netisr_register(&ether_nh);
+}
+SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
+
+static void
+ether_input(struct ifnet *ifp, struct mbuf *m)
+{
+
+ /*
+ * We will rely on rcvif being set properly in the deferred context,
+ * so assert it is correct here.
+ */
+ KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+
+ netisr_dispatch(NETISR_ETHER, m);
+}
+
+/*
* Upper layer processing for a received Ethernet packet.
*/
void
diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c
index 61cc7039..b022ecae 100644
--- a/freebsd/sys/net/if_fwsubr.c
+++ b/freebsd/sys/net/if_fwsubr.c
@@ -65,7 +65,7 @@
#include <security/mac/mac_framework.h>
-MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals");
+static MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals");
struct fw_hwaddr firewire_broadcastaddr = {
0xffffffff,
diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c
index e309d73a..316a3958 100644
--- a/freebsd/sys/net/if_gif.c
+++ b/freebsd/sys/net/if_gif.c
@@ -114,7 +114,7 @@ IFC_SIMPLE_DECLARE(gif, 0);
static int gifmodevent(module_t, int, void *);
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
+static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
"Generic Tunnel Interface");
#ifndef MAX_GIF_NEST
/*
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index 2eb4433f..b7e0bd15 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -20,13 +20,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -157,7 +150,7 @@ static const struct protosw in_mobile_protosw = {
#endif
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
+static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
"Generic Routing Encapsulation");
#ifndef MAX_GRE_NEST
/*
@@ -356,6 +349,12 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
}
+ if ((ifp->if_flags & IFF_MONITOR) != 0) {
+ m_freem(m);
+ error = ENETDOWN;
+ goto end;
+ }
+
m->m_flags &= ~(M_BCAST|M_MCAST);
if (sc->g_proto == IPPROTO_MOBILE) {
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 13b882c8..74d16b1c 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -16,13 +16,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c
index d59e2b2a..660dc7dd 100644
--- a/freebsd/sys/net/if_iso88025subr.c
+++ b/freebsd/sys/net/if_iso88025subr.c
@@ -783,7 +783,7 @@ iso88025_resolvemulti (ifp, llsa, sa)
return (0);
}
-MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals");
+static MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals");
static void*
iso88025_alloc(u_char type, struct ifnet *ifp)
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index f836f3ae..4dee2afe 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -54,8 +54,10 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <net/bpf.h>
-#ifdef INET
+#if defined(INET) || defined(INET6)
#include <netinet/in.h>
+#endif
+#ifdef INET
#include <netinet/in_systm.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
@@ -63,6 +65,8 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/in6_ifattach.h>
#endif
#include <net/if_vlan_var.h>
@@ -98,7 +102,9 @@ static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
static int lagg_port_output(struct ifnet *, struct mbuf *,
struct sockaddr *, struct route *);
static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
+#ifdef LAGG_PORT_STACKING
static int lagg_port_checkstacking(struct lagg_softc *);
+#endif
static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
static void lagg_init(void *);
static void lagg_stop(struct lagg_softc *);
@@ -108,7 +114,8 @@ static int lagg_ether_cmdmulti(struct lagg_port *, int);
static int lagg_setflag(struct lagg_port *, int, int,
int (*func)(struct ifnet *, int));
static int lagg_setflags(struct lagg_port *, int status);
-static void lagg_start(struct ifnet *);
+static int lagg_transmit(struct ifnet *, struct mbuf *);
+static void lagg_qflush(struct ifnet *);
static int lagg_media_change(struct ifnet *);
static void lagg_media_status(struct ifnet *, struct ifmediareq *);
static struct lagg_port *lagg_link_active(struct lagg_softc *,
@@ -163,7 +170,8 @@ static const struct {
};
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation");
+static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
+ "Link Aggregation");
static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
@@ -282,6 +290,9 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
"use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
"Use flow id for load sharing");
+ SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count,
+ "Total number of ports");
/* Hash all layers by default */
sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
@@ -310,15 +321,12 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
if_initname(ifp, ifc->ifc_name, unit);
ifp->if_type = IFT_ETHER;
ifp->if_softc = sc;
- ifp->if_start = lagg_start;
+ ifp->if_transmit = lagg_transmit;
+ ifp->if_qflush = lagg_qflush;
ifp->if_init = lagg_init;
ifp->if_ioctl = lagg_ioctl;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
- IFQ_SET_READY(&ifp->if_snd);
-
/*
* Attach as an ordinary ethernet device, childs will be attached
* as special device IFT_IEEE8023ADLAG.
@@ -360,7 +368,8 @@ lagg_clone_destroy(struct ifnet *ifp)
while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
lagg_port_destroy(lp, 1);
/* Unhook the aggregation protocol */
- (*sc->sc_detach)(sc);
+ if (sc->sc_detach != NULL)
+ (*sc->sc_detach)(sc);
LAGG_WUNLOCK(sc);
@@ -489,7 +498,9 @@ lagg_port_setlladdr(void *arg, int pending)
ifp = llq->llq_ifp;
/* Set the link layer address */
+ CURVNET_SET(ifp->if_vnet);
error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
+ CURVNET_RESTORE();
if (error)
printf("%s: setlladdr failed on %s\n", __func__,
ifp->if_xname);
@@ -513,13 +524,46 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
return (ENOSPC);
/* Check if port has already been associated to a lagg */
- if (ifp->if_lagg != NULL)
+ if (ifp->if_lagg != NULL) {
+ /* Port is already in the current lagg? */
+ lp = (struct lagg_port *)ifp->if_lagg;
+ if (lp->lp_softc == sc)
+ return (EEXIST);
return (EBUSY);
+ }
/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
if (ifp->if_type != IFT_ETHER)
return (EPROTONOSUPPORT);
+#ifdef INET6
+ /*
+ * The member interface should not have inet6 address because
+ * two interfaces with a valid link-local scope zone must not be
+ * merged in any form. This restriction is needed to
+ * prevent violation of link-local scope zone. Attempts to
+ * add a member interface which has inet6 addresses triggers
+ * removal of all inet6 addresses on the member interface.
+ */
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ if (in6ifa_llaonifp(lp->lp_ifp)) {
+ in6_ifdetach(lp->lp_ifp);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ lp->lp_ifp->if_xname);
+ }
+ }
+ if (in6ifa_llaonifp(ifp)) {
+ in6_ifdetach(ifp);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ ifp->if_xname);
+ }
+#endif
/* Allow the first Ethernet member to define the MTU */
if (SLIST_EMPTY(&sc->sc_ports))
sc->sc_ifp->if_mtu = ifp->if_mtu;
@@ -540,7 +584,8 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
mtx_unlock(&lagg_list_mtx);
free(lp, M_DEVBUF);
return (EINVAL);
- /* XXX disable stacking for the moment, its untested
+ /* XXX disable stacking for the moment, its untested */
+#ifdef LAGG_PORT_STACKING
lp->lp_flags |= LAGG_PORT_STACK;
if (lagg_port_checkstacking(sc_ptr) >=
LAGG_MAX_STACKING) {
@@ -548,7 +593,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
free(lp, M_DEVBUF);
return (E2BIG);
}
- */
+#endif
}
}
mtx_unlock(&lagg_list_mtx);
@@ -599,6 +644,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
return (error);
}
+#ifdef LAGG_PORT_STACKING
static int
lagg_port_checkstacking(struct lagg_softc *sc)
{
@@ -617,6 +663,7 @@ lagg_port_checkstacking(struct lagg_softc *sc)
return (m + 1);
}
+#endif
static int
lagg_port_destroy(struct lagg_port *lp, int runpd)
@@ -1211,35 +1258,45 @@ lagg_setflags(struct lagg_port *lp, int status)
return (0);
}
-static void
-lagg_start(struct ifnet *ifp)
+static int
+lagg_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
- struct mbuf *m;
- int error = 0;
+ int error, len, mcast;
+
+ len = m->m_pkthdr.len;
+ mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
LAGG_RLOCK(sc);
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
- IF_DRAIN(&ifp->if_snd);
LAGG_RUNLOCK(sc);
- return;
+ m_freem(m);
+ ifp->if_oerrors++;
+ return (ENXIO);
}
- for (;; error = 0) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == NULL)
- break;
-
- ETHER_BPF_MTAP(ifp, m);
+ ETHER_BPF_MTAP(ifp, m);
- error = (*sc->sc_start)(sc, m);
- if (error == 0)
- ifp->if_opackets++;
- else
- ifp->if_oerrors++;
- }
+ error = (*sc->sc_start)(sc, m);
LAGG_RUNLOCK(sc);
+
+ if (error == 0) {
+ ifp->if_opackets++;
+ ifp->if_omcasts += mcast;
+ ifp->if_obytes += len;
+ } else
+ ifp->if_oerrors++;
+
+ return (error);
+}
+
+/*
+ * The ifp->if_qflush entry point for lagg(4) is no-op.
+ */
+static void
+lagg_qflush(struct ifnet *ifp __unused)
+{
}
static struct mbuf *
@@ -1572,7 +1629,7 @@ lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
*/
if ((lp = lagg_link_active(sc, lp)) == NULL) {
m_freem(m);
- return (ENOENT);
+ return (ENETDOWN);
}
/* Send mbuf */
@@ -1620,7 +1677,7 @@ lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
/* Use the master port if active or the next available port */
if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
m_freem(m);
- return (ENOENT);
+ return (ENETDOWN);
}
/* Send mbuf */
@@ -1749,7 +1806,7 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
*/
if ((lp = lagg_link_active(sc, lp)) == NULL) {
m_freem(m);
- return (ENOENT);
+ return (ENETDOWN);
}
/* Send mbuf */
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index 53077589..55b816a7 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -111,10 +111,13 @@ llentry_free(struct llentry *lle)
size_t pkts_dropped;
struct mbuf *next;
- pkts_dropped = 0;
+ IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
LLE_WLOCK_ASSERT(lle);
+
LIST_REMOVE(lle, lle_next);
+ lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+ pkts_dropped = 0;
while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
next = lle->la_hold->m_nextpkt;
m_freem(lle->la_hold);
@@ -123,53 +126,43 @@ llentry_free(struct llentry *lle)
pkts_dropped++;
}
- KASSERT(lle->la_numheld == 0,
- ("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
+ KASSERT(lle->la_numheld == 0,
+ ("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
lle->la_numheld, pkts_dropped));
- lle->la_flags &= ~LLE_VALID;
LLE_FREE_LOCKED(lle);
return (pkts_dropped);
}
/*
- * Update an llentry for address dst (equivalent to rtalloc for new-arp)
- * Caller must pass in a valid struct llentry * (or NULL)
+ * (al)locate an llentry for address dst (equivalent to rtalloc for new-arp).
*
- * if found the llentry * is returned referenced and unlocked
+ * If found the llentry * is returned referenced and unlocked.
*/
-int
-llentry_update(struct llentry **llep, struct lltable *lt,
- struct sockaddr_storage *dst, struct ifnet *ifp)
+struct llentry *
+llentry_alloc(struct ifnet *ifp, struct lltable *lt,
+ struct sockaddr_storage *dst)
{
struct llentry *la;
- IF_AFDATA_RLOCK(ifp);
- la = lla_lookup(lt, LLE_EXCLUSIVE,
- (struct sockaddr *)dst);
+ IF_AFDATA_RLOCK(ifp);
+ la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
IF_AFDATA_RUNLOCK(ifp);
- if ((la == NULL) &&
+ if ((la == NULL) &&
(ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
IF_AFDATA_WLOCK(ifp);
- la = lla_lookup(lt,
- (LLE_CREATE | LLE_EXCLUSIVE),
+ la = lla_lookup(lt, (LLE_CREATE | LLE_EXCLUSIVE),
(struct sockaddr *)dst);
- IF_AFDATA_WUNLOCK(ifp);
+ IF_AFDATA_WUNLOCK(ifp);
}
- if (la != NULL && (*llep != la)) {
- if (*llep != NULL)
- LLE_FREE(*llep);
+
+ if (la != NULL) {
LLE_ADDREF(la);
LLE_WUNLOCK(la);
- *llep = la;
- } else if (la != NULL)
- LLE_WUNLOCK(la);
-
- if (la == NULL)
- return (ENOENT);
+ }
- return (0);
+ return (la);
}
/*
@@ -187,17 +180,16 @@ lltable_free(struct lltable *llt)
SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
LLTABLE_WUNLOCK();
- for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
- int canceled;
-
- canceled = callout_drain(&lle->la_timer);
LLE_WLOCK(lle);
- if (canceled)
+ if (callout_stop(&lle->la_timer))
LLE_REMREF(lle);
llentry_free(lle);
}
}
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
free(llt, M_LLTABLE);
}
@@ -232,7 +224,7 @@ lltable_drain(int af)
void
lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
- u_int flags)
+ u_int flags)
{
struct lltable *llt;
@@ -302,7 +294,7 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
if (rtm->rtm_flags & RTF_ANNOUNCE) {
flags |= LLE_PUB;
#ifdef INET
- if (dst->sa_family == AF_INET &&
+ if (dst->sa_family == AF_INET &&
((struct sockaddr_inarp *)dst)->sin_other != 0) {
struct rtentry *rt;
((struct sockaddr_inarp *)dst)->sin_other = 0;
@@ -347,7 +339,7 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
if (flags & LLE_CREATE)
flags |= LLE_EXCLUSIVE;
-
+
IF_AFDATA_LOCK(ifp);
lle = lla_lookup(llt, flags, dst);
IF_AFDATA_UNLOCK(ifp);
@@ -383,7 +375,7 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
#ifdef INET
/* gratuitous ARP */
if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) {
- arprequest(ifp,
+ arprequest(ifp,
&((struct sockaddr_in *)dst)->sin_addr,
&((struct sockaddr_in *)dst)->sin_addr,
((laflags & LLE_PROXY) ?
@@ -456,7 +448,7 @@ llatbl_lle_show(struct llentry_sa *la)
sin = (struct sockaddr_in *)&la->l3_addr;
inet_ntoa_r(sin->sin_addr, l3s);
- db_printf(" l3_addr=%s\n", l3s);
+ db_printf(" l3_addr=%s\n", l3s);
break;
}
#endif
@@ -468,7 +460,7 @@ llatbl_lle_show(struct llentry_sa *la)
sin6 = (struct sockaddr_in6 *)&la->l3_addr;
ip6_sprintf(l3s, &sin6->sin6_addr);
- db_printf(" l3_addr=%s\n", l3s);
+ db_printf(" l3_addr=%s\n", l3s);
break;
}
#endif
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
index 8b15e5c8..8ac72c4f 100644
--- a/freebsd/sys/net/if_llatbl.h
+++ b/freebsd/sys/net/if_llatbl.h
@@ -30,6 +30,8 @@ __FBSDID("$FreeBSD$");
#ifndef _NET_IF_LLATBL_H_
#define _NET_IF_LLATBL_H_
+#include <rtems/bsd/local/opt_ofed.h>
+
#include <sys/_rwlock.h>
#include <netinet/in.h>
@@ -57,21 +59,25 @@ struct llentry {
struct rwlock lle_lock;
struct lltable *lle_tbl;
struct llentries *lle_head;
+ void (*lle_free)(struct lltable *, struct llentry *);
struct mbuf *la_hold;
- int la_numheld; /* # of packets currently held */
+ int la_numheld; /* # of packets currently held */
time_t la_expire;
- uint16_t la_flags;
+ uint16_t la_flags;
uint16_t la_asked;
uint16_t la_preempt;
uint16_t ln_byhint;
int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */
- uint16_t ln_router;
+ uint16_t ln_router;
time_t ln_ntick;
int lle_refcnt;
-
+
union {
uint64_t mac_aligned;
uint16_t mac16[3];
+#ifdef OFED
+ uint8_t mac8[20]; /* IB needs 20 bytes. */
+#endif
} ll_addr;
/* XXX af-private? */
@@ -97,26 +103,28 @@ struct llentry {
#define LLE_ADDREF(lle) do { \
LLE_WLOCK_ASSERT(lle); \
KASSERT((lle)->lle_refcnt >= 0, \
- ("negative refcnt %d", (lle)->lle_refcnt)); \
+ ("negative refcnt %d on lle %p", \
+ (lle)->lle_refcnt, (lle))); \
(lle)->lle_refcnt++; \
} while (0)
#define LLE_REMREF(lle) do { \
LLE_WLOCK_ASSERT(lle); \
- KASSERT((lle)->lle_refcnt > 1, \
- ("bogus refcnt %d", (lle)->lle_refcnt)); \
+ KASSERT((lle)->lle_refcnt > 0, \
+ ("bogus refcnt %d on lle %p", \
+ (lle)->lle_refcnt, (lle))); \
(lle)->lle_refcnt--; \
} while (0)
#define LLE_FREE_LOCKED(lle) do { \
- if ((lle)->lle_refcnt <= 1) \
- (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
+ if ((lle)->lle_refcnt == 1) \
+ (lle)->lle_free((lle)->lle_tbl, (lle)); \
else { \
- (lle)->lle_refcnt--; \
+ LLE_REMREF(lle); \
LLE_WUNLOCK(lle); \
} \
/* guard against invalid refs */ \
- lle = NULL; \
+ (lle) = NULL; \
} while (0)
#define LLE_FREE(lle) do { \
@@ -146,7 +154,6 @@ struct lltable {
int llt_af;
struct ifnet *llt_ifp;
- void (*llt_free)(struct lltable *, struct llentry *);
void (*llt_prefix_free)(struct lltable *,
const struct sockaddr *prefix,
const struct sockaddr *mask,
@@ -154,7 +161,7 @@ struct lltable {
struct llentry * (*llt_lookup)(struct lltable *, u_int flags,
const struct sockaddr *l3addr);
int (*llt_dump)(struct lltable *,
- struct sysctl_req *);
+ struct sysctl_req *);
};
MALLOC_DECLARE(M_LLTABLE);
@@ -167,25 +174,26 @@ MALLOC_DECLARE(M_LLTABLE);
#define LLE_VALID 0x0008 /* ll_addr is valid */
#define LLE_PROXY 0x0010 /* proxy entry ??? */
#define LLE_PUB 0x0020 /* publish entry ??? */
+#define LLE_LINKED 0x0040 /* linked to lookup structure */
+#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
#define LLE_DELETE 0x4000 /* delete on a lookup - match LLE_IFADDR */
#define LLE_CREATE 0x8000 /* create on a lookup miss */
-#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
#define LLATBL_HASH(key, mask) \
(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)
struct lltable *lltable_init(struct ifnet *, int);
void lltable_free(struct lltable *);
-void lltable_prefix_free(int, struct sockaddr *,
- struct sockaddr *, u_int);
+void lltable_prefix_free(int, struct sockaddr *,
+ struct sockaddr *, u_int);
#if 0
void lltable_drain(int);
#endif
int lltable_sysctl_dumparp(int, struct sysctl_req *);
size_t llentry_free(struct llentry *);
-int llentry_update(struct llentry **, struct lltable *,
- struct sockaddr_storage *, struct ifnet *);
+struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
+ struct sockaddr_storage *);
/*
* Generic link layer address lookup function.
@@ -197,4 +205,14 @@ lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
}
int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
+
+#include <sys/eventhandler.h>
+enum {
+ LLENTRY_RESOLVED,
+ LLENTRY_TIMEDOUT,
+ LLENTRY_DELETED,
+ LLENTRY_EXPIRED,
+};
+typedef void (*lle_event_fn)(void *, struct llentry *, int);
+EVENTHANDLER_DECLARE(lle_event, lle_event_fn);
#endif /* _NET_IF_LLATBL_H_ */
diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c
index 958019d9..b40dec8e 100644
--- a/freebsd/sys/net/if_loop.c
+++ b/freebsd/sys/net/if_loop.c
@@ -94,7 +94,9 @@
#endif
#define LO_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
-#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | \
+#define LO_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
+#define LO_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
+ CSUM_PSEUDO_HDR | \
CSUM_IP_CHECKED | CSUM_IP_VALID | \
CSUM_SCTP_VALID)
@@ -145,8 +147,9 @@ lo_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifp->if_ioctl = loioctl;
ifp->if_output = looutput;
ifp->if_snd.ifq_maxlen = ifqmaxlen;
- ifp->if_capabilities = ifp->if_capenable = IFCAP_HWCSUM;
- ifp->if_hwassist = LO_CSUM_FEATURES;
+ ifp->if_capabilities = ifp->if_capenable =
+ IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
+ ifp->if_hwassist = LO_CSUM_FEATURES | LO_CSUM_FEATURES6;
if_attach(ifp);
bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
if (V_loif == NULL)
@@ -254,7 +257,24 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
m->m_pkthdr.csum_flags = LO_CSUM_SET;
}
m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES;
+ break;
case AF_INET6:
+#if 0
+ /*
+ * XXX-BZ for now always claim the checksum is good despite
+ * any interface flags. This is a workaround for 9.1-R and
+ * a proper solution ought to be sought later.
+ */
+ if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
+ m->m_pkthdr.csum_data = 0xffff;
+ m->m_pkthdr.csum_flags = LO_CSUM_SET;
+ }
+#else
+ m->m_pkthdr.csum_data = 0xffff;
+ m->m_pkthdr.csum_flags = LO_CSUM_SET;
+#endif
+ m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6;
+ break;
case AF_IPX:
case AF_APPLETALK:
break;
@@ -438,10 +458,29 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifp->if_capenable ^= IFCAP_RXCSUM;
if ((mask & IFCAP_TXCSUM) != 0)
ifp->if_capenable ^= IFCAP_TXCSUM;
+ if ((mask & IFCAP_RXCSUM_IPV6) != 0) {
+#if 0
+ ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+#else
+ error = EOPNOTSUPP;
+ break;
+#endif
+ }
+ if ((mask & IFCAP_TXCSUM_IPV6) != 0) {
+#if 0
+ ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+#else
+ error = EOPNOTSUPP;
+ break;
+#endif
+ }
+ ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist = LO_CSUM_FEATURES;
- else
- ifp->if_hwassist = 0;
+#if 0
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+ ifp->if_hwassist |= LO_CSUM_FEATURES6;
+#endif
break;
default:
diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h
index 2c833228..6424d662 100644
--- a/freebsd/sys/net/if_media.h
+++ b/freebsd/sys/net/if_media.h
@@ -150,6 +150,9 @@ uint64_t ifmedia_baudrate(int);
#define IFM_10G_LRM 24 /* 10GBase-LRM 850nm Multi-mode */
#define IFM_UNKNOWN 25 /* media types not defined yet */
#define IFM_10G_T 26 /* 10GBase-T - RJ45 */
+#define IFM_40G_CR4 27 /* 40GBase-CR4 */
+#define IFM_40G_SR4 28 /* 40GBase-SR4 */
+#define IFM_40G_LR4 29 /* 40GBase-LR4 */
/* note 31 is the max! */
@@ -366,6 +369,9 @@ struct ifmedia_description {
{ IFM_10G_TWINAX_LONG, "10Gbase-Twinax-Long" }, \
{ IFM_UNKNOWN, "Unknown" }, \
{ IFM_10G_T, "10Gbase-T" }, \
+ { IFM_40G_CR4, "40Gbase-CR4" }, \
+ { IFM_40G_SR4, "40Gbase-SR4" }, \
+ { IFM_40G_LR4, "40Gbase-LR4" }, \
{ 0, NULL }, \
}
@@ -488,6 +494,7 @@ struct ifmedia_description {
{ IFM_IEEE80211_OFDM3, "OFDM/3Mbps" }, \
{ IFM_IEEE80211_OFDM4, "OFDM/4.5Mbps" }, \
{ IFM_IEEE80211_OFDM27, "OFDM/27Mbps" }, \
+ { IFM_IEEE80211_MCS, "MCS" }, \
{ 0, NULL }, \
}
@@ -526,6 +533,7 @@ struct ifmedia_description {
{ IFM_IEEE80211_OFDM3, "OFDM3" }, \
{ IFM_IEEE80211_OFDM4, "OFDM4.5" }, \
{ IFM_IEEE80211_OFDM27, "OFDM27" }, \
+ { IFM_IEEE80211_MCS, "MCS" }, \
{ 0, NULL }, \
}
@@ -662,6 +670,9 @@ struct ifmedia_baudrate {
{ IFM_ETHER | IFM_10G_TWINAX_LONG, IF_Gbps(10ULL) }, \
{ IFM_ETHER | IFM_10G_LRM, IF_Gbps(10ULL) }, \
{ IFM_ETHER | IFM_10G_T, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_40G_CR4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_40G_SR4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_40G_LR4, IF_Gbps(40ULL) }, \
\
{ IFM_TOKEN | IFM_TOK_STP4, IF_Mbps(4) }, \
{ IFM_TOKEN | IFM_TOK_STP16, IF_Mbps(16) }, \
diff --git a/freebsd/sys/net/if_mib.c b/freebsd/sys/net/if_mib.c
index a7d436ad..ec7a6984 100644
--- a/freebsd/sys/net/if_mib.c
+++ b/freebsd/sys/net/if_mib.c
@@ -65,7 +65,7 @@
*/
SYSCTL_DECL(_net_link_generic);
-SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0,
+static SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0,
"Variables global to all interfaces");
SYSCTL_VNET_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD,
@@ -166,6 +166,6 @@ out:
return error;
}
-SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW,
+static SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW,
sysctl_ifdata, "Interface table");
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
index 5619554a..4f2f6d05 100644
--- a/freebsd/sys/net/if_spppsubr.c
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -176,7 +176,7 @@
#define STATE_ACK_SENT 8
#define STATE_OPENED 9
-MALLOC_DEFINE(M_SPPP, "sppp", "synchronous PPP interface internals");
+static MALLOC_DEFINE(M_SPPP, "sppp", "synchronous PPP interface internals");
struct ppp_header {
u_char address;
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index da09de7c..985c5651 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -123,12 +123,17 @@
#include <security/mac/mac_framework.h>
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface");
+static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface");
static int stf_route_cache = 1;
SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW,
&stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output");
+static int stf_permit_rfc1918 = 0;
+TUNABLE_INT("net.link.stf.permit_rfc1918", &stf_permit_rfc1918);
+SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
+ &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses");
+
#define STFNAME "stf"
#define STFUNIT 0
@@ -586,9 +591,10 @@ isrfc1918addr(in)
* returns 1 if private address range:
* 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16
*/
- if ((ntohl(in->s_addr) & 0xff000000) >> 24 == 10 ||
+ if (stf_permit_rfc1918 == 0 && (
+ (ntohl(in->s_addr) & 0xff000000) >> 24 == 10 ||
(ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 ||
- (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168)
+ (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168))
return 1;
return 0;
@@ -799,7 +805,7 @@ stf_rtrequest(cmd, rt, info)
struct rt_addrinfo *info;
{
RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = IPV6_MMTU;
+ rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
}
static int
@@ -812,7 +818,7 @@ stf_ioctl(ifp, cmd, data)
struct ifreq *ifr;
struct sockaddr_in6 *sin6;
struct in_addr addr;
- int error;
+ int error, mtu;
error = 0;
switch (cmd) {
@@ -846,6 +852,18 @@ stf_ioctl(ifp, cmd, data)
error = EAFNOSUPPORT;
break;
+ case SIOCGIFMTU:
+ break;
+
+ case SIOCSIFMTU:
+ ifr = (struct ifreq *)data;
+ mtu = ifr->ifr_mtu;
+ /* RFC 4213 3.2 ideal world MTU */
+ if (mtu < IPV6_MINMTU || mtu > IF_MAXMTU - 20)
+ return (EINVAL);
+ ifp->if_mtu = mtu;
+ break;
+
default:
error = EINVAL;
break;
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
index b34b77de..9c501f16 100644
--- a/freebsd/sys/net/if_tap.c
+++ b/freebsd/sys/net/if_tap.c
@@ -67,6 +67,7 @@
#include <net/if.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
+#include <net/if_media.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -165,7 +166,7 @@ MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
+static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
"Ethernet tunnel software network interface");
SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
"Allow user to open /dev/tap (based on node permissions)");
@@ -608,7 +609,8 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct tap_softc *tp = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
struct ifstat *ifs = NULL;
- int dummy;
+ struct ifmediareq *ifmr = NULL;
+ int dummy, error = 0;
switch (cmd) {
case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
@@ -616,6 +618,22 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCDELMULTI:
break;
+ case SIOCGIFMEDIA:
+ ifmr = (struct ifmediareq *)data;
+ dummy = ifmr->ifm_count;
+ ifmr->ifm_count = 1;
+ ifmr->ifm_status = IFM_AVALID;
+ ifmr->ifm_active = IFM_ETHER;
+ if (tp->tap_flags & TAP_OPEN)
+ ifmr->ifm_status |= IFM_ACTIVE;
+ ifmr->ifm_current = ifmr->ifm_active;
+ if (dummy >= 1) {
+ int media = IFM_ETHER;
+ error = copyout(&media, ifmr->ifm_ulist,
+ sizeof(int));
+ }
+ break;
+
case SIOCSIFMTU:
ifp->if_mtu = ifr->ifr_mtu;
break;
@@ -632,11 +650,11 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
default:
- return (ether_ioctl(ifp, cmd, data));
- /* NOT REACHED */
+ error = ether_ioctl(ifp, cmd, data);
+ break;
}
- return (0);
+ return (error);
} /* tapifioctl */
@@ -921,7 +939,7 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
struct ifnet *ifp = tp->tap_ifp;
struct mbuf *m;
- TAPDEBUG("%s writting, minor = %#x\n",
+ TAPDEBUG("%s writing, minor = %#x\n",
ifp->if_xname, dev2unit(dev));
if (uio->uio_resid == 0)
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
index 669c0b1b..25b73294 100644
--- a/freebsd/sys/net/if_tun.c
+++ b/freebsd/sys/net/if_tun.c
@@ -117,7 +117,7 @@ static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
+static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
"IP tunnel software network interface.");
SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0,
"Enable legacy devfs interface creation.");
diff --git a/freebsd/sys/net/if_types.h b/freebsd/sys/net/if_types.h
index b2d3a159..c2effacd 100644
--- a/freebsd/sys/net/if_types.h
+++ b/freebsd/sys/net/if_types.h
@@ -238,6 +238,7 @@
#define IFT_ATMVCIENDPT 0xc2 /* ATM VCI End Point */
#define IFT_OPTICALCHANNEL 0xc3 /* Optical Channel */
#define IFT_OPTICALTRANSPORT 0xc4 /* Optical Transport */
+#define IFT_INFINIBAND 0xc7 /* Infiniband */
#define IFT_BRIDGE 0xd1 /* Transparent bridge interface */
#define IFT_STF 0xd7 /* 6to4 interface */
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index c5c489fb..5992fa0b 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -141,7 +141,7 @@ struct ifnet {
struct carp_if *if_carp; /* carp interface structure */
struct bpf_if *if_bpf; /* packet filter structure */
u_short if_index; /* numeric abbreviation for this if */
- short if_timer; /* time 'til if_watchdog called */
+ short if_index_reserved; /* spare space to grow if_index */
struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
int if_flags; /* up/down, broadcast, etc. */
int if_capabilities; /* interface features & capabilities */
@@ -161,8 +161,6 @@ struct ifnet {
(struct ifnet *);
int (*if_ioctl) /* ioctl routine */
(struct ifnet *, u_long, caddr_t);
- void (*if_watchdog) /* timer routine */
- (struct ifnet *);
void (*if_init) /* Init routine */
(void *);
int (*if_resolvemulti) /* validate/resolve multicast */
@@ -197,6 +195,8 @@ struct ifnet {
/* protected by if_addr_mtx */
void *if_pf_kif;
void *if_lagg; /* lagg glue */
+ char *if_description; /* interface description */
+ u_int if_fib; /* interface FIB */
u_char if_alloctype; /* if_type at time of allocation */
/*
@@ -205,10 +205,12 @@ struct ifnet {
* be used with care where binary compatibility is required.
*/
char if_cspare[3];
- char *if_description; /* interface description */
- void *if_pspare[7]; /* 1 netmap, 6 TBD */
+ u_int if_hw_tsomax; /* tso burst length limit, the minmum
+ * is (IP_MAXPACKET / 8).
+ * XXXAO: Have to find a better place
+ * for it eventually. */
int if_ispare[3];
- u_int if_fib; /* interface FIB */
+ void *if_pspare[8]; /* 1 netmap, 7 TDB */
};
typedef void if_init_f_t(void *);
@@ -329,6 +331,18 @@ void if_maddr_runlock(struct ifnet *ifp); /* if_multiaddrs */
IF_UNLOCK(ifq); \
} while (0)
+#define _IF_DEQUEUE_ALL(ifq, m) do { \
+ (m) = (ifq)->ifq_head; \
+ (ifq)->ifq_head = (ifq)->ifq_tail = NULL; \
+ (ifq)->ifq_len = 0; \
+} while (0)
+
+#define IF_DEQUEUE_ALL(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_DEQUEUE_ALL(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
#define IF_POLL(ifq, m) _IF_POLL(ifq, m)
@@ -361,6 +375,9 @@ EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t);
/* interface departure event */
typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
+/* Interface link state change event */
+typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int);
+EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t);
/*
* interface groups
@@ -406,6 +423,8 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
#define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock)
#define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED)
+#define IF_AFDATA_RLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_RLOCKED)
+#define IF_AFDATA_WLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED)
#define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED)
int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
@@ -574,22 +593,10 @@ do { \
} while (0)
#ifdef _KERNEL
-static __inline void
-drbr_stats_update(struct ifnet *ifp, int len, int mflags)
-{
-#ifndef NO_SLOW_STATS
- ifp->if_obytes += len;
- if (mflags & M_MCAST)
- ifp->if_omcasts++;
-#endif
-}
-
static __inline int
drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
{
int error = 0;
- int len = m->m_pkthdr.len;
- int mflags = m->m_flags;
#ifdef ALTQ
if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
@@ -597,16 +604,53 @@ drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
return (error);
}
#endif
- if ((error = buf_ring_enqueue_bytes(br, m, len)) == ENOBUFS) {
- br->br_drops++;
+ error = buf_ring_enqueue(br, m);
+ if (error)
m_freem(m);
- } else
- drbr_stats_update(ifp, len, mflags);
-
+
return (error);
}
static __inline void
+drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new)
+{
+ /*
+ * The top of the list needs to be swapped
+ * for this one.
+ */
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Peek in altq case dequeued it
+ * so put it back.
+ */
+ IFQ_DRV_PREPEND(&ifp->if_snd, new);
+ return;
+ }
+#endif
+ buf_ring_putback_sc(br, new);
+}
+
+static __inline struct mbuf *
+drbr_peek(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Pull it off like a dequeue
+ * since drbr_advance() does nothing
+ * for altq and drbr_putback() will
+ * use the old prepend function.
+ */
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return(buf_ring_peek(br));
+}
+
+static __inline void
drbr_flush(struct ifnet *ifp, struct buf_ring *br)
{
struct mbuf *m;
@@ -633,7 +677,7 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
#ifdef ALTQ
struct mbuf *m;
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
IFQ_DEQUEUE(&ifp->if_snd, m);
return (m);
}
@@ -641,6 +685,18 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
return (buf_ring_dequeue_sc(br));
}
+static __inline void
+drbr_advance(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ /* Nothing to do here since peek dequeues in altq case */
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ return;
+#endif
+ return (buf_ring_advance_sc(br));
+}
+
+
static __inline struct mbuf *
drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
int (*func) (struct mbuf *, void *), void *arg)
@@ -703,6 +759,8 @@ drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
#define IF_MINMTU 72
#define IF_MAXMTU 65535
+#define TOEDEV(ifp) ((ifp)->if_llsoftc)
+
#endif /* _KERNEL */
/*
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 95ea1a9e..f31b9be2 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -43,6 +43,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_vlan.h>
#include <rtems/bsd/sys/param.h>
@@ -57,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
+#include <sys/sx.h>
#include <net/bpf.h>
#include <net/ethernet.h>
@@ -67,6 +69,11 @@ __FBSDID("$FreeBSD$");
#include <net/if_vlan_var.h>
#include <net/vnet.h>
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#endif
+
#define VLANNAME "vlan"
#define VLAN_DEF_HWIDTH 4
#define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST)
@@ -91,13 +98,14 @@ struct ifvlantrunk {
};
struct vlan_mc_entry {
- struct ether_addr mc_addr;
+ struct sockaddr_dl mc_addr;
SLIST_ENTRY(vlan_mc_entry) mc_entries;
};
struct ifvlan {
struct ifvlantrunk *ifv_trunk;
struct ifnet *ifv_ifp;
+ void *ifv_cookie;
#define TRUNK(ifv) ((ifv)->ifv_trunk)
#define PARENT(ifv) ((ifv)->ifv_trunk->parent)
int ifv_pflags; /* special flags we have set on parent */
@@ -130,8 +138,10 @@ static struct {
};
SYSCTL_DECL(_net_link);
-SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN");
-SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency");
+static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
+ "IEEE 802.1Q VLAN");
+static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
+ "for consistency");
static int soft_pad = 0;
SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
@@ -154,12 +164,12 @@ static eventhandler_tag iflladdr_tag;
* however on practice it does not. Probably this is because array
* is too big to fit into CPU cache.
*/
-static struct mtx ifv_mtx;
-#define VLAN_LOCK_INIT() mtx_init(&ifv_mtx, "vlan_global", NULL, MTX_DEF)
-#define VLAN_LOCK_DESTROY() mtx_destroy(&ifv_mtx)
-#define VLAN_LOCK_ASSERT() mtx_assert(&ifv_mtx, MA_OWNED)
-#define VLAN_LOCK() mtx_lock(&ifv_mtx)
-#define VLAN_UNLOCK() mtx_unlock(&ifv_mtx)
+static struct sx ifv_lock;
+#define VLAN_LOCK_INIT() sx_init(&ifv_lock, "vlan_global")
+#define VLAN_LOCK_DESTROY() sx_destroy(&ifv_lock)
+#define VLAN_LOCK_ASSERT() sx_assert(&ifv_lock, SA_LOCKED)
+#define VLAN_LOCK() sx_xlock(&ifv_lock)
+#define VLAN_UNLOCK() sx_xunlock(&ifv_lock)
#define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, VLANNAME)
#define TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
#define TRUNK_LOCK(trunk) rw_wlock(&(trunk)->rw)
@@ -192,7 +202,7 @@ static int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
static void vlan_unconfig(struct ifnet *ifp);
static void vlan_unconfig_locked(struct ifnet *ifp, int departing);
static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
-static void vlan_link_state(struct ifnet *ifp, int link);
+static void vlan_link_state(struct ifnet *ifp);
static void vlan_capabilities(struct ifvlan *ifv);
static void vlan_trunk_capabilities(struct ifnet *ifp);
@@ -388,6 +398,47 @@ vlan_dumphash(struct ifvlantrunk *trunk)
}
}
#endif /* 0 */
+#else
+
+static __inline struct ifvlan *
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+{
+
+ return trunk->vlans[tag];
+}
+
+static __inline int
+vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
+{
+
+ if (trunk->vlans[ifv->ifv_tag] != NULL)
+ return EEXIST;
+ trunk->vlans[ifv->ifv_tag] = ifv;
+ trunk->refcnt++;
+
+ return (0);
+}
+
+static __inline int
+vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
+{
+
+ trunk->vlans[ifv->ifv_tag] = NULL;
+ trunk->refcnt--;
+
+ return (0);
+}
+
+static __inline void
+vlan_freehash(struct ifvlantrunk *trunk)
+{
+}
+
+static __inline void
+vlan_inithash(struct ifvlantrunk *trunk)
+{
+}
+
#endif /* !VLAN_ARRAY */
static void
@@ -396,9 +447,7 @@ trunk_destroy(struct ifvlantrunk *trunk)
VLAN_LOCK_ASSERT();
TRUNK_LOCK(trunk);
-#ifndef VLAN_ARRAY
vlan_freehash(trunk);
-#endif
trunk->parent->if_vlantrunk = NULL;
TRUNK_UNLOCK(trunk);
TRUNK_LOCK_DESTROY(trunk);
@@ -423,7 +472,6 @@ vlan_setmulti(struct ifnet *ifp)
struct ifmultiaddr *ifma, *rifma = NULL;
struct ifvlan *sc;
struct vlan_mc_entry *mc;
- struct sockaddr_dl sdl;
int error;
/*VLAN_LOCK_ASSERT();*/
@@ -434,17 +482,9 @@ vlan_setmulti(struct ifnet *ifp)
CURVNET_SET_QUIET(ifp_p->if_vnet);
- bzero((char *)&sdl, sizeof(sdl));
- sdl.sdl_len = sizeof(sdl);
- sdl.sdl_family = AF_LINK;
- sdl.sdl_index = ifp_p->if_index;
- sdl.sdl_type = IFT_ETHER;
- sdl.sdl_alen = ETHER_ADDR_LEN;
-
/* First, remove any existing filter entries. */
while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
- bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN);
- error = if_delmulti(ifp_p, (struct sockaddr *)&sdl);
+ error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
if (error)
return (error);
SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
@@ -458,12 +498,11 @@ vlan_setmulti(struct ifnet *ifp)
mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
if (mc == NULL)
return (ENOMEM);
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- (char *)&mc->mc_addr, ETHER_ADDR_LEN);
+ bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
+ mc->mc_addr.sdl_index = ifp_p->if_index;
SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- LLADDR(&sdl), ETHER_ADDR_LEN);
- error = if_addmulti(ifp_p, (struct sockaddr *)&sdl, &rifma);
+ error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
+ &rifma);
if (error)
return (error);
}
@@ -505,7 +544,8 @@ vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) {
#endif /* VLAN_ARRAY */
VLAN_UNLOCK();
- if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp),
+ ifp->if_addrlen);
VLAN_LOCK();
}
VLAN_UNLOCK();
@@ -566,6 +606,92 @@ restart:
}
/*
+ * Return the trunk device for a virtual interface.
+ */
+static struct ifnet *
+vlan_trunkdev(struct ifnet *ifp)
+{
+ struct ifvlan *ifv;
+
+ if (ifp->if_type != IFT_L2VLAN)
+ return (NULL);
+ ifv = ifp->if_softc;
+ ifp = NULL;
+ VLAN_LOCK();
+ if (ifv->ifv_trunk)
+ ifp = PARENT(ifv);
+ VLAN_UNLOCK();
+ return (ifp);
+}
+
+/*
+ * Return the 16bit vlan tag for this interface.
+ */
+static int
+vlan_tag(struct ifnet *ifp, uint16_t *tagp)
+{
+ struct ifvlan *ifv;
+
+ if (ifp->if_type != IFT_L2VLAN)
+ return (EINVAL);
+ ifv = ifp->if_softc;
+ *tagp = ifv->ifv_tag;
+ return (0);
+}
+
+/*
+ * Return a driver specific cookie for this interface. Synchronization
+ * with setcookie must be provided by the driver.
+ */
+static void *
+vlan_cookie(struct ifnet *ifp)
+{
+ struct ifvlan *ifv;
+
+ if (ifp->if_type != IFT_L2VLAN)
+ return (NULL);
+ ifv = ifp->if_softc;
+ return (ifv->ifv_cookie);
+}
+
+/*
+ * Store a cookie in our softc that drivers can use to store driver
+ * private per-instance data in.
+ */
+static int
+vlan_setcookie(struct ifnet *ifp, void *cookie)
+{
+ struct ifvlan *ifv;
+
+ if (ifp->if_type != IFT_L2VLAN)
+ return (EINVAL);
+ ifv = ifp->if_softc;
+ ifv->ifv_cookie = cookie;
+ return (0);
+}
+
+/*
+ * Return the vlan device present at the specific tag.
+ */
+static struct ifnet *
+vlan_devat(struct ifnet *ifp, uint16_t tag)
+{
+ struct ifvlantrunk *trunk;
+ struct ifvlan *ifv;
+
+ trunk = ifp->if_vlantrunk;
+ if (trunk == NULL)
+ return (NULL);
+ ifp = NULL;
+ TRUNK_RLOCK(trunk);
+ ifv = vlan_gethash(trunk, tag);
+ if (ifv)
+ ifp = ifv->ifv_ifp;
+ TRUNK_RUNLOCK(trunk);
+ return (ifp);
+}
+
+/*
* VLAN support can be loaded as a module. The only place in the
* system that's intimately aware of this is ether_input. We hook
* into this code through vlan_input_p which is defined there and
@@ -575,7 +701,7 @@ restart:
extern void (*vlan_input_p)(struct ifnet *, struct mbuf *);
/* For if_link_state_change() eyes only... */
-extern void (*vlan_link_state_p)(struct ifnet *, int);
+extern void (*vlan_link_state_p)(struct ifnet *);
static int
vlan_modevent(module_t mod, int type, void *data)
@@ -595,6 +721,11 @@ vlan_modevent(module_t mod, int type, void *data)
vlan_input_p = vlan_input;
vlan_link_state_p = vlan_link_state;
vlan_trunk_cap_p = vlan_trunk_capabilities;
+ vlan_trunkdev_p = vlan_trunkdev;
+ vlan_cookie_p = vlan_cookie;
+ vlan_setcookie_p = vlan_setcookie;
+ vlan_tag_p = vlan_tag;
+ vlan_devat_p = vlan_devat;
#ifndef VIMAGE
if_clone_attach(&vlan_cloner);
#endif
@@ -617,6 +748,11 @@ vlan_modevent(module_t mod, int type, void *data)
vlan_input_p = NULL;
vlan_link_state_p = NULL;
vlan_trunk_cap_p = NULL;
+ vlan_trunkdev_p = NULL;
+ vlan_tag_p = NULL;
+ vlan_cookie_p = NULL;
+ vlan_setcookie_p = NULL;
+ vlan_devat_p = NULL;
VLAN_LOCK_DESTROY();
if (bootverbose)
printf("vlan: unloaded\n");
@@ -667,7 +803,12 @@ vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
/* Check for <etherif>.<vlan> style interface names. */
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- if (ifp->if_type != IFT_ETHER)
+ /*
+ * We can handle non-ethernet hardware types as long as
+ * they handle the tagging and headers themselves.
+ */
+ if (ifp->if_type != IFT_ETHER &&
+ (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
continue;
if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
continue;
@@ -720,6 +861,8 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
struct ifvlan *ifv;
struct ifnet *ifp;
struct ifnet *p;
+ struct ifaddr *ifa;
+ struct sockaddr_dl *sdl;
struct vlanreq vlr;
static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
@@ -818,6 +961,9 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp->if_baudrate = 0;
ifp->if_type = IFT_L2VLAN;
ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
+ ifa = ifp->if_addr;
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_L2VLAN;
if (ethertag) {
error = vlan_config(ifv, p, tag);
@@ -890,7 +1036,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
if (!UP_AND_RUNNING(p)) {
m_freem(m);
ifp->if_oerrors++;
- return (0);
+ return (ENETDOWN);
}
/*
@@ -905,7 +1051,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
* devices that just discard such runts instead or mishandle
* them somehow.
*/
- if (soft_pad) {
+ if (soft_pad && p->if_type == IFT_ETHER) {
static char pad[8]; /* just zeros */
int n;
@@ -1017,11 +1163,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
}
TRUNK_RLOCK(trunk);
-#ifdef VLAN_ARRAY
- ifv = trunk->vlans[tag];
-#else
ifv = vlan_gethash(trunk, tag);
-#endif
if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
TRUNK_RUNLOCK(trunk);
m_freem(m);
@@ -1047,7 +1189,8 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
/* VID numbers 0x0 and 0xFFF are reserved */
if (tag == 0 || tag == 0xFFF)
return (EINVAL);
- if (p->if_type != IFT_ETHER)
+ if (p->if_type != IFT_ETHER &&
+ (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
return (EPROTONOSUPPORT);
if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
return (EPROTONOSUPPORT);
@@ -1057,15 +1200,11 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
if (p->if_vlantrunk == NULL) {
trunk = malloc(sizeof(struct ifvlantrunk),
M_VLAN, M_WAITOK | M_ZERO);
-#ifndef VLAN_ARRAY
vlan_inithash(trunk);
-#endif
VLAN_LOCK();
if (p->if_vlantrunk != NULL) {
/* A race that that is very unlikely to be hit. */
-#ifndef VLAN_ARRAY
vlan_freehash(trunk);
-#endif
free(trunk, M_VLAN);
goto exists;
}
@@ -1081,18 +1220,9 @@ exists:
}
ifv->ifv_tag = tag; /* must set this before vlan_inshash() */
-#ifdef VLAN_ARRAY
- if (trunk->vlans[tag] != NULL) {
- error = EEXIST;
- goto done;
- }
- trunk->vlans[tag] = ifv;
- trunk->refcnt++;
-#else
error = vlan_inshash(trunk, ifv);
if (error)
goto done;
-#endif
ifv->ifv_proto = ETHERTYPE_VLAN;
ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
ifv->ifv_mintu = ETHERMIN;
@@ -1122,8 +1252,19 @@ exists:
ifv->ifv_trunk = trunk;
ifp = ifv->ifv_ifp;
+ /*
+ * Initialize fields from our parent. This duplicates some
+ * work with ether_ifattach() but allows for non-ethernet
+ * interfaces to also work.
+ */
ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
ifp->if_baudrate = p->if_baudrate;
+ ifp->if_output = p->if_output;
+ ifp->if_input = p->if_input;
+ ifp->if_resolvemulti = p->if_resolvemulti;
+ ifp->if_addrlen = p->if_addrlen;
+ ifp->if_broadcastaddr = p->if_broadcastaddr;
+
/*
* Copy only a selected subset of flags from the parent.
* Other flags are none of our business.
@@ -1138,10 +1279,12 @@ exists:
vlan_capabilities(ifv);
/*
- * Set up our ``Ethernet address'' to reflect the underlying
+ * Set up our interface address to reflect the underlying
* physical interface's.
*/
- bcopy(IF_LLADDR(p), IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen);
+ ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen =
+ p->if_addrlen;
/*
* Configure multicast addresses that may already be
@@ -1185,7 +1328,6 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
parent = NULL;
if (trunk != NULL) {
- struct sockaddr_dl sdl;
TRUNK_LOCK(trunk);
parent = trunk->parent;
@@ -1195,17 +1337,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
* empty the list of multicast groups that we may have joined
* while we were alive from the parent's list.
*/
- bzero((char *)&sdl, sizeof(sdl));
- sdl.sdl_len = sizeof(sdl);
- sdl.sdl_family = AF_LINK;
- sdl.sdl_index = parent->if_index;
- sdl.sdl_type = IFT_ETHER;
- sdl.sdl_alen = ETHER_ADDR_LEN;
-
while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
- bcopy((char *)&mc->mc_addr, LLADDR(&sdl),
- ETHER_ADDR_LEN);
-
/*
* If the parent interface is being detached,
* all its multicast addresses have already
@@ -1216,7 +1348,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
*/
if (!departing) {
error = if_delmulti(parent,
- (struct sockaddr *)&sdl);
+ (struct sockaddr *)&mc->mc_addr);
if (error)
if_printf(ifp,
"Failed to delete multicast address from parent: %d\n",
@@ -1227,12 +1359,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
}
vlan_setflags(ifp, 0); /* clear special flags on parent */
-#ifdef VLAN_ARRAY
- trunk->vlans[ifv->ifv_tag] = NULL;
- trunk->refcnt--;
-#else
vlan_remhash(trunk, ifv);
-#endif
ifv->ifv_trunk = NULL;
/*
@@ -1323,7 +1450,7 @@ vlan_setflags(struct ifnet *ifp, int status)
/* Inform all vlans that their parent has changed link state */
static void
-vlan_link_state(struct ifnet *ifp, int link)
+vlan_link_state(struct ifnet *ifp)
{
struct ifvlantrunk *trunk = ifp->if_vlantrunk;
struct ifvlan *ifv;
@@ -1366,7 +1493,7 @@ vlan_capabilities(struct ifvlan *ifv)
p->if_capenable & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
- CSUM_UDP | CSUM_SCTP | CSUM_IP_FRAGS | CSUM_FRAGMENT);
+ CSUM_UDP | CSUM_SCTP | CSUM_FRAGMENT);
} else {
ifp->if_capenable = 0;
ifp->if_hwassist = 0;
@@ -1385,6 +1512,22 @@ vlan_capabilities(struct ifvlan *ifv)
ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
}
+
+ /*
+ * If the parent interface can offload TCP connections over VLANs then
+ * propagate its TOE capability to the VLAN interface.
+ *
+ * All TOE drivers in the tree today can deal with VLANs. If this
+ * changes then IFCAP_VLAN_TOE should be promoted to a full capability
+ * with its own bit.
+ */
+#define IFCAP_VLAN_TOE IFCAP_TOE
+ if (p->if_capabilities & IFCAP_VLAN_TOE)
+ ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
+ if (p->if_capenable & IFCAP_VLAN_TOE) {
+ TOEDEV(ifp) = TOEDEV(p);
+ ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
+ }
}
static void
@@ -1413,14 +1556,31 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct ifnet *p;
struct ifreq *ifr;
+ struct ifaddr *ifa;
struct ifvlan *ifv;
struct vlanreq vlr;
int error = 0;
ifr = (struct ifreq *)data;
+ ifa = (struct ifaddr *) data;
ifv = ifp->if_softc;
switch (cmd) {
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+#ifdef INET
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ arp_ifinit(ifp, ifa);
+#endif
+ break;
+ case SIOCGIFADDR:
+ {
+ struct sockaddr *sa;
+
+ sa = (struct sockaddr *)&ifr->ifr_data;
+ bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen);
+ }
+ break;
case SIOCGIFMEDIA:
VLAN_LOCK();
if (TRUNK(ifv) != NULL) {
@@ -1482,7 +1642,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
p = ifunit(vlr.vlr_parent);
- if (p == 0) {
+ if (p == NULL) {
error = ENOENT;
break;
}
@@ -1540,7 +1700,8 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
default:
- error = ether_ioctl(ifp, cmd, data);
+ error = EINVAL;
+ break;
}
return (error);
diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h
index ec71df1a..fd3fc4f3 100644
--- a/freebsd/sys/net/if_vlan_var.h
+++ b/freebsd/sys/net/if_vlan_var.h
@@ -131,7 +131,25 @@ struct vlanreq {
(*vlan_trunk_cap_p)(_ifp); \
} while (0)
+#define VLAN_TRUNKDEV(_ifp) \
+ (_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL
+#define VLAN_TAG(_ifp, _tag) \
+ (_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_tag)) : EINVAL
+#define VLAN_COOKIE(_ifp) \
+ (_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL
+#define VLAN_SETCOOKIE(_ifp, _cookie) \
+ (_ifp)->if_type == IFT_L2VLAN ? \
+ (*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL
+#define VLAN_DEVAT(_ifp, _tag) \
+ (_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_tag)) : NULL
+
extern void (*vlan_trunk_cap_p)(struct ifnet *);
+extern struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
+extern struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
+extern int (*vlan_tag_p)(struct ifnet *, uint16_t *);
+extern int (*vlan_setcookie_p)(struct ifnet *, void *);
+extern void *(*vlan_cookie_p)(struct ifnet *);
+
#endif /* _KERNEL */
#endif /* _NET_IF_VLAN_VAR_H_ */
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index 13e12147..c8a4d7b5 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -2,7 +2,7 @@
/*-
* Copyright (c) 2007-2009 Robert N. M. Watson
- * Copyright (c) 2010 Juniper Networks, Inc.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* This software was developed by Robert N. M. Watson under contract
@@ -126,35 +126,47 @@ static struct rmlock netisr_rmlock;
#define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock)
/* #define NETISR_LOCKING */
-SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
+static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
/*-
- * Three direct dispatch policies are supported:
+ * Three global direct dispatch policies are supported:
*
- * - Always defer: all work is scheduled for a netisr, regardless of context.
- * (!direct)
+ * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of
+ * context (may be overriden by protocols).
*
- * - Hybrid: if the executing context allows direct dispatch, and we're
- * running on the CPU the work would be done on, then direct dispatch if it
- * wouldn't violate ordering constraints on the workstream.
- * (direct && !direct_force)
+ * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch,
+ * and we're running on the CPU the work would be performed on, then direct
+ * dispatch it if it wouldn't violate ordering constraints on the workstream.
*
- * - Always direct: if the executing context allows direct dispatch, always
- * direct dispatch. (direct && direct_force)
+ * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch,
+ * always direct dispatch. (The default.)
*
* Notice that changing the global policy could lead to short periods of
* misordered processing, but this is considered acceptable as compared to
- * the complexity of enforcing ordering during policy changes.
+ * the complexity of enforcing ordering during policy changes. Protocols can
+ * override the global policy (when they're not doing that, they select
+ * NETISR_DISPATCH_DEFAULT).
*/
-static int netisr_direct_force = 1; /* Always direct dispatch. */
-TUNABLE_INT("net.isr.direct_force", &netisr_direct_force);
-SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RW,
- &netisr_direct_force, 0, "Force direct dispatch");
+#define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT
+#define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */
+static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT;
+static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RW |
+ CTLFLAG_TUN, 0, 0, sysctl_netisr_dispatch_policy, "A",
+ "netisr dispatch policy");
-static int netisr_direct = 1; /* Enable direct dispatch. */
-TUNABLE_INT("net.isr.direct", &netisr_direct);
-SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW,
- &netisr_direct, 0, "Enable direct dispatch");
+/*
+ * These sysctls were used in previous versions to control and export
+ * dispatch policy state. Now, we provide read-only export via them so that
+ * older netstat binaries work. At some point they can be garbage collected.
+ */
+static int netisr_direct_force;
+SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RD,
+ &netisr_direct_force, 0, "compat: force direct dispatch");
+
+static int netisr_direct;
+SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RD, &netisr_direct, 0,
+ "compat: enable direct dispatch");
/*
* Allow the administrator to limit the number of threads (CPUs) to use for
@@ -284,6 +296,106 @@ netisr_default_flow2cpu(u_int flowid)
#endif /* __rtems__ */
/*
+ * Dispatch tunable and sysctl configuration.
+ */
+struct netisr_dispatch_table_entry {
+ u_int ndte_policy;
+ const char *ndte_policy_str;
+};
+static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = {
+ { NETISR_DISPATCH_DEFAULT, "default" },
+ { NETISR_DISPATCH_DEFERRED, "deferred" },
+ { NETISR_DISPATCH_HYBRID, "hybrid" },
+ { NETISR_DISPATCH_DIRECT, "direct" },
+};
+static const u_int netisr_dispatch_table_len =
+ (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0]));
+
+static void
+netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
+ u_int buflen)
+{
+ const struct netisr_dispatch_table_entry *ndtep;
+ const char *str;
+ u_int i;
+
+ str = "unknown";
+ for (i = 0; i < netisr_dispatch_table_len; i++) {
+ ndtep = &netisr_dispatch_table[i];
+ if (ndtep->ndte_policy == dispatch_policy) {
+ str = ndtep->ndte_policy_str;
+ break;
+ }
+ }
+ snprintf(buffer, buflen, "%s", str);
+}
+
+static int
+netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
+{
+ const struct netisr_dispatch_table_entry *ndtep;
+ u_int i;
+
+ for (i = 0; i < netisr_dispatch_table_len; i++) {
+ ndtep = &netisr_dispatch_table[i];
+ if (strcmp(ndtep->ndte_policy_str, str) == 0) {
+ *dispatch_policyp = ndtep->ndte_policy;
+ return (0);
+ }
+ }
+ return (EINVAL);
+}
+
+static void
+netisr_dispatch_policy_compat(void)
+{
+
+ switch (netisr_dispatch_policy) {
+ case NETISR_DISPATCH_DEFERRED:
+ netisr_direct_force = 0;
+ netisr_direct = 0;
+ break;
+
+ case NETISR_DISPATCH_HYBRID:
+ netisr_direct_force = 0;
+ netisr_direct = 1;
+ break;
+
+ case NETISR_DISPATCH_DIRECT:
+ netisr_direct_force = 1;
+ netisr_direct = 1;
+ break;
+
+ default:
+ panic("%s: unknown policy %u", __func__,
+ netisr_dispatch_policy);
+ }
+}
+
+static int
+sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
+{
+ char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
+ u_int dispatch_policy;
+ int error;
+
+ netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp,
+ sizeof(tmp));
+ error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req);
+ if (error == 0 && req->newptr != NULL) {
+ error = netisr_dispatch_policy_from_str(tmp,
+ &dispatch_policy);
+ if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
+ error = EINVAL;
+ if (error == 0) {
+ netisr_dispatch_policy = dispatch_policy;
+ netisr_dispatch_policy_compat();
+ }
+ }
+ return (error);
+}
+
+/*
* Register a new netisr handler, which requires initializing per-protocol
* fields for each workstream. All netisr work is briefly suspended while
* the protocol is installed.
@@ -320,6 +432,12 @@ netisr_register(const struct netisr_handler *nhp)
KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL,
("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__,
name));
+ KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT ||
+ nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED ||
+ nhp->nh_dispatch == NETISR_DISPATCH_HYBRID ||
+ nhp->nh_dispatch == NETISR_DISPATCH_DIRECT,
+ ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch));
+
KASSERT(proto < NETISR_MAXPROT,
("%s(%u, %s): protocol too big", __func__, proto, name));
@@ -347,6 +465,7 @@ netisr_register(const struct netisr_handler *nhp)
} else
netisr_proto[proto].np_qlimit = nhp->nh_qlimit;
netisr_proto[proto].np_policy = nhp->nh_policy;
+ netisr_proto[proto].np_dispatch = nhp->nh_dispatch;
CPU_FOREACH(i) {
#ifndef __rtems__
npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
@@ -569,15 +688,32 @@ netisr_unregister(const struct netisr_handler *nhp)
}
/*
+ * Compose the global and per-protocol policies on dispatch, and return the
+ * dispatch policy to use.
+ */
+static u_int
+netisr_get_dispatch(struct netisr_proto *npp)
+{
+
+ /*
+ * Protocol-specific configuration overrides the global default.
+ */
+ if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT)
+ return (npp->np_dispatch);
+ return (netisr_dispatch_policy);
+}
+
+/*
* Look up the workstream given a packet and source identifier. Do this by
* checking the protocol's policy, and optionally call out to the protocol
* for assistance if required.
*/
static struct mbuf *
-netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source,
- struct mbuf *m, u_int *cpuidp)
+netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
+ uintptr_t source, struct mbuf *m, u_int *cpuidp)
{
struct ifnet *ifp;
+ u_int policy;
NETISR_LOCK_ASSERT();
@@ -596,11 +732,30 @@ netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source,
* If we want to support per-interface policies, we should do that
* here first.
*/
- switch (npp->np_policy) {
- case NETISR_POLICY_CPU:
- return (npp->np_m2cpuid(m, source, cpuidp));
+ policy = npp->np_policy;
+ if (policy == NETISR_POLICY_CPU) {
+ m = npp->np_m2cpuid(m, source, cpuidp);
+ if (m == NULL)
+ return (NULL);
+
+ /*
+ * It's possible for a protocol not to have a good idea about
+ * where to process a packet, in which case we fall back on
+ * the netisr code to decide. In the hybrid case, return the
+ * current CPU ID, which will force an immediate direct
+ * dispatch. In the queued case, fall back on the SOURCE
+ * policy.
+ */
+ if (*cpuidp != NETISR_CPUID_NONE)
+ return (m);
+ if (dispatch_policy == NETISR_DISPATCH_HYBRID) {
+ *cpuidp = curcpu;
+ return (m);
+ }
+ policy = NETISR_POLICY_SOURCE;
+ }
- case NETISR_POLICY_FLOW:
+ if (policy == NETISR_POLICY_FLOW) {
if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
m = npp->np_m2flow(m, source);
if (m == NULL)
@@ -611,25 +766,22 @@ netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source,
netisr_default_flow2cpu(m->m_pkthdr.flowid);
return (m);
}
- /* FALLTHROUGH */
-
- case NETISR_POLICY_SOURCE:
- ifp = m->m_pkthdr.rcvif;
- if (ifp != NULL)
- *cpuidp = nws_array[(ifp->if_index + source) %
- nws_count];
- else
- *cpuidp = nws_array[source % nws_count];
- return (m);
-
- default:
- panic("%s: invalid policy %u for %s", __func__,
- npp->np_policy, npp->np_name);
+ policy = NETISR_POLICY_SOURCE;
}
+
+ KASSERT(policy == NETISR_POLICY_SOURCE,
+ ("%s: invalid policy %u for %s", __func__, npp->np_policy,
+ npp->np_name));
+
+ ifp = m->m_pkthdr.rcvif;
+ if (ifp != NULL)
+ *cpuidp = nws_array[(ifp->if_index + source) % nws_count];
+ else
+ *cpuidp = nws_array[source % nws_count];
#else /* __rtems__ */
*cpuidp = 0;
- return (m);
#endif /* __rtems__ */
+ return (m);
}
/*
@@ -832,7 +984,8 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(netisr_proto[proto].np_handler != NULL,
("%s: invalid proto %u", __func__, proto));
- m = netisr_select_cpuid(&netisr_proto[proto], source, m, &cpuid);
+ m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
+ source, m, &cpuid);
if (m != NULL) {
KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__,
cpuid));
@@ -863,23 +1016,23 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
struct rm_priotracker tracker;
#endif
struct netisr_workstream *nwsp;
+ struct netisr_proto *npp;
struct netisr_work *npwp;
int dosignal, error;
- u_int cpuid;
-
- /*
- * If direct dispatch is entirely disabled, fall back on queueing.
- */
- if (!netisr_direct)
- return (netisr_queue_src(proto, source, m));
+ u_int cpuid, dispatch_policy;
KASSERT(proto < NETISR_MAXPROT,
("%s: invalid proto %u", __func__, proto));
#ifdef NETISR_LOCKING
NETISR_RLOCK(&tracker);
#endif
- KASSERT(netisr_proto[proto].np_handler != NULL,
- ("%s: invalid proto %u", __func__, proto));
+ npp = &netisr_proto[proto];
+ KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
+ proto));
+
+ dispatch_policy = netisr_get_dispatch(npp);
+ if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
+ return (netisr_queue_src(proto, source, m));
/*
* If direct dispatch is forced, then unconditionally dispatch
@@ -888,7 +1041,7 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
* nws_flags because all netisr processing will be source ordered due
* to always being forced to directly dispatch.
*/
- if (netisr_direct_force) {
+ if (dispatch_policy == NETISR_DISPATCH_DIRECT) {
nwsp = DPCPU_PTR(nws);
npwp = &nwsp->nws_work[proto];
npwp->nw_dispatched++;
@@ -898,20 +1051,24 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
goto out_unlock;
}
+ KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID,
+ ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy));
+
/*
* Otherwise, we execute in a hybrid mode where we will try to direct
* dispatch if we're on the right CPU and the netisr worker isn't
* already running.
*/
- m = netisr_select_cpuid(&netisr_proto[proto], source, m, &cpuid);
+#ifndef __rtems__
+ sched_pin();
+#endif /* __rtems__ */
+ m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID,
+ source, m, &cpuid);
if (m == NULL) {
error = ENOBUFS;
- goto out_unlock;
+ goto out_unpin;
}
KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
-#ifndef __rtems__
- sched_pin();
-#endif /* __rtems__ */
if (cpuid != curcpu)
goto queue_fallback;
nwsp = DPCPU_PTR(nws);
@@ -1050,6 +1207,9 @@ netisr_start_swi(u_int cpuid, struct pcpu *pc)
static void
netisr_init(void *arg)
{
+ char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
+ u_int dispatch_policy;
+ int error;
KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
@@ -1081,6 +1241,22 @@ netisr_init(void *arg)
#endif
#ifndef __rtems__
+ if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) {
+ error = netisr_dispatch_policy_from_str(tmp,
+ &dispatch_policy);
+ if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
+ error = EINVAL;
+ if (error == 0) {
+ netisr_dispatch_policy = dispatch_policy;
+ netisr_dispatch_policy_compat();
+ } else
+ printf(
+ "%s: invalid dispatch policy %s, using default\n",
+ __func__, tmp);
+ }
+#endif /* __rtems__ */
+
+#ifndef __rtems__
netisr_start_swi(curcpu, pcpu_find(curcpu));
#else /* __rtems__ */
netisr_start_swi(0, NULL);
@@ -1098,7 +1274,7 @@ netisr_start(void *arg)
{
struct pcpu *pc;
- SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
+ STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
if (nws_count >= netisr_maxthreads)
break;
/* XXXRW: Is skipping absent CPUs still required here? */
@@ -1141,6 +1317,7 @@ sysctl_netisr_proto(SYSCTL_HANDLER_ARGS)
snpp->snp_proto = proto;
snpp->snp_qlimit = npp->np_qlimit;
snpp->snp_policy = npp->np_policy;
+ snpp->snp_dispatch = npp->np_dispatch;
if (npp->np_m2flow != NULL)
snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW;
if (npp->np_m2cpuid != NULL)
diff --git a/freebsd/sys/net/netisr.h b/freebsd/sys/net/netisr.h
index cd692f6d..83bf9ce5 100644
--- a/freebsd/sys/net/netisr.h
+++ b/freebsd/sys/net/netisr.h
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2007-2009 Robert N. M. Watson
- * Copyright (c) 2010 Juniper Networks, Inc.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* This software was developed by Robert N. M. Watson under contract
@@ -71,6 +71,15 @@
#define NETISR_POLICY_CPU 3 /* Protocol determines CPU placement. */
/*
+ * Protocol dispatch policy constants; selects whether and when direct
+ * dispatch is permitted.
+ */
+#define NETISR_DISPATCH_DEFAULT 0 /* Use global default. */
+#define NETISR_DISPATCH_DEFERRED 1 /* Always defer dispatch. */
+#define NETISR_DISPATCH_HYBRID 2 /* Allow hybrid dispatch. */
+#define NETISR_DISPATCH_DIRECT 3 /* Always direct dispatch. */
+
+/*
* Monitoring data structures, exported by sysctl(2).
*
* Three sysctls are defined. First, a per-protocol structure exported by
@@ -84,7 +93,8 @@ struct sysctl_netisr_proto {
u_int snp_qlimit; /* nh_qlimit */
u_int snp_policy; /* nh_policy */
u_int snp_flags; /* Various flags. */
- u_int _snp_ispare[7];
+ u_int snp_dispatch; /* Dispatch policy. */
+ u_int _snp_ispare[6];
};
/*
@@ -173,6 +183,8 @@ typedef struct mbuf *netisr_m2cpuid_t(struct mbuf *m, uintptr_t source,
typedef struct mbuf *netisr_m2flow_t(struct mbuf *m, uintptr_t source);
typedef void netisr_drainedcpu_t(u_int cpuid);
+#define NETISR_CPUID_NONE ((u_int)-1) /* No affinity returned. */
+
/*
* Data structure describing a protocol handler.
*/
@@ -185,7 +197,8 @@ struct netisr_handler {
u_int nh_proto; /* Integer protocol ID. */
u_int nh_qlimit; /* Maximum per-CPU queue depth. */
u_int nh_policy; /* Work placement policy. */
- u_int nh_ispare[5]; /* For future use. */
+ u_int nh_dispatch; /* Dispatch policy. */
+ u_int nh_ispare[4]; /* For future use. */
void *nh_pspare[4]; /* For future use. */
};
diff --git a/freebsd/sys/net/netisr_internal.h b/freebsd/sys/net/netisr_internal.h
index 40afaf16..ac3ed0f2 100644
--- a/freebsd/sys/net/netisr_internal.h
+++ b/freebsd/sys/net/netisr_internal.h
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2007-2009 Robert N. M. Watson
- * Copyright (c) 2010 Juniper Networks, Inc.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* This software was developed by Robert N. M. Watson under contract
@@ -64,6 +64,7 @@ struct netisr_proto {
netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */
u_int np_qlimit; /* Maximum per-CPU queue depth. */
u_int np_policy; /* Work placement policy. */
+ u_int np_dispatch; /* Work dispatch policy. */
};
#define NETISR_MAXPROT 16 /* Compile-time limit. */
diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c
index 7fceea35..123d03c4 100644
--- a/freebsd/sys/net/pfil.c
+++ b/freebsd/sys/net/pfil.c
@@ -288,25 +288,27 @@ pfil_list_remove(pfil_list_t *list,
return (ENOENT);
}
-/****************
- * Stuff that must be initialized for every instance
- * (including the first of course).
+/*
+ * Stuff that must be initialized for every instance (including the first of
+ * course).
*/
static int
vnet_pfil_init(const void *unused)
{
+
LIST_INIT(&V_pfil_head_list);
return (0);
}
-/***********************
+/*
* Called for the removal of each instance.
*/
static int
vnet_pfil_uninit(const void *unused)
{
+
/* XXX should panic if list is not empty */
- return 0;
+ return (0);
}
/* Define startup order. */
@@ -315,17 +317,17 @@ vnet_pfil_uninit(const void *unused)
#define PFIL_VNET_ORDER (PFIL_MODEVENT_ORDER + 2) /* Later still. */
/*
- * Starting up.
+ * Starting up.
+ *
* VNET_SYSINIT is called for each existing vnet and each new vnet.
*/
VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
- vnet_pfil_init, NULL);
+ vnet_pfil_init, NULL);
/*
- * Closing up shop. These are done in REVERSE ORDER,
- * Not called on reboot.
+ * Closing up shop. These are done in REVERSE ORDER. Not called on reboot.
+ *
* VNET_SYSUNINIT is called for each exiting vnet as it exits.
*/
VNET_SYSUNINIT(vnet_pfil_uninit, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
- vnet_pfil_uninit, NULL);
-
+ vnet_pfil_uninit, NULL);
diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h
index 142da67d..da06dedf 100644
--- a/freebsd/sys/net/pfil.h
+++ b/freebsd/sys/net/pfil.h
@@ -69,7 +69,11 @@ struct pfil_head {
pfil_list_t ph_out;
int ph_type;
int ph_nhooks;
+#if defined( __linux__ ) || defined( _WIN32 )
+ rwlock_t ph_mtx;
+#else
struct rmlock ph_lock;
+#endif
union {
u_long phu_val;
void *phu_ptr;
diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c
index d31a5b3c..875a482c 100644
--- a/freebsd/sys/net/radix.c
+++ b/freebsd/sys/net/radix.c
@@ -52,8 +52,8 @@
#include <stdio.h>
#include <strings.h>
#include <stdlib.h>
-#define log(x, arg...) fprintf(stderr, ## arg)
-#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1)
+#define log(x, arg...) fprintf(stderr, ## arg)
+#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1)
#define min(a, b) ((a) < (b) ? (a) : (b) )
#include <net/radix.h>
#endif /* !_KERNEL */
diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h
index 29659b54..5bacaa3a 100644
--- a/freebsd/sys/net/radix.h
+++ b/freebsd/sys/net/radix.h
@@ -105,6 +105,8 @@ typedef int walktree_f_t(struct radix_node *, void *);
struct radix_node_head {
struct radix_node *rnh_treetop;
+ u_int rnh_gen; /* generation counter */
+ int rnh_multipath; /* multipath capable ? */
int rnh_addrsize; /* permit, but not require fixed keys */
int rnh_pktsize; /* permit, but not require fixed keys */
struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
@@ -131,7 +133,6 @@ struct radix_node_head {
void (*rnh_close) /* do something when the last ref drops */
(struct radix_node *rn, struct radix_node_head *head);
struct radix_node rnh_nodes[3]; /* empty tree for common case */
- int rnh_multipath; /* multipath capable ? */
#ifdef _KERNEL
struct rwlock rnh_lock; /* locks entire radix tree */
#endif
diff --git a/freebsd/sys/net/raw_cb.c b/freebsd/sys/net/raw_cb.c
index 68b8bd26..10db8bba 100644
--- a/freebsd/sys/net/raw_cb.c
+++ b/freebsd/sys/net/raw_cb.c
@@ -61,7 +61,8 @@
struct mtx rawcb_mtx;
VNET_DEFINE(struct rawcb_list_head, rawcb_list);
-SYSCTL_NODE(_net, OID_AUTO, raw, CTLFLAG_RW, 0, "Raw socket infrastructure");
+static SYSCTL_NODE(_net, OID_AUTO, raw, CTLFLAG_RW, 0,
+ "Raw socket infrastructure");
static u_long raw_sendspace = RAWSNDQ;
SYSCTL_ULONG(_net_raw, OID_AUTO, sendspace, CTLFLAG_RW, &raw_sendspace, 0,
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index a0589947..fdd8a12c 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -69,6 +69,10 @@
#include <netinet/ip_mroute.h>
#include <vm/uma.h>
+#ifdef __rtems__
+#include <machine/rtems-bsd-syscall-api.h>
+#include <sys/file.h>
+#endif /* __rtems__ */
/* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */
#define RT_MAXFIBS 16
@@ -144,7 +148,6 @@ VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
#define V_rtzone VNET(rtzone)
-#ifndef __rtems__
/*
* handler for net.my_fibnum
*/
@@ -154,14 +157,17 @@ sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
int fibnum;
int error;
+#ifndef __rtems__
fibnum = curthread->td_proc->p_fibnum;
+#else /* __rtems__ */
+ fibnum = BSD_DEFAULT_FIB;
+#endif /* __rtems__ */
error = sysctl_handle_int(oidp, &fibnum, 0, req);
return (error);
}
SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
-#endif /* __rtems__ */
static __inline struct radix_node_head **
rt_tables_get_rnh_ptr(int table, int fam)
@@ -279,20 +285,40 @@ VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
vnet_route_uninit, 0);
#endif
-#ifndef __rtems__
#ifndef _SYS_SYSPROTO_H_
struct setfib_args {
int fibnum;
};
#endif
+#ifdef __rtems__
+static
+#endif /* __rtems__ */
int
-setfib(struct thread *td, struct setfib_args *uap)
+sys_setfib(struct thread *td, struct setfib_args *uap)
{
if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
return EINVAL;
+#ifndef __rtems__
td->td_proc->p_fibnum = uap->fibnum;
+#else /* __rtems__ */
+ if (uap->fibnum != BSD_DEFAULT_FIB)
+ return EINVAL;
+#endif /* __rtems__ */
return (0);
}
+#ifdef __rtems__
+int
+setfib(int fibnum)
+{
+ struct setfib_args ua = {
+ .fibnum = fibnum
+ };
+ int error;
+
+ error = sys_setfib(NULL, &ua);
+
+ return rtems_bsd_error_to_status_and_errno(error);
+}
#endif /* __rtems__ */
/*
@@ -1118,6 +1144,14 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
error = 0;
}
#endif
+ if ((flags & RTF_PINNED) == 0) {
+ /* Check if target route can be deleted */
+ rt = (struct rtentry *)rnh->rnh_lookup(dst,
+ netmask, rnh);
+ if ((rt != NULL) && (rt->rt_flags & RTF_PINNED))
+ senderr(EADDRINUSE);
+ }
+
/*
* Remove the item from the tree and return it.
* Complain if it is not there and do no more processing.
@@ -1237,11 +1271,9 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
rt0 = NULL;
/* "flow-table" only supports IPv6 and IPv4 at the moment. */
switch (dst->sa_family) {
-#ifdef notyet
#ifdef INET6
case AF_INET6:
#endif
-#endif
#ifdef INET
case AF_INET:
#endif
@@ -1309,13 +1341,11 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
#ifdef FLOWTABLE
else if (rt0 != NULL) {
switch (dst->sa_family) {
-#ifdef notyet
#ifdef INET6
case AF_INET6:
flowtable_route_flush(V_ip6_ft, rt0);
break;
#endif
-#endif
#ifdef INET
case AF_INET:
flowtable_route_flush(V_ip_ft, rt0);
@@ -1445,6 +1475,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
int didwork = 0;
int a_failure = 0;
static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+ struct radix_node_head *rnh;
if (flags & RTF_HOST) {
dst = ifa->ifa_dstaddr;
@@ -1507,7 +1538,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
*/
for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
if (cmd == RTM_DELETE) {
- struct radix_node_head *rnh;
struct radix_node *rn;
/*
* Look up an rtentry that is in the routing tree and
@@ -1517,7 +1547,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
if (rnh == NULL)
/* this table doesn't exist but others might */
continue;
- RADIX_NODE_HEAD_LOCK(rnh);
+ RADIX_NODE_HEAD_RLOCK(rnh);
#ifdef RADIX_MPATH
if (rn_mpath_capable(rnh)) {
@@ -1546,7 +1576,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
(rn->rn_flags & RNF_ROOT) ||
RNTORT(rn)->rt_ifa != ifa ||
!sa_equal((struct sockaddr *)rn->rn_key, dst));
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ RADIX_NODE_HEAD_RUNLOCK(rnh);
if (error) {
/* this is only an error if bad on ALL tables */
continue;
@@ -1557,7 +1587,8 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
*/
bzero((caddr_t)&info, sizeof(info));
info.rti_ifa = ifa;
- info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF);
+ info.rti_flags = flags |
+ (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
info.rti_info[RTAX_DST] = dst;
/*
* doing this for compatibility reasons
@@ -1569,6 +1600,33 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
info.rti_info[RTAX_NETMASK] = netmask;
error = rtrequest1_fib(cmd, &info, &rt, fibnum);
+
+ if ((error == EEXIST) && (cmd == RTM_ADD)) {
+ /*
+ * Interface route addition failed.
+ * Atomically delete current prefix generating
+ * RTM_DELETE message, and retry adding
+ * interface prefix.
+ */
+ rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ RADIX_NODE_HEAD_LOCK(rnh);
+
+ /* Delete old prefix */
+ info.rti_ifa = NULL;
+ info.rti_flags = RTF_RNH_LOCKED;
+
+ error = rtrequest1_fib(RTM_DELETE, &info, &rt, fibnum);
+ if (error == 0) {
+ info.rti_ifa = ifa;
+ info.rti_flags = flags | RTF_RNH_LOCKED |
+ (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
+ error = rtrequest1_fib(cmd, &info, &rt, fibnum);
+ }
+
+ RADIX_NODE_HEAD_UNLOCK(rnh);
+ }
+
+
if (error == 0 && rt != NULL) {
/*
* notify any listening routing agents of the change
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index b26ac441..997f3cd6 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -49,9 +49,14 @@
struct route {
struct rtentry *ro_rt;
struct llentry *ro_lle;
+ struct in_ifaddr *ro_ia;
+ int ro_flags;
struct sockaddr ro_dst;
};
+#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
+#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
+
/*
* These numbers are used by reliable protocols for determining
* retransmission behavior and are included in the routing structure.
@@ -171,7 +176,7 @@ struct ortentry {
/* 0x20000 unused, was RTF_WASCLONED */
#define RTF_PROTO3 0x40000 /* protocol specific routing flag */
/* 0x80000 unused */
-#define RTF_PINNED 0x100000 /* future use */
+#define RTF_PINNED 0x100000 /* route is immutable */
#define RTF_LOCAL 0x200000 /* route represents a local address */
#define RTF_BROADCAST 0x400000 /* route represents a bcast address */
#define RTF_MULTICAST 0x800000 /* route represents a mcast address */
@@ -337,6 +342,18 @@ struct rt_addrinfo {
RTFREE_LOCKED(_rt); \
} while (0)
+#define RO_RTFREE(_ro) do { \
+ if ((_ro)->ro_rt) { \
+ if ((_ro)->ro_flags & RT_NORTREF) { \
+ (_ro)->ro_flags &= ~RT_NORTREF; \
+ (_ro)->ro_rt = NULL; \
+ } else { \
+ RT_LOCK((_ro)->ro_rt); \
+ RTFREE_LOCKED((_ro)->ro_rt); \
+ } \
+ } \
+} while (0)
+
struct radix_node_head *rt_tables_get_rnh(int, int);
struct ifmultiaddr;
@@ -404,6 +421,7 @@ int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
#include <sys/eventhandler.h>
typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *);
typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *);
+/* route_arp_update_event is no longer generated; see arp_update_event */
EVENTHANDLER_DECLARE(route_arp_update_event, rtevent_arp_update_fn);
EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn);
#endif
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index a421fd61..1eebe095 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -172,7 +172,7 @@ MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
#define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
#define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
-SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
+static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
struct walkarg {
int w_tmemsize;
@@ -956,7 +956,6 @@ flush:
m = NULL;
} else if (m->m_pkthdr.len > rtm->rtm_msglen)
m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
- Free(rtm);
}
if (m) {
M_SETFIB(m, so->so_fibnum);
@@ -973,6 +972,9 @@ flush:
} else
rt_dispatch(m, saf);
}
+ /* info.rti_info[RTAX_DST] (used above) can point inside of rtm */
+ if (rtm)
+ Free(rtm);
}
return (error);
#undef sa_equal
@@ -1821,6 +1823,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
u_int namelen = arg2;
struct radix_node_head *rnh = NULL; /* silence compiler. */
int i, lim, error = EINVAL;
+ int fib = 0;
u_char af;
struct walkarg w;
@@ -1828,7 +1831,25 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
namelen--;
if (req->newptr)
return (EPERM);
- if (namelen != 3)
+ if (name[1] == NET_RT_DUMP) {
+ if (namelen == 3)
+#ifndef __rtems__
+ fib = req->td->td_proc->p_fibnum;
+#else /* __rtems__ */
+ fib = BSD_DEFAULT_FIB;
+#endif /* __rtems__ */
+ else if (namelen == 4)
+ fib = (name[3] == -1) ?
+#ifndef __rtems__
+ req->td->td_proc->p_fibnum : name[3];
+#else /* __rtems__ */
+ BSD_DEFAULT_FIB : name[3];
+#endif /* __rtems__ */
+ else
+ return ((namelen < 3) ? EISDIR : ENOTDIR);
+ if (fib < 0 || fib >= rt_numfibs)
+ return (EINVAL);
+ } else if (namelen != 3)
return ((namelen < 3) ? EISDIR : ENOTDIR);
af = name[0];
if (af > AF_MAX)
@@ -1867,11 +1888,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
* take care of routing entries
*/
for (error = 0; error == 0 && i <= lim; i++) {
-#ifndef __rtems__
- rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i);
-#else /* __rtems__ */
- rnh = rt_tables_get_rnh(BSD_DEFAULT_FIB, i);
-#endif /* __rtems__ */
+ rnh = rt_tables_get_rnh(fib, i);
if (rnh != NULL) {
RADIX_NODE_HEAD_RLOCK(rnh);
error = rnh->rnh_walktree(rnh,
@@ -1896,7 +1913,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
return (error);
}
-SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
+static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
/*
* Definitions of protocols supported in the ROUTE domain.
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index 8ef1c00d..01e26cdb 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -95,7 +95,9 @@ struct vnet {
* Location of the kernel's 'set_vnet' linker set.
*/
extern uintptr_t *__start_set_vnet;
+__GLOBL(__start_set_vnet);
extern uintptr_t *__stop_set_vnet;
+__GLOBL(__stop_set_vnet);
#define VNET_START (uintptr_t)&__start_set_vnet
#define VNET_STOP (uintptr_t)&__stop_set_vnet
@@ -249,6 +251,7 @@ int vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS);
ptr, val, vnet_sysctl_handle_int, "I", descr)
#define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \
fmt, descr) \
+ CTASSERT(((access) & CTLTYPE) != 0); \
SYSCTL_OID(parent, nbr, name, CTLFLAG_VNET|(access), ptr, arg, \
handler, fmt, descr)
#define SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt, \
diff --git a/freebsd/sys/net80211/_ieee80211.h b/freebsd/sys/net80211/_ieee80211.h
index c488c006..3793c661 100644
--- a/freebsd/sys/net80211/_ieee80211.h
+++ b/freebsd/sys/net80211/_ieee80211.h
@@ -133,7 +133,7 @@ enum ieee80211_roamingmode {
*/
struct ieee80211_channel {
uint32_t ic_flags; /* see below */
- uint16_t ic_freq; /* setting in Mhz */
+ uint16_t ic_freq; /* setting in MHz */
uint8_t ic_ieee; /* IEEE channel number */
int8_t ic_maxregpower; /* maximum regulatory tx power in dBm */
int8_t ic_maxpower; /* maximum tx power in .5 dBm */
@@ -335,7 +335,7 @@ struct ieee80211_rateset {
* the structure such that it can be used interchangeably
* with an ieee80211_rateset (modulo structure size).
*/
-#define IEEE80211_HTRATE_MAXSIZE 127
+#define IEEE80211_HTRATE_MAXSIZE 77
struct ieee80211_htrateset {
uint8_t rs_nrates;
@@ -387,9 +387,16 @@ struct ieee80211_regdomain {
/*
* MIMO antenna/radio state.
*/
+
+#define IEEE80211_MAX_CHAINS 3
+#define IEEE80211_MAX_EVM_PILOTS 6
+
+/*
+ * XXX This doesn't yet export both ctl/ext chain details
+ */
struct ieee80211_mimo_info {
- int8_t rssi[3]; /* per-antenna rssi */
- int8_t noise[3]; /* per-antenna noise floor */
+ int8_t rssi[IEEE80211_MAX_CHAINS]; /* per-antenna rssi */
+ int8_t noise[IEEE80211_MAX_CHAINS]; /* per-antenna noise floor */
uint8_t pad[2];
uint32_t evm[3]; /* EVM data */
};
diff --git a/freebsd/sys/net80211/ieee80211.c b/freebsd/sys/net80211/ieee80211.c
index 3d121987..7021ff31 100644
--- a/freebsd/sys/net80211/ieee80211.c
+++ b/freebsd/sys/net80211/ieee80211.c
@@ -208,6 +208,15 @@ ieee80211_chan_init(struct ieee80211com *ic)
DEFAULTRATES(IEEE80211_MODE_11NG, ieee80211_rateset_11g);
/*
+ * Setup required information to fill the mcsset field, if driver did
+ * not. Assume a 2T2R setup for historic reasons.
+ */
+ if (ic->ic_rxstream == 0)
+ ic->ic_rxstream = 2;
+ if (ic->ic_txstream == 0)
+ ic->ic_txstream = 2;
+
+ /*
* Set auto mode to reset active channel state and any desired channel.
*/
(void) ieee80211_setmode(ic, IEEE80211_MODE_AUTO);
@@ -377,9 +386,9 @@ default_reset(struct ieee80211vap *vap, u_long cmd)
*/
int
ieee80211_vap_setup(struct ieee80211com *ic, struct ieee80211vap *vap,
- const char name[IFNAMSIZ], int unit, int opmode, int flags,
- const uint8_t bssid[IEEE80211_ADDR_LEN],
- const uint8_t macaddr[IEEE80211_ADDR_LEN])
+ const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode,
+ int flags, const uint8_t bssid[IEEE80211_ADDR_LEN],
+ const uint8_t macaddr[IEEE80211_ADDR_LEN])
{
struct ifnet *ifp;
@@ -407,6 +416,7 @@ ieee80211_vap_setup(struct ieee80211com *ic, struct ieee80211vap *vap,
vap->iv_flags_ven = ic->ic_flags_ven;
vap->iv_caps = ic->ic_caps &~ IEEE80211_C_OPMODE;
vap->iv_htcaps = ic->ic_htcaps;
+ vap->iv_htextcaps = ic->ic_htextcaps;
vap->iv_opmode = opmode;
vap->iv_caps |= ieee80211_opcap[opmode];
switch (opmode) {
@@ -439,6 +449,8 @@ ieee80211_vap_setup(struct ieee80211com *ic, struct ieee80211vap *vap,
}
break;
#endif
+ default:
+ break;
}
/* auto-enable s/w beacon miss support */
if (flags & IEEE80211_CLONE_NOBEACONS)
@@ -1000,7 +1012,8 @@ ieee80211_media_setup(struct ieee80211com *ic,
struct ifmedia *media, int caps, int addsta,
ifm_change_cb_t media_change, ifm_stat_cb_t media_stat)
{
- int i, j, mode, rate, maxrate, mword, r;
+ int i, j, rate, maxrate, mword, r;
+ enum ieee80211_phymode mode;
const struct ieee80211_rateset *rs;
struct ieee80211_rateset allrates;
@@ -1068,10 +1081,18 @@ ieee80211_media_setup(struct ieee80211com *ic,
isset(ic->ic_modecaps, IEEE80211_MODE_11NG)) {
addmedia(media, caps, addsta,
IEEE80211_MODE_AUTO, IFM_IEEE80211_MCS);
- /* XXX could walk htrates */
- /* XXX known array size */
- if (ieee80211_htrates[15].ht40_rate_400ns > maxrate)
- maxrate = ieee80211_htrates[15].ht40_rate_400ns;
+ i = ic->ic_txstream * 8 - 1;
+ if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) &&
+ (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40))
+ rate = ieee80211_htrates[i].ht40_rate_400ns;
+ else if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40))
+ rate = ieee80211_htrates[i].ht40_rate_800ns;
+ else if ((ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20))
+ rate = ieee80211_htrates[i].ht20_rate_400ns;
+ else
+ rate = ieee80211_htrates[i].ht20_rate_800ns;
+ if (rate > maxrate)
+ maxrate = rate;
}
return maxrate;
}
@@ -1121,7 +1142,8 @@ void
ieee80211_announce(struct ieee80211com *ic)
{
struct ifnet *ifp = ic->ic_ifp;
- int i, mode, rate, mword;
+ int i, rate, mword;
+ enum ieee80211_phymode mode;
const struct ieee80211_rateset *rs;
/* NB: skip AUTO since it has no rates */
@@ -1514,6 +1536,67 @@ ieee80211_rate2media(struct ieee80211com *ic, int rate, enum ieee80211_phymode m
{ 13, IFM_IEEE80211_MCS },
{ 14, IFM_IEEE80211_MCS },
{ 15, IFM_IEEE80211_MCS },
+ { 16, IFM_IEEE80211_MCS },
+ { 17, IFM_IEEE80211_MCS },
+ { 18, IFM_IEEE80211_MCS },
+ { 19, IFM_IEEE80211_MCS },
+ { 20, IFM_IEEE80211_MCS },
+ { 21, IFM_IEEE80211_MCS },
+ { 22, IFM_IEEE80211_MCS },
+ { 23, IFM_IEEE80211_MCS },
+ { 24, IFM_IEEE80211_MCS },
+ { 25, IFM_IEEE80211_MCS },
+ { 26, IFM_IEEE80211_MCS },
+ { 27, IFM_IEEE80211_MCS },
+ { 28, IFM_IEEE80211_MCS },
+ { 29, IFM_IEEE80211_MCS },
+ { 30, IFM_IEEE80211_MCS },
+ { 31, IFM_IEEE80211_MCS },
+ { 32, IFM_IEEE80211_MCS },
+ { 33, IFM_IEEE80211_MCS },
+ { 34, IFM_IEEE80211_MCS },
+ { 35, IFM_IEEE80211_MCS },
+ { 36, IFM_IEEE80211_MCS },
+ { 37, IFM_IEEE80211_MCS },
+ { 38, IFM_IEEE80211_MCS },
+ { 39, IFM_IEEE80211_MCS },
+ { 40, IFM_IEEE80211_MCS },
+ { 41, IFM_IEEE80211_MCS },
+ { 42, IFM_IEEE80211_MCS },
+ { 43, IFM_IEEE80211_MCS },
+ { 44, IFM_IEEE80211_MCS },
+ { 45, IFM_IEEE80211_MCS },
+ { 46, IFM_IEEE80211_MCS },
+ { 47, IFM_IEEE80211_MCS },
+ { 48, IFM_IEEE80211_MCS },
+ { 49, IFM_IEEE80211_MCS },
+ { 50, IFM_IEEE80211_MCS },
+ { 51, IFM_IEEE80211_MCS },
+ { 52, IFM_IEEE80211_MCS },
+ { 53, IFM_IEEE80211_MCS },
+ { 54, IFM_IEEE80211_MCS },
+ { 55, IFM_IEEE80211_MCS },
+ { 56, IFM_IEEE80211_MCS },
+ { 57, IFM_IEEE80211_MCS },
+ { 58, IFM_IEEE80211_MCS },
+ { 59, IFM_IEEE80211_MCS },
+ { 60, IFM_IEEE80211_MCS },
+ { 61, IFM_IEEE80211_MCS },
+ { 62, IFM_IEEE80211_MCS },
+ { 63, IFM_IEEE80211_MCS },
+ { 64, IFM_IEEE80211_MCS },
+ { 65, IFM_IEEE80211_MCS },
+ { 66, IFM_IEEE80211_MCS },
+ { 67, IFM_IEEE80211_MCS },
+ { 68, IFM_IEEE80211_MCS },
+ { 69, IFM_IEEE80211_MCS },
+ { 70, IFM_IEEE80211_MCS },
+ { 71, IFM_IEEE80211_MCS },
+ { 72, IFM_IEEE80211_MCS },
+ { 73, IFM_IEEE80211_MCS },
+ { 74, IFM_IEEE80211_MCS },
+ { 75, IFM_IEEE80211_MCS },
+ { 76, IFM_IEEE80211_MCS },
};
int m;
diff --git a/freebsd/sys/net80211/ieee80211.h b/freebsd/sys/net80211/ieee80211.h
index e567d9de..9c12ef0c 100644
--- a/freebsd/sys/net80211/ieee80211.h
+++ b/freebsd/sys/net80211/ieee80211.h
@@ -131,6 +131,7 @@ struct ieee80211_qosframe_addr4 {
#define IEEE80211_FC0_SUBTYPE_AUTH 0xb0
#define IEEE80211_FC0_SUBTYPE_DEAUTH 0xc0
#define IEEE80211_FC0_SUBTYPE_ACTION 0xd0
+#define IEEE80211_FC0_SUBTYPE_ACTION_NOACK 0xe0
/* for TYPE_CTL */
#define IEEE80211_FC0_SUBTYPE_BAR 0x80
#define IEEE80211_FC0_SUBTYPE_BA 0x90
diff --git a/freebsd/sys/net80211/ieee80211_acl.c b/freebsd/sys/net80211/ieee80211_acl.c
index 0d26a4e2..3bfb0a30 100644
--- a/freebsd/sys/net80211/ieee80211_acl.c
+++ b/freebsd/sys/net80211/ieee80211_acl.c
@@ -79,7 +79,7 @@ struct acl {
struct aclstate {
acl_lock_t as_lock;
int as_policy;
- int as_nacls;
+ uint32_t as_nacls;
TAILQ_HEAD(, acl) as_list; /* list of all ACL's */
LIST_HEAD(, acl) as_hash[ACL_HASHSIZE];
struct ieee80211vap *as_vap;
@@ -89,7 +89,7 @@ struct aclstate {
#define ACL_HASH(addr) \
(((const uint8_t *)(addr))[IEEE80211_ADDR_LEN - 1] % ACL_HASHSIZE)
-MALLOC_DEFINE(M_80211_ACL, "acl", "802.11 station acl");
+static MALLOC_DEFINE(M_80211_ACL, "acl", "802.11 station acl");
static int acl_free_all(struct ieee80211vap *);
@@ -291,7 +291,8 @@ acl_getioctl(struct ieee80211vap *vap, struct ieee80211req *ireq)
struct aclstate *as = vap->iv_as;
struct acl *acl;
struct ieee80211req_maclist *ap;
- int error, space, i;
+ int error;
+ uint32_t i, space;
switch (ireq->i_val) {
case IEEE80211_MACCMD_POLICY:
diff --git a/freebsd/sys/net80211/ieee80211_adhoc.c b/freebsd/sys/net80211/ieee80211_adhoc.c
index b7933a17..4c330976 100644
--- a/freebsd/sys/net80211/ieee80211_adhoc.c
+++ b/freebsd/sys/net80211/ieee80211_adhoc.c
@@ -287,7 +287,6 @@ doprint(struct ieee80211vap *vap, int subtype)
static int
adhoc_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
{
-#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
#define HAS_SEQ(type) ((type & 0x4) == 0)
struct ieee80211vap *vap = ni->ni_vap;
struct ieee80211com *ic = ni->ni_ic;
@@ -414,9 +413,7 @@ adhoc_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
TID_TO_WME_AC(tid) >= WME_AC_VI)
ic->ic_wme.wme_hipri_traffic++;
rxseq = le16toh(*(uint16_t *)wh->i_seq);
- if ((ni->ni_flags & IEEE80211_NODE_HT) == 0 &&
- (wh->i_fc[1] & IEEE80211_FC1_RETRY) &&
- SEQ_LEQ(rxseq, ni->ni_rxseqs[tid])) {
+ if (! ieee80211_check_rxseq(ni, wh)) {
/* duplicate, discard */
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT,
bssid, "duplicate",
@@ -662,7 +659,6 @@ out:
m_freem(m);
}
return type;
-#undef SEQ_LEQ
}
static int
@@ -825,80 +821,44 @@ adhoc_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
is11bclient(rates, xrates) ? IEEE80211_SEND_LEGACY_11B : 0);
break;
- case IEEE80211_FC0_SUBTYPE_ACTION: {
- const struct ieee80211_action *ia;
-
- if (vap->iv_state != IEEE80211_S_RUN) {
+ case IEEE80211_FC0_SUBTYPE_ACTION:
+ case IEEE80211_FC0_SUBTYPE_ACTION_NOACK:
+ if (ni == vap->iv_bss) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "unknown node");
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, wh->i_addr1) &&
+ !IEEE80211_IS_MULTICAST(wh->i_addr1)) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not for us");
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else if (vap->iv_state != IEEE80211_S_RUN) {
IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
wh, NULL, "wrong state %s",
ieee80211_state_name[vap->iv_state]);
vap->iv_stats.is_rx_mgtdiscard++;
- return;
- }
- /*
- * action frame format:
- * [1] category
- * [1] action
- * [tlv] parameters
- */
- IEEE80211_VERIFY_LENGTH(efrm - frm,
- sizeof(struct ieee80211_action), return);
- ia = (const struct ieee80211_action *) frm;
-
- vap->iv_stats.is_rx_action++;
- IEEE80211_NODE_STAT(ni, rx_action);
-
- /* verify frame payloads but defer processing */
- /* XXX maybe push this to method */
- switch (ia->ia_category) {
- case IEEE80211_ACTION_CAT_BA:
- switch (ia->ia_action) {
- case IEEE80211_ACTION_BA_ADDBA_REQUEST:
- IEEE80211_VERIFY_LENGTH(efrm - frm,
- sizeof(struct ieee80211_action_ba_addbarequest),
- return);
- break;
- case IEEE80211_ACTION_BA_ADDBA_RESPONSE:
- IEEE80211_VERIFY_LENGTH(efrm - frm,
- sizeof(struct ieee80211_action_ba_addbaresponse),
- return);
- break;
- case IEEE80211_ACTION_BA_DELBA:
- IEEE80211_VERIFY_LENGTH(efrm - frm,
- sizeof(struct ieee80211_action_ba_delba),
- return);
- break;
- }
- break;
- case IEEE80211_ACTION_CAT_HT:
- switch (ia->ia_action) {
- case IEEE80211_ACTION_HT_TXCHWIDTH:
- IEEE80211_VERIFY_LENGTH(efrm - frm,
- sizeof(struct ieee80211_action_ht_txchwidth),
- return);
- break;
- }
- break;
+ } else {
+ if (ieee80211_parse_action(ni, m0) == 0)
+ (void)ic->ic_recv_action(ni, wh, frm, efrm);
}
- ic->ic_recv_action(ni, wh, frm, efrm);
break;
- }
- case IEEE80211_FC0_SUBTYPE_AUTH:
case IEEE80211_FC0_SUBTYPE_ASSOC_REQ:
- case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
case IEEE80211_FC0_SUBTYPE_ASSOC_RESP:
+ case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
case IEEE80211_FC0_SUBTYPE_REASSOC_RESP:
- case IEEE80211_FC0_SUBTYPE_DEAUTH:
+ case IEEE80211_FC0_SUBTYPE_ATIM:
case IEEE80211_FC0_SUBTYPE_DISASSOC:
+ case IEEE80211_FC0_SUBTYPE_AUTH:
+ case IEEE80211_FC0_SUBTYPE_DEAUTH:
IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
- wh, NULL, "%s", "not handled");
+ wh, NULL, "%s", "not handled");
vap->iv_stats.is_rx_mgtdiscard++;
- return;
+ break;
default:
IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY,
- wh, "mgt", "subtype 0x%x not handled", subtype);
+ wh, "mgt", "subtype 0x%x not handled", subtype);
vap->iv_stats.is_rx_badsubtype++;
break;
}
@@ -912,6 +872,7 @@ ahdemo_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
{
struct ieee80211vap *vap = ni->ni_vap;
struct ieee80211com *ic = ni->ni_ic;
+ struct ieee80211_frame *wh;
/*
* Process management frames when scanning; useful for doing
@@ -919,11 +880,42 @@ ahdemo_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
*/
if (ic->ic_flags & IEEE80211_F_SCAN)
adhoc_recv_mgmt(ni, m0, subtype, rssi, nf);
- else
- vap->iv_stats.is_rx_mgtdiscard++;
+ else {
+ wh = mtod(m0, struct ieee80211_frame *);
+ switch (subtype) {
+ case IEEE80211_FC0_SUBTYPE_ASSOC_REQ:
+ case IEEE80211_FC0_SUBTYPE_ASSOC_RESP:
+ case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
+ case IEEE80211_FC0_SUBTYPE_REASSOC_RESP:
+ case IEEE80211_FC0_SUBTYPE_PROBE_REQ:
+ case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
+ case IEEE80211_FC0_SUBTYPE_BEACON:
+ case IEEE80211_FC0_SUBTYPE_ATIM:
+ case IEEE80211_FC0_SUBTYPE_DISASSOC:
+ case IEEE80211_FC0_SUBTYPE_AUTH:
+ case IEEE80211_FC0_SUBTYPE_DEAUTH:
+ case IEEE80211_FC0_SUBTYPE_ACTION:
+ case IEEE80211_FC0_SUBTYPE_ACTION_NOACK:
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not handled");
+ vap->iv_stats.is_rx_mgtdiscard++;
+ break;
+ default:
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY,
+ wh, "mgt", "subtype 0x%x not handled", subtype);
+ vap->iv_stats.is_rx_badsubtype++;
+ break;
+ }
+ }
}
static void
-adhoc_recv_ctl(struct ieee80211_node *ni, struct mbuf *m0, int subtype)
+adhoc_recv_ctl(struct ieee80211_node *ni, struct mbuf *m, int subtype)
{
+
+ switch (subtype) {
+ case IEEE80211_FC0_SUBTYPE_BAR:
+ ieee80211_recv_bar(ni, m);
+ break;
+ }
}
diff --git a/freebsd/sys/net80211/ieee80211_ageq.c b/freebsd/sys/net80211/ieee80211_ageq.c
index 349345cb..2c6f0475 100644
--- a/freebsd/sys/net80211/ieee80211_ageq.c
+++ b/freebsd/sys/net80211/ieee80211_ageq.c
@@ -51,7 +51,7 @@ __FBSDID("$FreeBSD$");
void
ieee80211_ageq_init(struct ieee80211_ageq *aq, int maxlen, const char *name)
{
- memset(aq, 0, sizeof(aq));
+ memset(aq, 0, sizeof(*aq));
aq->aq_maxlen = maxlen;
IEEE80211_AGEQ_INIT(aq, name); /* OS-dependent setup */
}
diff --git a/freebsd/sys/net80211/ieee80211_crypto.h b/freebsd/sys/net80211/ieee80211_crypto.h
index 57d05ad7..d7ac436f 100644
--- a/freebsd/sys/net80211/ieee80211_crypto.h
+++ b/freebsd/sys/net80211/ieee80211_crypto.h
@@ -78,6 +78,7 @@ struct ieee80211_key {
#define IEEE80211_KEY_XMIT 0x0001 /* key used for xmit */
#define IEEE80211_KEY_RECV 0x0002 /* key used for recv */
#define IEEE80211_KEY_GROUP 0x0004 /* key used for WPA group operation */
+#define IEEE80211_KEY_NOREPLAY 0x0008 /* ignore replay failures */
#define IEEE80211_KEY_SWENCRYPT 0x0010 /* host-based encrypt */
#define IEEE80211_KEY_SWDECRYPT 0x0020 /* host-based decrypt */
#define IEEE80211_KEY_SWENMIC 0x0040 /* host-based enmic */
@@ -98,7 +99,8 @@ struct ieee80211_key {
uint8_t wk_macaddr[IEEE80211_ADDR_LEN];
};
#define IEEE80211_KEY_COMMON /* common flags passed in by apps */\
- (IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV | IEEE80211_KEY_GROUP)
+ (IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV | IEEE80211_KEY_GROUP | \
+ IEEE80211_KEY_NOREPLAY)
#define IEEE80211_KEY_DEVICE /* flags owned by device driver */\
(IEEE80211_KEY_DEVKEY|IEEE80211_KEY_CIPHER0|IEEE80211_KEY_CIPHER1)
diff --git a/freebsd/sys/net80211/ieee80211_crypto_ccmp.c b/freebsd/sys/net80211/ieee80211_crypto_ccmp.c
index 81ce4df4..13843744 100644
--- a/freebsd/sys/net80211/ieee80211_crypto_ccmp.c
+++ b/freebsd/sys/net80211/ieee80211_crypto_ccmp.c
@@ -228,7 +228,8 @@ ccmp_decap(struct ieee80211_key *k, struct mbuf *m, int hdrlen)
}
tid = ieee80211_gettid(wh);
pn = READ_6(ivp[0], ivp[1], ivp[4], ivp[5], ivp[6], ivp[7]);
- if (pn <= k->wk_keyrsc[tid]) {
+ if (pn <= k->wk_keyrsc[tid] &&
+ (k->wk_flags & IEEE80211_KEY_NOREPLAY) == 0) {
/*
* Replay violation.
*/
diff --git a/freebsd/sys/net80211/ieee80211_crypto_tkip.c b/freebsd/sys/net80211/ieee80211_crypto_tkip.c
index d75baeda..9bc51743 100644
--- a/freebsd/sys/net80211/ieee80211_crypto_tkip.c
+++ b/freebsd/sys/net80211/ieee80211_crypto_tkip.c
@@ -283,7 +283,8 @@ tkip_decap(struct ieee80211_key *k, struct mbuf *m, int hdrlen)
tid = ieee80211_gettid(wh);
ctx->rx_rsc = READ_6(ivp[2], ivp[0], ivp[4], ivp[5], ivp[6], ivp[7]);
- if (ctx->rx_rsc <= k->wk_keyrsc[tid]) {
+ if (ctx->rx_rsc <= k->wk_keyrsc[tid] &&
+ (k->wk_flags & IEEE80211_KEY_NOREPLAY) == 0) {
/*
* Replay violation; notify upper layer.
*/
diff --git a/freebsd/sys/net80211/ieee80211_dfs.c b/freebsd/sys/net80211/ieee80211_dfs.c
index 948a4dc5..708cfc90 100644
--- a/freebsd/sys/net80211/ieee80211_dfs.c
+++ b/freebsd/sys/net80211/ieee80211_dfs.c
@@ -54,7 +54,7 @@ __FBSDID("$FreeBSD$");
#include <net80211/ieee80211_var.h>
-MALLOC_DEFINE(M_80211_DFS, "80211dfs", "802.11 DFS state");
+static MALLOC_DEFINE(M_80211_DFS, "80211dfs", "802.11 DFS state");
static int ieee80211_nol_timeout = 30*60; /* 30 minutes */
SYSCTL_INT(_net_wlan, OID_AUTO, nol_timeout, CTLFLAG_RW,
@@ -322,6 +322,8 @@ ieee80211_dfs_notify_radar(struct ieee80211com *ic, struct ieee80211_channel *ch
* on the NOL to expire.
*/
/*XXX*/
+ if_printf(ic->ic_ifp, "%s: No free channels; waiting for entry "
+ "on NOL to expire\n", __func__);
}
} else {
/*
diff --git a/freebsd/sys/net80211/ieee80211_freebsd.c b/freebsd/sys/net80211/ieee80211_freebsd.c
index 7cfa4a81..5e2abf8a 100644
--- a/freebsd/sys/net80211/ieee80211_freebsd.c
+++ b/freebsd/sys/net80211/ieee80211_freebsd.c
@@ -65,7 +65,7 @@ SYSCTL_INT(_net_wlan, OID_AUTO, debug, CTLFLAG_RW, &ieee80211_debug,
0, "debugging printfs");
#endif
-MALLOC_DEFINE(M_80211_COM, "80211com", "802.11 com state");
+static MALLOC_DEFINE(M_80211_COM, "80211com", "802.11 com state");
/*
* Allocate/free com structure in conjunction with ifnet;
diff --git a/freebsd/sys/net80211/ieee80211_hostap.c b/freebsd/sys/net80211/ieee80211_hostap.c
index 12970428..63809ea3 100644
--- a/freebsd/sys/net80211/ieee80211_hostap.c
+++ b/freebsd/sys/net80211/ieee80211_hostap.c
@@ -474,7 +474,6 @@ doprint(struct ieee80211vap *vap, int subtype)
static int
hostap_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
{
-#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
#define HAS_SEQ(type) ((type & 0x4) == 0)
struct ieee80211vap *vap = ni->ni_vap;
struct ieee80211com *ic = ni->ni_ic;
@@ -574,9 +573,7 @@ hostap_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
TID_TO_WME_AC(tid) >= WME_AC_VI)
ic->ic_wme.wme_hipri_traffic++;
rxseq = le16toh(*(uint16_t *)wh->i_seq);
- if ((ni->ni_flags & IEEE80211_NODE_HT) == 0 &&
- (wh->i_fc[1] & IEEE80211_FC1_RETRY) &&
- SEQ_LEQ(rxseq, ni->ni_rxseqs[tid])) {
+ if (! ieee80211_check_rxseq(ni, wh)) {
/* duplicate, discard */
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT,
bssid, "duplicate",
@@ -916,7 +913,6 @@ out:
m_freem(m);
}
return type;
-#undef SEQ_LEQ
}
static void
@@ -2196,18 +2192,38 @@ hostap_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
}
case IEEE80211_FC0_SUBTYPE_ACTION:
- if (vap->iv_state == IEEE80211_S_RUN) {
- if (ieee80211_parse_action(ni, m0) == 0)
- ic->ic_recv_action(ni, wh, frm, efrm);
- } else
+ case IEEE80211_FC0_SUBTYPE_ACTION_NOACK:
+ if (ni == vap->iv_bss) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "unknown node");
vap->iv_stats.is_rx_mgtdiscard++;
+ } else if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, wh->i_addr1) &&
+ !IEEE80211_IS_MULTICAST(wh->i_addr1)) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not for us");
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else if (vap->iv_state != IEEE80211_S_RUN) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "wrong state %s",
+ ieee80211_state_name[vap->iv_state]);
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else {
+ if (ieee80211_parse_action(ni, m0) == 0)
+ (void)ic->ic_recv_action(ni, wh, frm, efrm);
+ }
break;
case IEEE80211_FC0_SUBTYPE_ASSOC_RESP:
case IEEE80211_FC0_SUBTYPE_REASSOC_RESP:
+ case IEEE80211_FC0_SUBTYPE_ATIM:
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not handled");
+ vap->iv_stats.is_rx_mgtdiscard++;
+ break;
+
default:
IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY,
- wh, "mgt", "subtype 0x%x not handled", subtype);
+ wh, "mgt", "subtype 0x%x not handled", subtype);
vap->iv_stats.is_rx_badsubtype++;
break;
}
diff --git a/freebsd/sys/net80211/ieee80211_ht.c b/freebsd/sys/net80211/ieee80211_ht.c
index da1bb26b..69e64a02 100644
--- a/freebsd/sys/net80211/ieee80211_ht.c
+++ b/freebsd/sys/net80211/ieee80211_ht.c
@@ -56,31 +56,86 @@ __FBSDID("$FreeBSD$");
#define MS(_v, _f) (((_v) & _f) >> _f##_S)
#define SM(_v, _f) (((_v) << _f##_S) & _f)
-const struct ieee80211_mcs_rates ieee80211_htrates[16] = {
- { 13, 14, 27, 30 }, /* MCS 0 */
- { 26, 29, 54, 60 }, /* MCS 1 */
- { 39, 43, 81, 90 }, /* MCS 2 */
- { 52, 58, 108, 120 }, /* MCS 3 */
- { 78, 87, 162, 180 }, /* MCS 4 */
- { 104, 116, 216, 240 }, /* MCS 5 */
- { 117, 130, 243, 270 }, /* MCS 6 */
- { 130, 144, 270, 300 }, /* MCS 7 */
- { 26, 29, 54, 60 }, /* MCS 8 */
- { 52, 58, 108, 120 }, /* MCS 9 */
- { 78, 87, 162, 180 }, /* MCS 10 */
- { 104, 116, 216, 240 }, /* MCS 11 */
- { 156, 173, 324, 360 }, /* MCS 12 */
- { 208, 231, 432, 480 }, /* MCS 13 */
- { 234, 260, 486, 540 }, /* MCS 14 */
- { 260, 289, 540, 600 } /* MCS 15 */
+const struct ieee80211_mcs_rates ieee80211_htrates[IEEE80211_HTRATE_MAXSIZE] = {
+ { 13, 14, 27, 30 }, /* MCS 0 */
+ { 26, 29, 54, 60 }, /* MCS 1 */
+ { 39, 43, 81, 90 }, /* MCS 2 */
+ { 52, 58, 108, 120 }, /* MCS 3 */
+ { 78, 87, 162, 180 }, /* MCS 4 */
+ { 104, 116, 216, 240 }, /* MCS 5 */
+ { 117, 130, 243, 270 }, /* MCS 6 */
+ { 130, 144, 270, 300 }, /* MCS 7 */
+ { 26, 29, 54, 60 }, /* MCS 8 */
+ { 52, 58, 108, 120 }, /* MCS 9 */
+ { 78, 87, 162, 180 }, /* MCS 10 */
+ { 104, 116, 216, 240 }, /* MCS 11 */
+ { 156, 173, 324, 360 }, /* MCS 12 */
+ { 208, 231, 432, 480 }, /* MCS 13 */
+ { 234, 260, 486, 540 }, /* MCS 14 */
+ { 260, 289, 540, 600 }, /* MCS 15 */
+ { 39, 43, 81, 90 }, /* MCS 16 */
+ { 78, 87, 162, 180 }, /* MCS 17 */
+ { 117, 130, 243, 270 }, /* MCS 18 */
+ { 156, 173, 324, 360 }, /* MCS 19 */
+ { 234, 260, 486, 540 }, /* MCS 20 */
+ { 312, 347, 648, 720 }, /* MCS 21 */
+ { 351, 390, 729, 810 }, /* MCS 22 */
+ { 390, 433, 810, 900 }, /* MCS 23 */
+ { 52, 58, 108, 120 }, /* MCS 24 */
+ { 104, 116, 216, 240 }, /* MCS 25 */
+ { 156, 173, 324, 360 }, /* MCS 26 */
+ { 208, 231, 432, 480 }, /* MCS 27 */
+ { 312, 347, 648, 720 }, /* MCS 28 */
+ { 416, 462, 864, 960 }, /* MCS 29 */
+ { 468, 520, 972, 1080 }, /* MCS 30 */
+ { 520, 578, 1080, 1200 }, /* MCS 31 */
+ { 0, 0, 12, 13 }, /* MCS 32 */
+ { 78, 87, 162, 180 }, /* MCS 33 */
+ { 104, 116, 216, 240 }, /* MCS 34 */
+ { 130, 144, 270, 300 }, /* MCS 35 */
+ { 117, 130, 243, 270 }, /* MCS 36 */
+ { 156, 173, 324, 360 }, /* MCS 37 */
+ { 195, 217, 405, 450 }, /* MCS 38 */
+ { 104, 116, 216, 240 }, /* MCS 39 */
+ { 130, 144, 270, 300 }, /* MCS 40 */
+ { 130, 144, 270, 300 }, /* MCS 41 */
+ { 156, 173, 324, 360 }, /* MCS 42 */
+ { 182, 202, 378, 420 }, /* MCS 43 */
+ { 182, 202, 378, 420 }, /* MCS 44 */
+ { 208, 231, 432, 480 }, /* MCS 45 */
+ { 156, 173, 324, 360 }, /* MCS 46 */
+ { 195, 217, 405, 450 }, /* MCS 47 */
+ { 195, 217, 405, 450 }, /* MCS 48 */
+ { 234, 260, 486, 540 }, /* MCS 49 */
+ { 273, 303, 567, 630 }, /* MCS 50 */
+ { 273, 303, 567, 630 }, /* MCS 51 */
+ { 312, 347, 648, 720 }, /* MCS 52 */
+ { 130, 144, 270, 300 }, /* MCS 53 */
+ { 156, 173, 324, 360 }, /* MCS 54 */
+ { 182, 202, 378, 420 }, /* MCS 55 */
+ { 156, 173, 324, 360 }, /* MCS 56 */
+ { 182, 202, 378, 420 }, /* MCS 57 */
+ { 208, 231, 432, 480 }, /* MCS 58 */
+ { 234, 260, 486, 540 }, /* MCS 59 */
+ { 208, 231, 432, 480 }, /* MCS 60 */
+ { 234, 260, 486, 540 }, /* MCS 61 */
+ { 260, 289, 540, 600 }, /* MCS 62 */
+ { 260, 289, 540, 600 }, /* MCS 63 */
+ { 286, 318, 594, 660 }, /* MCS 64 */
+ { 195, 217, 405, 450 }, /* MCS 65 */
+ { 234, 260, 486, 540 }, /* MCS 66 */
+ { 273, 303, 567, 630 }, /* MCS 67 */
+ { 234, 260, 486, 540 }, /* MCS 68 */
+ { 273, 303, 567, 630 }, /* MCS 69 */
+ { 312, 347, 648, 720 }, /* MCS 70 */
+ { 351, 390, 729, 810 }, /* MCS 71 */
+ { 312, 347, 648, 720 }, /* MCS 72 */
+ { 351, 390, 729, 810 }, /* MCS 73 */
+ { 390, 433, 810, 900 }, /* MCS 74 */
+ { 390, 433, 810, 900 }, /* MCS 75 */
+ { 429, 477, 891, 990 }, /* MCS 76 */
};
-static const struct ieee80211_htrateset ieee80211_rateset_11n =
- { 16, {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15 }
- };
-
#ifdef IEEE80211_AMPDU_AGE
static int ieee80211_ampdu_age = -1; /* threshold for ampdu reorder q (ms) */
SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age, CTLTYPE_INT | CTLFLAG_RW,
@@ -164,6 +219,9 @@ static int ieee80211_addba_response(struct ieee80211_node *ni,
int code, int baparamset, int batimeout);
static void ieee80211_addba_stop(struct ieee80211_node *ni,
struct ieee80211_tx_ampdu *tap);
+static void null_addba_response_timeout(struct ieee80211_node *ni,
+ struct ieee80211_tx_ampdu *tap);
+
static void ieee80211_bar_response(struct ieee80211_node *ni,
struct ieee80211_tx_ampdu *tap, int status);
static void ampdu_tx_stop(struct ieee80211_tx_ampdu *tap);
@@ -181,6 +239,7 @@ ieee80211_ht_attach(struct ieee80211com *ic)
ic->ic_ampdu_enable = ieee80211_ampdu_enable;
ic->ic_addba_request = ieee80211_addba_request;
ic->ic_addba_response = ieee80211_addba_response;
+ ic->ic_addba_response_timeout = null_addba_response_timeout;
ic->ic_addba_stop = ieee80211_addba_stop;
ic->ic_bar_response = ieee80211_bar_response;
ic->ic_ampdu_rx_start = ampdu_rx_start;
@@ -249,40 +308,150 @@ ieee80211_ht_vdetach(struct ieee80211vap *vap)
{
}
+static int
+ht_getrate(struct ieee80211com *ic, int index, enum ieee80211_phymode mode,
+ int ratetype)
+{
+ int mword, rate;
+
+ mword = ieee80211_rate2media(ic, index | IEEE80211_RATE_MCS, mode);
+ if (IFM_SUBTYPE(mword) != IFM_IEEE80211_MCS)
+ return (0);
+ switch (ratetype) {
+ case 0:
+ rate = ieee80211_htrates[index].ht20_rate_800ns;
+ break;
+ case 1:
+ rate = ieee80211_htrates[index].ht20_rate_400ns;
+ break;
+ case 2:
+ rate = ieee80211_htrates[index].ht40_rate_800ns;
+ break;
+ default:
+ rate = ieee80211_htrates[index].ht40_rate_400ns;
+ break;
+ }
+ return (rate);
+}
+
+static struct printranges {
+ int minmcs;
+ int maxmcs;
+ int txstream;
+ int ratetype;
+ int htcapflags;
+} ranges[] = {
+ { 0, 7, 1, 0, 0 },
+ { 8, 15, 2, 0, 0 },
+ { 16, 23, 3, 0, 0 },
+ { 24, 31, 4, 0, 0 },
+ { 32, 0, 1, 2, IEEE80211_HTC_TXMCS32 },
+ { 33, 38, 2, 0, IEEE80211_HTC_TXUNEQUAL },
+ { 39, 52, 3, 0, IEEE80211_HTC_TXUNEQUAL },
+ { 53, 76, 4, 0, IEEE80211_HTC_TXUNEQUAL },
+ { 0, 0, 0, 0, 0 },
+};
+
static void
-ht_announce(struct ieee80211com *ic, int mode,
- const struct ieee80211_htrateset *rs)
+ht_rateprint(struct ieee80211com *ic, enum ieee80211_phymode mode, int ratetype)
{
struct ifnet *ifp = ic->ic_ifp;
- int i, rate, mword;
+ int minrate, maxrate;
+ struct printranges *range;
- if_printf(ifp, "%s MCS: ", ieee80211_phymode_name[mode]);
- for (i = 0; i < rs->rs_nrates; i++) {
- mword = ieee80211_rate2media(ic,
- rs->rs_rates[i] | IEEE80211_RATE_MCS, mode);
- if (IFM_SUBTYPE(mword) != IFM_IEEE80211_MCS)
+ for (range = ranges; range->txstream != 0; range++) {
+ if (ic->ic_txstream < range->txstream)
+ continue;
+ if (range->htcapflags &&
+ (ic->ic_htcaps & range->htcapflags) == 0)
continue;
- rate = ieee80211_htrates[rs->rs_rates[i]].ht40_rate_400ns;
- printf("%s%d%sMbps", (i != 0 ? " " : ""),
- rate / 2, ((rate & 0x1) != 0 ? ".5" : ""));
+ if (ratetype < range->ratetype)
+ continue;
+ minrate = ht_getrate(ic, range->minmcs, mode, ratetype);
+ maxrate = ht_getrate(ic, range->maxmcs, mode, ratetype);
+ if (range->maxmcs) {
+ if_printf(ifp, "MCS %d-%d: %d%sMbps - %d%sMbps\n",
+ range->minmcs, range->maxmcs,
+ minrate/2, ((minrate & 0x1) != 0 ? ".5" : ""),
+ maxrate/2, ((maxrate & 0x1) != 0 ? ".5" : ""));
+ } else {
+ if_printf(ifp, "MCS %d: %d%sMbps\n", range->minmcs,
+ minrate/2, ((minrate & 0x1) != 0 ? ".5" : ""));
+ }
+ }
+}
+
+static void
+ht_announce(struct ieee80211com *ic, enum ieee80211_phymode mode)
+{
+ struct ifnet *ifp = ic->ic_ifp;
+ const char *modestr = ieee80211_phymode_name[mode];
+
+ if_printf(ifp, "%s MCS 20MHz\n", modestr);
+ ht_rateprint(ic, mode, 0);
+ if (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20) {
+ if_printf(ifp, "%s MCS 20MHz SGI\n", modestr);
+ ht_rateprint(ic, mode, 1);
+ }
+ if (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) {
+ if_printf(ifp, "%s MCS 40MHz:\n", modestr);
+ ht_rateprint(ic, mode, 2);
+ }
+ if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) &&
+ (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40)) {
+ if_printf(ifp, "%s MCS 40MHz SGI:\n", modestr);
+ ht_rateprint(ic, mode, 3);
}
- printf("\n");
}
void
ieee80211_ht_announce(struct ieee80211com *ic)
{
+ struct ifnet *ifp = ic->ic_ifp;
+
+ if (isset(ic->ic_modecaps, IEEE80211_MODE_11NA) ||
+ isset(ic->ic_modecaps, IEEE80211_MODE_11NG))
+ if_printf(ifp, "%dT%dR\n", ic->ic_txstream, ic->ic_rxstream);
if (isset(ic->ic_modecaps, IEEE80211_MODE_11NA))
- ht_announce(ic, IEEE80211_MODE_11NA, &ieee80211_rateset_11n);
+ ht_announce(ic, IEEE80211_MODE_11NA);
if (isset(ic->ic_modecaps, IEEE80211_MODE_11NG))
- ht_announce(ic, IEEE80211_MODE_11NG, &ieee80211_rateset_11n);
+ ht_announce(ic, IEEE80211_MODE_11NG);
}
+static struct ieee80211_htrateset htrateset;
+
const struct ieee80211_htrateset *
ieee80211_get_suphtrates(struct ieee80211com *ic,
- const struct ieee80211_channel *c)
+ const struct ieee80211_channel *c)
{
- return &ieee80211_rateset_11n;
+#define ADDRATE(x) do { \
+ htrateset.rs_rates[htrateset.rs_nrates] = x; \
+ htrateset.rs_nrates++; \
+} while (0)
+ int i;
+
+ memset(&htrateset, 0, sizeof(struct ieee80211_htrateset));
+ for (i = 0; i < ic->ic_txstream * 8; i++)
+ ADDRATE(i);
+ if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) &&
+ (ic->ic_htcaps & IEEE80211_HTC_TXMCS32))
+ ADDRATE(32);
+ if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) {
+ if (ic->ic_txstream >= 2) {
+ for (i = 33; i <= 38; i++)
+ ADDRATE(i);
+ }
+ if (ic->ic_txstream >= 3) {
+ for (i = 39; i <= 52; i++)
+ ADDRATE(i);
+ }
+ if (ic->ic_txstream == 4) {
+ for (i = 53; i <= 76; i++)
+ ADDRATE(i);
+ }
+ }
+ return &htrateset;
+#undef ADDRATE
}
/*
@@ -398,6 +567,7 @@ ampdu_rx_start(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap,
static void
ampdu_rx_stop(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap)
{
+
ampdu_rx_purge(rap);
rap->rxa_flags &= ~(IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND);
}
@@ -658,7 +828,7 @@ again:
if (off < rap->rxa_wnd) {
/*
* Common case (hopefully): in the BA window.
- * Sec 9.10.7.6 a) (D2.04 p.118 line 47)
+ * Sec 9.10.7.6.2 a) (p.137)
*/
#ifdef IEEE80211_AMPDU_AGE
/*
@@ -723,7 +893,7 @@ again:
/*
* Outside the BA window, but within range;
* flush the reorder q and move the window.
- * Sec 9.10.7.6 b) (D2.04 p.118 line 60)
+ * Sec 9.10.7.6.2 b) (p.138)
*/
IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni,
"move BA win <%u:%u> (%u frames) rxseq %u tid %u",
@@ -747,7 +917,7 @@ again:
} else {
/*
* Outside the BA window and out of range; toss.
- * Sec 9.10.7.6 c) (D2.04 p.119 line 16)
+ * Sec 9.10.7.6.2 c) (p.138)
*/
IEEE80211_DISCARD_MAC(vap,
IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr,
@@ -811,7 +981,7 @@ ieee80211_recv_bar(struct ieee80211_node *ni, struct mbuf *m0)
if (off < IEEE80211_SEQ_BA_RANGE) {
/*
* Flush the reorder q up to rxseq and move the window.
- * Sec 9.10.7.6 a) (D2.04 p.119 line 22)
+ * Sec 9.10.7.6.3 a) (p.138)
*/
IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni,
"BAR moves BA win <%u:%u> (%u frames) rxseq %u tid %u",
@@ -832,7 +1002,7 @@ ieee80211_recv_bar(struct ieee80211_node *ni, struct mbuf *m0)
} else {
/*
* Out of range; toss.
- * Sec 9.10.7.6 b) (D2.04 p.119 line 41)
+ * Sec 9.10.7.6.3 b) (p.138)
*/
IEEE80211_DISCARD_MAC(vap,
IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr,
@@ -1400,7 +1570,7 @@ ieee80211_ht_updatehtcap(struct ieee80211_node *ni, const uint8_t *htcapie)
htcap_update_shortgi(ni);
/* NB: honor operating mode constraint */
- /* XXX 40 MHZ intolerant */
+ /* XXX 40 MHz intolerant */
htflags = (vap->iv_flags_ht & IEEE80211_FHT_HT) ?
IEEE80211_CHAN_HT20 : 0;
if ((ni->ni_htcap & IEEE80211_HTCAP_CHWIDTH40) &&
@@ -1419,10 +1589,22 @@ ieee80211_ht_updatehtcap(struct ieee80211_node *ni, const uint8_t *htcapie)
int
ieee80211_setup_htrates(struct ieee80211_node *ni, const uint8_t *ie, int flags)
{
+ struct ieee80211com *ic = ni->ni_ic;
struct ieee80211vap *vap = ni->ni_vap;
const struct ieee80211_ie_htcap *htcap;
struct ieee80211_htrateset *rs;
- int i;
+ int i, maxequalmcs, maxunequalmcs;
+
+ maxequalmcs = ic->ic_txstream * 8 - 1;
+ if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) {
+ if (ic->ic_txstream >= 2)
+ maxunequalmcs = 38;
+ if (ic->ic_txstream >= 3)
+ maxunequalmcs = 52;
+ if (ic->ic_txstream >= 4)
+ maxunequalmcs = 76;
+ } else
+ maxunequalmcs = 0;
rs = &ni->ni_htrates;
memset(rs, 0, sizeof(*rs));
@@ -1441,6 +1623,13 @@ ieee80211_setup_htrates(struct ieee80211_node *ni, const uint8_t *ie, int flags)
vap->iv_stats.is_rx_rstoobig++;
break;
}
+ if (i <= 31 && i > maxequalmcs)
+ continue;
+ if (i == 32 &&
+ (ic->ic_htcaps & IEEE80211_HTC_TXMCS32) == 0)
+ continue;
+ if (i > 32 && i > maxunequalmcs)
+ continue;
rs->rs_rates[rs->rs_nrates++] = i;
}
}
@@ -1509,14 +1698,23 @@ ampdu_tx_stop(struct ieee80211_tx_ampdu *tap)
tap->txa_flags &= ~(IEEE80211_AGGR_SETUP | IEEE80211_AGGR_NAK);
}
+/*
+ * ADDBA response timeout.
+ *
+ * If software aggregation and per-TID queue management was done here,
+ * that queue would be unpaused after the ADDBA timeout occurs.
+ */
static void
addba_timeout(void *arg)
{
struct ieee80211_tx_ampdu *tap = arg;
+ struct ieee80211_node *ni = tap->txa_ni;
+ struct ieee80211com *ic = ni->ni_ic;
/* XXX ? */
tap->txa_flags &= ~IEEE80211_AGGR_XCHGPEND;
tap->txa_attempts++;
+ ic->ic_addba_response_timeout(ni, tap);
}
static void
@@ -1539,6 +1737,12 @@ addba_stop_timeout(struct ieee80211_tx_ampdu *tap)
}
}
+static void
+null_addba_response_timeout(struct ieee80211_node *ni,
+ struct ieee80211_tx_ampdu *tap)
+{
+}
+
/*
* Default method for requesting A-MPDU tx aggregation.
* We setup the specified state block and start a timer
@@ -1623,7 +1827,7 @@ ht_recv_action_ba_addba_request(struct ieee80211_node *ni,
struct ieee80211_rx_ampdu *rap;
uint8_t dialogtoken;
uint16_t baparamset, batimeout, baseqctl;
- uint16_t args[4];
+ uint16_t args[5];
int tid;
dialogtoken = frm[2];
@@ -1673,6 +1877,7 @@ ht_recv_action_ba_addba_request(struct ieee80211_node *ni,
| SM(rap->rxa_wnd, IEEE80211_BAPS_BUFSIZ)
;
args[3] = 0;
+ args[4] = 0;
ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA,
IEEE80211_ACTION_BA_ADDBA_RESPONSE, args);
return 0;
@@ -1876,7 +2081,7 @@ ieee80211_ampdu_request(struct ieee80211_node *ni,
struct ieee80211_tx_ampdu *tap)
{
struct ieee80211com *ic = ni->ni_ic;
- uint16_t args[4];
+ uint16_t args[5];
int tid, dialogtoken;
static int tokens = 0; /* XXX */
@@ -1893,13 +2098,14 @@ ieee80211_ampdu_request(struct ieee80211_node *ni,
tap->txa_start = ni->ni_txseqs[tid];
args[0] = dialogtoken;
- args[1] = IEEE80211_BAPS_POLICY_IMMEDIATE
+ args[1] = 0; /* NB: status code not used */
+ args[2] = IEEE80211_BAPS_POLICY_IMMEDIATE
| SM(tid, IEEE80211_BAPS_TID)
| SM(IEEE80211_AGGR_BAWMAX, IEEE80211_BAPS_BUFSIZ)
;
- args[2] = 0; /* batimeout */
+ args[3] = 0; /* batimeout */
/* NB: do first so there's no race against reply */
- if (!ic->ic_addba_request(ni, tap, dialogtoken, args[1], args[2])) {
+ if (!ic->ic_addba_request(ni, tap, dialogtoken, args[2], args[3])) {
/* unable to setup state, don't make request */
IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
ni, "%s: could not setup BA stream for AC %d",
@@ -1913,7 +2119,7 @@ ieee80211_ampdu_request(struct ieee80211_node *ni,
}
tokens = dialogtoken; /* allocate token */
/* NB: after calling ic_addba_request so driver can set txa_start */
- args[3] = SM(tap->txa_start, IEEE80211_BASEQ_START)
+ args[4] = SM(tap->txa_start, IEEE80211_BASEQ_START)
| SM(0, IEEE80211_BASEQ_FRAG)
;
return ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA,
@@ -2004,7 +2210,7 @@ bar_tx_complete(struct ieee80211_node *ni, void *arg, int status)
callout_pending(&tap->txa_timer)) {
struct ieee80211com *ic = ni->ni_ic;
- if (status) /* ACK'd */
+ if (status == 0) /* ACK'd */
bar_stop_timer(tap);
ic->ic_bar_response(ni, tap, status);
/* NB: just let timer expire so we pace requests */
@@ -2016,7 +2222,7 @@ ieee80211_bar_response(struct ieee80211_node *ni,
struct ieee80211_tx_ampdu *tap, int status)
{
- if (status != 0) { /* got ACK */
+ if (status == 0) { /* got ACK */
IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N,
ni, "BAR moves BA win <%u:%u> (%u frames) txseq %u tid %u",
tap->txa_start,
@@ -2106,11 +2312,15 @@ ieee80211_send_bar(struct ieee80211_node *ni,
ni, "send BAR: tid %u ctl 0x%x start %u (attempt %d)",
tid, barctl, seq, tap->txa_attempts);
+ /*
+ * ic_raw_xmit will free the node reference
+ * regardless of queue/TX success or failure.
+ */
ret = ic->ic_raw_xmit(ni, m, NULL);
if (ret != 0) {
/* xmit failed, clear state flag */
tap->txa_flags &= ~IEEE80211_AGGR_BARPEND;
- goto bad;
+ return ret;
}
/* XXX hack against tx complete happening before timer is started */
if (tap->txa_flags & IEEE80211_AGGR_BARPEND)
@@ -2159,12 +2369,12 @@ ht_send_action_ba_addba(struct ieee80211_node *ni,
uint8_t *frm;
IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni,
- "send ADDBA %s: dialogtoken %d "
+ "send ADDBA %s: dialogtoken %d status %d "
"baparamset 0x%x (tid %d) batimeout 0x%x baseqctl 0x%x",
(action == IEEE80211_ACTION_BA_ADDBA_REQUEST) ?
"request" : "response",
- args[0], args[1], MS(args[1], IEEE80211_BAPS_TID),
- args[2], args[3]);
+ args[0], args[1], args[2], MS(args[2], IEEE80211_BAPS_TID),
+ args[3], args[4]);
IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE,
"ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__,
@@ -2181,10 +2391,12 @@ ht_send_action_ba_addba(struct ieee80211_node *ni,
*frm++ = category;
*frm++ = action;
*frm++ = args[0]; /* dialog token */
- ADDSHORT(frm, args[1]); /* baparamset */
- ADDSHORT(frm, args[2]); /* batimeout */
+ if (action == IEEE80211_ACTION_BA_ADDBA_RESPONSE)
+ ADDSHORT(frm, args[1]); /* status code */
+ ADDSHORT(frm, args[2]); /* baparamset */
+ ADDSHORT(frm, args[3]); /* batimeout */
if (action == IEEE80211_ACTION_BA_ADDBA_REQUEST)
- ADDSHORT(frm, args[3]); /* baseqctl */
+ ADDSHORT(frm, args[4]); /* baseqctl */
m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *);
return ht_action_output(ni, m);
} else {
@@ -2278,21 +2490,49 @@ ht_send_action_ht_txchwidth(struct ieee80211_node *ni,
#undef ADDSHORT
/*
- * Construct the MCS bit mask for inclusion
- * in an HT information element.
+ * Construct the MCS bit mask for inclusion in an HT capabilities
+ * information element.
*/
-static void
-ieee80211_set_htrates(uint8_t *frm, const struct ieee80211_htrateset *rs)
+static void
+ieee80211_set_mcsset(struct ieee80211com *ic, uint8_t *frm)
{
int i;
-
- for (i = 0; i < rs->rs_nrates; i++) {
- int r = rs->rs_rates[i] & IEEE80211_RATE_VAL;
- if (r < IEEE80211_HTRATE_MAXSIZE) { /* XXX? */
- /* NB: this assumes a particular implementation */
- setbit(frm, r);
+ uint8_t txparams;
+
+ KASSERT((ic->ic_rxstream > 0 && ic->ic_rxstream <= 4),
+ ("ic_rxstream %d out of range", ic->ic_rxstream));
+ KASSERT((ic->ic_txstream > 0 && ic->ic_txstream <= 4),
+ ("ic_txstream %d out of range", ic->ic_txstream));
+
+ for (i = 0; i < ic->ic_rxstream * 8; i++)
+ setbit(frm, i);
+ if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) &&
+ (ic->ic_htcaps & IEEE80211_HTC_RXMCS32))
+ setbit(frm, 32);
+ if (ic->ic_htcaps & IEEE80211_HTC_RXUNEQUAL) {
+ if (ic->ic_rxstream >= 2) {
+ for (i = 33; i <= 38; i++)
+ setbit(frm, i);
+ }
+ if (ic->ic_rxstream >= 3) {
+ for (i = 39; i <= 52; i++)
+ setbit(frm, i);
+ }
+ if (ic->ic_txstream >= 4) {
+ for (i = 53; i <= 76; i++)
+ setbit(frm, i);
}
}
+
+ if (ic->ic_rxstream != ic->ic_txstream) {
+ txparams = 0x1; /* TX MCS set defined */
+ txparams |= 0x2; /* TX RX MCS not equal */
+ txparams |= (ic->ic_txstream - 1) << 2; /* num TX streams */
+ if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL)
+ txparams |= 0x16; /* TX unequal modulation sup */
+ } else
+ txparams = 0;
+ frm[12] = txparams;
}
/*
@@ -2306,8 +2546,9 @@ ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni)
frm[1] = (v) >> 8; \
frm += 2; \
} while (0)
+ struct ieee80211com *ic = ni->ni_ic;
struct ieee80211vap *vap = ni->ni_vap;
- uint16_t caps;
+ uint16_t caps, extcaps;
int rxmax, density;
/* HT capabilities */
@@ -2329,6 +2570,17 @@ ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni)
/* use advertised setting (XXX locally constraint) */
rxmax = MS(ni->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU);
density = MS(ni->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY);
+
+ /*
+ * NB: Hardware might support HT40 on some but not all
+ * channels. We can't determine this earlier because only
+ * after association the channel is upgraded to HT based
+ * on the negotiated capabilities.
+ */
+ if (ni->ni_chan != IEEE80211_CHAN_ANYC &&
+ findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40U) == NULL &&
+ findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40D) == NULL)
+ caps &= ~IEEE80211_HTCAP_CHWIDTH40;
} else {
/* override 20/40 use based on current channel */
if (IEEE80211_IS_CHAN_HT40(ni->ni_chan))
@@ -2358,15 +2610,24 @@ ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni)
/* supported MCS set */
/*
- * XXX it would better to get the rate set from ni_htrates
- * so we can restrict it but for sta mode ni_htrates isn't
- * setup when we're called to form an AssocReq frame so for
- * now we're restricted to the default HT rate set.
+ * XXX: For sta mode the rate set should be restricted based
+ * on the AP's capabilities, but ni_htrates isn't setup when
+ * we're called to form an AssocReq frame so for now we're
+ * restricted to the device capabilities.
*/
- ieee80211_set_htrates(frm, &ieee80211_rateset_11n);
+ ieee80211_set_mcsset(ni->ni_ic, frm);
- frm += sizeof(struct ieee80211_ie_htcap) -
+ frm += __offsetof(struct ieee80211_ie_htcap, hc_extcap) -
__offsetof(struct ieee80211_ie_htcap, hc_mcsset);
+
+ /* HT extended capabilities */
+ extcaps = vap->iv_htextcaps & 0xffff;
+
+ ADDSHORT(frm, extcaps);
+
+ frm += sizeof(struct ieee80211_ie_htcap) -
+ __offsetof(struct ieee80211_ie_htcap, hc_txbf);
+
return frm;
#undef ADDSHORT
}
diff --git a/freebsd/sys/net80211/ieee80211_ht.h b/freebsd/sys/net80211/ieee80211_ht.h
index 7b0eab7a..249ddd2c 100644
--- a/freebsd/sys/net80211/ieee80211_ht.h
+++ b/freebsd/sys/net80211/ieee80211_ht.h
@@ -142,7 +142,8 @@ struct ieee80211_rx_ampdu {
int rxa_age; /* age of oldest frame in window */
int rxa_nframes; /* frames since ADDBA */
struct mbuf *rxa_m[IEEE80211_AGGR_BAWMAX];
- uint64_t rxa_pad[4];
+ void *rxa_private;
+ uint64_t rxa_pad[3];
};
void ieee80211_ht_attach(struct ieee80211com *);
@@ -158,7 +159,7 @@ struct ieee80211_mcs_rates {
uint16_t ht40_rate_800ns;
uint16_t ht40_rate_400ns;
};
-extern const struct ieee80211_mcs_rates ieee80211_htrates[16];
+extern const struct ieee80211_mcs_rates ieee80211_htrates[];
const struct ieee80211_htrateset *ieee80211_get_suphtrates(
struct ieee80211com *, const struct ieee80211_channel *);
diff --git a/freebsd/sys/net80211/ieee80211_hwmp.c b/freebsd/sys/net80211/ieee80211_hwmp.c
index 12a15637..36cdc313 100644
--- a/freebsd/sys/net80211/ieee80211_hwmp.c
+++ b/freebsd/sys/net80211/ieee80211_hwmp.c
@@ -163,7 +163,7 @@ struct ieee80211_hwmp_state {
uint8_t hs_maxhops; /* max hop count */
};
-SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD, 0,
+static SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD, 0,
"IEEE 802.11s HWMP parameters");
static int ieee80211_hwmp_targetonly = 0;
SYSCTL_INT(_net_wlan_hwmp, OID_AUTO, targetonly, CTLTYPE_INT | CTLFLAG_RW,
diff --git a/freebsd/sys/net80211/ieee80211_input.c b/freebsd/sys/net80211/ieee80211_input.c
index 97b5c756..043d1887 100644
--- a/freebsd/sys/net80211/ieee80211_input.c
+++ b/freebsd/sys/net80211/ieee80211_input.c
@@ -59,9 +59,54 @@ __FBSDID("$FreeBSD$");
#include <net/ethernet.h>
#endif
+static void
+ieee80211_process_mimo(struct ieee80211_node *ni, struct ieee80211_rx_stats *rx)
+{
+ int i;
+
+ /* Verify the required MIMO bits are set */
+ if ((rx->r_flags & (IEEE80211_R_C_CHAIN | IEEE80211_R_C_NF | IEEE80211_R_C_RSSI)) !=
+ (IEEE80211_R_C_CHAIN | IEEE80211_R_C_NF | IEEE80211_R_C_RSSI))
+ return;
+
+ /* XXX This assumes the MIMO radios have both ctl and ext chains */
+ for (i = 0; i < MIN(rx->c_chain, IEEE80211_MAX_CHAINS); i++) {
+ IEEE80211_RSSI_LPF(ni->ni_mimo_rssi_ctl[i], rx->c_rssi_ctl[i]);
+ IEEE80211_RSSI_LPF(ni->ni_mimo_rssi_ext[i], rx->c_rssi_ext[i]);
+ }
+
+ /* XXX This also assumes the MIMO radios have both ctl and ext chains */
+ for(i = 0; i < MIN(rx->c_chain, IEEE80211_MAX_CHAINS); i++) {
+ ni->ni_mimo_noise_ctl[i] = rx->c_nf_ctl[i];
+ ni->ni_mimo_noise_ext[i] = rx->c_nf_ext[i];
+ }
+ ni->ni_mimo_chains = rx->c_chain;
+}
+
+int
+ieee80211_input_mimo(struct ieee80211_node *ni, struct mbuf *m,
+ struct ieee80211_rx_stats *rx)
+{
+ /* XXX should assert IEEE80211_R_NF and IEEE80211_R_RSSI are set */
+ ieee80211_process_mimo(ni, rx);
+ return ieee80211_input(ni, m, rx->rssi, rx->nf);
+}
+
int
ieee80211_input_all(struct ieee80211com *ic, struct mbuf *m, int rssi, int nf)
{
+ struct ieee80211_rx_stats rx;
+
+ rx.r_flags = IEEE80211_R_NF | IEEE80211_R_RSSI;
+ rx.nf = nf;
+ rx.rssi = rssi;
+ return ieee80211_input_mimo_all(ic, m, &rx);
+}
+
+int
+ieee80211_input_mimo_all(struct ieee80211com *ic, struct mbuf *m,
+ struct ieee80211_rx_stats *rx)
+{
struct ieee80211vap *vap;
int type = -1;
@@ -98,7 +143,7 @@ ieee80211_input_all(struct ieee80211com *ic, struct mbuf *m, int rssi, int nf)
m = NULL;
}
ni = ieee80211_ref_node(vap->iv_bss);
- type = ieee80211_input(ni, mcopy, rssi, nf);
+ type = ieee80211_input_mimo(ni, mcopy, rx);
ieee80211_free_node(ni);
}
if (m != NULL) /* no vaps, reclaim mbuf */
@@ -679,7 +724,6 @@ ieee80211_parse_action(struct ieee80211_node *ni, struct mbuf *m)
IEEE80211_NODE_STAT(ni, rx_action);
/* verify frame payloads but defer processing */
- /* XXX maybe push this to method */
switch (ia->ia_category) {
case IEEE80211_ACTION_CAT_BA:
switch (ia->ia_action) {
diff --git a/freebsd/sys/net80211/ieee80211_input.h b/freebsd/sys/net80211/ieee80211_input.h
index 5b38dddc..b90f46a1 100644
--- a/freebsd/sys/net80211/ieee80211_input.h
+++ b/freebsd/sys/net80211/ieee80211_input.h
@@ -142,6 +142,104 @@ ishtinfooui(const uint8_t *frm)
return frm[1] > 3 && LE_READ_4(frm+2) == ((BCM_OUI_HTINFO<<24)|BCM_OUI);
}
+#include <sys/endian.h> /* For le16toh() */
+
+/*
+ * Check the current frame sequence number against the current TID
+ * state and return whether it's in sequence or should be dropped.
+ *
+ * Since out of order packet and duplicate packet eliminations should
+ * be done by the AMPDU RX code, this routine blindly accepts all
+ * frames from a HT station w/ a TID that is currently doing AMPDU-RX.
+ * HT stations without WME or where the TID is not doing AMPDU-RX
+ * are checked like non-HT stations.
+ *
+ * The routine only eliminates packets whose sequence/fragment
+ * match or are less than the last seen sequence/fragment number
+ * AND are retransmits It doesn't try to eliminate out of order packets.
+ *
+ * Since all frames after sequence number 4095 will be less than 4095
+ * (as the seqnum wraps), handle that special case so packets aren't
+ * incorrectly dropped - ie, if the next packet is sequence number 0
+ * but a retransmit since the initial packet didn't make it.
+ */
+static __inline int
+ieee80211_check_rxseq(struct ieee80211_node *ni, struct ieee80211_frame *wh)
+{
+#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
+#define SEQ_EQ(a,b) ((int)((a)-(b)) == 0)
+#define HAS_SEQ(type) ((type & 0x4) == 0)
+#define SEQNO(a) ((a) >> IEEE80211_SEQ_SEQ_SHIFT)
+#define FRAGNO(a) ((a) & IEEE80211_SEQ_FRAG_MASK)
+ uint16_t rxseq;
+ uint8_t type;
+ uint8_t tid;
+ struct ieee80211_rx_ampdu *rap;
+
+ rxseq = le16toh(*(uint16_t *)wh->i_seq);
+ type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
+
+ /* Types with no sequence number are always treated valid */
+ if (! HAS_SEQ(type))
+ return 1;
+
+ tid = ieee80211_gettid(wh);
+
+ /*
+ * Only do the HT AMPDU check for WME stations; non-WME HT stations
+ * shouldn't exist outside of debugging. We should at least
+ * handle that.
+ */
+ if (tid < WME_NUM_TID) {
+ rap = &ni->ni_rx_ampdu[tid];
+ /* HT nodes currently doing RX AMPDU are always valid */
+ if ((ni->ni_flags & IEEE80211_NODE_HT) &&
+ (rap->rxa_flags & IEEE80211_AGGR_RUNNING))
+ return 1;
+ }
+
+ /*
+ * Otherwise, retries for packets below or equal to the last
+ * seen sequence number should be dropped.
+ */
+
+ /*
+ * Treat frame seqnum 4095 as special due to boundary
+ * wrapping conditions.
+ */
+ if (SEQNO(ni->ni_rxseqs[tid]) == 4095) {
+ /*
+ * Drop retransmits on seqnum 4095/current fragment for itself.
+ */
+ if (SEQ_EQ(rxseq, ni->ni_rxseqs[tid]) &&
+ (wh->i_fc[1] & IEEE80211_FC1_RETRY))
+ return 0;
+ /*
+ * Treat any subsequent frame as fine if the last seen frame
+ * is 4095 and it's not a retransmit for the same sequence
+ * number. However, this doesn't capture incorrectly ordered
+ * fragments w/ sequence number 4095. It shouldn't be seen
+ * in practice, but see the comment above for further info.
+ */
+ return 1;
+ }
+
+ /*
+ * At this point we assume that retransmitted seq/frag numbers below
+ * the current can simply be eliminated.
+ */
+ if ((wh->i_fc[1] & IEEE80211_FC1_RETRY) &&
+ SEQ_LEQ(rxseq, ni->ni_rxseqs[tid]))
+ return 0;
+
+ return 1;
+#undef SEQ_LEQ
+#undef SEQ_EQ
+#undef HAS_SEQ
+#undef SEQNO
+#undef FRAGNO
+}
+
void ieee80211_deliver_data(struct ieee80211vap *,
struct ieee80211_node *, struct mbuf *);
struct mbuf *ieee80211_defrag(struct ieee80211_node *,
diff --git a/freebsd/sys/net80211/ieee80211_ioctl.c b/freebsd/sys/net80211/ieee80211_ioctl.c
index af27ef10..78a6b50f 100644
--- a/freebsd/sys/net80211/ieee80211_ioctl.c
+++ b/freebsd/sys/net80211/ieee80211_ioctl.c
@@ -72,6 +72,8 @@ __FBSDID("$FreeBSD$");
static const uint8_t zerobssid[IEEE80211_ADDR_LEN];
static struct ieee80211_channel *findchannel(struct ieee80211com *,
int ieee, int mode);
+static int ieee80211_scanreq(struct ieee80211vap *,
+ struct ieee80211_scan_req *);
static __noinline int
ieee80211_ioctl_getkey(struct ieee80211vap *vap, struct ieee80211req *ireq)
@@ -143,7 +145,7 @@ static __noinline int
ieee80211_ioctl_getchaninfo(struct ieee80211vap *vap, struct ieee80211req *ireq)
{
struct ieee80211com *ic = vap->iv_ic;
- int space;
+ uint32_t space;
space = __offsetof(struct ieee80211req_chaninfo,
ic_chans[ic->ic_nchans]);
@@ -207,7 +209,7 @@ ieee80211_ioctl_getstastats(struct ieee80211vap *vap, struct ieee80211req *ireq)
{
struct ieee80211_node *ni;
uint8_t macaddr[IEEE80211_ADDR_LEN];
- const int off = __offsetof(struct ieee80211req_sta_stats, is_stats);
+ const size_t off = __offsetof(struct ieee80211req_sta_stats, is_stats);
int error;
if (ireq->i_len < off)
@@ -323,7 +325,7 @@ ieee80211_ioctl_getscanresults(struct ieee80211vap *vap,
if (req.space > ireq->i_len)
req.space = ireq->i_len;
if (req.space > 0) {
- size_t space;
+ uint32_t space;
void *p;
space = req.space;
@@ -458,7 +460,7 @@ get_sta_info(void *arg, struct ieee80211_node *ni)
static __noinline int
getstainfo_common(struct ieee80211vap *vap, struct ieee80211req *ireq,
- struct ieee80211_node *ni, int off)
+ struct ieee80211_node *ni, size_t off)
{
struct ieee80211com *ic = vap->iv_ic;
struct stainforeq req;
@@ -503,7 +505,7 @@ static __noinline int
ieee80211_ioctl_getstainfo(struct ieee80211vap *vap, struct ieee80211req *ireq)
{
uint8_t macaddr[IEEE80211_ADDR_LEN];
- const int off = __offsetof(struct ieee80211req_sta_req, info);
+ const size_t off = __offsetof(struct ieee80211req_sta_req, info);
struct ieee80211_node *ni;
int error;
@@ -1473,14 +1475,15 @@ mlmelookup(void *arg, const struct ieee80211_scan_entry *se)
}
static __noinline int
-setmlme_assoc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN],
- int ssid_len, const uint8_t ssid[IEEE80211_NWID_LEN])
+setmlme_assoc_sta(struct ieee80211vap *vap,
+ const uint8_t mac[IEEE80211_ADDR_LEN], int ssid_len,
+ const uint8_t ssid[IEEE80211_NWID_LEN])
{
struct scanlookup lookup;
- /* XXX ibss/ahdemo */
- if (vap->iv_opmode != IEEE80211_M_STA)
- return EINVAL;
+ KASSERT(vap->iv_opmode == IEEE80211_M_STA,
+ ("expected opmode STA not %s",
+ ieee80211_opmode_name[vap->iv_opmode]));
/* NB: this is racey if roaming is !manual */
lookup.se = NULL;
@@ -1497,6 +1500,37 @@ setmlme_assoc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN],
}
static __noinline int
+setmlme_assoc_adhoc(struct ieee80211vap *vap,
+ const uint8_t mac[IEEE80211_ADDR_LEN], int ssid_len,
+ const uint8_t ssid[IEEE80211_NWID_LEN])
+{
+ struct ieee80211_scan_req sr;
+
+ KASSERT(vap->iv_opmode == IEEE80211_M_IBSS ||
+ vap->iv_opmode == IEEE80211_M_AHDEMO,
+ ("expected opmode IBSS or AHDEMO not %s",
+ ieee80211_opmode_name[vap->iv_opmode]));
+
+ if (ssid_len == 0)
+ return EINVAL;
+
+ /* NB: IEEE80211_IOC_SSID call missing for ap_scan=2. */
+ memset(vap->iv_des_ssid[0].ssid, 0, IEEE80211_NWID_LEN);
+ vap->iv_des_ssid[0].len = ssid_len;
+ memcpy(vap->iv_des_ssid[0].ssid, ssid, ssid_len);
+ vap->iv_des_nssid = 1;
+
+ memset(&sr, 0, sizeof(sr));
+ sr.sr_flags = IEEE80211_IOC_SCAN_ACTIVE | IEEE80211_IOC_SCAN_ONCE;
+ sr.sr_duration = IEEE80211_IOC_SCAN_FOREVER;
+ memcpy(sr.sr_ssid[0].ssid, ssid, ssid_len);
+ sr.sr_ssid[0].len = ssid_len;
+ sr.sr_nssid = 1;
+
+ return ieee80211_scanreq(vap, &sr);
+}
+
+static __noinline int
ieee80211_ioctl_setmlme(struct ieee80211vap *vap, struct ieee80211req *ireq)
{
struct ieee80211req_mlme mlme;
@@ -1507,9 +1541,13 @@ ieee80211_ioctl_setmlme(struct ieee80211vap *vap, struct ieee80211req *ireq)
error = copyin(ireq->i_data, &mlme, sizeof(mlme));
if (error)
return error;
- if (mlme.im_op == IEEE80211_MLME_ASSOC)
- return setmlme_assoc(vap, mlme.im_macaddr,
+ if (vap->iv_opmode == IEEE80211_M_STA &&
+ mlme.im_op == IEEE80211_MLME_ASSOC)
+ return setmlme_assoc_sta(vap, mlme.im_macaddr,
vap->iv_des_ssid[0].len, vap->iv_des_ssid[0].ssid);
+ else if (mlme.im_op == IEEE80211_MLME_ASSOC)
+ return setmlme_assoc_adhoc(vap, mlme.im_macaddr,
+ mlme.im_ssid_len, mlme.im_ssid);
else
return setmlme_common(vap, mlme.im_op,
mlme.im_macaddr, mlme.im_reason);
@@ -2338,8 +2376,8 @@ ieee80211_ioctl_chanswitch(struct ieee80211vap *vap, struct ieee80211req *ireq)
return error;
}
-static __noinline int
-ieee80211_ioctl_scanreq(struct ieee80211vap *vap, struct ieee80211req *ireq)
+static int
+ieee80211_scanreq(struct ieee80211vap *vap, struct ieee80211_scan_req *sr)
{
#define IEEE80211_IOC_SCAN_FLAGS \
(IEEE80211_IOC_SCAN_NOPICK | IEEE80211_IOC_SCAN_ACTIVE | \
@@ -2348,48 +2386,38 @@ ieee80211_ioctl_scanreq(struct ieee80211vap *vap, struct ieee80211req *ireq)
IEEE80211_IOC_SCAN_NOJOIN | IEEE80211_IOC_SCAN_FLUSH | \
IEEE80211_IOC_SCAN_CHECK)
struct ieee80211com *ic = vap->iv_ic;
- struct ieee80211_scan_req sr; /* XXX off stack? */
int error, i;
- /* NB: parent must be running */
- if ((ic->ic_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
- return ENXIO;
-
- if (ireq->i_len != sizeof(sr))
- return EINVAL;
- error = copyin(ireq->i_data, &sr, sizeof(sr));
- if (error != 0)
- return error;
/* convert duration */
- if (sr.sr_duration == IEEE80211_IOC_SCAN_FOREVER)
- sr.sr_duration = IEEE80211_SCAN_FOREVER;
+ if (sr->sr_duration == IEEE80211_IOC_SCAN_FOREVER)
+ sr->sr_duration = IEEE80211_SCAN_FOREVER;
else {
- if (sr.sr_duration < IEEE80211_IOC_SCAN_DURATION_MIN ||
- sr.sr_duration > IEEE80211_IOC_SCAN_DURATION_MAX)
+ if (sr->sr_duration < IEEE80211_IOC_SCAN_DURATION_MIN ||
+ sr->sr_duration > IEEE80211_IOC_SCAN_DURATION_MAX)
return EINVAL;
- sr.sr_duration = msecs_to_ticks(sr.sr_duration);
- if (sr.sr_duration < 1)
- sr.sr_duration = 1;
+ sr->sr_duration = msecs_to_ticks(sr->sr_duration);
+ if (sr->sr_duration < 1)
+ sr->sr_duration = 1;
}
/* convert min/max channel dwell */
- if (sr.sr_mindwell != 0) {
- sr.sr_mindwell = msecs_to_ticks(sr.sr_mindwell);
- if (sr.sr_mindwell < 1)
- sr.sr_mindwell = 1;
+ if (sr->sr_mindwell != 0) {
+ sr->sr_mindwell = msecs_to_ticks(sr->sr_mindwell);
+ if (sr->sr_mindwell < 1)
+ sr->sr_mindwell = 1;
}
- if (sr.sr_maxdwell != 0) {
- sr.sr_maxdwell = msecs_to_ticks(sr.sr_maxdwell);
- if (sr.sr_maxdwell < 1)
- sr.sr_maxdwell = 1;
+ if (sr->sr_maxdwell != 0) {
+ sr->sr_maxdwell = msecs_to_ticks(sr->sr_maxdwell);
+ if (sr->sr_maxdwell < 1)
+ sr->sr_maxdwell = 1;
}
/* NB: silently reduce ssid count to what is supported */
- if (sr.sr_nssid > IEEE80211_SCAN_MAX_SSID)
- sr.sr_nssid = IEEE80211_SCAN_MAX_SSID;
- for (i = 0; i < sr.sr_nssid; i++)
- if (sr.sr_ssid[i].len > IEEE80211_NWID_LEN)
+ if (sr->sr_nssid > IEEE80211_SCAN_MAX_SSID)
+ sr->sr_nssid = IEEE80211_SCAN_MAX_SSID;
+ for (i = 0; i < sr->sr_nssid; i++)
+ if (sr->sr_ssid[i].len > IEEE80211_NWID_LEN)
return EINVAL;
/* cleanse flags just in case, could reject if invalid flags */
- sr.sr_flags &= IEEE80211_IOC_SCAN_FLAGS;
+ sr->sr_flags &= IEEE80211_IOC_SCAN_FLAGS;
/*
* Add an implicit NOPICK if the vap is not marked UP. This
* allows applications to scan without joining a bss (or picking
@@ -2397,13 +2425,13 @@ ieee80211_ioctl_scanreq(struct ieee80211vap *vap, struct ieee80211req *ireq)
* roaming mode--you just need to mark the parent device UP.
*/
if ((vap->iv_ifp->if_flags & IFF_UP) == 0)
- sr.sr_flags |= IEEE80211_IOC_SCAN_NOPICK;
+ sr->sr_flags |= IEEE80211_IOC_SCAN_NOPICK;
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN,
"%s: flags 0x%x%s duration 0x%x mindwell %u maxdwell %u nssid %d\n",
- __func__, sr.sr_flags,
+ __func__, sr->sr_flags,
(vap->iv_ifp->if_flags & IFF_UP) == 0 ? " (!IFF_UP)" : "",
- sr.sr_duration, sr.sr_mindwell, sr.sr_maxdwell, sr.sr_nssid);
+ sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell, sr->sr_nssid);
/*
* If we are in INIT state then the driver has never had a chance
* to setup hardware state to do a scan; we must use the state
@@ -2418,13 +2446,13 @@ ieee80211_ioctl_scanreq(struct ieee80211vap *vap, struct ieee80211req *ireq)
IEEE80211_LOCK(ic);
if (vap->iv_state == IEEE80211_S_INIT) {
/* NB: clobbers previous settings */
- vap->iv_scanreq_flags = sr.sr_flags;
- vap->iv_scanreq_duration = sr.sr_duration;
- vap->iv_scanreq_nssid = sr.sr_nssid;
- for (i = 0; i < sr.sr_nssid; i++) {
- vap->iv_scanreq_ssid[i].len = sr.sr_ssid[i].len;
- memcpy(vap->iv_scanreq_ssid[i].ssid, sr.sr_ssid[i].ssid,
- sr.sr_ssid[i].len);
+ vap->iv_scanreq_flags = sr->sr_flags;
+ vap->iv_scanreq_duration = sr->sr_duration;
+ vap->iv_scanreq_nssid = sr->sr_nssid;
+ for (i = 0; i < sr->sr_nssid; i++) {
+ vap->iv_scanreq_ssid[i].len = sr->sr_ssid[i].len;
+ memcpy(vap->iv_scanreq_ssid[i].ssid,
+ sr->sr_ssid[i].ssid, sr->sr_ssid[i].len);
}
vap->iv_flags_ext |= IEEE80211_FEXT_SCANREQ;
IEEE80211_UNLOCK(ic);
@@ -2432,26 +2460,46 @@ ieee80211_ioctl_scanreq(struct ieee80211vap *vap, struct ieee80211req *ireq)
} else {
vap->iv_flags_ext &= ~IEEE80211_FEXT_SCANREQ;
IEEE80211_UNLOCK(ic);
- /* XXX neeed error return codes */
- if (sr.sr_flags & IEEE80211_IOC_SCAN_CHECK) {
- (void) ieee80211_check_scan(vap, sr.sr_flags,
- sr.sr_duration, sr.sr_mindwell, sr.sr_maxdwell,
- sr.sr_nssid,
+ if (sr->sr_flags & IEEE80211_IOC_SCAN_CHECK) {
+ error = ieee80211_check_scan(vap, sr->sr_flags,
+ sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell,
+ sr->sr_nssid,
/* NB: cheat, we assume structures are compatible */
- (const struct ieee80211_scan_ssid *) &sr.sr_ssid[0]);
+ (const struct ieee80211_scan_ssid *) &sr->sr_ssid[0]);
} else {
- (void) ieee80211_start_scan(vap, sr.sr_flags,
- sr.sr_duration, sr.sr_mindwell, sr.sr_maxdwell,
- sr.sr_nssid,
+ error = ieee80211_start_scan(vap, sr->sr_flags,
+ sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell,
+ sr->sr_nssid,
/* NB: cheat, we assume structures are compatible */
- (const struct ieee80211_scan_ssid *) &sr.sr_ssid[0]);
+ (const struct ieee80211_scan_ssid *) &sr->sr_ssid[0]);
}
+ if (error == 0)
+ return EINPROGRESS;
}
- return error;
+ return 0;
#undef IEEE80211_IOC_SCAN_FLAGS
}
static __noinline int
+ieee80211_ioctl_scanreq(struct ieee80211vap *vap, struct ieee80211req *ireq)
+{
+ struct ieee80211com *ic = vap->iv_ic;
+ struct ieee80211_scan_req sr; /* XXX off stack? */
+ int error;
+
+ /* NB: parent must be running */
+ if ((ic->ic_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ return ENXIO;
+
+ if (ireq->i_len != sizeof(sr))
+ return EINVAL;
+ error = copyin(ireq->i_data, &sr, sizeof(sr));
+ if (error != 0)
+ return error;
+ return ieee80211_scanreq(vap, &sr);
+}
+
+static __noinline int
ieee80211_ioctl_setstavlan(struct ieee80211vap *vap, struct ieee80211req *ireq)
{
struct ieee80211_node *ni;
@@ -2676,7 +2724,7 @@ ieee80211_ioctl_set80211(struct ieee80211vap *vap, u_long cmd, struct ieee80211r
case IEEE80211_IOC_PROTMODE:
if (ireq->i_val > IEEE80211_PROT_RTSCTS)
return EINVAL;
- ic->ic_protmode = ireq->i_val;
+ ic->ic_protmode = (enum ieee80211_protmode)ireq->i_val;
/* NB: if not operating in 11g this can wait */
if (ic->ic_bsschan != IEEE80211_CHAN_ANYC &&
IEEE80211_IS_CHAN_ANYG(ic->ic_bsschan))
@@ -2695,7 +2743,7 @@ ieee80211_ioctl_set80211(struct ieee80211vap *vap, u_long cmd, struct ieee80211r
if (!(IEEE80211_ROAMING_DEVICE <= ireq->i_val &&
ireq->i_val <= IEEE80211_ROAMING_MANUAL))
return EINVAL;
- vap->iv_roaming = ireq->i_val;
+ vap->iv_roaming = (enum ieee80211_roamingmode)ireq->i_val;
/* XXXX reset? */
break;
case IEEE80211_IOC_PRIVACY:
diff --git a/freebsd/sys/net80211/ieee80211_ioctl.h b/freebsd/sys/net80211/ieee80211_ioctl.h
index 89d8fe53..cad55760 100644
--- a/freebsd/sys/net80211/ieee80211_ioctl.h
+++ b/freebsd/sys/net80211/ieee80211_ioctl.h
@@ -578,7 +578,7 @@ struct ieee80211req {
char i_name[IFNAMSIZ]; /* if_name, e.g. "wi0" */
uint16_t i_type; /* req type */
int16_t i_val; /* Index or simple value */
- int16_t i_len; /* Index or simple value */
+ uint16_t i_len; /* Index or simple value */
void *i_data; /* Extra data */
};
#define SIOCS80211 _IOW('i', 234, struct ieee80211req)
@@ -790,7 +790,7 @@ struct ieee80211req_scan_result {
uint16_t isr_flags; /* channel flags */
int8_t isr_noise;
int8_t isr_rssi;
- uint8_t isr_intval; /* beacon interval */
+ uint16_t isr_intval; /* beacon interval */
uint8_t isr_capinfo; /* capabilities */
uint8_t isr_erp; /* ERP element */
uint8_t isr_bssid[IEEE80211_ADDR_LEN];
diff --git a/freebsd/sys/net80211/ieee80211_mesh.c b/freebsd/sys/net80211/ieee80211_mesh.c
index cd2ddb94..303d430c 100644
--- a/freebsd/sys/net80211/ieee80211_mesh.c
+++ b/freebsd/sys/net80211/ieee80211_mesh.c
@@ -84,6 +84,7 @@ static void mesh_forward(struct ieee80211vap *, struct mbuf *,
static int mesh_input(struct ieee80211_node *, struct mbuf *, int, int);
static void mesh_recv_mgmt(struct ieee80211_node *, struct mbuf *, int,
int, int);
+static void mesh_recv_ctl(struct ieee80211_node *, struct mbuf *, int);
static void mesh_peer_timeout_setup(struct ieee80211_node *);
static void mesh_peer_timeout_backoff(struct ieee80211_node *);
static void mesh_peer_timeout_cb(void *);
@@ -98,7 +99,7 @@ uint32_t mesh_airtime_calc(struct ieee80211_node *);
/*
* Timeout values come from the specification and are in milliseconds.
*/
-SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD, 0,
+static SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD, 0,
"IEEE 802.11s parameters");
static int ieee80211_mesh_retrytimeout = -1;
SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, retrytimeout, CTLTYPE_INT | CTLFLAG_RW,
@@ -522,6 +523,7 @@ mesh_vattach(struct ieee80211vap *vap)
vap->iv_input = mesh_input;
vap->iv_opdetach = mesh_vdetach;
vap->iv_recv_mgmt = mesh_recv_mgmt;
+ vap->iv_recv_ctl = mesh_recv_ctl;
ms = malloc(sizeof(struct ieee80211_mesh_state), M_80211_VAP,
M_NOWAIT | M_ZERO);
if (ms == NULL) {
@@ -1040,7 +1042,6 @@ mesh_isucastforme(struct ieee80211vap *vap, const struct ieee80211_frame *wh,
static int
mesh_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
{
-#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
#define HAS_SEQ(type) ((type & 0x4) == 0)
struct ieee80211vap *vap = ni->ni_vap;
struct ieee80211com *ic = ni->ni_ic;
@@ -1094,9 +1095,7 @@ mesh_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
TID_TO_WME_AC(tid) >= WME_AC_VI)
ic->ic_wme.wme_hipri_traffic++;
rxseq = le16toh(*(uint16_t *)wh->i_seq);
- if ((ni->ni_flags & IEEE80211_NODE_HT) == 0 &&
- (wh->i_fc[1] & IEEE80211_FC1_RETRY) &&
- SEQ_LEQ(rxseq, ni->ni_rxseqs[tid])) {
+ if (! ieee80211_check_rxseq(ni, wh)) {
/* duplicate, discard */
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT,
wh->i_addr1, "duplicate",
@@ -1470,11 +1469,12 @@ mesh_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype,
if (xrates != NULL)
IEEE80211_VERIFY_ELEMENT(xrates,
IEEE80211_RATE_MAXSIZE - rates[1], return);
- if (meshid != NULL)
+ if (meshid != NULL) {
IEEE80211_VERIFY_ELEMENT(meshid,
IEEE80211_MESHID_LEN, return);
- /* NB: meshid, not ssid */
- IEEE80211_VERIFY_SSID(vap->iv_bss, meshid, return);
+ /* NB: meshid, not ssid */
+ IEEE80211_VERIFY_SSID(vap->iv_bss, meshid, return);
+ }
/* XXX find a better class or define it's own */
IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_INPUT, wh->i_addr2,
@@ -1488,46 +1488,42 @@ mesh_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype,
ieee80211_send_proberesp(vap, wh->i_addr2, 0);
break;
}
+
case IEEE80211_FC0_SUBTYPE_ACTION:
- if (vap->iv_state != IEEE80211_S_RUN) {
- vap->iv_stats.is_rx_mgtdiscard++;
- break;
- }
- /*
- * We received an action for an unknown neighbor.
- * XXX: wait for it to beacon or create ieee80211_node?
- */
+ case IEEE80211_FC0_SUBTYPE_ACTION_NOACK:
if (ni == vap->iv_bss) {
- IEEE80211_DISCARD(vap, IEEE80211_MSG_MESH,
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
wh, NULL, "%s", "unknown node");
vap->iv_stats.is_rx_mgtdiscard++;
- break;
- }
- /*
- * Discard if not for us.
- */
- if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, wh->i_addr1) &&
+ } else if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, wh->i_addr1) &&
!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
- IEEE80211_DISCARD(vap, IEEE80211_MSG_MESH,
- wh, NULL, "%s", "not for me");
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not for us");
vap->iv_stats.is_rx_mgtdiscard++;
- break;
+ } else if (vap->iv_state != IEEE80211_S_RUN) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "wrong state %s",
+ ieee80211_state_name[vap->iv_state]);
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else {
+ if (ieee80211_parse_action(ni, m0) == 0)
+ (void)ic->ic_recv_action(ni, wh, frm, efrm);
}
- /* XXX parse_action is a bit useless now */
- if (ieee80211_parse_action(ni, m0) == 0)
- ic->ic_recv_action(ni, wh, frm, efrm);
break;
- case IEEE80211_FC0_SUBTYPE_AUTH:
+
case IEEE80211_FC0_SUBTYPE_ASSOC_REQ:
- case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
case IEEE80211_FC0_SUBTYPE_ASSOC_RESP:
+ case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
case IEEE80211_FC0_SUBTYPE_REASSOC_RESP:
- case IEEE80211_FC0_SUBTYPE_DEAUTH:
+ case IEEE80211_FC0_SUBTYPE_ATIM:
case IEEE80211_FC0_SUBTYPE_DISASSOC:
+ case IEEE80211_FC0_SUBTYPE_AUTH:
+ case IEEE80211_FC0_SUBTYPE_DEAUTH:
IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
wh, NULL, "%s", "not handled");
vap->iv_stats.is_rx_mgtdiscard++;
- return;
+ break;
+
default:
IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY,
wh, "mgt", "subtype 0x%x not handled", subtype);
@@ -1536,6 +1532,17 @@ mesh_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype,
}
}
+static void
+mesh_recv_ctl(struct ieee80211_node *ni, struct mbuf *m, int subtype)
+{
+
+ switch (subtype) {
+ case IEEE80211_FC0_SUBTYPE_BAR:
+ ieee80211_recv_bar(ni, m);
+ break;
+ }
+}
+
/*
* Parse meshpeering action ie's for open+confirm frames; the
* important bits are returned in the supplied structure.
@@ -2285,6 +2292,7 @@ mesh_verify_meshconf(struct ieee80211vap *vap, const uint8_t *ie)
const struct ieee80211_meshconf_ie *meshconf =
(const struct ieee80211_meshconf_ie *) ie;
const struct ieee80211_mesh_state *ms = vap->iv_mesh;
+ uint16_t cap;
if (meshconf == NULL)
return 1;
@@ -2318,8 +2326,10 @@ mesh_verify_meshconf(struct ieee80211vap *vap, const uint8_t *ie)
meshconf->conf_pselid);
return 1;
}
+ /* NB: conf_cap is only read correctly here */
+ cap = LE_READ_2(&meshconf->conf_cap);
/* Not accepting peers */
- if (!(meshconf->conf_cap & IEEE80211_MESHCONF_CAP_AP)) {
+ if (!(cap & IEEE80211_MESHCONF_CAP_AP)) {
IEEE80211_DPRINTF(vap, IEEE80211_MSG_MESH,
"not accepting peers: 0x%x\n", meshconf->conf_cap);
return 1;
@@ -2383,6 +2393,7 @@ uint8_t *
ieee80211_add_meshconf(uint8_t *frm, struct ieee80211vap *vap)
{
const struct ieee80211_mesh_state *ms = vap->iv_mesh;
+ uint16_t caps;
KASSERT(vap->iv_opmode == IEEE80211_M_MBSS, ("not a MBSS vap"));
@@ -2398,11 +2409,12 @@ ieee80211_add_meshconf(uint8_t *frm, struct ieee80211vap *vap)
if (ms->ms_flags & IEEE80211_MESHFLAGS_PORTAL)
*frm |= IEEE80211_MESHCONF_FORM_MP;
frm += 1;
+ caps = 0;
if (ms->ms_flags & IEEE80211_MESHFLAGS_AP)
- *frm |= IEEE80211_MESHCONF_CAP_AP;
+ caps |= IEEE80211_MESHCONF_CAP_AP;
if (ms->ms_flags & IEEE80211_MESHFLAGS_FWD)
- *frm |= IEEE80211_MESHCONF_CAP_FWRD;
- frm += 1;
+ caps |= IEEE80211_MESHCONF_CAP_FWRD;
+ ADDSHORT(frm, caps);
return frm;
}
diff --git a/freebsd/sys/net80211/ieee80211_mesh.h b/freebsd/sys/net80211/ieee80211_mesh.h
index de9b5c2a..ad1b02af 100644
--- a/freebsd/sys/net80211/ieee80211_mesh.h
+++ b/freebsd/sys/net80211/ieee80211_mesh.h
@@ -49,7 +49,7 @@ struct ieee80211_meshconf_ie {
uint8_t conf_syncid; /* Sync. Protocol ID */
uint8_t conf_authid; /* Auth. Protocol ID */
uint8_t conf_form; /* Formation Information */
- uint8_t conf_cap;
+ uint16_t conf_cap;
} __packed;
/* Hybrid Wireless Mesh Protocol */
@@ -72,7 +72,8 @@ struct ieee80211_meshconf_ie {
#define IEEE80211_MESHCONF_CAP_FWRD 0x08 /* forwarding enabled */
#define IEEE80211_MESHCONF_CAP_BTR 0x10 /* Beacon Timing Report Enab */
#define IEEE80211_MESHCONF_CAP_TBTTA 0x20 /* TBTT Adj. Enabled */
-#define IEEE80211_MESHCONF_CAP_PSL 0x40 /* Power Save Level */
+#define IEEE80211_MESHCONF_CAP_TBTT 0x40 /* TBTT Adjusting */
+#define IEEE80211_MESHCONF_CAP_PSL 0x80 /* Power Save Level */
/* Mesh Identifier */
struct ieee80211_meshid_ie {
diff --git a/freebsd/sys/net80211/ieee80211_node.c b/freebsd/sys/net80211/ieee80211_node.c
index e1166707..5bf33549 100644
--- a/freebsd/sys/net80211/ieee80211_node.c
+++ b/freebsd/sys/net80211/ieee80211_node.c
@@ -287,10 +287,7 @@ ieee80211_node_set_chan(struct ieee80211_node *ni,
mode = ieee80211_chan2mode(chan);
if (IEEE80211_IS_CHAN_HT(chan)) {
/*
- * XXX Gotta be careful here; the rate set returned by
- * ieee80211_get_suprates is actually any HT rate
- * set so blindly copying it will be bad. We must
- * install the legacy rate est in ni_rates and the
+ * We must install the legacy rate est in ni_rates and the
* HT rate set in ni_htrates.
*/
ni->ni_htrates = *ieee80211_get_suphtrates(ic, chan);
@@ -1090,7 +1087,26 @@ static void
node_getmimoinfo(const struct ieee80211_node *ni,
struct ieee80211_mimo_info *info)
{
- /* XXX zero data? */
+ int i;
+ uint32_t avgrssi;
+ int32_t rssi;
+
+ bzero(info, sizeof(*info));
+
+ for (i = 0; i < ni->ni_mimo_chains; i++) {
+ avgrssi = ni->ni_mimo_rssi_ctl[i];
+ if (avgrssi == IEEE80211_RSSI_DUMMY_MARKER) {
+ info->rssi[i] = 0;
+ } else {
+ rssi = IEEE80211_RSSI_GET(avgrssi);
+ info->rssi[i] = rssi < 0 ? 0 : rssi > 127 ? 127 : rssi;
+ }
+ info->noise[i] = ni->ni_mimo_noise_ctl[i];
+ }
+
+ /* XXX ext radios? */
+
+ /* XXX EVM? */
}
struct ieee80211_node *
diff --git a/freebsd/sys/net80211/ieee80211_node.h b/freebsd/sys/net80211/ieee80211_node.h
index 01bb2cf1..c1fc0069 100644
--- a/freebsd/sys/net80211/ieee80211_node.h
+++ b/freebsd/sys/net80211/ieee80211_node.h
@@ -166,6 +166,13 @@ struct ieee80211_node {
uint32_t ni_avgrssi; /* recv ssi state */
int8_t ni_noise; /* noise floor */
+ /* mimo statistics */
+ uint32_t ni_mimo_rssi_ctl[IEEE80211_MAX_CHAINS];
+ uint32_t ni_mimo_rssi_ext[IEEE80211_MAX_CHAINS];
+ uint8_t ni_mimo_noise_ctl[IEEE80211_MAX_CHAINS];
+ uint8_t ni_mimo_noise_ext[IEEE80211_MAX_CHAINS];
+ uint8_t ni_mimo_chains;
+
/* header */
uint8_t ni_macaddr[IEEE80211_ADDR_LEN];
uint8_t ni_bssid[IEEE80211_ADDR_LEN];
diff --git a/freebsd/sys/net80211/ieee80211_output.c b/freebsd/sys/net80211/ieee80211_output.c
index 81aa7fb6..229c87ea 100644
--- a/freebsd/sys/net80211/ieee80211_output.c
+++ b/freebsd/sys/net80211/ieee80211_output.c
@@ -59,8 +59,11 @@ __FBSDID("$FreeBSD$");
#include <net80211/ieee80211_wds.h>
#include <net80211/ieee80211_mesh.h>
-#ifdef INET
+#if defined(INET) || defined(INET6)
#include <netinet/in.h>
+#endif
+
+#ifdef INET
#include <netinet/if_ether.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
@@ -421,7 +424,8 @@ ieee80211_output(struct ifnet *ifp, struct mbuf *m,
"block %s frame in CAC state\n", "raw data");
vap->iv_stats.is_tx_badstate++;
senderr(EIO); /* XXX */
- }
+ } else if (vap->iv_state == IEEE80211_S_SCAN)
+ senderr(EIO);
/* XXX bypass bridge, pfil, carp, etc. */
if (m->m_pkthdr.len < sizeof(struct ieee80211_frame_ack))
@@ -514,6 +518,7 @@ ieee80211_send_setup(
{
#define WH4(wh) ((struct ieee80211_frame_addr4 *)wh)
struct ieee80211vap *vap = ni->ni_vap;
+ struct ieee80211_tx_ampdu *tap;
struct ieee80211_frame *wh = mtod(m, struct ieee80211_frame *);
ieee80211_seq seqno;
@@ -581,9 +586,15 @@ ieee80211_send_setup(
}
*(uint16_t *)&wh->i_dur[0] = 0;
- seqno = ni->ni_txseqs[tid]++;
- *(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT);
- M_SEQNO_SET(m, seqno);
+ tap = &ni->ni_tx_ampdu[TID_TO_WME_AC(tid)];
+ if (tid != IEEE80211_NONQOS_TID && IEEE80211_AMPDU_RUNNING(tap))
+ m->m_flags |= M_AMPDU_MPDU;
+ else {
+ seqno = ni->ni_txseqs[tid]++;
+ *(uint16_t *)&wh->i_seq[0] =
+ htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT);
+ M_SEQNO_SET(m, seqno);
+ }
if (IEEE80211_IS_MULTICAST(wh->i_addr1))
m->m_flags |= M_MCAST;
@@ -2783,6 +2794,8 @@ ieee80211_beacon_update(struct ieee80211_node *ni,
struct ieee80211com *ic = ni->ni_ic;
int len_changed = 0;
uint16_t capinfo;
+ struct ieee80211_frame *wh;
+ ieee80211_seq seqno;
IEEE80211_LOCK(ic);
/*
@@ -2814,6 +2827,12 @@ ieee80211_beacon_update(struct ieee80211_node *ni,
return 1; /* just assume length changed */
}
+ wh = mtod(m, struct ieee80211_frame *);
+ seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++;
+ *(uint16_t *)&wh->i_seq[0] =
+ htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT);
+ M_SEQNO_SET(m, seqno);
+
/* XXX faster to recalculate entirely or just changes? */
capinfo = ieee80211_getcapinfo(vap, ni->ni_chan);
*bo->bo_caps = htole16(capinfo);
@@ -2924,13 +2943,13 @@ ieee80211_beacon_update(struct ieee80211_node *ni,
bo->bo_tim_trailer += adjust;
bo->bo_erp += adjust;
bo->bo_htinfo += adjust;
-#ifdef IEEE80211_SUPERG_SUPPORT
+#ifdef IEEE80211_SUPPORT_SUPERG
bo->bo_ath += adjust;
#endif
-#ifdef IEEE80211_TDMA_SUPPORT
+#ifdef IEEE80211_SUPPORT_TDMA
bo->bo_tdma += adjust;
#endif
-#ifdef IEEE80211_MESH_SUPPORT
+#ifdef IEEE80211_SUPPORT_MESH
bo->bo_meshconf += adjust;
#endif
bo->bo_appie += adjust;
@@ -2978,13 +2997,13 @@ ieee80211_beacon_update(struct ieee80211_node *ni,
bo->bo_erp += sizeof(*csa);
bo->bo_htinfo += sizeof(*csa);
bo->bo_wme += sizeof(*csa);
-#ifdef IEEE80211_SUPERG_SUPPORT
+#ifdef IEEE80211_SUPPORT_SUPERG
bo->bo_ath += sizeof(*csa);
#endif
-#ifdef IEEE80211_TDMA_SUPPORT
+#ifdef IEEE80211_SUPPORT_TDMA
bo->bo_tdma += sizeof(*csa);
#endif
-#ifdef IEEE80211_MESH_SUPPORT
+#ifdef IEEE80211_SUPPORT_MESH
bo->bo_meshconf += sizeof(*csa);
#endif
bo->bo_appie += sizeof(*csa);
diff --git a/freebsd/sys/net80211/ieee80211_power.c b/freebsd/sys/net80211/ieee80211_power.c
index b39e7156..dde58269 100644
--- a/freebsd/sys/net80211/ieee80211_power.c
+++ b/freebsd/sys/net80211/ieee80211_power.c
@@ -50,7 +50,7 @@ __FBSDID("$FreeBSD$");
static void ieee80211_update_ps(struct ieee80211vap *, int);
static int ieee80211_set_tim(struct ieee80211_node *, int);
-MALLOC_DEFINE(M_80211_POWER, "80211power", "802.11 power save state");
+static MALLOC_DEFINE(M_80211_POWER, "80211power", "802.11 power save state");
void
ieee80211_power_attach(struct ieee80211com *ic)
@@ -104,7 +104,7 @@ ieee80211_power_vdetach(struct ieee80211vap *vap)
void
ieee80211_psq_init(struct ieee80211_psq *psq, const char *name)
{
- memset(psq, 0, sizeof(psq));
+ memset(psq, 0, sizeof(*psq));
psq->psq_maxlen = IEEE80211_PS_MAX_QUEUE;
IEEE80211_PSQ_INIT(psq, name); /* OS-dependent setup */
}
diff --git a/freebsd/sys/net80211/ieee80211_proto.c b/freebsd/sys/net80211/ieee80211_proto.c
index a0a6a0fa..2f9e60b8 100644
--- a/freebsd/sys/net80211/ieee80211_proto.c
+++ b/freebsd/sys/net80211/ieee80211_proto.c
@@ -66,7 +66,7 @@ const char *ieee80211_mgt_subtype_name[] = {
"assoc_req", "assoc_resp", "reassoc_req", "reassoc_resp",
"probe_req", "probe_resp", "reserved#6", "reserved#7",
"beacon", "atim", "disassoc", "auth",
- "deauth", "action", "reserved#14", "reserved#15"
+ "deauth", "action", "action_noack", "reserved#15"
};
const char *ieee80211_ctl_subtype_name[] = {
"reserved#0", "reserved#1", "reserved#2", "reserved#3",
@@ -209,6 +209,21 @@ ieee80211_proto_vattach(struct ieee80211vap *vap)
const struct ieee80211_rateset *rs = &ic->ic_sup_rates[i];
vap->iv_txparms[i].ucastrate = IEEE80211_FIXED_RATE_NONE;
+
+ /*
+ * Setting the management rate to MCS 0 assumes that the
+ * BSS Basic rate set is empty and the BSS Basic MCS set
+ * is not.
+ *
+ * Since we're not checking this, default to the lowest
+ * defined rate for this mode.
+ *
+ * At least one 11n AP (DLINK DIR-825) is reported to drop
+ * some MCS management traffic (eg BA response frames.)
+ *
+ * See also: 9.6.0 of the 802.11n-2009 specification.
+ */
+#ifdef NOTYET
if (i == IEEE80211_MODE_11NA || i == IEEE80211_MODE_11NG) {
vap->iv_txparms[i].mgmtrate = 0 | IEEE80211_RATE_MCS;
vap->iv_txparms[i].mcastrate = 0 | IEEE80211_RATE_MCS;
@@ -218,6 +233,9 @@ ieee80211_proto_vattach(struct ieee80211vap *vap)
vap->iv_txparms[i].mcastrate =
rs->rs_rates[0] & IEEE80211_RATE_VAL;
}
+#endif
+ vap->iv_txparms[i].mgmtrate = rs->rs_rates[0] & IEEE80211_RATE_VAL;
+ vap->iv_txparms[i].mcastrate = rs->rs_rates[0] & IEEE80211_RATE_VAL;
vap->iv_txparms[i].maxretry = IEEE80211_TXMAX_DEFAULT;
}
vap->iv_roaming = IEEE80211_ROAMING_AUTO;
@@ -880,6 +898,15 @@ ieee80211_wme_initparams_locked(struct ieee80211vap *vap)
return;
/*
+ * Clear the wme cap_info field so a qoscount from a previous
+ * vap doesn't confuse later code which only parses the beacon
+ * field and updates hardware when said field changes.
+ * Otherwise the hardware is programmed with defaults, not what
+ * the beacon actually announces.
+ */
+ wme->wme_wmeChanParams.cap_info = 0;
+
+ /*
* Select mode; we can be called early in which case we
* always use auto mode. We know we'll be called when
* entering the RUN state with bsschan setup properly
@@ -1476,6 +1503,11 @@ ieee80211_csa_startswitch(struct ieee80211com *ic,
ieee80211_notify_csa(ic, c, mode, count);
}
+/*
+ * Complete the channel switch by transitioning all CSA VAPs to RUN.
+ * This is called by both the completion and cancellation functions
+ * so each VAP is placed back in the RUN state and can thus transmit.
+ */
static void
csa_completeswitch(struct ieee80211com *ic)
{
@@ -1493,15 +1525,27 @@ csa_completeswitch(struct ieee80211com *ic)
* Complete an 802.11h channel switch started by ieee80211_csa_startswitch.
* We clear state and move all vap's in CSA state to RUN state
* so they can again transmit.
+ *
+ * Although this may not be completely correct, update the BSS channel
+ * for each VAP to the newly configured channel. The setcurchan sets
+ * the current operating channel for the interface (so the radio does
+ * switch over) but the VAP BSS isn't updated, leading to incorrectly
+ * reported information via ioctl.
*/
void
ieee80211_csa_completeswitch(struct ieee80211com *ic)
{
+ struct ieee80211vap *vap;
+
IEEE80211_LOCK_ASSERT(ic);
KASSERT(ic->ic_flags & IEEE80211_F_CSAPENDING, ("csa not pending"));
ieee80211_setcurchan(ic, ic->ic_csa_newchan);
+ TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next)
+ if (vap->iv_state == IEEE80211_S_CSA)
+ vap->iv_bss->ni_chan = ic->ic_curchan;
+
csa_completeswitch(ic);
}
diff --git a/freebsd/sys/net80211/ieee80211_proto.h b/freebsd/sys/net80211/ieee80211_proto.h
index c2808477..7e882161 100644
--- a/freebsd/sys/net80211/ieee80211_proto.h
+++ b/freebsd/sys/net80211/ieee80211_proto.h
@@ -61,9 +61,36 @@ void ieee80211_syncflag(struct ieee80211vap *, int flag);
void ieee80211_syncflag_ht(struct ieee80211vap *, int flag);
void ieee80211_syncflag_ext(struct ieee80211vap *, int flag);
+#define IEEE80211_R_NF 0x0000001 /* global NF value valid */
+#define IEEE80211_R_RSSI 0x0000002 /* global RSSI value valid */
+#define IEEE80211_R_C_CHAIN 0x0000004 /* RX chain count valid */
+#define IEEE80211_R_C_NF 0x0000008 /* per-chain NF value valid */
+#define IEEE80211_R_C_RSSI 0x0000010 /* per-chain RSSI value valid */
+#define IEEE80211_R_C_EVM 0x0000020 /* per-chain EVM valid */
+#define IEEE80211_R_C_HT40 0x0000040 /* RX'ed packet is 40mhz, pilots 4,5 valid */
+
+struct ieee80211_rx_stats {
+ uint32_t r_flags; /* IEEE80211_R_* flags */
+ uint8_t c_chain; /* number of RX chains involved */
+ int16_t c_nf_ctl[IEEE80211_MAX_CHAINS]; /* per-chain NF */
+ int16_t c_nf_ext[IEEE80211_MAX_CHAINS]; /* per-chain NF */
+ int16_t c_rssi_ctl[IEEE80211_MAX_CHAINS]; /* per-chain RSSI */
+ int16_t c_rssi_ext[IEEE80211_MAX_CHAINS]; /* per-chain RSSI */
+ uint8_t nf; /* global NF */
+ uint8_t rssi; /* global RSSI */
+ uint8_t evm[IEEE80211_MAX_CHAINS][IEEE80211_MAX_EVM_PILOTS];
+ /* per-chain, per-pilot EVM values */
+};
+
#define ieee80211_input(ni, m, rssi, nf) \
((ni)->ni_vap->iv_input(ni, m, rssi, nf))
int ieee80211_input_all(struct ieee80211com *, struct mbuf *, int, int);
+
+int ieee80211_input_mimo(struct ieee80211_node *, struct mbuf *,
+ struct ieee80211_rx_stats *);
+int ieee80211_input_mimo_all(struct ieee80211com *, struct mbuf *,
+ struct ieee80211_rx_stats *);
+
struct ieee80211_bpf_params;
int ieee80211_mgmt_output(struct ieee80211_node *, struct mbuf *, int,
struct ieee80211_bpf_params *);
diff --git a/freebsd/sys/net80211/ieee80211_scan.c b/freebsd/sys/net80211/ieee80211_scan.c
index 16eeae7d..19becf8f 100644
--- a/freebsd/sys/net80211/ieee80211_scan.c
+++ b/freebsd/sys/net80211/ieee80211_scan.c
@@ -418,6 +418,8 @@ start_scan_locked(const struct ieee80211_scanner *scan,
vap->iv_stats.is_scan_passive++;
if (flags & IEEE80211_SCAN_FLUSH)
ss->ss_ops->scan_flush(ss);
+ if (flags & IEEE80211_SCAN_BGSCAN)
+ ic->ic_flags_ext |= IEEE80211_FEXT_BGSCAN;
/* NB: flush frames rx'd before 1st channel change */
SCAN_PRIVATE(ss)->ss_iflags |= ISCAN_DISCARD;
@@ -434,12 +436,13 @@ start_scan_locked(const struct ieee80211_scanner *scan,
ic->ic_flags |= IEEE80211_F_SCAN;
ieee80211_runtask(ic, &SCAN_PRIVATE(ss)->ss_scan_task);
}
+ return 1;
} else {
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN,
"%s: %s scan already in progress\n", __func__,
ss->ss_flags & IEEE80211_SCAN_ACTIVE ? "active" : "passive");
}
- return (ic->ic_flags & IEEE80211_F_SCAN);
+ return 0;
}
/*
diff --git a/freebsd/sys/net80211/ieee80211_scan_sta.c b/freebsd/sys/net80211/ieee80211_scan_sta.c
index eb5bcd7c..7cf485c8 100644
--- a/freebsd/sys/net80211/ieee80211_scan_sta.c
+++ b/freebsd/sys/net80211/ieee80211_scan_sta.c
@@ -240,6 +240,7 @@ sta_add(struct ieee80211_scan_state *ss,
const uint8_t *macaddr = wh->i_addr2;
struct ieee80211vap *vap = ss->ss_vap;
struct ieee80211com *ic = vap->iv_ic;
+ struct ieee80211_channel *c;
struct sta_entry *se;
struct ieee80211_scan_entry *ise;
int hash;
@@ -302,7 +303,6 @@ found:
* association on the wrong channel.
*/
if (sp->status & IEEE80211_BPARSE_OFFCHAN) {
- struct ieee80211_channel *c;
/*
* Off-channel, locate the home/bss channel for the sta
* using the value broadcast in the DSPARMS ie. We know
@@ -319,6 +319,14 @@ found:
}
} else
ise->se_chan = ic->ic_curchan;
+ if (IEEE80211_IS_CHAN_HT(ise->se_chan) && sp->htcap == NULL) {
+ /* Demote legacy networks to a non-HT channel. */
+ c = ieee80211_find_channel(ic, ise->se_chan->ic_freq,
+ ise->se_chan->ic_flags & ~IEEE80211_CHAN_HT);
+ KASSERT(c != NULL,
+ ("no legacy channel %u", ise->se_chan->ic_ieee));
+ ise->se_chan = c;
+ }
ise->se_fhdwell = sp->fhdwell;
ise->se_fhindex = sp->fhindex;
ise->se_erp = sp->erp;
@@ -758,26 +766,38 @@ maxrate(const struct ieee80211_scan_entry *se)
{
const struct ieee80211_ie_htcap *htcap =
(const struct ieee80211_ie_htcap *) se->se_ies.htcap_ie;
- int rmax, r, i;
+ int rmax, r, i, txstream;
uint16_t caps;
+ uint8_t txparams;
rmax = 0;
if (htcap != NULL) {
/*
* HT station; inspect supported MCS and then adjust
- * rate by channel width. Could also include short GI
- * in this if we want to be extra accurate.
+ * rate by channel width.
*/
- /* XXX assumes MCS15 is max */
- for (i = 15; i >= 0 && isclr(htcap->hc_mcsset, i); i--)
- ;
+ txparams = htcap->hc_mcsset[12];
+ if (txparams & 0x3) {
+ /*
+ * TX MCS parameters defined and not equal to RX,
+ * extract the number of spartial streams and
+ * map it to the highest MCS rate.
+ */
+ txstream = ((txparams & 0xc) >> 2) + 1;
+ i = txstream * 8 - 1;
+ } else
+ for (i = 31; i >= 0 && isclr(htcap->hc_mcsset, i); i--);
if (i >= 0) {
caps = LE_READ_2(&htcap->hc_cap);
- /* XXX short/long GI */
- if (caps & IEEE80211_HTCAP_CHWIDTH40)
+ if ((caps & IEEE80211_HTCAP_CHWIDTH40) &&
+ (caps & IEEE80211_HTCAP_SHORTGI40))
rmax = ieee80211_htrates[i].ht40_rate_400ns;
- else
+ else if (caps & IEEE80211_HTCAP_CHWIDTH40)
rmax = ieee80211_htrates[i].ht40_rate_800ns;
+ else if (caps & IEEE80211_HTCAP_SHORTGI20)
+ rmax = ieee80211_htrates[i].ht20_rate_400ns;
+ else
+ rmax = ieee80211_htrates[i].ht20_rate_800ns;
}
}
for (i = 0; i < se->se_rates[1]; i++) {
diff --git a/freebsd/sys/net80211/ieee80211_sta.c b/freebsd/sys/net80211/ieee80211_sta.c
index f709d51e..c96eb0b5 100644
--- a/freebsd/sys/net80211/ieee80211_sta.c
+++ b/freebsd/sys/net80211/ieee80211_sta.c
@@ -436,7 +436,7 @@ sta_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
goto invalid;
break;
case IEEE80211_S_SLEEP:
- ieee80211_sta_pwrsave(vap, 0);
+ ieee80211_sta_pwrsave(vap, 1);
break;
default:
invalid:
@@ -514,7 +514,6 @@ doprint(struct ieee80211vap *vap, int subtype)
static int
sta_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
{
-#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
#define HAS_SEQ(type) ((type & 0x4) == 0)
struct ieee80211vap *vap = ni->ni_vap;
struct ieee80211com *ic = ni->ni_ic;
@@ -593,9 +592,7 @@ sta_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
TID_TO_WME_AC(tid) >= WME_AC_VI)
ic->ic_wme.wme_hipri_traffic++;
rxseq = le16toh(*(uint16_t *)wh->i_seq);
- if ((ni->ni_flags & IEEE80211_NODE_HT) == 0 &&
- (wh->i_fc[1] & IEEE80211_FC1_RETRY) &&
- SEQ_LEQ(rxseq, ni->ni_rxseqs[tid])) {
+ if (! ieee80211_check_rxseq(ni, wh)) {
/* duplicate, discard */
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT,
bssid, "duplicate",
@@ -912,7 +909,6 @@ out:
m_freem(m);
}
return type;
-#undef SEQ_LEQ
}
static void
@@ -1546,7 +1542,7 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
htcap = frm;
} else if (ishtinfooui(frm)) {
if (htinfo == NULL)
- htcap = frm;
+ htinfo = frm;
}
}
/* XXX Atheros OUI support */
@@ -1720,21 +1716,35 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
}
case IEEE80211_FC0_SUBTYPE_ACTION:
- if (vap->iv_state == IEEE80211_S_RUN) {
- if (ieee80211_parse_action(ni, m0) == 0)
- ic->ic_recv_action(ni, wh, frm, efrm);
- } else
+ case IEEE80211_FC0_SUBTYPE_ACTION_NOACK:
+ if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, wh->i_addr1) &&
+ !IEEE80211_IS_MULTICAST(wh->i_addr1)) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not for us");
vap->iv_stats.is_rx_mgtdiscard++;
+ } else if (vap->iv_state != IEEE80211_S_RUN) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "wrong state %s",
+ ieee80211_state_name[vap->iv_state]);
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else {
+ if (ieee80211_parse_action(ni, m0) == 0)
+ (void)ic->ic_recv_action(ni, wh, frm, efrm);
+ }
break;
- case IEEE80211_FC0_SUBTYPE_PROBE_REQ:
case IEEE80211_FC0_SUBTYPE_ASSOC_REQ:
case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
+ case IEEE80211_FC0_SUBTYPE_PROBE_REQ:
+ case IEEE80211_FC0_SUBTYPE_ATIM:
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not handled");
vap->iv_stats.is_rx_mgtdiscard++;
- return;
+ break;
+
default:
IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY,
- wh, "mgt", "subtype 0x%x not handled", subtype);
+ wh, "mgt", "subtype 0x%x not handled", subtype);
vap->iv_stats.is_rx_badsubtype++;
break;
}
@@ -1743,6 +1753,11 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
}
static void
-sta_recv_ctl(struct ieee80211_node *ni, struct mbuf *m0, int subtype)
+sta_recv_ctl(struct ieee80211_node *ni, struct mbuf *m, int subtype)
{
+ switch (subtype) {
+ case IEEE80211_FC0_SUBTYPE_BAR:
+ ieee80211_recv_bar(ni, m);
+ break;
+ }
}
diff --git a/freebsd/sys/net80211/ieee80211_var.h b/freebsd/sys/net80211/ieee80211_var.h
index d790bfc2..e4b00099 100644
--- a/freebsd/sys/net80211/ieee80211_var.h
+++ b/freebsd/sys/net80211/ieee80211_var.h
@@ -137,6 +137,7 @@ struct ieee80211com {
uint32_t ic_flags_ven; /* vendor state flags */
uint32_t ic_caps; /* capabilities */
uint32_t ic_htcaps; /* HT capabilities */
+ uint32_t ic_htextcaps; /* HT extended capabilities */
uint32_t ic_cryptocaps; /* crypto capabilities */
uint8_t ic_modecaps[2]; /* set of mode capabilities */
uint8_t ic_promisc; /* vap's needing promisc mode */
@@ -212,6 +213,8 @@ struct ieee80211com {
enum ieee80211_protmode ic_htprotmode; /* HT protection mode */
int ic_lastnonerp; /* last time non-ERP sta noted*/
int ic_lastnonht; /* last time non-HT sta noted */
+ uint8_t ic_rxstream; /* # RX streams */
+ uint8_t ic_txstream; /* # TX streams */
/* optional state for Atheros SuperG protocol extensions */
struct ieee80211_superg *ic_superg;
@@ -225,10 +228,10 @@ struct ieee80211com {
/* virtual ap create/delete */
struct ieee80211vap* (*ic_vap_create)(struct ieee80211com *,
- const char name[IFNAMSIZ], int unit,
- int opmode, int flags,
- const uint8_t bssid[IEEE80211_ADDR_LEN],
- const uint8_t macaddr[IEEE80211_ADDR_LEN]);
+ const char [IFNAMSIZ], int,
+ enum ieee80211_opmode, int,
+ const uint8_t [IEEE80211_ADDR_LEN],
+ const uint8_t [IEEE80211_ADDR_LEN]);
void (*ic_vap_delete)(struct ieee80211vap *);
/* operating mode attachment */
ieee80211vap_attach ic_vattach[IEEE80211_OPMODE_MAX];
@@ -304,6 +307,8 @@ struct ieee80211com {
int status, int baparamset, int batimeout);
void (*ic_addba_stop)(struct ieee80211_node *,
struct ieee80211_tx_ampdu *);
+ void (*ic_addba_response_timeout)(struct ieee80211_node *,
+ struct ieee80211_tx_ampdu *);
/* BAR response received */
void (*ic_bar_response)(struct ieee80211_node *,
struct ieee80211_tx_ampdu *, int status);
@@ -313,7 +318,7 @@ struct ieee80211com {
int batimeout, int baseqctl);
void (*ic_ampdu_rx_stop)(struct ieee80211_node *,
struct ieee80211_rx_ampdu *);
- uint64_t ic_spare[8];
+ uint64_t ic_spare[7];
};
struct ieee80211_aclator;
@@ -340,6 +345,7 @@ struct ieee80211vap {
uint32_t iv_flags_ven; /* vendor state flags */
uint32_t iv_caps; /* capabilities */
uint32_t iv_htcaps; /* HT capabilities */
+ uint32_t iv_htextcaps; /* HT extended capabilities */
enum ieee80211_opmode iv_opmode; /* operation mode */
enum ieee80211_state iv_state; /* state machine state */
enum ieee80211_state iv_nstate; /* pending state */
@@ -574,7 +580,7 @@ MALLOC_DECLARE(M_80211_VAP);
#define IEEE80211_FHT_BITS \
"\20\1NONHT_PR" \
- "\23GF\24HT\25AMDPU_TX\26AMPDU_TX" \
+ "\23GF\24HT\25AMPDU_TX\26AMPDU_TX" \
"\27AMSDU_TX\30AMSDU_RX\31USEHT40\32PUREN\33SHORTGI20\34SHORTGI40" \
"\35HTCOMPAT\36RIFS\37STBC_TX\40STBC_RX"
@@ -633,6 +639,10 @@ MALLOC_DECLARE(M_80211_VAP);
#define IEEE80211_HTC_HT 0x00040000 /* CAPABILITY: HT operation */
#define IEEE80211_HTC_SMPS 0x00080000 /* CAPABILITY: MIMO power save*/
#define IEEE80211_HTC_RIFS 0x00100000 /* CAPABILITY: RIFS support */
+#define IEEE80211_HTC_RXUNEQUAL 0x00200000 /* CAPABILITY: RX unequal MCS */
+#define IEEE80211_HTC_RXMCS32 0x00400000 /* CAPABILITY: MCS32 support */
+#define IEEE80211_HTC_TXUNEQUAL 0x00800000 /* CAPABILITY: TX unequal MCS */
+#define IEEE80211_HTC_TXMCS32 0x01000000 /* CAPABILITY: MCS32 suport */
#define IEEE80211_C_HTCAP_BITS \
"\20\1LDPC\2CHWIDTH40\5GREENFIELD\6SHORTGI20\7SHORTGI40\10TXSTBC" \
@@ -642,7 +652,8 @@ void ieee80211_ifattach(struct ieee80211com *,
const uint8_t macaddr[IEEE80211_ADDR_LEN]);
void ieee80211_ifdetach(struct ieee80211com *);
int ieee80211_vap_setup(struct ieee80211com *, struct ieee80211vap *,
- const char name[IFNAMSIZ], int unit, int opmode, int flags,
+ const char name[IFNAMSIZ], int unit,
+ enum ieee80211_opmode opmode, int flags,
const uint8_t bssid[IEEE80211_ADDR_LEN],
const uint8_t macaddr[IEEE80211_ADDR_LEN]);
int ieee80211_vap_attach(struct ieee80211vap *,
diff --git a/freebsd/sys/net80211/ieee80211_wds.c b/freebsd/sys/net80211/ieee80211_wds.c
index 55c2833b..0c5ea68b 100644
--- a/freebsd/sys/net80211/ieee80211_wds.c
+++ b/freebsd/sys/net80211/ieee80211_wds.c
@@ -408,7 +408,6 @@ wds_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
static int
wds_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
{
-#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
#define HAS_SEQ(type) ((type & 0x4) == 0)
struct ieee80211vap *vap = ni->ni_vap;
struct ieee80211com *ic = ni->ni_ic;
@@ -456,6 +455,9 @@ wds_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
*/
wh = mtod(m, struct ieee80211_frame *);
+ if (!IEEE80211_IS_MULTICAST(wh->i_addr1))
+ ni->ni_inact = ni->ni_inact_reload;
+
if ((wh->i_fc[0] & IEEE80211_FC0_VERSION_MASK) !=
IEEE80211_FC0_VERSION_0) {
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY,
@@ -494,9 +496,7 @@ wds_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
TID_TO_WME_AC(tid) >= WME_AC_VI)
ic->ic_wme.wme_hipri_traffic++;
rxseq = le16toh(*(uint16_t *)wh->i_seq);
- if ((ni->ni_flags & IEEE80211_NODE_HT) == 0 &&
- (wh->i_fc[1] & IEEE80211_FC1_RETRY) &&
- SEQ_LEQ(rxseq, ni->ni_rxseqs[tid])) {
+ if (! ieee80211_check_rxseq(ni, wh)) {
/* duplicate, discard */
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT,
wh->i_addr1, "duplicate",
@@ -538,8 +538,6 @@ wds_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf)
vap->iv_stats.is_rx_wrongdir++;/*XXX*/
goto out;
}
- if (!IEEE80211_IS_MULTICAST(wh->i_addr1))
- ni->ni_inact = ni->ni_inact_reload;
/*
* Handle A-MPDU re-ordering. If the frame is to be
* processed directly then ieee80211_ampdu_reorder
@@ -742,7 +740,6 @@ out:
m_freem(m);
}
return type;
-#undef SEQ_LEQ
}
static void
@@ -758,31 +755,47 @@ wds_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
frm = (u_int8_t *)&wh[1];
efrm = mtod(m0, u_int8_t *) + m0->m_len;
switch (subtype) {
- case IEEE80211_FC0_SUBTYPE_DEAUTH:
- case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
- case IEEE80211_FC0_SUBTYPE_BEACON:
- case IEEE80211_FC0_SUBTYPE_PROBE_REQ:
- case IEEE80211_FC0_SUBTYPE_AUTH:
+ case IEEE80211_FC0_SUBTYPE_ACTION:
+ case IEEE80211_FC0_SUBTYPE_ACTION_NOACK:
+ if (ni == vap->iv_bss) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "unknown node");
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, wh->i_addr1)) {
+ /* NB: not interested in multicast frames. */
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not for us");
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else if (vap->iv_state != IEEE80211_S_RUN) {
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "wrong state %s",
+ ieee80211_state_name[vap->iv_state]);
+ vap->iv_stats.is_rx_mgtdiscard++;
+ } else {
+ if (ieee80211_parse_action(ni, m0) == 0)
+ (void)ic->ic_recv_action(ni, wh, frm, efrm);
+ }
+ break;
+
case IEEE80211_FC0_SUBTYPE_ASSOC_REQ:
- case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
case IEEE80211_FC0_SUBTYPE_ASSOC_RESP:
+ case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
case IEEE80211_FC0_SUBTYPE_REASSOC_RESP:
+ case IEEE80211_FC0_SUBTYPE_PROBE_REQ:
+ case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
+ case IEEE80211_FC0_SUBTYPE_BEACON:
+ case IEEE80211_FC0_SUBTYPE_ATIM:
case IEEE80211_FC0_SUBTYPE_DISASSOC:
+ case IEEE80211_FC0_SUBTYPE_AUTH:
+ case IEEE80211_FC0_SUBTYPE_DEAUTH:
+ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT,
+ wh, NULL, "%s", "not handled");
vap->iv_stats.is_rx_mgtdiscard++;
break;
- case IEEE80211_FC0_SUBTYPE_ACTION:
- if (vap->iv_state != IEEE80211_S_RUN ||
- IEEE80211_IS_MULTICAST(wh->i_addr1)) {
- vap->iv_stats.is_rx_mgtdiscard++;
- break;
- }
- ni->ni_inact = ni->ni_inact_reload;
- if (ieee80211_parse_action(ni, m0) == 0)
- ic->ic_recv_action(ni, wh, frm, efrm);
- break;
+
default:
IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY,
- wh, "mgt", "subtype 0x%x not handled", subtype);
+ wh, "mgt", "subtype 0x%x not handled", subtype);
vap->iv_stats.is_rx_badsubtype++;
break;
}
diff --git a/freebsd/sys/netinet/accf_http.c b/freebsd/sys/netinet/accf_http.c
index 97344a2c..3af867b0 100644
--- a/freebsd/sys/netinet/accf_http.c
+++ b/freebsd/sys/netinet/accf_http.c
@@ -71,7 +71,7 @@ DECLARE_MODULE(accf_http, accf_http_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
static int parse_http_version = 1;
-SYSCTL_NODE(_net_inet_accf, OID_AUTO, http, CTLFLAG_RW, 0,
+static SYSCTL_NODE(_net_inet_accf, OID_AUTO, http, CTLFLAG_RW, 0,
"HTTP accept filter");
SYSCTL_INT(_net_inet_accf_http, OID_AUTO, parsehttpversion, CTLFLAG_RW,
&parse_http_version, 1,
diff --git a/freebsd/sys/netinet/icmp6.h b/freebsd/sys/netinet/icmp6.h
index a6e68864..5483721d 100644
--- a/freebsd/sys/netinet/icmp6.h
+++ b/freebsd/sys/netinet/icmp6.h
@@ -659,7 +659,8 @@ void kmod_icmp6stat_inc(int statnum);
#define ICMPV6CTL_MLD_SOMAXSRC 22
#define ICMPV6CTL_MLD_VERSION 23
#define ICMPV6CTL_ND6_MAXQLEN 24
-#define ICMPV6CTL_MAXID 25
+#define ICMPV6CTL_NODEINFO_OLDMCPREFIX 25
+#define ICMPV6CTL_MAXID 26
#define RTF_PROBEMTU RTF_PROTO1
diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c
index 6b98161f..98ed0b36 100644
--- a/freebsd/sys/netinet/if_ether.c
+++ b/freebsd/sys/netinet/if_ether.c
@@ -79,8 +79,8 @@ __FBSDID("$FreeBSD$");
#define SDL(s) ((struct sockaddr_dl *)s)
SYSCTL_DECL(_net_link_ether);
-SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
-SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
+static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
+static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
/* timer values */
static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20
@@ -89,8 +89,8 @@ static VNET_DEFINE(int, arp_maxtries) = 5;
VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for
* local traffic */
static VNET_DEFINE(int, arp_proxyall) = 0;
-static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for
- * 20 seconds */
+static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for
+ * 20 seconds */
VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
static VNET_DEFINE(int, arp_maxhold) = 1;
@@ -121,7 +121,7 @@ SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW,
&VNET_NAME(arpstat), arpstat,
"ARP statistics (struct arpstat, net/if_arp.h)");
SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW,
- &VNET_NAME(arp_maxhold), 0,
+ &VNET_NAME(arp_maxhold), 0,
"Number of packets to hold per ARP entry");
static void arp_init(void);
@@ -169,38 +169,48 @@ arp_ifscrub(struct ifnet *ifp, uint32_t addr)
static void
arptimer(void *arg)
{
+ struct llentry *lle = (struct llentry *)arg;
struct ifnet *ifp;
- struct llentry *lle;
- int pkts_dropped;
- KASSERT(arg != NULL, ("%s: arg NULL", __func__));
- lle = (struct llentry *)arg;
+ if (lle->la_flags & LLE_STATIC) {
+ LLE_WUNLOCK(lle);
+ return;
+ }
+
ifp = lle->lle_tbl->llt_ifp;
CURVNET_SET(ifp->if_vnet);
+
+ if ((lle->la_flags & LLE_DELETED) == 0) {
+ int evt;
+
+ if (lle->la_flags & LLE_VALID)
+ evt = LLENTRY_EXPIRED;
+ else
+ evt = LLENTRY_TIMEDOUT;
+ EVENTHANDLER_INVOKE(lle_event, lle, evt);
+ }
+
+ callout_stop(&lle->la_timer);
+
+ /* XXX: LOR avoidance. We still have ref on lle. */
+ LLE_WUNLOCK(lle);
IF_AFDATA_LOCK(ifp);
LLE_WLOCK(lle);
- if (lle->la_flags & LLE_STATIC)
- LLE_WUNLOCK(lle);
- else {
- if (!callout_pending(&lle->la_timer) &&
- callout_active(&lle->la_timer)) {
- callout_stop(&lle->la_timer);
- LLE_REMREF(lle);
- pkts_dropped = llentry_free(lle);
- ARPSTAT_ADD(dropped, pkts_dropped);
- ARPSTAT_INC(timeouts);
- } else {
-#ifdef DIAGNOSTIC
- struct sockaddr *l3addr = L3_ADDR(lle);
- log(LOG_INFO,
- "arptimer issue: %p, IPv4 address: \"%s\"\n", lle,
- inet_ntoa(
- ((const struct sockaddr_in *)l3addr)->sin_addr));
-#endif
- LLE_WUNLOCK(lle);
- }
- }
+
+ /* Guard against race with other llentry_free(). */
+ if (lle->la_flags & LLE_LINKED) {
+ size_t pkts_dropped;
+
+ LLE_REMREF(lle);
+ pkts_dropped = llentry_free(lle);
+ ARPSTAT_ADD(dropped, pkts_dropped);
+ } else
+ LLE_FREE_LOCKED(lle);
+
IF_AFDATA_UNLOCK(ifp);
+
+ ARPSTAT_INC(timeouts);
+
CURVNET_RESTORE();
}
@@ -235,7 +245,7 @@ arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip,
SIN(ifa->ifa_netmask)->sin_addr.s_addr) )
break; /* found it. */
}
- if (sip == NULL) {
+ if (sip == NULL) {
printf("%s: cannot find matching address\n", __func__);
return;
}
@@ -304,18 +314,16 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
return (0);
}
}
- /* XXXXX
- */
retry:
- IF_AFDATA_RLOCK(ifp);
+ IF_AFDATA_RLOCK(ifp);
la = lla_lookup(LLTABLE(ifp), flags, dst);
- IF_AFDATA_RUNLOCK(ifp);
+ IF_AFDATA_RUNLOCK(ifp);
if ((la == NULL) && ((flags & LLE_EXCLUSIVE) == 0)
- && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) {
+ && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) {
flags |= (LLE_CREATE | LLE_EXCLUSIVE);
- IF_AFDATA_WLOCK(ifp);
+ IF_AFDATA_WLOCK(ifp);
la = lla_lookup(LLTABLE(ifp), flags, dst);
- IF_AFDATA_WUNLOCK(ifp);
+ IF_AFDATA_WUNLOCK(ifp);
}
if (la == NULL) {
if (flags & LLE_CREATE)
@@ -324,10 +332,10 @@ retry:
inet_ntoa(SIN(dst)->sin_addr));
m_freem(m);
return (EINVAL);
- }
+ }
if ((la->la_flags & LLE_VALID) &&
- ((la->la_flags & LLE_STATIC) || la->la_expire > time_second)) {
+ ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
bcopy(&la->ll_addr, desten, ifp->if_addrlen);
/*
* If entry has an expiry time and it is approaching,
@@ -335,18 +343,18 @@ retry:
* arpt_down interval.
*/
if (!(la->la_flags & LLE_STATIC) &&
- time_second + la->la_preempt > la->la_expire) {
+ time_uptime + la->la_preempt > la->la_expire) {
arprequest(ifp, NULL,
&SIN(dst)->sin_addr, IF_LLADDR(ifp));
la->la_preempt--;
}
-
+
*lle = la;
error = 0;
goto done;
- }
-
+ }
+
if (la->la_flags & LLE_STATIC) { /* should not happen! */
log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n",
inet_ntoa(SIN(dst)->sin_addr));
@@ -355,7 +363,7 @@ retry:
goto done;
}
- renew = (la->la_asked == 0 || la->la_expire != time_second);
+ renew = (la->la_asked == 0 || la->la_expire != time_uptime);
if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) {
flags |= LLE_EXCLUSIVE;
LLE_RUNLOCK(la);
@@ -376,20 +384,20 @@ retry:
la->la_numheld--;
ARPSTAT_INC(dropped);
}
- }
+ }
if (la->la_hold != NULL) {
curr = la->la_hold;
while (curr->m_nextpkt != NULL)
curr = curr->m_nextpkt;
curr->m_nextpkt = m;
- } else
+ } else
la->la_hold = m;
la->la_numheld++;
if (renew == 0 && (flags & LLE_EXCLUSIVE)) {
flags &= ~LLE_EXCLUSIVE;
LLE_DOWNGRADE(la);
}
-
+
}
/*
* Return EWOULDBLOCK if we have tried less than arp_maxtries. It
@@ -407,7 +415,7 @@ retry:
int canceled;
LLE_ADDREF(la);
- la->la_expire = time_second;
+ la->la_expire = time_uptime;
canceled = callout_reset(&la->la_timer, hz * V_arpt_down,
arptimer, la);
if (canceled)
@@ -437,7 +445,7 @@ arpintr(struct mbuf *m)
if (m->m_len < sizeof(struct arphdr) &&
((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
- log(LOG_ERR, "arp: runt packet -- m_pullup failed\n");
+ log(LOG_NOTICE, "arp: runt packet -- m_pullup failed\n");
return;
}
ar = mtod(m, struct arphdr *);
@@ -445,16 +453,19 @@ arpintr(struct mbuf *m)
if (ntohs(ar->ar_hrd) != ARPHRD_ETHER &&
ntohs(ar->ar_hrd) != ARPHRD_IEEE802 &&
ntohs(ar->ar_hrd) != ARPHRD_ARCNET &&
- ntohs(ar->ar_hrd) != ARPHRD_IEEE1394) {
- log(LOG_ERR, "arp: unknown hardware address format (0x%2D)\n",
- (unsigned char *)&ar->ar_hrd, "");
+ ntohs(ar->ar_hrd) != ARPHRD_IEEE1394 &&
+ ntohs(ar->ar_hrd) != ARPHRD_INFINIBAND) {
+ log(LOG_NOTICE, "arp: unknown hardware address format (0x%2D)"
+ " (from %*D to %*D)\n", (unsigned char *)&ar->ar_hrd, "",
+ ETHER_ADDR_LEN, (u_char *)ar_sha(ar), ":",
+ ETHER_ADDR_LEN, (u_char *)ar_tha(ar), ":");
m_freem(m);
return;
}
if (m->m_len < arphdr_len(ar)) {
if ((m = m_pullup(m, arphdr_len(ar))) == NULL) {
- log(LOG_ERR, "arp: runt packet\n");
+ log(LOG_NOTICE, "arp: runt packet\n");
m_freem(m);
return;
}
@@ -490,17 +501,19 @@ arpintr(struct mbuf *m)
static int log_arp_wrong_iface = 1;
static int log_arp_movements = 1;
static int log_arp_permanent_modify = 1;
+static int allow_multicast = 0;
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
&log_arp_wrong_iface, 0,
"log arp packets arriving on the wrong interface");
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
- &log_arp_movements, 0,
- "log arp replies from MACs different than the one in the cache");
+ &log_arp_movements, 0,
+ "log arp replies from MACs different than the one in the cache");
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
- &log_arp_permanent_modify, 0,
- "log arp replies from MACs different than the one in the permanent arp entry");
-
+ &log_arp_permanent_modify, 0,
+ "log arp replies from MACs different than the one in the permanent arp entry");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW,
+ &allow_multicast, 0, "accept multicast addresses");
static void
in_arpinput(struct mbuf *m)
@@ -530,11 +543,27 @@ in_arpinput(struct mbuf *m)
req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
- log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n");
+ log(LOG_NOTICE, "in_arp: runt packet -- m_pullup failed\n");
return;
}
ah = mtod(m, struct arphdr *);
+ /*
+ * ARP is only for IPv4 so we can reject packets with
+ * a protocol length not equal to an IPv4 address.
+ */
+ if (ah->ar_pln != sizeof(struct in_addr)) {
+ log(LOG_NOTICE, "in_arp: requested protocol length != %zu\n",
+ sizeof(struct in_addr));
+ goto drop;
+ }
+
+ if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) {
+ log(LOG_NOTICE, "arp: %*D is multicast\n",
+ ifp->if_addrlen, (u_char *)ar_sha(ah), ":");
+ goto drop;
+ }
+
op = ntohs(ah->ar_op);
(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
@@ -553,7 +582,7 @@ in_arpinput(struct mbuf *m)
*/
IN_IFADDR_RLOCK();
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
- if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
+ if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
ia->ia_ifp == ifp) &&
itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
ifa_ref(&ia->ia_ifa);
@@ -570,7 +599,7 @@ in_arpinput(struct mbuf *m)
}
}
LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
- if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
+ if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
ia->ia_ifp == ifp) &&
isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
ifa_ref(&ia->ia_ifa);
@@ -633,7 +662,7 @@ match:
if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
goto drop; /* it's from me, ignore it. */
if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
- log(LOG_ERR,
+ log(LOG_NOTICE,
"arp: link address is broadcast for IP address %s!\n",
inet_ntoa(isaddr));
goto drop;
@@ -662,14 +691,14 @@ match:
sin.sin_addr = isaddr;
flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0;
flags |= LLE_EXCLUSIVE;
- IF_AFDATA_LOCK(ifp);
+ IF_AFDATA_LOCK(ifp);
la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin);
IF_AFDATA_UNLOCK(ifp);
if (la != NULL) {
/* the following is not an error when doing bridging */
if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) {
if (log_arp_wrong_iface)
- log(LOG_ERR, "arp: %s is on %s "
+ log(LOG_WARNING, "arp: %s is on %s "
"but got reply from %*D on %s\n",
inet_ntoa(isaddr),
la->lle_tbl->llt_ifp->if_xname,
@@ -692,7 +721,7 @@ match:
goto reply;
}
if (log_arp_movements) {
- log(LOG_INFO, "arp: %s moved from %*D "
+ log(LOG_INFO, "arp: %s moved from %*D "
"to %*D on %s\n",
inet_ntoa(isaddr),
ifp->if_addrlen,
@@ -701,23 +730,25 @@ match:
ifp->if_xname);
}
}
-
+
if (ifp->if_addrlen != ah->ar_hln) {
LLE_WUNLOCK(la);
- log(LOG_WARNING,
- "arp from %*D: addr len: new %d, i/f %d (ignored)",
- ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
- ah->ar_hln, ifp->if_addrlen);
- goto reply;
+ log(LOG_WARNING, "arp from %*D: addr len: new %d, "
+ "i/f %d (ignored)\n", ifp->if_addrlen,
+ (u_char *) ar_sha(ah), ":", ah->ar_hln,
+ ifp->if_addrlen);
+ goto drop;
}
(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
la->la_flags |= LLE_VALID;
+ EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
+
if (!(la->la_flags & LLE_STATIC)) {
int canceled;
LLE_ADDREF(la);
- la->la_expire = time_second + V_arpt_keep;
+ la->la_expire = time_uptime + V_arpt_keep;
canceled = callout_reset(&la->la_timer,
hz * V_arpt_keep, arptimer, la);
if (canceled)
@@ -725,7 +756,7 @@ match:
}
la->la_asked = 0;
la->la_preempt = V_arp_maxtries;
- /*
+ /*
* The packets are all freed within the call to the output
* routine.
*
@@ -747,7 +778,7 @@ match:
}
} else
LLE_WUNLOCK(la);
- } /* end of FIB loop */
+ }
reply:
if (op != ARPOP_REQUEST)
goto drop;
@@ -761,7 +792,7 @@ reply:
struct llentry *lle = NULL;
sin.sin_addr = itaddr;
- IF_AFDATA_LOCK(ifp);
+ IF_AFDATA_LOCK(ifp);
lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
IF_AFDATA_UNLOCK(ifp);
@@ -776,7 +807,7 @@ reply:
if (!V_arp_proxyall)
goto drop;
-
+
sin.sin_addr = itaddr;
/* XXX MRT use table 0 for arp reply */
rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
@@ -804,7 +835,7 @@ reply:
* wrong network.
*/
sin.sin_addr = isaddr;
-
+
/* XXX MRT use table 0 for arp checks */
rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
if (!rt)
@@ -820,8 +851,7 @@ reply:
RTFREE_LOCKED(rt);
#ifdef DEBUG_PROXY
- printf("arp: proxying for %s\n",
- inet_ntoa(itaddr));
+ printf("arp: proxying for %s\n", inet_ntoa(itaddr));
#endif
}
}
@@ -843,8 +873,8 @@ reply:
(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
ah->ar_op = htons(ARPOP_REPLY);
ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
- m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
- m->m_pkthdr.len = m->m_len;
+ m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
+ m->m_pkthdr.len = m->m_len;
m->m_pkthdr.rcvif = NULL;
sa.sa_family = AF_ARP;
sa.sa_len = 2;
@@ -865,7 +895,7 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
- /*
+ /*
* interface address is considered static entry
* because the output of the arp utility shows
* that L2 entry as permanent
diff --git a/freebsd/sys/netinet/igmp.c b/freebsd/sys/netinet/igmp.c
index f2949c14..3056fa3a 100644
--- a/freebsd/sys/netinet/igmp.c
+++ b/freebsd/sys/netinet/igmp.c
@@ -187,7 +187,7 @@ static const struct netisr_handler igmp_nh = {
struct mtx igmp_mtx;
struct mbuf *m_raopt; /* Router Alert option */
-MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
+static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
/*
* VIMAGE-wide globals.
@@ -282,8 +282,9 @@ SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
/*
* Non-virtualized sysctls.
*/
-SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
- sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
+static SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo,
+ CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_igmp_ifinfo,
+ "Per-interface IGMPv3 state");
static __inline void
igmp_save_context(struct mbuf *m, struct ifnet *ifp)
diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c
index 7bf52c6b..0c3f72bc 100644
--- a/freebsd/sys/netinet/in.c
+++ b/freebsd/sys/netinet/in.c
@@ -78,11 +78,6 @@ static int in_ifinit(struct ifnet *,
struct in_ifaddr *, struct sockaddr_in *, int);
static void in_purgemaddrs(struct ifnet *);
-static VNET_DEFINE(int, subnetsarelocal);
-#define V_subnetsarelocal VNET(subnetsarelocal)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW,
- &VNET_NAME(subnetsarelocal), 0,
- "Treat all subnets as directly connected");
static VNET_DEFINE(int, sameprefixcarponly);
#define V_sameprefixcarponly VNET(sameprefixcarponly)
SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
@@ -97,9 +92,7 @@ VNET_DECLARE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
/*
* Return 1 if an internet address is for a ``local'' host
- * (one to which we have a connection). If subnetsarelocal
- * is true, this includes other subnets of the local net.
- * Otherwise, it includes only the directly-connected (sub)nets.
+ * (one to which we have a connection).
*/
int
in_localaddr(struct in_addr in)
@@ -108,19 +101,10 @@ in_localaddr(struct in_addr in)
register struct in_ifaddr *ia;
IN_IFADDR_RLOCK();
- if (V_subnetsarelocal) {
- TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- if ((i & ia->ia_netmask) == ia->ia_net) {
- IN_IFADDR_RUNLOCK();
- return (1);
- }
- }
- } else {
- TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
- IN_IFADDR_RUNLOCK();
- return (1);
- }
+ TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+ if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
+ IN_IFADDR_RUNLOCK();
+ return (1);
}
}
IN_IFADDR_RUNLOCK();
@@ -541,20 +525,20 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
hostIsNew = 0;
}
if (ifra->ifra_mask.sin_len) {
- /*
+ /*
* QL: XXX
* Need to scrub the prefix here in case
* the issued command is SIOCAIFADDR with
* the same address, but with a different
* prefix length. And if the prefix length
- * is the same as before, then the call is
+ * is the same as before, then the call is
* un-necessarily executed here.
*/
in_ifscrub(ifp, ia, LLE_STATIC);
ia->ia_sockmask = ifra->ifra_mask;
ia->ia_sockmask.sin_family = AF_INET;
ia->ia_subnetmask =
- ntohl(ia->ia_sockmask.sin_addr.s_addr);
+ ntohl(ia->ia_sockmask.sin_addr.s_addr);
maskIsNew = 1;
}
if ((ifp->if_flags & IFF_POINTOPOINT) &&
@@ -567,7 +551,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
(hostIsNew || maskIsNew))
error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
if (error != 0 && iaIsNew)
- goto out;
+ break;
if ((ifp->if_flags & IFF_BROADCAST) &&
(ifra->ifra_broadaddr.sin_family == AF_INET))
@@ -898,23 +882,19 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
in_ifscrub(ifp, ia, LLE_STATIC);
ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
}
- if (IN_CLASSA(i))
- ia->ia_netmask = IN_CLASSA_NET;
- else if (IN_CLASSB(i))
- ia->ia_netmask = IN_CLASSB_NET;
- else
- ia->ia_netmask = IN_CLASSC_NET;
/*
- * The subnet mask usually includes at least the standard network part,
- * but may may be smaller in the case of supernetting.
- * If it is set, we believe it.
+ * Be compatible with network classes, if netmask isn't supplied,
+ * guess it based on classes.
*/
if (ia->ia_subnetmask == 0) {
- ia->ia_subnetmask = ia->ia_netmask;
+ if (IN_CLASSA(i))
+ ia->ia_subnetmask = IN_CLASSA_NET;
+ else if (IN_CLASSB(i))
+ ia->ia_subnetmask = IN_CLASSB_NET;
+ else
+ ia->ia_subnetmask = IN_CLASSC_NET;
ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
- } else
- ia->ia_netmask &= ia->ia_subnetmask;
- ia->ia_net = i & ia->ia_netmask;
+ }
ia->ia_subnet = i & ia->ia_subnetmask;
in_socktrim(&ia->ia_sockmask);
/*
@@ -927,10 +907,11 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
*/
ia->ia_ifa.ifa_metric = ifp->if_metric;
if (ifp->if_flags & IFF_BROADCAST) {
- ia->ia_broadaddr.sin_addr.s_addr =
- htonl(ia->ia_subnet | ~ia->ia_subnetmask);
- ia->ia_netbroadcast.s_addr =
- htonl(ia->ia_net | ~ ia->ia_netmask);
+ if (ia->ia_subnetmask == IN_RFC3021_MASK)
+ ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
+ else
+ ia->ia_broadaddr.sin_addr.s_addr =
+ htonl(ia->ia_subnet | ~ia->ia_subnetmask);
} else if (ifp->if_flags & IFF_LOOPBACK) {
ia->ia_dstaddr = ia->ia_addr;
flags |= RTF_HOST;
@@ -966,8 +947,8 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
RT_ADDREF(ia_ro.ro_rt);
RTFREE_LOCKED(ia_ro.ro_rt);
} else
- error = ifa_add_loopback_route((struct ifaddr *)ia,
- (struct sockaddr *)&ia->ia_addr);
+ error = ifa_add_loopback_route((struct ifaddr *)ia,
+ (struct sockaddr *)&ia->ia_addr);
if (error == 0)
ia->ia_flags |= IFA_RTSELF;
if (ia_ro.ro_rt != NULL)
@@ -982,10 +963,10 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
? RTF_HOST : 0)
/*
- * Generate a routing message when inserting or deleting
+ * Generate a routing message when inserting or deleting
* an interface address alias.
*/
-static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
+static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
struct in_ifaddr *target)
{
struct route pfx_ro;
@@ -1008,16 +989,13 @@ static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
/* QL: XXX
* Point the gateway to the new interface
- * address as if a new prefix route entry has
- * been added through the new address alias.
- * All other parts of the rtentry is accurate,
+ * address as if a new prefix route entry has
+ * been added through the new address alias.
+ * All other parts of the rtentry is accurate,
* e.g., rt_key, rt_mask, rt_ifp etc.
*/
- msg_rt.rt_gateway =
- (struct sockaddr *)&target->ia_addr;
- rt_newaddrmsg(cmd,
- (struct ifaddr *)target,
- 0, &msg_rt);
+ msg_rt.rt_gateway = (struct sockaddr *)&target->ia_addr;
+ rt_newaddrmsg(cmd, (struct ifaddr *)target, 0, &msg_rt);
RTFREE(pfx_ro.ro_rt);
}
return;
@@ -1065,7 +1043,7 @@ in_addprefix(struct in_ifaddr *target, int flags)
*/
if (ia->ia_flags & IFA_ROUTE) {
#ifdef RADIX_MPATH
- if (ia->ia_addr.sin_addr.s_addr ==
+ if (ia->ia_addr.sin_addr.s_addr ==
target->ia_addr.sin_addr.s_addr) {
IN_IFADDR_RUNLOCK();
return (EEXIST);
@@ -1142,7 +1120,7 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
}
if (freeit && (flags & LLE_STATIC)) {
error = ifa_del_loopback_route((struct ifaddr *)target,
- (struct sockaddr *)&target->ia_addr);
+ (struct sockaddr *)&target->ia_addr);
if (error == 0)
target->ia_flags &= ~IFA_RTSELF;
}
@@ -1222,8 +1200,8 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
mask0.sin_len = sizeof(mask0);
mask0.sin_family = AF_INET;
mask0.sin_addr.s_addr = target->ia_subnetmask;
- lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
- (struct sockaddr *)&mask0, flags);
+ lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
+ (struct sockaddr *)&mask0, flags);
/*
* As no-one seem to have this prefix, we can remove the route.
@@ -1261,17 +1239,18 @@ in_broadcast(struct in_addr in, struct ifnet *ifp)
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (ifa->ifa_addr->sa_family == AF_INET &&
(in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
- in.s_addr == ia->ia_netbroadcast.s_addr ||
/*
- * Check for old-style (host 0) broadcast.
+ * Check for old-style (host 0) broadcast, but
+ * taking into account that RFC 3021 obsoletes it.
*/
- t == ia->ia_subnet || t == ia->ia_net) &&
+ (ia->ia_subnetmask != IN_RFC3021_MASK &&
+ t == ia->ia_subnet)) &&
/*
* Check for an all one subnetmask. These
* only exist when an interface gets a secondary
* address.
*/
- ia->ia_subnetmask != (u_long)0xffffffff)
+ ia->ia_subnetmask != (u_long)0xffffffff)
return (1);
return (0);
#undef ia
@@ -1343,6 +1322,20 @@ struct in_llentry {
struct sockaddr_in l3_addr4;
};
+/*
+ * Deletes an address from the address table.
+ * This function is called by the timer functions
+ * such as arptimer() and nd6_llinfo_timer(), and
+ * the caller does the locking.
+ */
+static void
+in_lltable_free(struct lltable *llt, struct llentry *lle)
+{
+ LLE_WUNLOCK(lle);
+ LLE_LOCK_DESTROY(lle);
+ free(lle, M_LLTABLE);
+}
+
static struct llentry *
in_lltable_new(const struct sockaddr *l3addr, u_int flags)
{
@@ -1352,69 +1345,53 @@ in_lltable_new(const struct sockaddr *l3addr, u_int flags)
if (lle == NULL) /* NB: caller generates msg */
return NULL;
- callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
/*
* For IPv4 this will trigger "arpresolve" to generate
* an ARP request.
*/
- lle->base.la_expire = time_second; /* mark expired */
+ lle->base.la_expire = time_uptime; /* mark expired */
lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
lle->base.lle_refcnt = 1;
+ lle->base.lle_free = in_lltable_free;
LLE_LOCK_INIT(&lle->base);
- return &lle->base;
-}
+ callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock,
+ CALLOUT_RETURNUNLOCKED);
-/*
- * Deletes an address from the address table.
- * This function is called by the timer functions
- * such as arptimer() and nd6_llinfo_timer(), and
- * the caller does the locking.
- */
-static void
-in_lltable_free(struct lltable *llt, struct llentry *lle)
-{
- LLE_WUNLOCK(lle);
- LLE_LOCK_DESTROY(lle);
- free(lle, M_LLTABLE);
+ return (&lle->base);
}
-
#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \
(((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
static void
-in_lltable_prefix_free(struct lltable *llt,
- const struct sockaddr *prefix,
- const struct sockaddr *mask,
- u_int flags)
+in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
+ const struct sockaddr *mask, u_int flags)
{
const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
struct llentry *lle, *next;
- register int i;
+ int i;
size_t pkts_dropped;
- for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
-
- /*
+ /*
* (flags & LLE_STATIC) means deleting all entries
- * including static ARP entries
+ * including static ARP entries.
*/
- if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle),
- pfx, msk) &&
- ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
- int canceled;
-
- canceled = callout_drain(&lle->la_timer);
+ if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)),
+ pfx, msk) && ((flags & LLE_STATIC) ||
+ !(lle->la_flags & LLE_STATIC))) {
LLE_WLOCK(lle);
- if (canceled)
+ if (callout_stop(&lle->la_timer))
LLE_REMREF(lle);
pkts_dropped = llentry_free(lle);
ARPSTAT_ADD(dropped, pkts_dropped);
}
}
}
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
}
@@ -1440,19 +1417,18 @@ in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr
*/
if (rt->rt_flags & RTF_GATEWAY) {
if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
- rt->rt_ifp->if_type != IFT_ETHER ||
- (rt->rt_ifp->if_flags &
- (IFF_NOARP | IFF_STATICARP)) != 0 ||
- memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
- sizeof(in_addr_t)) != 0) {
+ rt->rt_ifp->if_type != IFT_ETHER ||
+ (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 ||
+ memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
+ sizeof(in_addr_t)) != 0) {
RTFREE_LOCKED(rt);
return (EINVAL);
}
}
/*
- * Make sure that at least the destination address is covered
- * by the route. This is for handling the case where 2 or more
+ * Make sure that at least the destination address is covered
+ * by the route. This is for handling the case where 2 or more
* interfaces have the same prefix. An incoming packet arrives
* on one interface and the corresponding outgoing packet leaves
* another interface.
@@ -1512,7 +1488,7 @@ in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3add
hashkey = sin->sin_addr.s_addr;
lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
LIST_FOREACH(lle, lleh, lle_next) {
- struct sockaddr_in *sa2 = (struct sockaddr_in *)L3_ADDR(lle);
+ struct sockaddr_in *sa2 = satosin(L3_ADDR(lle));
if (lle->la_flags & LLE_DELETED)
continue;
if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
@@ -1521,7 +1497,7 @@ in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3add
if (lle == NULL) {
#ifdef DIAGNOSTIC
if (flags & LLE_DELETE)
- log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle);
+ log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle);
#endif
if (!(flags & LLE_CREATE))
return (NULL);
@@ -1547,18 +1523,24 @@ in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3add
lle->lle_tbl = llt;
lle->lle_head = lleh;
+ lle->la_flags |= LLE_LINKED;
LIST_INSERT_HEAD(lleh, lle, lle_next);
} else if (flags & LLE_DELETE) {
if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
LLE_WLOCK(lle);
- lle->la_flags = LLE_DELETED;
- LLE_WUNLOCK(lle);
+ lle->la_flags |= LLE_DELETED;
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
#ifdef DIAGNOSTIC
- log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
#endif
+ if ((lle->la_flags &
+ (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC)
+ llentry_free(lle);
+ else
+ LLE_WUNLOCK(lle);
}
lle = (void *)-1;
-
+
}
if (LLE_IS_VALID(lle)) {
if (flags & LLE_EXCLUSIVE)
@@ -1590,7 +1572,7 @@ in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
struct sockaddr_dl *sdl;
-
+
/* skip deleted entries */
if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
continue;
@@ -1659,7 +1641,6 @@ in_domifattach(struct ifnet *ifp)
llt = lltable_init(ifp, AF_INET);
if (llt != NULL) {
- llt->llt_free = in_lltable_free;
llt->llt_prefix_free = in_lltable_prefix_free;
llt->llt_lookup = in_lltable_lookup;
llt->llt_dump = in_lltable_dump;
diff --git a/freebsd/sys/netinet/in.h b/freebsd/sys/netinet/in.h
index 16df5f24..92ba45a6 100644
--- a/freebsd/sys/netinet/in.h
+++ b/freebsd/sys/netinet/in.h
@@ -93,27 +93,7 @@ typedef __socklen_t socklen_t;
#define _SOCKLEN_T_DECLARED
#endif
-/* Avoid collision with original definition in sys/socket.h. */
-#ifndef _STRUCT_SOCKADDR_STORAGE_DECLARED
-/*
- * RFC 2553: protocol-independent placeholder for socket addresses
- */
-#define _SS_MAXSIZE 128U
-#define _SS_ALIGNSIZE (sizeof(__int64_t))
-#define _SS_PAD1SIZE (_SS_ALIGNSIZE - sizeof(unsigned char) - \
- sizeof(sa_family_t))
-#define _SS_PAD2SIZE (_SS_MAXSIZE - sizeof(unsigned char) - \
- sizeof(sa_family_t) - _SS_PAD1SIZE - _SS_ALIGNSIZE)
-
-struct sockaddr_storage {
- unsigned char ss_len; /* address length */
- sa_family_t ss_family; /* address family */
- char __ss_pad1[_SS_PAD1SIZE];
- __int64_t __ss_align; /* force desired struct alignment */
- char __ss_pad2[_SS_PAD2SIZE];
-};
-#define _STRUCT_SOCKADDR_STORAGE_DECLARED
-#endif
+#include <sys/_sockaddr_storage.h>
/* Socket address, internet style. */
struct sockaddr_in {
@@ -147,6 +127,7 @@ __END_DECLS
#endif /* !_KERNEL && __BSD_VISIBLE */
#if __POSIX_VISIBLE >= 200112
+#define IPPROTO_IPV6 41 /* IP6 header */
#define IPPROTO_RAW 255 /* raw IP packet */
#define INET_ADDRSTRLEN 16
#endif
@@ -198,7 +179,6 @@ __END_DECLS
#define IPPROTO_CMTP 38 /* Control Message Transport */
#define IPPROTO_TPXX 39 /* TP++ Transport */
#define IPPROTO_IL 40 /* IL transport protocol */
-#define IPPROTO_IPV6 41 /* IP6 header */
#define IPPROTO_SDRP 42 /* Source Demand Routing */
#define IPPROTO_ROUTING 43 /* IP6 routing header */
#define IPPROTO_FRAGMENT 44 /* IP6 fragmentation header */
@@ -260,10 +240,12 @@ __END_DECLS
#define IPPROTO_GMTP 100 /* GMTP*/
#define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */
#define IPPROTO_SCTP 132 /* SCTP */
+#define IPPROTO_MH 135 /* IPv6 Mobility Header */
/* 101-254: Partly Unassigned */
#define IPPROTO_PIM 103 /* Protocol Independent Mcast */
#define IPPROTO_CARP 112 /* CARP */
#define IPPROTO_PGM 113 /* PGM */
+#define IPPROTO_MPLS 137 /* MPLS-in-IP */
#define IPPROTO_PFSYNC 240 /* PFSYNC */
/* 255: Reserved */
/* BSD Private, local use, namespace incursion, no longer used */
@@ -275,6 +257,7 @@ __END_DECLS
/* Only used internally, so can be outside the range of valid IP protocols. */
#define IPPROTO_DIVERT 258 /* divert pseudo-protocol */
+#define IPPROTO_SEND 259 /* SeND pseudo-protocol */
/*
* Defined to avoid confusion. The master value is defined by
@@ -414,6 +397,8 @@ __END_DECLS
#define IN_LOOPBACKNET 127 /* official! */
+#define IN_RFC3021_MASK (u_int32_t)0xfffffffe
+
/*
* Options for use with [gs]etsockopt at the IP level.
* First word of comment is data type; bool is stored in int.
diff --git a/freebsd/sys/netinet/in_gif.c b/freebsd/sys/netinet/in_gif.c
index 5461334b..332d7ff4 100644
--- a/freebsd/sys/netinet/in_gif.c
+++ b/freebsd/sys/netinet/in_gif.c
@@ -258,6 +258,8 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
#endif
}
+ m_addr_changed(m);
+
error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL);
if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c
index e4b31968..6d748f1f 100644
--- a/freebsd/sys/netinet/in_mcast.c
+++ b/freebsd/sys/netinet/in_mcast.c
@@ -157,7 +157,8 @@ static int inp_set_multicast_if(struct inpcb *, struct sockopt *);
static int inp_set_source_filters(struct inpcb *, struct sockopt *);
static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv4 multicast");
+static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0,
+ "IPv4 multicast");
static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
@@ -176,7 +177,7 @@ SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
&in_mcast_loop, 0, "Loopback multicast datagrams by default");
TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
-SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
+static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
"Per-interface stack-wide source filters");
@@ -1861,6 +1862,7 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
ifp = NULL;
imf = NULL;
+ lims = NULL;
error = 0;
is_new = 0;
@@ -1978,34 +1980,47 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
error = EINVAL;
goto out_inp_locked;
}
- /* Throw out duplicates. */
+ /*
+ * Throw out duplicates.
+ *
+ * XXX FIXME: This makes a naive assumption that
+ * even if entries exist for *ssa in this imf,
+ * they will be rejected as dupes, even if they
+ * are not valid in the current mode (in-mode).
+ *
+ * in_msource is transactioned just as for anything
+ * else in SSM -- but note naive use of inm_graft()
+ * below for allocating new filter entries.
+ *
+ * This is only an issue if someone mixes the
+ * full-state SSM API with the delta-based API,
+ * which is discouraged in the relevant RFCs.
+ */
lims = imo_match_source(imo, idx, &ssa->sa);
- if (lims != NULL) {
+ if (lims != NULL /*&&
+ lims->imsl_st[1] == MCAST_INCLUDE*/) {
error = EADDRNOTAVAIL;
goto out_inp_locked;
}
} else {
/*
- * MCAST_JOIN_GROUP on an existing inclusive
- * membership is an error; if you want to change
- * filter mode, you must use the userland API
- * setsourcefilter().
- */
- if (imf->imf_st[1] == MCAST_INCLUDE) {
- error = EINVAL;
- goto out_inp_locked;
- }
- /*
* MCAST_JOIN_GROUP on an existing exclusive
* membership is an error; return EADDRINUSE
* to preserve 4.4BSD API idempotence, and
* avoid tedious detour to code below.
* NOTE: This is bending RFC 3678 a bit.
+ *
+ * On an existing inclusive membership, this is also
+ * an error; if you want to change filter mode,
+ * you must use the userland API setsourcefilter().
+ * XXX We don't reject this for imf in UNDEFINED
+ * state at t1, because allocation of a filter
+ * is atomic with allocation of a membership.
*/
- if (imf->imf_st[1] == MCAST_EXCLUDE) {
+ error = EINVAL;
+ if (imf->imf_st[1] == MCAST_EXCLUDE)
error = EADDRINUSE;
- goto out_inp_locked;
- }
+ goto out_inp_locked;
}
}
@@ -2040,6 +2055,11 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
* membership of the group. The in_multi may not have
* been allocated yet if this is a new membership, however,
* the in_mfilter slot will be allocated and must be initialized.
+ *
+ * Note: Grafting of exclusive mode filters doesn't happen
+ * in this path.
+ * XXX: Should check for non-NULL lims (node exists but may
+ * not be in-mode) for interop with full-state API.
*/
if (ssa->ss.ss_family != AF_UNSPEC) {
/* Membership starts in IN mode */
@@ -2424,8 +2444,10 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
if (error)
return (error);
- if (msfr.msfr_nsrcs > in_mcast_maxsocksrc ||
- (msfr.msfr_fmode != MCAST_EXCLUDE &&
+ if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
+ return (ENOBUFS);
+
+ if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
msfr.msfr_fmode != MCAST_INCLUDE))
return (EINVAL);
diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c
index 2b50ae8f..5100ac9b 100644
--- a/freebsd/sys/netinet/in_pcb.c
+++ b/freebsd/sys/netinet/in_pcb.c
@@ -4,8 +4,12 @@
* Copyright (c) 1982, 1986, 1991, 1993, 1995
* The Regents of the University of California.
* Copyright (c) 2007-2009 Robert N. M. Watson
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -44,17 +48,20 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_pcbgroup.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/callout.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/refcount.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -70,17 +77,22 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/vnet.h>
+#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#include <netinet/in_pcb.h>
-#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/tcp_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#endif
+#ifdef INET
+#include <netinet/in_var.h>
+#endif
#ifdef INET6
#include <netinet/ip6.h>
-#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
#endif /* INET6 */
@@ -91,6 +103,8 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
+static struct callout ipport_tick_callout;
+
/*
* These configure the range of local port addresses assigned to
* "unspecified" outgoing connections/packets/whatever.
@@ -120,12 +134,17 @@ static VNET_DEFINE(int, ipport_tcplastcount);
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
+static void in_pcbremlists(struct inpcb *inp);
+#ifdef INET
+static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
+ struct in_addr faddr, u_int fport_arg,
+ struct in_addr laddr, u_int lport_arg,
+ int lookupflags, struct ifnet *ifp);
+
#define RANGECHK(var, min, max) \
if ((var) < (min)) { (var) = (min); } \
else if ((var) > (max)) { (var) = (max); }
-static void in_pcbremlists(struct inpcb *inp);
-
static int
sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
{
@@ -149,7 +168,8 @@ sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
#undef RANGECHK
-SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
+static SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0,
+ "IP Ports");
SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0,
@@ -182,6 +202,7 @@ SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
&VNET_NAME(ipport_randomtime), 0,
"Minimum time to keep sequental port "
"allocation before switching to a random one");
+#endif
/*
* in_pcb.c: manage the Protocol Control Blocks.
@@ -192,6 +213,59 @@ SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
*/
/*
+ * Initialize an inpcbinfo -- we should be able to reduce the number of
+ * arguments in time.
+ */
+void
+in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
+ struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
+ char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
+ uint32_t inpcbzone_flags, u_int hashfields)
+{
+
+ INP_INFO_LOCK_INIT(pcbinfo, name);
+ INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */
+#ifdef VIMAGE
+ pcbinfo->ipi_vnet = curvnet;
+#endif
+ pcbinfo->ipi_listhead = listhead;
+ LIST_INIT(pcbinfo->ipi_listhead);
+ pcbinfo->ipi_count = 0;
+ pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
+ &pcbinfo->ipi_hashmask);
+ pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
+ &pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
+#endif
+ pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
+ NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
+ inpcbzone_flags);
+ uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
+}
+
+/*
+ * Destroy an inpcbinfo.
+ */
+void
+in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
+{
+
+ KASSERT(pcbinfo->ipi_count == 0,
+ ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count));
+
+ hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
+ hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
+ pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_destroy(pcbinfo);
+#endif
+ uma_zdestroy(pcbinfo->ipi_zone);
+ INP_HASH_LOCK_DESTROY(pcbinfo);
+ INP_INFO_LOCK_DESTROY(pcbinfo);
+}
+
+/*
* Allocate a PCB and associate it with the socket.
* On success return with the PCB locked.
*/
@@ -242,7 +316,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
#endif
INP_WLOCK(inp);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
- inp->inp_refcount = 1; /* Reference from the inpcbinfo */
+ refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */
#if defined(IPSEC) || defined(MAC)
out:
if (error != 0) {
@@ -253,13 +327,14 @@ out:
return (error);
}
+#ifdef INET
int
in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
{
int anonport, error;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
return (EINVAL);
@@ -278,11 +353,12 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
inp->inp_flags |= INP_ANONPORT;
return (0);
}
+#endif
#if defined(INET) || defined(INET6)
int
in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
- struct ucred *cred, int wild)
+ struct ucred *cred, int lookupflags)
{
struct inpcbinfo *pcbinfo;
struct inpcb *tmpinp;
@@ -299,8 +375,8 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
* Because no actual state changes occur here, a global write lock on
* the pcbinfo isn't required.
*/
- INP_INFO_LOCK_ASSERT(pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if (inp->inp_flags & INP_HIGHPORT) {
first = V_ipport_hifirstauto; /* sysctl */
@@ -358,6 +434,7 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
laddr = *laddrp;
}
#endif
+ tmpinp = NULL; /* Make compiler happy. */
lport = *lportp;
if (dorandom)
@@ -376,14 +453,14 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
#ifdef INET6
if ((inp->inp_vflag & INP_IPV6) != 0)
tmpinp = in6_pcblookup_local(pcbinfo,
- &inp->in6p_laddr, lport, wild, cred);
+ &inp->in6p_laddr, lport, lookupflags, cred);
#endif
#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
tmpinp = in_pcblookup_local(pcbinfo, laddr,
- lport, wild, cred);
+ lport, lookupflags, cred);
#endif
} while (tmpinp != NULL);
@@ -395,8 +472,26 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
return (0);
}
+
+/*
+ * Return cached socket options.
+ */
+short
+inp_so_options(const struct inpcb *inp)
+{
+ short so_options;
+
+ so_options = 0;
+
+ if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
+ so_options |= SO_REUSEPORT;
+ if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
+ so_options |= SO_REUSEADDR;
+ return (so_options);
+}
#endif /* INET || INET6 */
+#ifdef INET
/*
* Set up a bind operation on a PCB, performing port allocation
* as required, but do not actually modify the PCB. Callers can
@@ -415,15 +510,14 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct in_addr laddr;
u_short lport = 0;
- int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+ int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT);
int error;
/*
- * Because no actual state changes occur here, a global write lock on
- * the pcbinfo isn't required.
+ * No state changes, so read locks are sufficient here.
*/
- INP_INFO_LOCK_ASSERT(pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
return (EADDRNOTAVAIL);
@@ -431,7 +525,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
if (nam != NULL && laddr.s_addr != INADDR_ANY)
return (EINVAL);
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
- wild = INPLOOKUP_WILDCARD;
+ lookupflags = INPLOOKUP_WILDCARD;
if (nam == NULL) {
if ((error = prison_local_ip4(cred, &laddr)) != 0)
return (error);
@@ -505,8 +599,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_socket->so_options &
- SO_REUSEPORT) == 0) &&
+ (t->inp_flags2 & INP_REUSEPORT) == 0) &&
#ifndef __rtems__
(inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
@@ -516,7 +609,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
return (EADDRINUSE);
}
t = in_pcblookup_local(pcbinfo, sin->sin_addr,
- lport, wild, cred);
+ lport, lookupflags, cred);
if (t && (t->inp_flags & INP_TIMEWAIT)) {
/*
* XXXRW: If an incpb has had its timewait
@@ -524,19 +617,18 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
* being in use (for now). This is better
* than a panic, but not desirable.
*/
- tw = intotw(inp);
+ tw = intotw(t);
if (tw == NULL ||
(reuseport & tw->tw_so_options) == 0)
return (EADDRINUSE);
- } else if (t &&
- (reuseport & t->inp_socket->so_options) == 0) {
+ } else if (t && (reuseport & inp_so_options(t)) == 0) {
#ifdef INET6
if (ntohl(sin->sin_addr.s_addr) !=
INADDR_ANY ||
ntohl(t->inp_laddr.s_addr) !=
INADDR_ANY ||
- INP_SOCKAF(so) ==
- INP_SOCKAF(t->inp_socket))
+ (inp->inp_vflag & INP_IPV6PROTO) == 0 ||
+ (t->inp_vflag & INP_IPV6PROTO) == 0)
#endif
return (EADDRINUSE);
}
@@ -545,7 +637,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
if (*lportp != 0)
lport = *lportp;
if (lport == 0) {
- error = in_pcb_lport(inp, &laddr, &lport, cred, wild);
+ error = in_pcb_lport(inp, &laddr, &lport, cred, lookupflags);
if (error != 0)
return (error);
@@ -562,14 +654,15 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
* then pick one.
*/
int
-in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
+in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
+ struct ucred *cred, struct mbuf *m)
{
u_short lport, fport;
in_addr_t laddr, faddr;
int anonport, error;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
lport = inp->inp_lport;
laddr = inp->inp_laddr.s_addr;
@@ -595,13 +688,20 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
inp->inp_laddr.s_addr = laddr;
inp->inp_faddr.s_addr = faddr;
inp->inp_fport = fport;
- in_pcbrehash(inp);
+ in_pcbrehash_mbuf(inp, m);
if (anonport)
inp->inp_flags |= INP_ANONPORT;
return (0);
}
+int
+in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
+{
+
+ return (in_pcbconnect_mbuf(inp, nam, cred, NULL));
+}
+
/*
* Do proper source address selection on an unbound socket in case
* of connect. Take jails into account as well.
@@ -857,8 +957,8 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
* Because a global state change doesn't actually occur here, a read
* lock is sufficient.
*/
- INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
if (oinpp != NULL)
*oinpp = NULL;
@@ -933,8 +1033,8 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
if (error)
return (error);
}
- oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
- 0, NULL);
+ oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport,
+ laddr, lport, 0, NULL);
if (oinp != NULL) {
if (oinpp != NULL)
*oinpp = oinp;
@@ -957,13 +1057,14 @@ void
in_pcbdisconnect(struct inpcb *inp)
{
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
inp->inp_faddr.s_addr = INADDR_ANY;
inp->inp_fport = 0;
in_pcbrehash(inp);
}
+#endif
/*
* in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
@@ -982,53 +1083,18 @@ in_pcbdetach(struct inpcb *inp)
}
/*
- * in_pcbfree_internal() frees an inpcb that has been detached from its
- * socket, and whose reference count has reached 0. It will also remove the
- * inpcb from any global lists it might remain on.
- */
-static void
-in_pcbfree_internal(struct inpcb *inp)
-{
- struct inpcbinfo *ipi = inp->inp_pcbinfo;
-
- KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
- KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__));
-
- INP_INFO_WLOCK_ASSERT(ipi);
- INP_WLOCK_ASSERT(inp);
-
-#ifdef IPSEC
- if (inp->inp_sp != NULL)
- ipsec_delete_pcbpolicy(inp);
-#endif /* IPSEC */
- inp->inp_gencnt = ++ipi->ipi_gencnt;
- in_pcbremlists(inp);
-#ifdef INET6
- if (inp->inp_vflag & INP_IPV6PROTO) {
- ip6_freepcbopts(inp->in6p_outputopts);
- if (inp->in6p_moptions != NULL)
- ip6_freemoptions(inp->in6p_moptions);
- }
-#endif
- if (inp->inp_options)
- (void)m_free(inp->inp_options);
- if (inp->inp_moptions != NULL)
- inp_freemoptions(inp->inp_moptions);
- inp->inp_vflag = 0;
- crfree(inp->inp_cred);
-
-#ifdef MAC
- mac_inpcb_destroy(inp);
-#endif
- INP_WUNLOCK(inp);
- uma_zfree(ipi->ipi_zone, inp);
-}
-
-/*
* in_pcbref() bumps the reference count on an inpcb in order to maintain
* stability of an inpcb pointer despite the inpcb lock being released. This
* is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
- * but where the inpcb lock is already held.
+ * but where the inpcb lock may already held, or when acquiring a reference
+ * via a pcbgroup.
+ *
+ * in_pcbref() should be used only to provide brief memory stability, and
+ * must always be followed by a call to INP_WLOCK() and in_pcbrele() to
+ * garbage collect the inpcb if it has been in_pcbfree()'d from another
+ * context. Until in_pcbrele() has returned that the inpcb is still valid,
+ * lock and rele are the *only* safe operations that may be performed on the
+ * inpcb.
*
* While the inpcb will not be freed, releasing the inpcb lock means that the
* connection's state may change, so the caller should be careful to
@@ -1039,11 +1105,9 @@ void
in_pcbref(struct inpcb *inp)
{
- INP_WLOCK_ASSERT(inp);
-
KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
- inp->inp_refcount++;
+ refcount_acquire(&inp->inp_refcount);
}
/*
@@ -1051,47 +1115,118 @@ in_pcbref(struct inpcb *inp)
* in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
* return a flag indicating whether or not the inpcb remains valid. If it is
* valid, we return with the inpcb lock held.
+ *
+ * Notice that, unlike in_pcbref(), the inpcb lock must be held to drop a
+ * reference on an inpcb. Historically more work was done here (actually, in
+ * in_pcbfree_internal()) but has been moved to in_pcbfree() to avoid the
+ * need for the pcbinfo lock in in_pcbrele(). Deferring the free is entirely
+ * about memory stability (and continued use of the write lock).
*/
int
-in_pcbrele(struct inpcb *inp)
+in_pcbrele_rlocked(struct inpcb *inp)
{
-#ifdef INVARIANTS
- struct inpcbinfo *ipi = inp->inp_pcbinfo;
-#endif
+ struct inpcbinfo *pcbinfo;
+
+ KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
+
+ INP_RLOCK_ASSERT(inp);
+
+ if (refcount_release(&inp->inp_refcount) == 0) {
+ /*
+ * If the inpcb has been freed, let the caller know, even if
+ * this isn't the last reference.
+ */
+ if (inp->inp_flags2 & INP_FREED) {
+ INP_RUNLOCK(inp);
+ return (1);
+ }
+ return (0);
+ }
+
+ KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
+
+ INP_RUNLOCK(inp);
+ pcbinfo = inp->inp_pcbinfo;
+ uma_zfree(pcbinfo->ipi_zone, inp);
+ return (1);
+}
+
+int
+in_pcbrele_wlocked(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
- INP_INFO_WLOCK_ASSERT(ipi);
INP_WLOCK_ASSERT(inp);
- inp->inp_refcount--;
- if (inp->inp_refcount > 0)
+ if (refcount_release(&inp->inp_refcount) == 0)
return (0);
- in_pcbfree_internal(inp);
+
+ KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
+
+ INP_WUNLOCK(inp);
+ pcbinfo = inp->inp_pcbinfo;
+ uma_zfree(pcbinfo->ipi_zone, inp);
return (1);
}
/*
+ * Temporary wrapper.
+ */
+int
+in_pcbrele(struct inpcb *inp)
+{
+
+ return (in_pcbrele_wlocked(inp));
+}
+
+/*
* Unconditionally schedule an inpcb to be freed by decrementing its
* reference count, which should occur only after the inpcb has been detached
* from its socket. If another thread holds a temporary reference (acquired
* using in_pcbref()) then the free is deferred until that reference is
- * released using in_pcbrele(), but the inpcb is still unlocked.
+ * released using in_pcbrele(), but the inpcb is still unlocked. Almost all
+ * work, including removal from global lists, is done in this context, where
+ * the pcbinfo lock is held.
*/
void
in_pcbfree(struct inpcb *inp)
{
-#ifdef INVARIANTS
- struct inpcbinfo *ipi = inp->inp_pcbinfo;
-#endif
+ struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL",
- __func__));
+ KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
- INP_INFO_WLOCK_ASSERT(ipi);
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
- if (!in_pcbrele(inp))
+ /* XXXRW: Do as much as possible here. */
+#ifdef IPSEC
+ if (inp->inp_sp != NULL)
+ ipsec_delete_pcbpolicy(inp);
+#endif /* IPSEC */
+ inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+ in_pcbremlists(inp);
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6PROTO) {
+ ip6_freepcbopts(inp->in6p_outputopts);
+ if (inp->in6p_moptions != NULL)
+ ip6_freemoptions(inp->in6p_moptions);
+ }
+#endif
+ if (inp->inp_options)
+ (void)m_free(inp->inp_options);
+#ifdef INET
+ if (inp->inp_moptions != NULL)
+ inp_freemoptions(inp->inp_moptions);
+#endif
+ inp->inp_vflag = 0;
+ inp->inp_flags2 |= INP_FREED;
+ crfree(inp->inp_cred);
+#ifdef MAC
+ mac_inpcb_destroy(inp);
+#endif
+ if (!in_pcbrele_wlocked(inp))
INP_WUNLOCK(inp);
}
@@ -1106,12 +1241,6 @@ in_pcbfree(struct inpcb *inp)
* maintaining the invariant that so_pcb always points to a valid inpcb until
* in_pcbdetach().
*
- * XXXRW: An inp_lport of 0 is used to indicate that the inpcb is not on hash
- * lists, but can lead to confusing netstat output, as open sockets with
- * closed TCP connections will no longer appear to have their bound port
- * number. An explicit flag would be better, as it would allow us to leave
- * the port number intact after the connection is dropped.
- *
* XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
* in_pcbnotifyall() and in_pcbpurgeif0()?
*/
@@ -1119,23 +1248,32 @@ void
in_pcbdrop(struct inpcb *inp)
{
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ /*
+ * XXXRW: Possibly we should protect the setting of INP_DROPPED with
+ * the hash lock...?
+ */
inp->inp_flags |= INP_DROPPED;
if (inp->inp_flags & INP_INHASHLIST) {
struct inpcbport *phd = inp->inp_phd;
+ INP_HASH_WLOCK(inp->inp_pcbinfo);
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
LIST_REMOVE(phd, phd_hash);
free(phd, M_PCB);
}
+ INP_HASH_WUNLOCK(inp->inp_pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
}
+#ifdef INET
/*
* Common routines to return the socket addresses associated with inpcbs.
*/
@@ -1259,12 +1397,13 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
}
/*
- * Lookup a PCB based on the local address and port.
+ * Lookup a PCB based on the local address and port. Caller must hold the
+ * hash lock. No inpcb locks or references are acquired.
*/
#define INP_LOOKUP_MAPPED_PCB_COST 3
struct inpcb *
in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
- u_short lport, int wild_okay, struct ucred *cred)
+ u_short lport, int lookupflags, struct ucred *cred)
{
struct inpcb *inp;
#ifdef INET6
@@ -1274,9 +1413,12 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
#endif
int wildcard;
- INP_INFO_LOCK_ASSERT(pcbinfo);
+ KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
- if (!wild_okay) {
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+
+ if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
struct inpcbhead *head;
/*
* Look for an unconnected (wildcard foreign addr) PCB that
@@ -1377,19 +1519,166 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
}
#undef INP_LOOKUP_MAPPED_PCB_COST
+#ifdef PCBGROUP
/*
- * Lookup PCB in hash list.
+ * Lookup PCB in hash list, using pcbgroup tables.
*/
-struct inpcb *
-in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
- u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
+static struct inpcb *
+in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+ struct in_addr faddr, u_int fport_arg, struct in_addr laddr,
+ u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+ struct inpcbhead *head;
+ struct inpcb *inp, *tmpinp;
+ u_short fport = fport_arg, lport = lport_arg;
+
+ /*
+ * First look for an exact match.
+ */
+ tmpinp = NULL;
+ INP_GROUP_LOCK(pcbgroup);
+ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+ pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr == faddr.s_addr &&
+ inp->inp_laddr.s_addr == laddr.s_addr &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport) {
+ /*
+ * XXX We should be able to directly return
+ * the inp here, without any checks.
+ * Well unless both bound with SO_REUSEPORT?
+ */
+ if (prison_flag(inp->inp_cred, PR_IP4))
+ goto found;
+ if (tmpinp == NULL)
+ tmpinp = inp;
+ }
+ }
+ if (tmpinp != NULL) {
+ inp = tmpinp;
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+#ifdef INET6
+ struct inpcb *local_wild_mapped = NULL;
+#endif
+ struct inpcb *jail_wild = NULL;
+ struct inpcbhead *head;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr != INADDR_ANY ||
+ inp->inp_lport != lport)
+ continue;
+
+ /* XXX inp locking */
+ if (ifp && ifp->if_type == IFT_FAITH &&
+ (inp->inp_flags & INP_FAITH) == 0)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP4);
+ if (injail) {
+ if (prison_check_ip4(inp->inp_cred,
+ &laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (inp->inp_laddr.s_addr == laddr.s_addr) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#ifdef INET6
+ /* XXX inp locking, NULL check */
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ local_wild_mapped = inp;
+ else
+#endif /* INET6 */
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+#ifdef INET6
+ if (inp == NULL)
+ inp = local_wild_mapped;
+#endif /* defined(INET6) */
+ if (inp != NULL)
+ goto found;
+ } /* if (lookupflags & INPLOOKUP_WILDCARD) */
+ INP_GROUP_UNLOCK(pcbgroup);
+ return (NULL);
+
+found:
+ in_pcbref(inp);
+ INP_GROUP_UNLOCK(pcbgroup);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ return (inp);
+}
+#endif /* PCBGROUP */
+
+/*
+ * Lookup PCB in hash list, using pcbinfo tables. This variation assumes
+ * that the caller has locked the hash list, and will not perform any further
+ * locking or reference operations on either the hash list or the connection.
+ */
+static struct inpcb *
+in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+ u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
struct ifnet *ifp)
{
struct inpcbhead *head;
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
- INP_INFO_LOCK_ASSERT(pcbinfo);
+ KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+
+ INP_HASH_LOCK_ASSERT(pcbinfo);
/*
* First look for an exact match.
@@ -1424,7 +1713,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
/*
* Then look for a wildcard match, if requested.
*/
- if (wildcard == INPLOOKUP_WILDCARD) {
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
struct inpcb *local_wild = NULL, *local_exact = NULL;
#ifdef INET6
struct inpcb *local_wild_mapped = NULL;
@@ -1495,16 +1784,112 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
if (local_wild_mapped != NULL)
return (local_wild_mapped);
#endif /* defined(INET6) */
- } /* if (wildcard == INPLOOKUP_WILDCARD) */
+ } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
return (NULL);
}
/*
+ * Lookup PCB in hash list, using pcbinfo tables. This variation locks the
+ * hash list lock, and will return the inpcb locked (i.e., requires
+ * INPLOOKUP_LOCKPCB).
+ */
+static struct inpcb *
+in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+ u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
+ struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ INP_HASH_RLOCK(pcbinfo);
+ inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
+ (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
+ if (inp != NULL) {
+ in_pcbref(inp);
+ INP_HASH_RUNLOCK(pcbinfo);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ } else
+ INP_HASH_RUNLOCK(pcbinfo);
+ return (inp);
+}
+
+/*
+ * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
+ * from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in_pcbgroup.c.
+ */
+struct inpcb *
+in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
+ struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
+{
+#if defined(PCBGROUP)
+ struct inpcbgroup *pcbgroup;
+#endif
+
+ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+ KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
+ ("%s: LOCKPCB not set", __func__));
+
+#if defined(PCBGROUP)
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
+ return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
+ lookupflags, ifp));
+}
+
+struct inpcb *
+in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+ u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
+ struct ifnet *ifp, struct mbuf *m)
+{
+#ifdef PCBGROUP
+ struct inpcbgroup *pcbgroup;
+#endif
+
+ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+ KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
+ ("%s: LOCKPCB not set", __func__));
+
+#ifdef PCBGROUP
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid);
+ if (pcbgroup != NULL)
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
+ fport, laddr, lport, lookupflags, ifp));
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
+ return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
+ lookupflags, ifp));
+}
+#endif /* INET */
+
+/*
* Insert PCB onto various hash lists.
*/
-int
-in_pcbinshash(struct inpcb *inp)
+static int
+in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
{
struct inpcbhead *pcbhash;
struct inpcbporthead *pcbporthash;
@@ -1512,8 +1897,9 @@ in_pcbinshash(struct inpcb *inp)
struct inpcbport *phd;
u_int32_t hashkey_faddr;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
("in_pcbinshash: INP_INHASHLIST"));
@@ -1553,24 +1939,54 @@ in_pcbinshash(struct inpcb *inp)
LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
inp->inp_flags |= INP_INHASHLIST;
+#ifdef PCBGROUP
+ if (do_pcbgroup_update)
+ in_pcbgroup_update(inp);
+#endif
return (0);
}
/*
+ * For now, there are two public interfaces to insert an inpcb into the hash
+ * lists -- one that does update pcbgroups, and one that doesn't. The latter
+ * is used only in the TCP syncache, where in_pcbinshash is called before the
+ * full 4-tuple is set for the inpcb, and we don't want to install in the
+ * pcbgroup until later.
+ *
+ * XXXRW: This seems like a misfeature. in_pcbinshash should always update
+ * connection groups, and partially initialised inpcbs should not be exposed
+ * to either reservation hash tables or pcbgroups.
+ */
+int
+in_pcbinshash(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 1));
+}
+
+int
+in_pcbinshash_nopcbgroup(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 0));
+}
+
+/*
* Move PCB to the proper hash bucket when { faddr, fport } have been
* changed. NOTE: This does not handle the case of the lport changing (the
* hashed port list would have to be updated as well), so the lport must
* not change after in_pcbinshash() has been called.
*/
void
-in_pcbrehash(struct inpcb *inp)
+in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct inpcbhead *head;
u_int32_t hashkey_faddr;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
KASSERT(inp->inp_flags & INP_INHASHLIST,
("in_pcbrehash: !INP_INHASHLIST"));
@@ -1586,6 +2002,20 @@ in_pcbrehash(struct inpcb *inp)
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
+
+#ifdef PCBGROUP
+ if (m != NULL)
+ in_pcbgroup_update_mbuf(inp, m);
+ else
+ in_pcbgroup_update(inp);
+#endif
+}
+
+void
+in_pcbrehash(struct inpcb *inp)
+{
+
+ in_pcbrehash_mbuf(inp, NULL);
}
/*
@@ -1603,16 +2033,21 @@ in_pcbremlists(struct inpcb *inp)
if (inp->inp_flags & INP_INHASHLIST) {
struct inpcbport *phd = inp->inp_phd;
+ INP_HASH_WLOCK(pcbinfo);
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
LIST_REMOVE(phd, phd_hash);
free(phd, M_PCB);
}
+ INP_HASH_WUNLOCK(pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
}
LIST_REMOVE(inp, inp_list);
pcbinfo->ipi_count--;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
/*
@@ -1643,7 +2078,7 @@ in_pcbsosetlabel(struct socket *so)
* allocation. We return to random allocation only once we drop below
* ipport_randomcps for at least ipport_randomtime seconds.
*/
-void
+static void
ipport_tick(void *xtp)
{
VNET_ITERATOR_DECL(vnet_iter);
@@ -1664,6 +2099,30 @@ ipport_tick(void *xtp)
callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
}
+static void
+ip_fini(void *xtp)
+{
+
+ callout_stop(&ipport_tick_callout);
+}
+
+/*
+ * The ipport_callout should start running at about the time we attach the
+ * inet or inet6 domains.
+ */
+static void
+ipport_tick_init(const void *unused __unused)
+{
+
+ /* Start ipport_tick. */
+ callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
+ callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
+ EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
+ SHUTDOWN_PRI_DEFAULT);
+}
+SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
+ ipport_tick_init, NULL);
+
void
inp_wlock(struct inpcb *inp)
{
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h
index 9f602ce2..a78c6ab6 100644
--- a/freebsd/sys/netinet/in_pcb.h
+++ b/freebsd/sys/netinet/in_pcb.h
@@ -1,8 +1,12 @@
/*-
* Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -40,8 +44,10 @@
#include <sys/_rwlock.h>
#ifdef _KERNEL
+#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
+#include <vm/uma.h>
#endif
#define in6pcb inpcb /* for KAME src sync over BSD*'s */
@@ -136,6 +142,7 @@ struct icmp6_filter;
*
* Key:
* (c) - Constant after initialization
+ * (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
* (s) - Protected by another subsystem's locks
@@ -155,9 +162,12 @@ struct icmp6_filter;
*/
struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */
+ LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
+ struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
+ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
@@ -169,8 +179,9 @@ struct inpcb {
u_char inp_ip_minttl; /* (i) minimum TTL or drop */
uint32_t inp_flowid; /* (x) flow id / queue id */
u_int inp_refcount; /* (i) refcount */
- void *inp_pspare[4]; /* (x) rtentry / general use */
- u_int inp_ispare[4]; /* general use */
+ void *inp_pspare[5]; /* (x) route caching / general use */
+ u_int inp_ispare[6]; /* (x) route caching / user cookie /
+ * general use */
/* Local and foreign ports, local and foreign addr. */
struct in_conninfo inp_inc; /* (i/p) list for PCB's local port */
@@ -259,53 +270,93 @@ struct inpcbport {
u_short phd_port;
};
-/*
+/*-
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
+ *
+ * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
+ * the former covering mutable global fields (such as the global pcb list),
+ * and the latter covering the hashed lookup tables. The lock order is:
+ *
+ * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
+ *
+ * Locking key:
+ *
+ * (c) Constant or nearly constant after initialisation
+ * (g) Locked by ipi_lock
+ * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (p) Protected by one or more pcbgroup locks
+ * (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global list of inpcbs on the protocol.
+ * Global lock protecting global inpcb list, inpcb count, etc.
*/
- struct inpcbhead *ipi_listhead;
- u_int ipi_count;
+ struct rwlock ipi_lock;
/*
- * Global hash of inpcbs, hashed by local and foreign addresses and
- * port numbers.
+ * Global list of inpcbs on the protocol.
*/
- struct inpcbhead *ipi_hashbase;
- u_long ipi_hashmask;
+ struct inpcbhead *ipi_listhead; /* (g) */
+ u_int ipi_count; /* (g) */
/*
- * Global hash of inpcbs, hashed by only local port number.
+ * Generation count -- incremented each time a connection is allocated
+ * or freed.
*/
- struct inpcbporthead *ipi_porthashbase;
- u_long ipi_porthashmask;
+ u_quad_t ipi_gencnt; /* (g) */
/*
* Fields associated with port lookup and allocation.
*/
- u_short ipi_lastport;
- u_short ipi_lastlow;
- u_short ipi_lasthi;
+ u_short ipi_lastport; /* (x) */
+ u_short ipi_lastlow; /* (x) */
+ u_short ipi_lasthi; /* (x) */
/*
* UMA zone from which inpcbs are allocated for this protocol.
*/
- struct uma_zone *ipi_zone;
+ struct uma_zone *ipi_zone; /* (c) */
/*
- * Generation count--incremented each time a connection is allocated
- * or freed.
+ * Connection groups associated with this protocol. These fields are
+ * constant, but pcbgroup structures themselves are protected by
+ * per-pcbgroup locks.
*/
- u_quad_t ipi_gencnt;
- struct rwlock ipi_lock;
+ struct inpcbgroup *ipi_pcbgroups; /* (c) */
+ u_int ipi_npcbgroups; /* (c) */
+ u_int ipi_hashfields; /* (c) */
+
+ /*
+ * Global lock protecting non-pcbgroup hash lookup tables.
+ */
+ struct rwlock ipi_hash_lock;
+
+ /*
+ * Global hash of inpcbs, hashed by local and foreign addresses and
+ * port numbers.
+ */
+ struct inpcbhead *ipi_hashbase; /* (h) */
+ u_long ipi_hashmask; /* (h) */
+
+ /*
+ * Global hash of inpcbs, hashed by only local port number.
+ */
+ struct inpcbporthead *ipi_porthashbase; /* (h) */
+ u_long ipi_porthashmask; /* (h) */
+
+ /*
+ * List of wildcard inpcbs for use with pcbgroups. In the past, was
+ * per-pcbgroup but is now global. All pcbgroup locks must be held
+ * to modify the list, so any is sufficient to read it.
+ */
+ struct inpcbhead *ipi_wildbase; /* (p) */
+ u_long ipi_wildmask; /* (p) */
/*
* Pointer to network stack instance
*/
- struct vnet *ipi_vnet;
+ struct vnet *ipi_vnet; /* (c) */
/*
* general use 2
@@ -313,6 +364,32 @@ struct inpcbinfo {
void *ipi_pspare[2];
};
+#ifdef _KERNEL
+/*
+ * Connection groups hold sets of connections that have similar CPU/thread
+ * affinity. Each connection belongs to exactly one connection group.
+ */
+struct inpcbgroup {
+ /*
+ * Per-connection group hash of inpcbs, hashed by local and foreign
+ * addresses and port numbers.
+ */
+ struct inpcbhead *ipg_hashbase; /* (c) */
+ u_long ipg_hashmask; /* (c) */
+
+ /*
+ * Notional affinity of this pcbgroup.
+ */
+ u_int ipg_cpu; /* (p) */
+
+ /*
+ * Per-connection group lock, not to be confused with ipi_lock.
+ * Protects the hash table hung off the group, but also the global
+ * wildcard list in inpcbinfo.
+ */
+ struct mtx ipg_lock;
+} __aligned(CACHE_LINE_SIZE);
+
#define INP_LOCK_INIT(inp, d, t) \
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
@@ -330,7 +407,6 @@ struct inpcbinfo {
#define INP_WLOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_WLOCKED)
#define INP_UNLOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_UNLOCKED)
-#ifdef _KERNEL
/*
* These locking functions are for inpcb consumers outside of sys/netinet,
* more specifically, they were added for the benefit of TOE drivers. The
@@ -366,6 +442,7 @@ struct tcpcb *
inp_inpcbtotcpcb(struct inpcb *inp);
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
uint32_t *faddr, uint16_t *fp);
+short inp_so_options(const struct inpcb *inp);
#endif /* _KERNEL */
@@ -384,6 +461,26 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+#define INP_HASH_LOCK_INIT(ipi, d) \
+ rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
+#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
+ RA_LOCKED)
+#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
+ RA_WLOCKED)
+
+#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
+ MTX_DEF | MTX_DUPOK)
+#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock)
+
+#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock)
+#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED)
+#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock)
+
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
@@ -444,8 +541,21 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
*/
#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
+#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
+#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
+#define INP_FREED 0x00000010 /* inp itself is not valid */
+#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
+
+/*
+ * Flags passed to in_pcblookup*() functions.
+ */
+#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */
+#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */
+#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */
+
+#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
+ INPLOOKUP_WLOCKPCB)
-#define INPLOOKUP_WILDCARD 1
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */
@@ -453,6 +563,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
+/*
+ * Constants for pcbinfo.ipi_hashfields.
+ */
+#define IPI_HASHFIELDS_NONE 0
+#define IPI_HASHFIELDS_2TUPLE 1
+#define IPI_HASHFIELDS_4TUPLE 2
+
#ifdef _KERNEL
VNET_DECLARE(int, ipport_reservedhigh);
VNET_DECLARE(int, ipport_reservedlow);
@@ -482,7 +599,23 @@ VNET_DECLARE(int, ipport_tcpallocs);
#define V_ipport_stoprandom VNET(ipport_stoprandom)
#define V_ipport_tcpallocs VNET(ipport_tcpallocs)
-extern struct callout ipport_tick_callout;
+void in_pcbinfo_destroy(struct inpcbinfo *);
+void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
+ int, int, char *, uma_init, uma_fini, uint32_t, u_int);
+
+struct inpcbgroup *
+ in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+ in_pcbgroup_byinpcb(struct inpcb *);
+struct inpcbgroup *
+ in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short,
+ struct in_addr, u_short);
+void in_pcbgroup_destroy(struct inpcbinfo *);
+int in_pcbgroup_enabled(struct inpcbinfo *);
+void in_pcbgroup_init(struct inpcbinfo *, u_int, int);
+void in_pcbgroup_remove(struct inpcb *);
+void in_pcbgroup_update(struct inpcb *);
+void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
@@ -492,6 +625,8 @@ int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *,
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, struct ucred *);
int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
+int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *,
+ struct mbuf *);
int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, in_addr_t *, u_short *, struct inpcb **,
struct ucred *);
@@ -500,24 +635,30 @@ void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
+int in_pcbinshash_nopcbgroup(struct inpcb *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
struct inpcb *
- in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int,
+ in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
struct in_addr, u_int, int, struct ifnet *);
+struct inpcb *
+ in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
+ struct in_addr, u_int, int, struct ifnet *, struct mbuf *);
void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
int, struct inpcb *(*)(struct inpcb *, int));
void in_pcbref(struct inpcb *);
void in_pcbrehash(struct inpcb *);
+void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *);
int in_pcbrele(struct inpcb *);
+int in_pcbrele_rlocked(struct inpcb *);
+int in_pcbrele_wlocked(struct inpcb *);
void in_pcbsetsolabel(struct socket *so);
int in_getpeeraddr(struct socket *so, struct sockaddr **nam);
int in_getsockaddr(struct socket *so, struct sockaddr **nam);
struct sockaddr *
in_sockaddr(in_port_t port, struct in_addr *addr);
void in_pcbsosetlabel(struct socket *so);
-void ipport_tick(void *xtp);
#endif /* _KERNEL */
#endif /* !_NETINET_IN_PCB_H_ */
diff --git a/freebsd/sys/netinet/in_proto.c b/freebsd/sys/netinet/in_proto.c
index b479e09e..1eef2c72 100644
--- a/freebsd/sys/netinet/in_proto.c
+++ b/freebsd/sys/netinet/in_proto.c
@@ -37,8 +37,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/local/opt_mrouting.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_pf.h>
#include <rtems/bsd/local/opt_sctp.h>
#include <rtems/bsd/local/opt_mpath.h>
@@ -52,14 +52,26 @@ __FBSDID("$FreeBSD$");
#include <sys/queue.h>
#include <sys/sysctl.h>
+/*
+ * While this file provides the domain and protocol switch tables for IPv4, it
+ * also provides the sysctl node declarations for net.inet.* often shared with
+ * IPv6 for common features or by upper layer protocols. In case of no IPv4
+ * support compile out everything but these sysctl nodes.
+ */
+#ifdef INET
#include <net/if.h>
#include <net/route.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
#include <net/vnet.h>
+#endif /* INET */
+#if defined(INET) || defined(INET6)
#include <netinet/in.h>
+#endif
+
+#ifdef INET
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
@@ -90,11 +102,6 @@ static struct pr_usrreqs nousrreqs;
#include <netinet/sctp_var.h>
#endif /* SCTP */
-#ifdef DEV_PFSYNC
-#include <net/pfvar.h>
-#include <net/if_pfsync.h>
-#endif
-
FEATURE(inet, "Internet Protocol version 4");
extern struct domain inetdomain;
@@ -306,17 +313,6 @@ struct protosw inetsw[] = {
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
},
-#ifdef DEV_PFSYNC
-{
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_PFSYNC,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = pfsync_input,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-},
-#endif /* DEV_PFSYNC */
/* Spacer n-times for loadable protocols. */
IPPROTOSPACER,
IPPROTOSPACER,
@@ -364,6 +360,7 @@ struct domain inetdomain = {
};
VNET_DOMAIN_SET(inet);
+#endif /* INET */
SYSCTL_NODE(_net, PF_INET, inet, CTLFLAG_RW, 0,
"Internet Family");
@@ -385,6 +382,3 @@ SYSCTL_NODE(_net_inet, IPPROTO_IPCOMP, ipcomp, CTLFLAG_RW, 0, "IPCOMP");
SYSCTL_NODE(_net_inet, IPPROTO_IPIP, ipip, CTLFLAG_RW, 0, "IPIP");
#endif /* IPSEC */
SYSCTL_NODE(_net_inet, IPPROTO_RAW, raw, CTLFLAG_RW, 0, "RAW");
-#ifdef DEV_PFSYNC
-SYSCTL_NODE(_net_inet, IPPROTO_PFSYNC, pfsync, CTLFLAG_RW, 0, "PFSYNC");
-#endif
diff --git a/freebsd/sys/netinet/in_var.h b/freebsd/sys/netinet/in_var.h
index c04d45b9..b8477309 100644
--- a/freebsd/sys/netinet/in_var.h
+++ b/freebsd/sys/netinet/in_var.h
@@ -60,12 +60,9 @@ struct in_ifaddr {
struct ifaddr ia_ifa; /* protocol-independent info */
#define ia_ifp ia_ifa.ifa_ifp
#define ia_flags ia_ifa.ifa_flags
- /* ia_{,sub}net{,mask} in host order */
- u_long ia_net; /* network number of interface */
- u_long ia_netmask; /* mask of net part */
- u_long ia_subnet; /* subnet number, including net */
- u_long ia_subnetmask; /* mask of subnet part */
- struct in_addr ia_netbroadcast; /* to recognize net broadcasts */
+ /* ia_subnet{,mask} in host order */
+ u_long ia_subnet; /* subnet address */
+ u_long ia_subnetmask; /* mask of subnet */
LIST_ENTRY(in_ifaddr) ia_hash; /* entry in bucket of inet addresses */
TAILQ_ENTRY(in_ifaddr) ia_link; /* list of internet addresses */
struct sockaddr_in ia_addr; /* reserve space for interface name */
@@ -162,14 +159,16 @@ do { \
#define IFP_TO_IA(ifp, ia) \
/* struct ifnet *ifp; */ \
/* struct in_ifaddr *ia; */ \
-{ \
+do { \
+ IN_IFADDR_RLOCK(); \
for ((ia) = TAILQ_FIRST(&V_in_ifaddrhead); \
(ia) != NULL && (ia)->ia_ifp != (ifp); \
(ia) = TAILQ_NEXT((ia), ia_link)) \
continue; \
if ((ia) != NULL) \
ifa_ref(&(ia)->ia_ifa); \
-}
+ IN_IFADDR_RUNLOCK(); \
+} while (0)
#endif
/*
diff --git a/freebsd/sys/netinet/ip.h b/freebsd/sys/netinet/ip.h
index 6c9482f9..79afeb8f 100644
--- a/freebsd/sys/netinet/ip.h
+++ b/freebsd/sys/netinet/ip.h
@@ -48,11 +48,11 @@
*/
struct ip {
#if BYTE_ORDER == LITTLE_ENDIAN
- u_int ip_hl:4, /* header length */
+ u_char ip_hl:4, /* header length */
ip_v:4; /* version */
#endif
#if BYTE_ORDER == BIG_ENDIAN
- u_int ip_v:4, /* version */
+ u_char ip_v:4, /* version */
ip_hl:4; /* header length */
#endif
u_char ip_tos; /* type of service */
@@ -167,11 +167,11 @@ struct ip_timestamp {
u_char ipt_len; /* size of structure (variable) */
u_char ipt_ptr; /* index of current entry */
#if BYTE_ORDER == LITTLE_ENDIAN
- u_int ipt_flg:4, /* flags, see below */
+ u_char ipt_flg:4, /* flags, see below */
ipt_oflw:4; /* overflow counter */
#endif
#if BYTE_ORDER == BIG_ENDIAN
- u_int ipt_oflw:4, /* overflow counter */
+ u_char ipt_oflw:4, /* overflow counter */
ipt_flg:4; /* flags, see below */
#endif
union ipt_timestamp {
diff --git a/freebsd/sys/netinet/ip6.h b/freebsd/sys/netinet/ip6.h
index 3fb08a78..8f498410 100644
--- a/freebsd/sys/netinet/ip6.h
+++ b/freebsd/sys/netinet/ip6.h
@@ -263,7 +263,7 @@ struct ip6_frag {
/*
* IP6_EXTHDR_CHECK ensures that region between the IP6 header and the
* target header (including IPv6 itself, extension headers and
- * TCP/UDP/ICMP6 headers) are continuous. KAME requires drivers
+ * TCP/UDP/ICMP6 headers) are contiguous. KAME requires drivers
* to store incoming data into one internal mbuf or one or more external
* mbufs(never into two or more internal mbufs). Thus, the third case is
* supposed to never be matched but is prepared just in case.
@@ -275,24 +275,24 @@ do { \
if (((m)->m_flags & M_LOOP) && \
((m)->m_len < (off) + (hlen)) && \
(((m) = m_pullup((m), (off) + (hlen))) == NULL)) { \
- V_ip6stat.ip6s_exthdrtoolong++; \
+ IP6STAT_INC(ip6s_exthdrtoolong); \
return ret; \
} else if ((m)->m_flags & M_EXT) { \
if ((m)->m_len < (off) + (hlen)) { \
- V_ip6stat.ip6s_exthdrtoolong++; \
+ IP6STAT_INC(ip6s_exthdrtoolong); \
m_freem(m); \
return ret; \
} \
} else { \
if ((m)->m_len < (off) + (hlen)) { \
- V_ip6stat.ip6s_exthdrtoolong++; \
+ IP6STAT_INC(ip6s_exthdrtoolong); \
m_freem(m); \
return ret; \
} \
} \
} else { \
if ((m)->m_len < (off) + (hlen)) { \
- V_ip6stat.ip6s_tooshort++; \
+ IP6STAT_INC(ip6s_tooshort); \
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); \
m_freem(m); \
return ret; \
diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c
index a08c3fb8..a34c10c3 100644
--- a/freebsd/sys/netinet/ip_carp.c
+++ b/freebsd/sys/netinet/ip_carp.c
@@ -68,14 +68,19 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/vnet.h>
-#ifdef INET
+#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#include <netinet/in_var.h>
-#include <netinet/in_systm.h>
+#include <netinet/ip_carp.h>
#include <netinet/ip.h>
+
+#include <machine/in_cksum.h>
+#endif
+
+#ifdef INET
+#include <netinet/in_systm.h>
#include <netinet/ip_var.h>
#include <netinet/if_ether.h>
-#include <machine/in_cksum.h>
#endif
#ifdef INET6
@@ -84,11 +89,11 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
+#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#endif
#include <crypto/sha1.h>
-#include <netinet/ip_carp.h>
#define CARP_IFNAME "carp"
static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
@@ -98,7 +103,9 @@ struct carp_softc {
struct ifnet *sc_ifp; /* Interface clue */
struct ifnet *sc_carpdev; /* Pointer to parent interface */
struct in_ifaddr *sc_ia; /* primary iface address */
+#ifdef INET
struct ip_moptions sc_imo;
+#endif
#ifdef INET6
struct in6_ifaddr *sc_ia6; /* primary iface address v6 */
struct ip6_moptions sc_im6o;
@@ -208,7 +215,9 @@ static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
static void carp_send_ad_all(void);
static void carp_send_ad(void *);
static void carp_send_ad_locked(struct carp_softc *);
+#ifdef INET
static void carp_send_arp(struct carp_softc *);
+#endif
static void carp_master_down(void *);
static void carp_master_down_locked(struct carp_softc *);
static int carp_ioctl(struct ifnet *, u_long, caddr_t);
@@ -217,12 +226,16 @@ static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
static void carp_start(struct ifnet *);
static void carp_setrun(struct carp_softc *, sa_family_t);
static void carp_set_state(struct carp_softc *, int);
+#ifdef INET
static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
+#endif
enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
+#ifdef INET
static void carp_multicast_cleanup(struct carp_softc *, int dofree);
static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
static int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
+#endif
static void carp_carpdev_state_locked(struct carp_if *);
static void carp_sc_state_locked(struct carp_softc *);
#ifdef INET6
@@ -371,6 +384,7 @@ carp_setroute(struct carp_softc *sc, int cmd)
s = splnet();
TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+#ifdef INET
if (ifa->ifa_addr->sa_family == AF_INET &&
sc->sc_carpdev != NULL) {
int count = carp_addrcount(
@@ -381,6 +395,7 @@ carp_setroute(struct carp_softc *sc, int cmd)
(cmd == RTM_DELETE && count == 0))
rtinit(ifa, cmd, RTF_UP | RTF_HOST);
}
+#endif
}
splx(s);
}
@@ -406,12 +421,14 @@ carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
sc->sc_advskew = 0;
sc->sc_init_counter = 1;
sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
+#ifdef INET
sc->sc_imo.imo_membership = (struct in_multi **)malloc(
(sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
M_WAITOK);
sc->sc_imo.imo_mfilters = NULL;
sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
sc->sc_imo.imo_multicast_vif = -1;
+#endif
#ifdef INET6
sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
(sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
@@ -458,7 +475,9 @@ carp_clone_destroy(struct ifnet *ifp)
bpfdetach(ifp);
if_detach(ifp);
if_free_type(ifp, IFT_ETHER);
+#ifdef INET
free(sc->sc_imo.imo_membership, M_CARP);
+#endif
#ifdef INET6
free(sc->sc_im6o.im6o_membership, M_CARP);
#endif
@@ -497,7 +516,9 @@ carpdetach(struct carp_softc *sc, int unlock)
carp_set_state(sc, INIT);
SC2IFP(sc)->if_flags &= ~IFF_UP;
carp_setrun(sc, 0);
+#ifdef INET
carp_multicast_cleanup(sc, unlock);
+#endif
#ifdef INET6
carp_multicast6_cleanup(sc, unlock);
#endif
@@ -542,6 +563,7 @@ carp_ifdetach(void *arg __unused, struct ifnet *ifp)
* we have rearranged checks order compared to the rfc,
* but it seems more efficient this way or not possible otherwise.
*/
+#ifdef INET
void
carp_input(struct mbuf *m, int hlen)
{
@@ -632,6 +654,7 @@ carp_input(struct mbuf *m, int hlen)
carp_input_c(m, ch, AF_INET);
}
+#endif
#ifdef INET6
int
@@ -722,12 +745,16 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
- struct ip *ip = mtod(m, struct ip *);
uint32_t af1 = af;
+#ifdef INET
+ struct ip *ip = mtod(m, struct ip *);
/* BPF wants net byte order */
- ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
- ip->ip_off = htons(ip->ip_off);
+ if (af == AF_INET) {
+ ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
+ ip->ip_off = htons(ip->ip_off);
+ }
+#endif
bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
}
@@ -1083,6 +1110,7 @@ carp_send_ad_locked(struct carp_softc *sc)
}
+#ifdef INET
/*
* Broadcast a gratuitous ARP request containing
* the virtual router MAC address for each IP address
@@ -1104,6 +1132,7 @@ carp_send_arp(struct carp_softc *sc)
DELAY(1000); /* XXX */
}
}
+#endif
#ifdef INET6
static void
@@ -1126,6 +1155,7 @@ carp_send_na(struct carp_softc *sc)
}
#endif /* INET6 */
+#ifdef INET
static int
carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
{
@@ -1229,6 +1259,7 @@ carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
CARP_UNLOCK(cif);
return (0);
}
+#endif
#ifdef INET6
struct ifaddr *
@@ -1355,7 +1386,9 @@ carp_master_down_locked(struct carp_softc *sc)
case BACKUP:
carp_set_state(sc, MASTER);
carp_send_ad_locked(sc);
+#ifdef INET
carp_send_arp(sc);
+#endif
#ifdef INET6
carp_send_na(sc);
#endif /* INET6 */
@@ -1434,6 +1467,7 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
}
}
+#ifdef INET
static void
carp_multicast_cleanup(struct carp_softc *sc, int dofree)
{
@@ -1453,6 +1487,7 @@ carp_multicast_cleanup(struct carp_softc *sc, int dofree)
imo->imo_num_memberships = 0;
imo->imo_multicast_ifp = NULL;
}
+#endif
#ifdef INET6
static void
@@ -1475,6 +1510,7 @@ carp_multicast6_cleanup(struct carp_softc *sc, int dofree)
}
#endif
+#ifdef INET
static int
carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
{
@@ -1651,6 +1687,7 @@ carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
return (error);
}
+#endif
#ifdef INET6
static int
@@ -2351,13 +2388,13 @@ carp_mod_load(void)
printf("carp: error %d attaching to PF_INET6\n",
proto_reg[CARP_INET6]);
carp_mod_cleanup();
- return (EINVAL);
+ return (proto_reg[CARP_INET6]);
}
err = ip6proto_register(IPPROTO_CARP);
if (err) {
printf("carp: error %d registering with INET6\n", err);
carp_mod_cleanup();
- return (EINVAL);
+ return (err);
}
#endif
#ifdef INET
@@ -2367,13 +2404,13 @@ carp_mod_load(void)
printf("carp: error %d attaching to PF_INET\n",
proto_reg[CARP_INET]);
carp_mod_cleanup();
- return (EINVAL);
+ return (proto_reg[CARP_INET]);
}
err = ipproto_register(IPPROTO_CARP);
if (err) {
printf("carp: error %d registering with INET\n", err);
carp_mod_cleanup();
- return (EINVAL);
+ return (err);
}
#endif
return 0;
diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c
index 5fb32ba6..879f411f 100644
--- a/freebsd/sys/netinet/ip_divert.c
+++ b/freebsd/sys/netinet/ip_divert.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#error "IPDIVERT requires INET."
#endif
#endif
+#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
@@ -50,20 +51,13 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
-#include <sys/rwlock.h>
-#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-#include <sys/sx.h>
#include <sys/sysctl.h>
-#include <sys/systm.h>
-
-#include <vm/uma.h>
+#include <net/vnet.h>
#include <net/if.h>
#include <net/netisr.h>
-#include <net/route.h>
-#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
@@ -71,6 +65,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif
#ifdef SCTP
#include <netinet/sctp_crc32.h>
#endif
@@ -156,35 +154,21 @@ static void
div_init(void)
{
- INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
- LIST_INIT(&V_divcb);
- V_divcbinfo.ipi_listhead = &V_divcb;
-#ifdef VIMAGE
- V_divcbinfo.ipi_vnet = curvnet;
-#endif
/*
- * XXX We don't use the hash list for divert IP, but it's easier
- * to allocate a one entry hash list than it is to check all
- * over the place for hashbase == NULL.
+ * XXX We don't use the hash list for divert IP, but it's easier to
+ * allocate one-entry hash lists than it is to check all over the
+ * place for hashbase == NULL.
*/
- V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask);
- V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
- &V_divcbinfo.ipi_porthashmask);
- V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
- NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
- UMA_ZONE_NOFREE);
- uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
+ in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
+ div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_NONE);
}
static void
div_destroy(void)
{
- INP_INFO_LOCK_DESTROY(&V_divcbinfo);
- uma_zdestroy(V_divcbinfo.ipi_zone);
- hashdestroy(V_divcbinfo.ipi_hashbase, M_PCB, V_divcbinfo.ipi_hashmask);
- hashdestroy(V_divcbinfo.ipi_porthashbase, M_PCB,
- V_divcbinfo.ipi_porthashmask);
+ in_pcbinfo_destroy(&V_divcbinfo);
}
/*
@@ -335,10 +319,10 @@ static int
div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
struct mbuf *control)
{
+ struct ip *const ip = mtod(m, struct ip *);
struct m_tag *mtag;
struct ipfw_rule_ref *dt;
int error = 0;
- struct mbuf *options;
/*
* An mbuf may hasn't come from userland, but we pretend
@@ -390,71 +374,104 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
/* Reinject packet into the system as incoming or outgoing */
if (!sin || sin->sin_addr.s_addr == 0) {
- struct ip *const ip = mtod(m, struct ip *);
+ struct mbuf *options = NULL;
struct inpcb *inp;
dt->info |= IPFW_IS_DIVERT | IPFW_INFO_OUT;
inp = sotoinpcb(so);
INP_RLOCK(inp);
- /*
- * Don't allow both user specified and setsockopt options,
- * and don't allow packet length sizes that will crash
- */
- if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) ||
- ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
- error = EINVAL;
- INP_RUNLOCK(inp);
- m_freem(m);
- } else {
+ switch (ip->ip_v) {
+ case IPVERSION:
+ /*
+ * Don't allow both user specified and setsockopt
+ * options, and don't allow packet length sizes that
+ * will crash.
+ */
+ if ((((ip->ip_hl << 2) != sizeof(struct ip)) &&
+ inp->inp_options != NULL) ||
+ ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
+ error = EINVAL;
+ INP_RUNLOCK(inp);
+ goto cantsend;
+ }
+
/* Convert fields to host order for ip_output() */
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
+ break;
+#ifdef INET6
+ case IPV6_VERSION >> 4:
+ {
+ struct ip6_hdr *const ip6 = mtod(m, struct ip6_hdr *);
+
+ /* Don't allow packet length sizes that will crash */
+ if (((u_short)ntohs(ip6->ip6_plen) > m->m_pkthdr.len)) {
+ error = EINVAL;
+ INP_RUNLOCK(inp);
+ goto cantsend;
+ }
- /* Send packet to output processing */
- KMOD_IPSTAT_INC(ips_rawout); /* XXX */
+ ip6->ip6_plen = ntohs(ip6->ip6_plen);
+ break;
+ }
+#endif
+ default:
+ error = EINVAL;
+ INP_RUNLOCK(inp);
+ goto cantsend;
+ }
+
+ /* Send packet to output processing */
+ KMOD_IPSTAT_INC(ips_rawout); /* XXX */
#ifdef MAC
- mac_inpcb_create_mbuf(inp, m);
+ mac_inpcb_create_mbuf(inp, m);
#endif
- /*
- * Get ready to inject the packet into ip_output().
- * Just in case socket options were specified on the
- * divert socket, we duplicate them. This is done
- * to avoid having to hold the PCB locks over the call
- * to ip_output(), as doing this results in a number of
- * lock ordering complexities.
- *
- * Note that we set the multicast options argument for
- * ip_output() to NULL since it should be invariant that
- * they are not present.
- */
- KASSERT(inp->inp_moptions == NULL,
- ("multicast options set on a divert socket"));
- options = NULL;
- /*
- * XXXCSJP: It is unclear to me whether or not it makes
- * sense for divert sockets to have options. However,
- * for now we will duplicate them with the INP locks
- * held so we can use them in ip_output() without
- * requring a reference to the pcb.
- */
- if (inp->inp_options != NULL) {
- options = m_dup(inp->inp_options, M_DONTWAIT);
- if (options == NULL)
- error = ENOBUFS;
- }
- INP_RUNLOCK(inp);
- if (error == ENOBUFS) {
- m_freem(m);
- return (error);
+ /*
+ * Get ready to inject the packet into ip_output().
+ * Just in case socket options were specified on the
+ * divert socket, we duplicate them. This is done
+ * to avoid having to hold the PCB locks over the call
+ * to ip_output(), as doing this results in a number of
+ * lock ordering complexities.
+ *
+ * Note that we set the multicast options argument for
+ * ip_output() to NULL since it should be invariant that
+ * they are not present.
+ */
+ KASSERT(inp->inp_moptions == NULL,
+ ("multicast options set on a divert socket"));
+ /*
+ * XXXCSJP: It is unclear to me whether or not it makes
+ * sense for divert sockets to have options. However,
+ * for now we will duplicate them with the INP locks
+ * held so we can use them in ip_output() without
+ * requring a reference to the pcb.
+ */
+ if (inp->inp_options != NULL) {
+ options = m_dup(inp->inp_options, M_NOWAIT);
+ if (options == NULL) {
+ INP_RUNLOCK(inp);
+ error = ENOBUFS;
+ goto cantsend;
}
+ }
+ INP_RUNLOCK(inp);
+
+ switch (ip->ip_v) {
+ case IPVERSION:
error = ip_output(m, options, NULL,
- ((so->so_options & SO_DONTROUTE) ?
- IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST |
- IP_RAWOUTPUT, NULL, NULL);
- if (options != NULL)
- m_freem(options);
+ ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0)
+ | IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
+ break;
+#ifdef INET6
+ case IPV6_VERSION >> 4:
+ error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
+ break;
+#endif
}
+ if (options != NULL)
+ m_freem(options);
} else {
dt->info |= IPFW_IS_DIVERT | IPFW_INFO_IN;
if (m->m_pkthdr.rcvif == NULL) {
@@ -479,14 +496,26 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
mac_socket_create_mbuf(so, m);
#endif
/* Send packet to input processing via netisr */
- netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
+ switch (ip->ip_v) {
+ case IPVERSION:
+ netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
+ break;
+#ifdef INET6
+ case IPV6_VERSION >> 4:
+ netisr_queue_src(NETISR_IPV6, (uintptr_t)so, m);
+ break;
+#endif
+ default:
+ error = EINVAL;
+ goto cantsend;
+ }
}
- return error;
+ return (error);
cantsend:
m_freem(m);
- return error;
+ return (error);
}
static int
@@ -554,7 +583,9 @@ div_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
INP_INFO_WLOCK(&V_divcbinfo);
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_divcbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_divcbinfo);
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_divcbinfo);
return error;
@@ -683,9 +714,9 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_divcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_divcbinfo);
@@ -709,7 +740,8 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
}
#ifdef SYSCTL_NODE
-SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0, "IPDIVERT");
+static SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0,
+ "IPDIVERT");
SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
NULL, 0, div_pcblist, "S,xinpcb", "List of active divert sockets");
#endif
diff --git a/freebsd/sys/netinet/ip_dummynet.h b/freebsd/sys/netinet/ip_dummynet.h
index 0bbc3263..dc2c3412 100644
--- a/freebsd/sys/netinet/ip_dummynet.h
+++ b/freebsd/sys/netinet/ip_dummynet.h
@@ -87,14 +87,14 @@ enum {
DN_SYSCTL_SET,
DN_LAST,
-} ;
+};
enum { /* subtype for schedulers, flowset and the like */
DN_SCHED_UNKNOWN = 0,
DN_SCHED_FIFO = 1,
DN_SCHED_WF2QP = 2,
/* others are in individual modules */
-} ;
+};
enum { /* user flags */
DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */
@@ -113,16 +113,16 @@ enum { /* user flags */
struct dn_link {
struct dn_id oid;
- /*
+ /*
* Userland sets bw and delay in bits/s and milliseconds.
* The kernel converts this back and forth to bits/tick and ticks.
* XXX what about burst ?
- */
+ */
int32_t link_nr;
int bandwidth; /* bit/s or bits/tick. */
int delay; /* ms and ticks */
uint64_t burst; /* scaled. bits*Hz XXX */
-} ;
+};
/*
* A flowset, which is a template for flows. Contains parameters
@@ -132,13 +132,13 @@ struct dn_link {
*/
struct dn_fs {
struct dn_id oid;
- uint32_t fs_nr; /* the flowset number */
- uint32_t flags; /* userland flags */
- int qsize ; /* queue size in slots or bytes */
- int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */
+ uint32_t fs_nr; /* the flowset number */
+ uint32_t flags; /* userland flags */
+ int qsize; /* queue size in slots or bytes */
+ int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */
uint32_t buckets; /* buckets used for the queue hash table */
- struct ipfw_flow_id flow_mask ;
+ struct ipfw_flow_id flow_mask;
uint32_t sched_nr; /* the scheduler we attach to */
/* generic scheduler parameters. Leave them at -1 if unset.
* Now we use 0: weight, 1: lmax, 2: priority
@@ -149,14 +149,14 @@ struct dn_fs {
* weight and probabilities are in the range 0..1 represented
* in fixed point arithmetic with SCALE_RED decimal bits.
*/
-#define SCALE_RED 16
-#define SCALE(x) ( (x) << SCALE_RED )
-#define SCALE_VAL(x) ( (x) >> SCALE_RED )
-#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
- int w_q ; /* queue weight (scaled) */
- int max_th ; /* maximum threshold for queue (scaled) */
- int min_th ; /* minimum threshold for queue (scaled) */
- int max_p ; /* maximum value for p_b (scaled) */
+#define SCALE_RED 16
+#define SCALE(x) ( (x) << SCALE_RED )
+#define SCALE_VAL(x) ( (x) >> SCALE_RED )
+#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
+ int w_q ; /* queue weight (scaled) */
+ int max_th ; /* maximum threshold for queue (scaled) */
+ int min_th ; /* minimum threshold for queue (scaled) */
+ int max_p ; /* maximum value for p_b (scaled) */
};
@@ -177,10 +177,10 @@ struct dn_flow {
};
- /*
+/*
* Scheduler template, mostly indicating the name, number,
* sched_mask and buckets.
- */
+ */
struct dn_sch {
struct dn_id oid;
uint32_t sched_nr; /* N, scheduler number */
@@ -199,14 +199,14 @@ struct dn_sch {
#define ED_MAX_SAMPLES_NO 1024
struct dn_profile {
struct dn_id oid;
- /* fields to simulate a delay profile */
+ /* fields to simulate a delay profile */
#define ED_MAX_NAME_LEN 32
- char name[ED_MAX_NAME_LEN];
- int link_nr;
- int loss_level;
- int bandwidth; // XXX use link bandwidth?
- int samples_no; /* actual length of samples[] */
- int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
+ char name[ED_MAX_NAME_LEN];
+ int link_nr;
+ int loss_level;
+ int bandwidth; // XXX use link bandwidth?
+ int samples_no; /* actual len of samples[] */
+ int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
};
diff --git a/freebsd/sys/netinet/ip_fastfwd.c b/freebsd/sys/netinet/ip_fastfwd.c
index 43f10ef9..863b9a16 100644
--- a/freebsd/sys/netinet/ip_fastfwd.c
+++ b/freebsd/sys/netinet/ip_fastfwd.c
@@ -153,8 +153,8 @@ ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
/*
* Try to forward a packet based on the destination address.
* This is a fast path optimized for the plain forwarding case.
- * If the packet is handled (and consumed) here then we return 1;
- * otherwise 0 is returned and the packet should be delivered
+ * If the packet is handled (and consumed) here then we return NULL;
+ * otherwise mbuf is returned and the packet should be delivered
* to ip_input for full processing.
*/
struct mbuf *
@@ -169,9 +169,7 @@ ip_fastforward(struct mbuf *m)
u_short sum, ip_len;
int error = 0;
int hlen, mtu;
-#ifdef IPFIREWALL_FORWARD
- struct m_tag *fwd_tag;
-#endif
+ struct m_tag *fwd_tag = NULL;
/*
* Are we active and forwarding packets?
@@ -380,14 +378,13 @@ ip_fastforward(struct mbuf *m)
* Go on with new destination address
*/
}
-#ifdef IPFIREWALL_FORWARD
+
if (m->m_flags & M_FASTFWD_OURS) {
/*
* ipfw changed it for a local address on this host.
*/
goto forwardlocal;
}
-#endif /* IPFIREWALL_FORWARD */
passin:
/*
@@ -457,20 +454,13 @@ passin:
/*
* Destination address changed?
*/
-#ifndef IPFIREWALL_FORWARD
- if (odest.s_addr != dest.s_addr) {
-#else
- fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+ if (m->m_flags & M_IP_NEXTHOP)
+ fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
-#endif /* IPFIREWALL_FORWARD */
/*
* Is it now for a local address on this host?
*/
-#ifndef IPFIREWALL_FORWARD
- if (in_localip(dest)) {
-#else
if (m->m_flags & M_FASTFWD_OURS || in_localip(dest)) {
-#endif /* IPFIREWALL_FORWARD */
forwardlocal:
/*
* Return packet for processing by ip_input().
@@ -485,13 +475,12 @@ forwardlocal:
/*
* Redo route lookup with new destination address
*/
-#ifdef IPFIREWALL_FORWARD
if (fwd_tag) {
dest.s_addr = ((struct sockaddr_in *)
(fwd_tag + 1))->sin_addr.s_addr;
m_tag_delete(m, fwd_tag);
+ m->m_flags &= ~M_IP_NEXTHOP;
}
-#endif /* IPFIREWALL_FORWARD */
RTFREE(ro.ro_rt);
if ((dst = ip_findroute(&ro, dest, m)) == NULL)
return NULL; /* icmp unreach already sent */
diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h
index 69311a79..14b08f5e 100644
--- a/freebsd/sys/netinet/ip_fw.h
+++ b/freebsd/sys/netinet/ip_fw.h
@@ -211,12 +211,20 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_SETFIB, /* arg1=FIB number */
O_FIB, /* arg1=FIB desired fib number */
+
+ O_SOCKARG, /* socket argument */
O_CALLRETURN, /* arg1=called rule number */
+ O_FORWARD_IP6, /* fwd sockaddr_in6 */
+
+ O_DSCP, /* 2 u32 = DSCP mask */
+ O_SETDSCP, /* arg1=DSCP value */
+
O_LAST_OPCODE /* not an opcode! */
};
+
/*
* The extension header are filtered only for presence using a bit
* vector with a flag for each header.
@@ -309,6 +317,14 @@ typedef struct _ipfw_insn_sa {
} ipfw_insn_sa;
/*
+ * This is used to forward to a given address (ipv6).
+ */
+typedef struct _ipfw_insn_sa6 {
+ ipfw_insn o;
+ struct sockaddr_in6 sa;
+} ipfw_insn_sa6;
+
+/*
* This is used for MAC addr-mask pairs.
*/
typedef struct _ipfw_insn_mac {
diff --git a/freebsd/sys/netinet/ip_gre.c b/freebsd/sys/netinet/ip_gre.c
index 0fc1770f..25c9698e 100644
--- a/freebsd/sys/netinet/ip_gre.c
+++ b/freebsd/sys/netinet/ip_gre.c
@@ -19,13 +19,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -214,6 +207,11 @@ gre_input2(struct mbuf *m ,int hlen, u_char proto)
bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
}
+ if ((GRE2IFP(sc)->if_flags & IFF_MONITOR) != 0) {
+ m_freem(m);
+ return(NULL);
+ }
+
m->m_pkthdr.rcvif = GRE2IFP(sc);
netisr_queue(isr, m);
@@ -298,6 +296,11 @@ gre_mobile_input(struct mbuf *m, int hlen)
bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
}
+ if ((GRE2IFP(sc)->if_flags & IFF_MONITOR) != 0) {
+ m_freem(m);
+ return;
+ }
+
m->m_pkthdr.rcvif = GRE2IFP(sc);
netisr_queue(NETISR_IP, m);
diff --git a/freebsd/sys/netinet/ip_gre.h b/freebsd/sys/netinet/ip_gre.h
index 1fb67d93..d2f3866a 100644
--- a/freebsd/sys/netinet/ip_gre.h
+++ b/freebsd/sys/netinet/ip_gre.h
@@ -16,13 +16,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c
index 728e57ec..b003d03f 100644
--- a/freebsd/sys/netinet/ip_icmp.c
+++ b/freebsd/sys/netinet/ip_icmp.c
@@ -34,6 +34,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/sys/param.h>
@@ -64,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcpip.h>
#include <netinet/icmp_var.h>
+#ifdef INET
#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
@@ -72,12 +74,26 @@ __FBSDID("$FreeBSD$");
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
+#endif /* INET */
/*
* ICMP routines: error generation, receive packet processing, and
* routines to turnaround packets back to the originator, and
* host table maintenance routines.
*/
+static VNET_DEFINE(int, icmplim) = 200;
+#define V_icmplim VNET(icmplim)
+SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
+ &VNET_NAME(icmplim), 0,
+ "Maximum number of ICMP responses per second");
+
+static VNET_DEFINE(int, icmplim_output) = 1;
+#define V_icmplim_output VNET(icmplim_output)
+SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
+ &VNET_NAME(icmplim_output), 0,
+ "Enable rate limiting of ICMP responses");
+
+#ifdef INET
VNET_DEFINE(struct icmpstat, icmpstat);
SYSCTL_VNET_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
&VNET_NAME(icmpstat), icmpstat, "");
@@ -102,18 +118,6 @@ SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
&VNET_NAME(log_redirect), 0,
"Log ICMP redirects to the console");
-static VNET_DEFINE(int, icmplim) = 200;
-#define V_icmplim VNET(icmplim)
-SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
- &VNET_NAME(icmplim), 0,
- "Maximum number of ICMP responses per second");
-
-static VNET_DEFINE(int, icmplim_output) = 1;
-#define V_icmplim_output VNET(icmplim_output)
-SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
- &VNET_NAME(icmplim_output), 0,
- "Enable rate limiting of ICMP responses");
-
static VNET_DEFINE(char, reply_src[IFNAMSIZ]);
#define V_reply_src VNET(reply_src)
SYSCTL_VNET_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
@@ -702,6 +706,8 @@ icmp_reflect(struct mbuf *m)
goto done; /* Ip_output() will check for broadcast */
}
+ m_addr_changed(m);
+
t = ip->ip_dst;
ip->ip_dst = ip->ip_src;
@@ -953,6 +959,7 @@ ip_next_mtu(int mtu, int dir)
}
return 0;
}
+#endif /* INET */
/*
diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c
index b1154c79..2dbb2a7a 100644
--- a/freebsd/sys/netinet/ip_input.c
+++ b/freebsd/sys/netinet/ip_input.c
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/callout.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/domain.h>
@@ -104,11 +103,6 @@ SYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
&VNET_NAME(ipsendredirects), 0,
"Enable sending IP redirects");
-VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
- &VNET_NAME(ip_defttl), 0,
- "Maximum TTL on IP packets");
-
static VNET_DEFINE(int, ip_keepfaith);
#define V_ip_keepfaith VNET(ip_keepfaith)
SYSCTL_VNET_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
@@ -196,8 +190,6 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
&VNET_NAME(maxfragsperpacket), 0,
"Maximum number of IPv4 fragments allowed per packet");
-struct callout ipport_tick_callout;
-
#ifdef IPCTL_DEFMTU
SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
&ip_mtu, 0, "Default MTU");
@@ -220,8 +212,6 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
"number of entries in the per-cpu output flow caches");
#endif
-VNET_DEFINE(int, fw_one_pass) = 1;
-
static void ip_freef(struct ipqhead *, struct ipq *);
/*
@@ -356,11 +346,6 @@ ip_init(void)
ip_protox[pr->pr_protocol] = pr - inetsw;
}
- /* Start ipport_tick. */
- callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
- callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
- EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
- SHUTDOWN_PRI_DEFAULT);
EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
@@ -385,13 +370,6 @@ ip_destroy(void)
}
#endif
-void
-ip_fini(void *xtp)
-{
-
- callout_stop(&ipport_tick_callout);
-}
-
/*
* Ip input routine. Checksum and byte swap header. If fragmented
* try to reassemble. Process options. Pass to next level.
@@ -540,22 +518,22 @@ tooshort:
dchg = (odst.s_addr != ip->ip_dst.s_addr);
ifp = m->m_pkthdr.rcvif;
-#ifdef IPFIREWALL_FORWARD
if (m->m_flags & M_FASTFWD_OURS) {
m->m_flags &= ~M_FASTFWD_OURS;
goto ours;
}
- if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
- /*
- * Directly ship the packet on. This allows forwarding
- * packets originally destined to us to some other directly
- * connected host.
- */
- ip_forward(m, dchg);
- return;
+ if (m->m_flags & M_IP_NEXTHOP) {
+ dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
+ if (dchg != 0) {
+ /*
+ * Directly ship the packet on. This allows
+ * forwarding packets originally destined to us
+ * to some other directly connected host.
+ */
+ ip_forward(m, 1);
+ return;
+ }
}
-#endif /* IPFIREWALL_FORWARD */
-
passin:
/*
* Process options and, if not destined for us,
@@ -646,11 +624,6 @@ passin:
IF_ADDR_RUNLOCK(ifp);
goto ours;
}
- if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) {
- ifa_ref(ifa);
- IF_ADDR_RUNLOCK(ifp);
- goto ours;
- }
#ifdef BOOTP_COMPAT
if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
ifa_ref(ifa);
@@ -1524,8 +1497,7 @@ ip_forward(struct mbuf *m, int srcrt)
if (error == EMSGSIZE && ro.ro_rt)
mtu = ro.ro_rt->rt_rmx.rmx_mtu;
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
+ RO_RTFREE(&ro);
if (error)
IPSTAT_INC(ips_cantforward);
diff --git a/freebsd/sys/netinet/ip_ipsec.c b/freebsd/sys/netinet/ip_ipsec.c
index 35ea9cd5..f3516f1c 100644
--- a/freebsd/sys/netinet/ip_ipsec.c
+++ b/freebsd/sys/netinet/ip_ipsec.c
@@ -262,8 +262,7 @@ ip_ipsec_mtu(struct mbuf *m, int mtu)
* -1 = packet was reinjected and stop processing packet
*/
int
-ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
- struct ifnet **ifp)
+ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error)
{
#ifdef IPSEC
struct secpolicy *sp = NULL;
@@ -392,20 +391,6 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
} else {
/* No IPsec processing for this packet. */
}
-#ifdef notyet
- /*
- * If deferred crypto processing is needed, check that
- * the interface supports it.
- */
- mtag = m_tag_find(*m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
- if (mtag != NULL && ifp != NULL &&
- ((*ifp)->if_capenable & IFCAP_IPSEC) == 0) {
- /* notify IPsec to do its own crypto */
- ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
- *error = EHOSTUNREACH;
- goto bad;
- }
-#endif
}
done:
if (sp != NULL)
diff --git a/freebsd/sys/netinet/ip_ipsec.h b/freebsd/sys/netinet/ip_ipsec.h
index 31bc86a1..2870c114 100644
--- a/freebsd/sys/netinet/ip_ipsec.h
+++ b/freebsd/sys/netinet/ip_ipsec.h
@@ -36,6 +36,5 @@ int ip_ipsec_filtertunnel(struct mbuf *);
int ip_ipsec_fwd(struct mbuf *);
int ip_ipsec_input(struct mbuf *);
int ip_ipsec_mtu(struct mbuf *, int);
-int ip_ipsec_output(struct mbuf **, struct inpcb *, int *, int *,
- struct ifnet **);
+int ip_ipsec_output(struct mbuf **, struct inpcb *, int *, int *);
#endif
diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c
index 18419a74..6fc5cc68 100644
--- a/freebsd/sys/netinet/ip_mroute.c
+++ b/freebsd/sys/netinet/ip_mroute.c
@@ -116,8 +116,6 @@ __FBSDID("$FreeBSD$");
#include <machine/in_cksum.h>
-#include <security/mac/mac_framework.h>
-
#ifndef KTR_IPMF
#define KTR_IPMF KTR_INET
#endif
@@ -928,7 +926,6 @@ add_vif(struct vifctl *vifcp)
vifp->v_pkt_out = 0;
vifp->v_bytes_in = 0;
vifp->v_bytes_out = 0;
- bzero(&vifp->v_route, sizeof(vifp->v_route));
/* Adjust numvifs up if the vifi is higher than numvifs */
if (V_numvifs <= vifcp->vifc_vifi)
@@ -1036,6 +1033,8 @@ expire_mfc(struct mfc *rt)
{
struct rtdetq *rte, *nrte;
+ MFC_LOCK_ASSERT();
+
free_bw_list(rt->mfc_bw_meter);
TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
@@ -1704,7 +1703,7 @@ send_packet(struct vif *vifp, struct mbuf *m)
* should get rejected because they appear to come from
* the loopback interface, thus preventing looping.
*/
- error = ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, &imo, NULL);
+ error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL);
CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
(ptrdiff_t)(vifp - V_viftable), error);
}
@@ -2809,9 +2808,9 @@ out_locked:
return (error);
}
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, sysctl_mfctable,
- "IPv4 Multicast Forwarding Table (struct *mfc[mfchashsize], "
- "netinet/ip_mroute.h)");
+static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
+ sysctl_mfctable, "IPv4 Multicast Forwarding Table "
+ "(struct *mfc[mfchashsize], netinet/ip_mroute.h)");
static void
vnet_mroute_init(const void *unused __unused)
diff --git a/freebsd/sys/netinet/ip_mroute.h b/freebsd/sys/netinet/ip_mroute.h
index c54c75aa..e945b92c 100644
--- a/freebsd/sys/netinet/ip_mroute.h
+++ b/freebsd/sys/netinet/ip_mroute.h
@@ -262,7 +262,6 @@ struct vif {
u_long v_pkt_out; /* # pkts out on interface */
u_long v_bytes_in; /* # bytes in on interface */
u_long v_bytes_out; /* # bytes out on interface */
- struct route v_route; /* cached route */
};
#ifdef _KERNEL
diff --git a/freebsd/sys/netinet/ip_options.c b/freebsd/sys/netinet/ip_options.c
index 7b190bfd..98a8a2df 100644
--- a/freebsd/sys/netinet/ip_options.c
+++ b/freebsd/sys/netinet/ip_options.c
@@ -67,8 +67,6 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
-#include <security/mac/mac_framework.h>
-
static int ip_dosourceroute = 0;
SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
&ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c
index 02dc7bdb..a70d3142 100644
--- a/freebsd/sys/netinet/ip_output.c
+++ b/freebsd/sys/netinet/ip_output.c
@@ -86,12 +86,6 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
- x, (ntohl(a.s_addr)>>24)&0xFF,\
- (ntohl(a.s_addr)>>16)&0xFF,\
- (ntohl(a.s_addr)>>8)&0xFF,\
- (ntohl(a.s_addr))&0xFF, y);
-
VNET_DEFINE(u_short, ip_id);
#ifdef MBUF_STRESS_TEST
@@ -110,8 +104,13 @@ extern struct protosw inetsw[];
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
+ * ip_len and ip_off are in host format.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
+ * If route ro is present and has ro_rt initialized, route lookup would be
+ * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
+ * then result of route lookup is stored in ro->ro_rt.
+ *
* In the IP forwarding case, the packet will arrive with options already
* inserted, so must have a NULL opt pointer.
*/
@@ -124,17 +123,15 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
struct mbuf *m0;
int hlen = sizeof (struct ip);
int mtu;
- int len, error = 0;
- int nortfree = 0;
- struct sockaddr_in *dst = NULL; /* keep compiler happy */
- struct in_ifaddr *ia = NULL;
+ int n; /* scratchpad */
+ int error = 0;
+ struct sockaddr_in *dst;
+ struct in_ifaddr *ia;
int isbroadcast, sw_csum;
struct route iproute;
struct rtentry *rte; /* cache for ro->ro_rt */
struct in_addr odst;
-#ifdef IPFIREWALL_FORWARD
struct m_tag *fwd_tag = NULL;
-#endif
#ifdef IPSEC
int no_route_but_check_spd = 0;
#endif
@@ -152,30 +149,29 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
if (ro == NULL) {
ro = &iproute;
bzero(ro, sizeof (*ro));
+ }
#ifdef FLOWTABLE
- {
- struct flentry *fle;
+ if (ro->ro_rt == NULL) {
+ struct flentry *fle;
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- if ((fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET)) != NULL) {
- flow_to_route(fle, ro);
- nortfree = 1;
- }
- }
-#endif
+ /*
+ * The flow table returns route entries valid for up to 30
+ * seconds; we rely on the remainder of ip_output() taking no
+ * longer than that long for the stability of ro_rt. The
+ * flow ID assignment must have happened before this point.
+ */
+ fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
+ if (fle != NULL)
+ flow_to_route(fle, ro);
}
+#endif
if (opt) {
- len = 0;
+ int len = 0;
m = ip_insertoptions(m, opt, &len);
if (len != 0)
- hlen = len;
+ hlen = len; /* ip->ip_hl is updated above */
}
ip = mtod(m, struct ip *);
@@ -196,11 +192,13 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
ip->ip_id = ip_newid();
IPSTAT_INC(ips_localout);
} else {
+ /* Header already set, fetch hlen from there */
hlen = ip->ip_hl << 2;
}
again:
dst = (struct sockaddr_in *)&ro->ro_dst;
+ ia = NULL;
/*
* If there is a cached route,
* check that it is to the same destination
@@ -214,16 +212,11 @@ again:
!RT_LINK_IS_UP(rte->rt_ifp) ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
- if (!nortfree)
- RTFREE(rte);
- rte = ro->ro_rt = (struct rtentry *)NULL;
- ro->ro_lle = (struct llentry *)NULL;
+ RO_RTFREE(ro);
+ ro->ro_lle = NULL;
+ rte = NULL;
}
-#ifdef IPFIREWALL_FORWARD
if (rte == NULL && fwd_tag == NULL) {
-#else
- if (rte == NULL) {
-#endif
bzero(dst, sizeof(*dst));
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
@@ -328,6 +321,9 @@ again:
} else {
mtu = ifp->if_mtu;
}
+ /* Catch a possible divide by zero later. */
+ KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
+ __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
m->m_flags |= M_MCAST;
/*
@@ -441,18 +437,15 @@ again:
* packet or packet fragments, unless ALTQ is enabled on the given
* interface in which case packetdrop should be done by queueing.
*/
+ n = ip->ip_len / mtu + 1; /* how many fragments ? */
+ if (
#ifdef ALTQ
- if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
- ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
- ifp->if_snd.ifq_maxlen))
-#else
- if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
- ifp->if_snd.ifq_maxlen)
+ (!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
#endif /* ALTQ */
- {
+ (ifp->if_snd.ifq_len + n) >= ifp->if_snd.ifq_maxlen ) {
error = ENOBUFS;
IPSTAT_INC(ips_odropped);
- ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
+ ifp->if_snd.ifq_drops += n;
goto bad;
}
@@ -482,7 +475,7 @@ again:
sendit:
#ifdef IPSEC
- switch(ip_ipsec_output(&m, inp, &flags, &error, &ifp)) {
+ switch(ip_ipsec_output(&m, inp, &flags, &error)) {
case 1:
goto bad;
case -1:
@@ -537,11 +530,13 @@ sendit:
#endif
error = netisr_queue(NETISR_IP, m);
goto done;
- } else
+ } else {
+ if (ia != NULL)
+ ifa_free(&ia->ia_ifa);
goto again; /* Redo the routing table lookup. */
+ }
}
-#ifdef IPFIREWALL_FORWARD
/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
if (m->m_flags & M_FASTFWD_OURS) {
if (m->m_pkthdr.rcvif == NULL)
@@ -562,15 +557,17 @@ sendit:
goto done;
}
/* Or forward to some other address? */
- fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
- if (fwd_tag) {
+ if ((m->m_flags & M_IP_NEXTHOP) &&
+ (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
dst = (struct sockaddr_in *)&ro->ro_dst;
bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
m->m_flags |= M_SKIP_FIREWALL;
+ m->m_flags &= ~M_IP_NEXTHOP;
m_tag_delete(m, fwd_tag);
+ if (ia != NULL)
+ ifa_free(&ia->ia_ifa);
goto again;
}
-#endif /* IPFIREWALL_FORWARD */
passout:
/* 127/8 must not appear on wire - RFC1122. */
@@ -677,9 +674,8 @@ passout:
IPSTAT_INC(ips_fragmented);
done:
- if (ro == &iproute && ro->ro_rt && !nortfree) {
- RTFREE(ro->ro_rt);
- }
+ if (ro == &iproute)
+ RO_RTFREE(ro);
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (error);
@@ -725,14 +721,12 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
* If the interface will not calculate checksums on
* fragmented packets, then do it here.
*/
- if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
- (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+ if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
in_delayed_cksum(m0);
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
#ifdef SCTP
- if (m0->m_pkthdr.csum_flags & CSUM_SCTP &&
- (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+ if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
sctp_delayed_cksum(m0, hlen);
m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
@@ -900,12 +894,40 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
error = optval = 0;
if (sopt->sopt_level != IPPROTO_IP) {
- if ((sopt->sopt_level == SOL_SOCKET) &&
- (sopt->sopt_name == SO_SETFIB)) {
- inp->inp_inc.inc_fibnum = so->so_fibnum;
- return (0);
+ error = EINVAL;
+
+ if (sopt->sopt_level == SOL_SOCKET &&
+ sopt->sopt_dir == SOPT_SET) {
+ switch (sopt->sopt_name) {
+ case SO_REUSEADDR:
+ INP_WLOCK(inp);
+ if ((so->so_options & SO_REUSEADDR) != 0)
+ inp->inp_flags2 |= INP_REUSEADDR;
+ else
+ inp->inp_flags2 &= ~INP_REUSEADDR;
+ INP_WUNLOCK(inp);
+ error = 0;
+ break;
+ case SO_REUSEPORT:
+ INP_WLOCK(inp);
+ if ((so->so_options & SO_REUSEPORT) != 0)
+ inp->inp_flags2 |= INP_REUSEPORT;
+ else
+ inp->inp_flags2 &= ~INP_REUSEPORT;
+ INP_WUNLOCK(inp);
+ error = 0;
+ break;
+ case SO_SETFIB:
+ INP_WLOCK(inp);
+ inp->inp_inc.inc_fibnum = so->so_fibnum;
+ INP_WUNLOCK(inp);
+ error = 0;
+ break;
+ default:
+ break;
+ }
}
- return (EINVAL);
+ return (error);
}
switch (sopt->sopt_dir) {
diff --git a/freebsd/sys/netinet/ip_var.h b/freebsd/sys/netinet/ip_var.h
index d196fd04..b07ef162 100644
--- a/freebsd/sys/netinet/ip_var.h
+++ b/freebsd/sys/netinet/ip_var.h
@@ -162,6 +162,7 @@ void kmod_ipstat_dec(int statnum);
* mbuf flag used by ip_fastfwd
*/
#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */
+#define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */
#ifdef __NO_STRICT_ALIGNMENT
#define IP_HDR_ALIGNED_P(ip) 1
@@ -208,7 +209,6 @@ int inp_setmoptions(struct inpcb *, struct sockopt *);
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
-void ip_fini(void *xtp);
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags, int sw_csum);
void ip_forward(struct mbuf *m, int srcrt);
diff --git a/freebsd/sys/netinet/libalias/alias.h b/freebsd/sys/netinet/libalias/alias.h
index b2615c90..b12b353a 100644
--- a/freebsd/sys/netinet/libalias/alias.h
+++ b/freebsd/sys/netinet/libalias/alias.h
@@ -197,17 +197,6 @@ struct mbuf *m_megapullup(struct mbuf *, int);
*/
#define PKT_ALIAS_RESET_ON_ADDR_CHANGE 0x20
-#ifndef NO_FW_PUNCH
-/*
- * If PKT_ALIAS_PUNCH_FW is set, active FTP and IRC DCC connections will
- * create a 'hole' in the firewall to allow the transfers to work. The
- * ipfw rule number that the hole is created with is controlled by
- * PacketAliasSetFWBase(). The hole will be attached to that
- * particular alias_link, so when the link goes away the hole is deleted.
- */
-#define PKT_ALIAS_PUNCH_FW 0x100
-#endif
-
/*
* If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only
* transparent proxying is performed.
@@ -220,6 +209,17 @@ struct mbuf *m_megapullup(struct mbuf *, int);
*/
#define PKT_ALIAS_REVERSE 0x80
+#ifndef NO_FW_PUNCH
+/*
+ * If PKT_ALIAS_PUNCH_FW is set, active FTP and IRC DCC connections will
+ * create a 'hole' in the firewall to allow the transfers to work. The
+ * ipfw rule number that the hole is created with is controlled by
+ * PacketAliasSetFWBase(). The hole will be attached to that
+ * particular alias_link, so when the link goes away the hole is deleted.
+ */
+#define PKT_ALIAS_PUNCH_FW 0x100
+#endif
+
/*
* If PKT_ALIAS_SKIP_GLOBAL is set, nat instance is not checked for matching
* states in 'ipfw nat global' rule.
diff --git a/freebsd/sys/netinet/libalias/alias_db.c b/freebsd/sys/netinet/libalias/alias_db.c
index 7385027c..28be85da 100644
--- a/freebsd/sys/netinet/libalias/alias_db.c
+++ b/freebsd/sys/netinet/libalias/alias_db.c
@@ -2171,7 +2171,6 @@ HouseKeeping(struct libalias *la)
int i, n;
#ifndef _KERNEL
struct timeval tv;
- struct timezone tz;
#endif
LIBALIAS_LOCK_ASSERT(la);
@@ -2183,7 +2182,7 @@ HouseKeeping(struct libalias *la)
#ifdef _KERNEL
la->timeStamp = time_uptime;
#else
- gettimeofday(&tv, &tz);
+ gettimeofday(&tv, NULL);
la->timeStamp = tv.tv_sec;
#endif
@@ -2478,7 +2477,6 @@ LibAliasInit(struct libalias *la)
int i;
#ifndef _KERNEL
struct timeval tv;
- struct timezone tz;
#endif
if (la == NULL) {
@@ -2505,7 +2503,7 @@ LibAliasInit(struct libalias *la)
la->timeStamp = time_uptime;
la->lastCleanupTime = time_uptime;
#else
- gettimeofday(&tv, &tz);
+ gettimeofday(&tv, NULL);
la->timeStamp = tv.tv_sec;
la->lastCleanupTime = tv.tv_sec;
#endif
@@ -2737,7 +2735,6 @@ static void
InitPunchFW(struct libalias *la)
{
- LIBALIAS_LOCK_ASSERT(la);
la->fireWallField = malloc(la->fireWallNumNums);
if (la->fireWallField) {
memset(la->fireWallField, 0, la->fireWallNumNums);
@@ -2753,7 +2750,6 @@ static void
UninitPunchFW(struct libalias *la)
{
- LIBALIAS_LOCK_ASSERT(la);
ClearAllFWHoles(la);
if (la->fireWallFD >= 0)
close(la->fireWallFD);
@@ -2773,7 +2769,6 @@ PunchFWHole(struct alias_link *lnk)
struct ip_fw rule; /* On-the-fly built rule */
int fwhole; /* Where to punch hole */
- LIBALIAS_LOCK_ASSERT(la);
la = lnk->la;
/* Don't do anything unless we are asked to */
@@ -2847,7 +2842,6 @@ ClearFWHole(struct alias_link *lnk)
{
struct libalias *la;
- LIBALIAS_LOCK_ASSERT(la);
la = lnk->la;
if (lnk->link_type == LINK_TCP) {
int fwhole = lnk->data.tcp->fwhole; /* Where is the firewall
@@ -2872,7 +2866,6 @@ ClearAllFWHoles(struct libalias *la)
struct ip_fw rule; /* On-the-fly built rule */
int i;
- LIBALIAS_LOCK_ASSERT(la);
if (la->fireWallFD < 0)
return;
@@ -2886,7 +2879,7 @@ ClearAllFWHoles(struct libalias *la)
memset(la->fireWallField, 0, la->fireWallNumNums);
}
-#endif
+#endif /* !NO_FW_PUNCH */
void
LibAliasSetFWBase(struct libalias *la, unsigned int base, unsigned int num)
diff --git a/freebsd/sys/netinet/libalias/alias_sctp.c b/freebsd/sys/netinet/libalias/alias_sctp.c
index c8d83878..6158149a 100644
--- a/freebsd/sys/netinet/libalias/alias_sctp.c
+++ b/freebsd/sys/netinet/libalias/alias_sctp.c
@@ -183,7 +183,7 @@ void SctpShowAliasStats(struct libalias *la);
#ifdef _KERNEL
-MALLOC_DEFINE(M_SCTPNAT, "sctpnat", "sctp nat dbs");
+static MALLOC_DEFINE(M_SCTPNAT, "sctpnat", "sctp nat dbs");
/* Use kernel allocator. */
#ifdef _SYS_MALLOC_H_
#define sn_malloc(x) malloc(x, M_SCTPNAT, M_NOWAIT|M_ZERO)
@@ -366,8 +366,8 @@ SYSCTL_DECL(_net_inet);
SYSCTL_DECL(_net_inet_ip);
SYSCTL_DECL(_net_inet_ip_alias);
-SYSCTL_NODE(_net_inet_ip_alias, OID_AUTO, sctp, CTLFLAG_RW, NULL, "SCTP NAT");
-
+static SYSCTL_NODE(_net_inet_ip_alias, OID_AUTO, sctp, CTLFLAG_RW, NULL,
+ "SCTP NAT");
SYSCTL_PROC(_net_inet_ip_alias_sctp, OID_AUTO, log_level, CTLTYPE_UINT | CTLFLAG_RW,
&sysctl_log_level, 0, sysctl_chg_loglevel, "IU",
"Level of detail (0 - default, 1 - event, 2 - info, 3 - detail, 4 - debug, 5 - max debug)");
diff --git a/freebsd/sys/netinet/libalias/alias_sctp.h b/freebsd/sys/netinet/libalias/alias_sctp.h
index 8c244b45..f538d942 100644
--- a/freebsd/sys/netinet/libalias/alias_sctp.h
+++ b/freebsd/sys/netinet/libalias/alias_sctp.h
@@ -76,7 +76,6 @@
*
*/
#include <machine/cpufunc.h>
-#include <machine/cpu.h>
/* The packed define for 64 bit platforms */
#ifndef SCTP_PACKED
#define SCTP_PACKED __attribute__((packed))
@@ -136,13 +135,13 @@ struct sctp_nat_assoc {
struct in_addr a_addr; /**< alias ip address */
int state; /**< current state of NAT association */
int TableRegister; /**< stores which look up tables association is registered in */
- int exp; /**< timer expiration in seconds from uptime */
+ int exp; /**< timer expiration in seconds from uptime */
int exp_loc; /**< current location in timer_Q */
int num_Gaddr; /**< number of global IP addresses in the list */
LIST_HEAD(sctpGlobalAddresshead,sctp_GlobalAddress) Gaddr; /**< List of global addresses */
- LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
- LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
- LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
+ LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
+ LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
+ LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
//Using libalias locking
};
diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c
index aa6abae9..827eca6e 100644
--- a/freebsd/sys/netinet/raw_ip.c
+++ b/freebsd/sys/netinet/raw_ip.c
@@ -35,6 +35,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
@@ -76,6 +77,11 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
+VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
+ &VNET_NAME(ip_defttl), 0,
+ "Maximum TTL on IP packets");
+
VNET_DEFINE(struct inpcbhead, ripcb);
VNET_DEFINE(struct inpcbinfo, ripcbinfo);
@@ -96,6 +102,10 @@ void (*ip_divert_ptr)(struct mbuf *, int);
int (*ng_ipfw_input_p)(struct mbuf **, int,
struct ip_fw_args *, int);
+/* Hook for telling pf that the destination address changed */
+void (*m_addr_chg_pf_p)(struct mbuf *m);
+
+#ifdef INET
/*
* Hooks for multicast routing. They all default to NULL, so leave them not
* initialized and rely on BSS being set to 0.
@@ -121,6 +131,15 @@ u_long (*ip_mcast_src)(int);
void (*rsvp_input_p)(struct mbuf *m, int off);
int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
void (*ip_rsvp_force_done)(struct socket *);
+#endif /* INET */
+
+u_long rip_sendspace = 9216;
+SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
+ &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
+
+u_long rip_recvspace = 9216;
+SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
+ &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
/*
* Hash functions
@@ -130,6 +149,7 @@ void (*ip_rsvp_force_done)(struct socket *);
#define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
(((proto) + (laddr) + (faddr)) % (mask) + 1)
+#ifdef INET
static void
rip_inshash(struct inpcb *inp)
{
@@ -160,6 +180,7 @@ rip_delhash(struct inpcb *inp)
LIST_REMOVE(inp, inp_hash);
}
+#endif /* INET */
/*
* Raw interface to IP protocol.
@@ -188,19 +209,9 @@ void
rip_init(void)
{
- INP_INFO_LOCK_INIT(&V_ripcbinfo, "rip");
- LIST_INIT(&V_ripcb);
-#ifdef VIMAGE
- V_ripcbinfo.ipi_vnet = curvnet;
-#endif
- V_ripcbinfo.ipi_listhead = &V_ripcb;
- V_ripcbinfo.ipi_hashbase =
- hashinit(INP_PCBHASH_RAW_SIZE, M_PCB, &V_ripcbinfo.ipi_hashmask);
- V_ripcbinfo.ipi_porthashbase =
- hashinit(1, M_PCB, &V_ripcbinfo.ipi_porthashmask);
- V_ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
- NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
+ in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
+ 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_NONE);
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
@@ -210,20 +221,18 @@ void
rip_destroy(void)
{
- hashdestroy(V_ripcbinfo.ipi_hashbase, M_PCB,
- V_ripcbinfo.ipi_hashmask);
- hashdestroy(V_ripcbinfo.ipi_porthashbase, M_PCB,
- V_ripcbinfo.ipi_porthashmask);
+ in_pcbinfo_destroy(&V_ripcbinfo);
}
#endif
+#ifdef INET
static int
rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
struct sockaddr_in *ripsrc)
{
int policyfail = 0;
- INP_RLOCK_ASSERT(last);
+ INP_LOCK_ASSERT(last);
#ifdef IPSEC
/* check AH/ESP integrity. */
@@ -771,14 +780,6 @@ rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
}
}
-u_long rip_sendspace = 9216;
-u_long rip_recvspace = 9216;
-
-SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
- &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
-SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
- &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
-
static int
rip_attach(struct socket *so, int proto, struct thread *td)
{
@@ -839,16 +840,19 @@ rip_detach(struct socket *so)
static void
rip_dodisconnect(struct socket *so, struct inpcb *inp)
{
+ struct inpcbinfo *pcbinfo;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
- INP_WLOCK_ASSERT(inp);
-
+ pcbinfo = inp->inp_pcbinfo;
+ INP_INFO_WLOCK(pcbinfo);
+ INP_WLOCK(inp);
rip_delhash(inp);
inp->inp_faddr.s_addr = INADDR_ANY;
rip_inshash(inp);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED;
SOCK_UNLOCK(so);
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(pcbinfo);
}
static void
@@ -859,11 +863,7 @@ rip_abort(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
}
static void
@@ -874,11 +874,7 @@ rip_close(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_close: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
}
static int
@@ -892,11 +888,7 @@ rip_disconnect(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
return (0);
}
@@ -1003,6 +995,7 @@ rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
}
return (rip_output(m, so, dst));
}
+#endif /* INET */
static int
rip_pcblist(SYSCTL_HANDLER_ARGS)
@@ -1081,9 +1074,9 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_ripcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_ripcbinfo);
@@ -1109,6 +1102,7 @@ SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
+#ifdef INET
struct pr_usrreqs rip_usrreqs = {
.pru_abort = rip_abort,
.pru_attach = rip_attach,
@@ -1124,3 +1118,4 @@ struct pr_usrreqs rip_usrreqs = {
.pru_sosetlabel = in_pcbsosetlabel,
.pru_close = rip_close,
};
+#endif /* INET */
diff --git a/freebsd/sys/netinet/sctp_constants.h b/freebsd/sys/netinet/sctp_constants.h
index 18057009..58ca808e 100644
--- a/freebsd/sys/netinet/sctp_constants.h
+++ b/freebsd/sys/netinet/sctp_constants.h
@@ -521,9 +521,6 @@ __FBSDID("$FreeBSD$");
/* How long a cookie lives in milli-seconds */
#define SCTP_DEFAULT_COOKIE_LIFE 60000
-/* resource limit of streams */
-#define MAX_SCTP_STREAMS 2048
-
/* Maximum the mapping array will grow to (TSN mapping array) */
#define SCTP_MAPPING_ARRAY 512
@@ -658,6 +655,7 @@ __FBSDID("$FreeBSD$");
/* How many streams I request initally by default */
#define SCTP_OSTREAM_INITIAL 10
+#define SCTP_ISTREAM_INITIAL 2048
/*
* How many smallest_mtu's need to increase before a window update sack is
@@ -997,6 +995,10 @@ __FBSDID("$FreeBSD$");
(((uint8_t *)&(a)->s_addr)[2] == 0) && \
(((uint8_t *)&(a)->s_addr)[3] == 1))
+#define IN4_ISLINKLOCAL_ADDRESS(a) \
+ ((((uint8_t *)&(a)->s_addr)[0] == 169) && \
+ (((uint8_t *)&(a)->s_addr)[1] == 254))
+
#if defined(_KERNEL)
#define SCTP_GETTIME_TIMEVAL(x) (getmicrouptime(x))
diff --git a/freebsd/sys/netinet/sctp_indata.c b/freebsd/sys/netinet/sctp_indata.c
index 273ad6bc..e00a470d 100644
--- a/freebsd/sys/netinet/sctp_indata.c
+++ b/freebsd/sys/netinet/sctp_indata.c
@@ -1731,7 +1731,6 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
sctp_alloc_a_readq(stcb, control);
sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
protocol_id,
- stcb->asoc.context,
strmno, strmseq,
chunk_flags,
dmbuf);
@@ -1859,7 +1858,6 @@ failed_pdapi_express_del:
sctp_alloc_a_readq(stcb, control);
sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
protocol_id,
- stcb->asoc.context,
strmno, strmseq,
chunk_flags,
dmbuf);
diff --git a/freebsd/sys/netinet/sctp_indata.h b/freebsd/sys/netinet/sctp_indata.h
index 5eaa1f4b..79a86e2a 100644
--- a/freebsd/sys/netinet/sctp_indata.h
+++ b/freebsd/sys/netinet/sctp_indata.h
@@ -47,14 +47,14 @@ sctp_build_readq_entry(struct sctp_tcb *stcb,
struct mbuf *dm);
-#define sctp_build_readq_entry_mac(_ctl, in_it, a, net, tsn, ppid, context, stream_no, stream_seq, flags, dm) do { \
+#define sctp_build_readq_entry_mac(_ctl, in_it, context, net, tsn, ppid, stream_no, stream_seq, flags, dm) do { \
if (_ctl) { \
atomic_add_int(&((net)->ref_count), 1); \
(_ctl)->sinfo_stream = stream_no; \
(_ctl)->sinfo_ssn = stream_seq; \
(_ctl)->sinfo_flags = (flags << 8); \
(_ctl)->sinfo_ppid = ppid; \
- (_ctl)->sinfo_context = a; \
+ (_ctl)->sinfo_context = context; \
(_ctl)->sinfo_timetolive = 0; \
(_ctl)->sinfo_tsn = tsn; \
(_ctl)->sinfo_cumtsn = tsn; \
diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c
index 645c807e..7cdb5b09 100644
--- a/freebsd/sys/netinet/sctp_input.c
+++ b/freebsd/sys/netinet/sctp_input.c
@@ -391,9 +391,10 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
}
SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
}
- asoc->streamincnt = ntohs(init->num_outbound_streams);
- if (asoc->streamincnt > MAX_SCTP_STREAMS) {
- asoc->streamincnt = MAX_SCTP_STREAMS;
+ if (asoc->max_inbound_streams > ntohs(init->num_outbound_streams)) {
+ asoc->streamincnt = ntohs(init->num_outbound_streams);
+ } else {
+ asoc->streamincnt = asoc->max_inbound_streams;
}
SCTP_MALLOC(asoc->strmin, struct sctp_stream_in *, asoc->streamincnt *
sizeof(struct sctp_stream_in), SCTP_M_STRMI);
@@ -405,11 +406,6 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
for (i = 0; i < asoc->streamincnt; i++) {
asoc->strmin[i].stream_no = i;
asoc->strmin[i].last_sequence_delivered = 0xffff;
- /*
- * U-stream ranges will be set when the cookie is unpacked.
- * Or for the INIT sender they are un set (if pr-sctp not
- * supported) when the INIT-ACK arrives.
- */
TAILQ_INIT(&asoc->strmin[i].inqueue);
asoc->strmin[i].delivery_started = 0;
}
@@ -1030,12 +1026,13 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED,
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
- /* are the queues empty? */
+#ifdef INVARIANTS
if (!TAILQ_EMPTY(&asoc->send_queue) ||
!TAILQ_EMPTY(&asoc->sent_queue) ||
!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
- sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED);
+ panic("Queues are not empty when handling SHUTDOWN-ACK");
}
+#endif
/* stop the timer */
sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
/* send SHUTDOWN-COMPLETE */
@@ -1877,9 +1874,14 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
cookie->tie_tag_peer_vtag != 0) {
struct sctpasochead *head;
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
if (asoc->peer_supports_nat) {
/*
- * This is a gross gross hack. just call the
+ * This is a gross gross hack. Just call the
* cookie_new code since we are allowing a duplicate
* association. I hope this works...
*/
@@ -1941,6 +1943,10 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
asoc->mapping_array_size);
}
SCTP_TCB_UNLOCK(stcb);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ SCTP_SOCKET_LOCK(so, 1);
+#endif
SCTP_INP_INFO_WLOCK();
SCTP_INP_WLOCK(stcb->sctp_ep);
SCTP_TCB_LOCK(stcb);
@@ -1948,7 +1954,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
/* send up all the data */
SCTP_TCB_SEND_LOCK(stcb);
- sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_NOT_LOCKED);
+ sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_LOCKED);
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
stcb->asoc.strmout[i].chunks_on_queues = 0;
stcb->asoc.strmout[i].stream_no = i;
@@ -1970,11 +1976,15 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
*/
LIST_INSERT_HEAD(head, stcb, sctp_asocs);
- /* process the INIT info (peer's info) */
SCTP_TCB_SEND_UNLOCK(stcb);
SCTP_INP_WUNLOCK(stcb->sctp_ep);
SCTP_INP_INFO_WUNLOCK();
-
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ asoc->total_flight = 0;
+ asoc->total_flight_count = 0;
+ /* process the INIT info (peer's info) */
retval = sctp_process_init(init_cp, stcb);
if (retval < 0) {
if (how_indx < sizeof(asoc->cookie_how))
@@ -3198,13 +3208,14 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSE
/* notify upper layer protocol */
if (stcb->sctp_socket) {
sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
- /* are the queues empty? they should be */
- if (!TAILQ_EMPTY(&asoc->send_queue) ||
- !TAILQ_EMPTY(&asoc->sent_queue) ||
- !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
- sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED);
- }
}
+#ifdef INVARIANTS
+ if (!TAILQ_EMPTY(&asoc->send_queue) ||
+ !TAILQ_EMPTY(&asoc->sent_queue) ||
+ !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
+ panic("Queues are not empty when handling SHUTDOWN-COMPLETE");
+ }
+#endif
/* stop the timer */
sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
@@ -3493,18 +3504,13 @@ sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *
}
static void
-sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+sctp_reset_out_streams(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list)
{
- int i;
+ uint32_t i;
+ uint16_t temp;
- if (number_entries == 0) {
- for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
- stcb->asoc.strmout[i].next_sequence_send = 0;
- }
- } else if (number_entries) {
+ if (number_entries > 0) {
for (i = 0; i < number_entries; i++) {
- uint16_t temp;
-
temp = ntohs(list[i]);
if (temp >= stcb->asoc.streamoutcnt) {
/* no such stream */
@@ -3512,6 +3518,10 @@ sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * lis
}
stcb->asoc.strmout[temp].next_sequence_send = 0;
}
+ } else {
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].next_sequence_send = 0;
+ }
}
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
}
@@ -3598,7 +3608,7 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
struct sctp_association *asoc = &stcb->asoc;
struct sctp_tmit_chunk *chk;
struct sctp_stream_reset_out_request *srparam;
- int number_entries;
+ uint32_t number_entries;
if (asoc->stream_reset_outstanding == 0) {
/* duplicate */
@@ -4556,8 +4566,10 @@ __attribute__((noinline))
if ((ch->chunk_type == SCTP_ABORT_ASSOCIATION) ||
(ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) ||
(ch->chunk_type == SCTP_PACKET_DROPPED)) {
- if ((vtag_in == asoc->my_vtag) ||
- ((ch->chunk_flags & SCTP_HAD_NO_TCB) &&
+ /* Take the T-bit always into account. */
+ if ((((ch->chunk_flags & SCTP_HAD_NO_TCB) == 0) &&
+ (vtag_in == asoc->my_vtag)) ||
+ (((ch->chunk_flags & SCTP_HAD_NO_TCB) == SCTP_HAD_NO_TCB) &&
(vtag_in == asoc->peer_vtag))) {
/* this is valid */
} else {
@@ -5695,7 +5707,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
#ifdef INET
case AF_INET:
if (ipsec4_in_reject(m, &inp->ip_inp.inp)) {
- MODULE_GLOBAL(ipsec4stat).in_polvio++;
+ IPSECSTAT_INC(in_polvio);
SCTP_STAT_INCR(sctps_hdrops);
goto out;
}
@@ -5704,7 +5716,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
#ifdef INET6
case AF_INET6:
if (ipsec6_in_reject(m, &inp->ip_inp.inp)) {
- MODULE_GLOBAL(ipsec6stat).in_polvio++;
+ IPSEC6STAT_INC(in_polvio);
SCTP_STAT_INCR(sctps_hdrops);
goto out;
}
diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c
index 1bca9771..61260fb7 100644
--- a/freebsd/sys/netinet/sctp_output.c
+++ b/freebsd/sys/netinet/sctp_output.c
@@ -1967,7 +1967,7 @@ sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t * len)
while (SCTP_BUF_NEXT(mret) != NULL) {
mret = SCTP_BUF_NEXT(mret);
}
- SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_NOWAIT, 1, MT_DATA);
+ SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_DONTWAIT, 1, MT_DATA);
if (SCTP_BUF_NEXT(mret) == NULL) {
/* We are hosed, can't add more addresses */
return (m);
@@ -4131,10 +4131,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
if (net == NULL) {
/* free tempy routes */
- if (ro->ro_rt) {
- RTFREE(ro->ro_rt);
- ro->ro_rt = NULL;
- }
+ RO_RTFREE(ro);
} else {
/*
* PMTU check versus smallest asoc MTU goes
@@ -4449,8 +4446,9 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
#else
- sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr));
- SCTP_STAT_INCR(sctps_sendswcrc);
+ m->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
+ m->m_pkthdr.csum_data = 0;
+ SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
/* send it out. table id is taken from stcb */
@@ -4487,9 +4485,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
}
if (net == NULL) {
/* Now if we had a temp route free it */
- if (ro->ro_rt) {
- RTFREE(ro->ro_rt);
- }
+ RO_RTFREE(ro);
} else {
/*
* PMTU check versus smallest asoc MTU goes
@@ -10683,6 +10679,7 @@ sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
struct sctp_abort_chunk *abort;
struct sctp_auth_chunk *auth = NULL;
struct sctp_nets *net;
+ uint32_t vtag;
uint32_t auth_offset = 0;
uint16_t cause_len, chunk_len, padding_len;
@@ -10738,7 +10735,14 @@ sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
/* Fill in the ABORT chunk header. */
abort = mtod(m_abort, struct sctp_abort_chunk *);
abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION;
- abort->ch.chunk_flags = 0;
+ if (stcb->asoc.peer_vtag == 0) {
+ /* This happens iff the assoc is in COOKIE-WAIT state. */
+ vtag = stcb->asoc.my_vtag;
+ abort->ch.chunk_flags = SCTP_HAD_NO_TCB;
+ } else {
+ vtag = stcb->asoc.peer_vtag;
+ abort->ch.chunk_flags = 0;
+ }
abort->ch.chunk_length = htons(chunk_len);
/* Add padding, if necessary. */
if (padding_len > 0) {
@@ -10750,7 +10754,7 @@ sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
(struct sockaddr *)&net->ro._l_addr,
m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, 0,
- stcb->sctp_ep->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
+ stcb->sctp_ep->sctp_lport, stcb->rport, htonl(vtag),
stcb->asoc.primary_destination->port, NULL,
0, 0,
so_locked);
@@ -11032,8 +11036,9 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
#else
- shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr));
- SCTP_STAT_INCR(sctps_sendswcrc);
+ mout->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
+ mout->m_pkthdr.csum_data = 0;
+ SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
#ifdef SCTP_PACKET_LOGGING
diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c
index 47877ef1..e21c2e03 100644
--- a/freebsd/sys/netinet/sctp_pcb.c
+++ b/freebsd/sys/netinet/sctp_pcb.c
@@ -2378,8 +2378,13 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
inp->sctp_socket = so;
inp->ip_inp.inp.inp_socket = so;
#ifdef INET6
- if (MODULE_GLOBAL(ip6_auto_flowlabel)) {
- inp->ip_inp.inp.inp_flags |= IN6P_AUTOFLOWLABEL;
+ if (INP_SOCKAF(so) == AF_INET6) {
+ if (MODULE_GLOBAL(ip6_auto_flowlabel)) {
+ inp->ip_inp.inp.inp_flags |= IN6P_AUTOFLOWLABEL;
+ }
+ if (MODULE_GLOBAL(ip6_v6only)) {
+ inp->ip_inp.inp.inp_flags |= IN6P_IPV6_V6ONLY;
+ }
}
#endif
inp->sctp_associd_counter = 1;
@@ -2500,9 +2505,6 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
m->initial_rto = SCTP_BASE_SYSCTL(sctp_rto_initial_default);
m->initial_init_rto_max = SCTP_BASE_SYSCTL(sctp_init_rto_max_default);
m->sctp_sack_freq = SCTP_BASE_SYSCTL(sctp_sack_freq_default);
-
- m->max_open_streams_intome = MAX_SCTP_STREAMS;
-
m->max_init_times = SCTP_BASE_SYSCTL(sctp_init_rtx_max_default);
m->max_send_times = SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default);
m->def_net_failure = SCTP_BASE_SYSCTL(sctp_path_rtx_max_default);
@@ -2514,6 +2516,7 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module);
m->sctp_default_ss_module = SCTP_BASE_SYSCTL(sctp_default_ss_module);
+ m->max_open_streams_intome = SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default);
/* number of streams to pre-open on a association */
m->pre_open_stream_count = SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default);
@@ -4450,23 +4453,21 @@ sctp_delete_from_timewait(uint32_t tag, uint16_t lport, uint16_t rport)
int i;
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
- if (!LIST_EMPTY(chain)) {
- LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
- for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
- if ((twait_block->vtag_block[i].v_tag == tag) &&
- (twait_block->vtag_block[i].lport == lport) &&
- (twait_block->vtag_block[i].rport == rport)) {
- twait_block->vtag_block[i].tv_sec_at_expire = 0;
- twait_block->vtag_block[i].v_tag = 0;
- twait_block->vtag_block[i].lport = 0;
- twait_block->vtag_block[i].rport = 0;
- found = 1;
- break;
- }
- }
- if (found)
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if ((twait_block->vtag_block[i].v_tag == tag) &&
+ (twait_block->vtag_block[i].lport == lport) &&
+ (twait_block->vtag_block[i].rport == rport)) {
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ twait_block->vtag_block[i].lport = 0;
+ twait_block->vtag_block[i].rport = 0;
+ found = 1;
break;
+ }
}
+ if (found)
+ break;
}
}
@@ -4480,19 +4481,17 @@ sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport)
SCTP_INP_INFO_WLOCK();
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
- if (!LIST_EMPTY(chain)) {
- LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
- for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
- if ((twait_block->vtag_block[i].v_tag == tag) &&
- (twait_block->vtag_block[i].lport == lport) &&
- (twait_block->vtag_block[i].rport == rport)) {
- found = 1;
- break;
- }
- }
- if (found)
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if ((twait_block->vtag_block[i].v_tag == tag) &&
+ (twait_block->vtag_block[i].lport == lport) &&
+ (twait_block->vtag_block[i].rport == rport)) {
+ found = 1;
break;
+ }
}
+ if (found)
+ break;
}
SCTP_INP_INFO_WUNLOCK();
return (found);
@@ -4514,42 +4513,40 @@ sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time, uint16_t lport, uint16_t
(void)SCTP_GETTIME_TIMEVAL(&now);
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
set = 0;
- if (!LIST_EMPTY(chain)) {
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
/* Block(s) present, lets find space, and expire on the fly */
- LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
- for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
- if ((twait_block->vtag_block[i].v_tag == 0) &&
- !set) {
- twait_block->vtag_block[i].tv_sec_at_expire =
- now.tv_sec + time;
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if ((twait_block->vtag_block[i].v_tag == 0) &&
+ !set) {
+ twait_block->vtag_block[i].tv_sec_at_expire =
+ now.tv_sec + time;
+ twait_block->vtag_block[i].v_tag = tag;
+ twait_block->vtag_block[i].lport = lport;
+ twait_block->vtag_block[i].rport = rport;
+ set = 1;
+ } else if ((twait_block->vtag_block[i].v_tag) &&
+ ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
+ /* Audit expires this guy */
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ twait_block->vtag_block[i].lport = 0;
+ twait_block->vtag_block[i].rport = 0;
+ if (set == 0) {
+ /* Reuse it for my new tag */
+ twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time;
twait_block->vtag_block[i].v_tag = tag;
twait_block->vtag_block[i].lport = lport;
twait_block->vtag_block[i].rport = rport;
set = 1;
- } else if ((twait_block->vtag_block[i].v_tag) &&
- ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
- /* Audit expires this guy */
- twait_block->vtag_block[i].tv_sec_at_expire = 0;
- twait_block->vtag_block[i].v_tag = 0;
- twait_block->vtag_block[i].lport = 0;
- twait_block->vtag_block[i].rport = 0;
- if (set == 0) {
- /* Reuse it for my new tag */
- twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time;
- twait_block->vtag_block[i].v_tag = tag;
- twait_block->vtag_block[i].lport = lport;
- twait_block->vtag_block[i].rport = rport;
- set = 1;
- }
}
}
- if (set) {
- /*
- * We only do up to the block where we can
- * place our tag for audits
- */
- break;
- }
+ }
+ if (set) {
+ /*
+ * We only do up to the block where we can place our
+ * tag for audits
+ */
+ break;
}
}
/* Need to add a new block to chain */
@@ -6699,30 +6696,28 @@ skip_vtag_check:
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
/* Now what about timed wait ? */
- if (!LIST_EMPTY(chain)) {
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
/*
* Block(s) are present, lets see if we have this tag in the
* list
*/
- LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
- for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
- if (twait_block->vtag_block[i].v_tag == 0) {
- /* not used */
- continue;
- } else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
- now->tv_sec) {
- /* Audit expires this guy */
- twait_block->vtag_block[i].tv_sec_at_expire = 0;
- twait_block->vtag_block[i].v_tag = 0;
- twait_block->vtag_block[i].lport = 0;
- twait_block->vtag_block[i].rport = 0;
- } else if ((twait_block->vtag_block[i].v_tag == tag) &&
- (twait_block->vtag_block[i].lport == lport) &&
- (twait_block->vtag_block[i].rport == rport)) {
- /* Bad tag, sorry :< */
- SCTP_INP_INFO_RUNLOCK();
- return (0);
- }
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if (twait_block->vtag_block[i].v_tag == 0) {
+ /* not used */
+ continue;
+ } else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
+ now->tv_sec) {
+ /* Audit expires this guy */
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ twait_block->vtag_block[i].lport = 0;
+ twait_block->vtag_block[i].rport = 0;
+ } else if ((twait_block->vtag_block[i].v_tag == tag) &&
+ (twait_block->vtag_block[i].lport == lport) &&
+ (twait_block->vtag_block[i].rport == rport)) {
+ /* Bad tag, sorry :< */
+ SCTP_INP_INFO_RUNLOCK();
+ return (0);
}
}
}
diff --git a/freebsd/sys/netinet/sctp_structs.h b/freebsd/sys/netinet/sctp_structs.h
index abecdabd..bc18f0e8 100644
--- a/freebsd/sys/netinet/sctp_structs.h
+++ b/freebsd/sys/netinet/sctp_structs.h
@@ -189,6 +189,8 @@ struct iterator_control {
struct sctp_net_route {
sctp_rtentry_t *ro_rt;
void *ro_lle;
+ void *ro_ia;
+ int ro_flags;
union sctp_sockstore _l_addr; /* remote peer addr */
struct sctp_ifa *_s_addr; /* our selected src addr */
};
diff --git a/freebsd/sys/netinet/sctp_sysctl.c b/freebsd/sys/netinet/sctp_sysctl.c
index ca462b7a..95e3c589 100644
--- a/freebsd/sys/netinet/sctp_sysctl.c
+++ b/freebsd/sys/netinet/sctp_sysctl.c
@@ -83,6 +83,7 @@ sctp_init_sysctls()
SCTP_BASE_SYSCTL(sctp_path_rtx_max_default) = SCTPCTL_PATH_RTX_MAX_DEFAULT;
SCTP_BASE_SYSCTL(sctp_path_pf_threshold) = SCTPCTL_PATH_PF_THRESHOLD_DEFAULT;
SCTP_BASE_SYSCTL(sctp_add_more_threshold) = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default) = SCTPCTL_INCOMING_STREAMS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default) = SCTPCTL_OUTGOING_STREAMS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_cmt_on_off) = SCTPCTL_CMT_ON_OFF_DEFAULT;
/* EY */
@@ -625,6 +626,7 @@ sysctl_sctp_check(SYSCTL_HANDLER_ARGS)
RANGECHK(SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), SCTPCTL_PATH_RTX_MAX_MIN, SCTPCTL_PATH_RTX_MAX_MAX);
RANGECHK(SCTP_BASE_SYSCTL(sctp_path_pf_threshold), SCTPCTL_PATH_PF_THRESHOLD_MIN, SCTPCTL_PATH_PF_THRESHOLD_MAX);
RANGECHK(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTPCTL_ADD_MORE_ON_OUTPUT_MIN, SCTPCTL_ADD_MORE_ON_OUTPUT_MAX);
+ RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default), SCTPCTL_INCOMING_STREAMS_MIN, SCTPCTL_INCOMING_STREAMS_MAX);
RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), SCTPCTL_OUTGOING_STREAMS_MIN, SCTPCTL_OUTGOING_STREAMS_MAX);
RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_on_off), SCTPCTL_CMT_ON_OFF_MIN, SCTPCTL_CMT_ON_OFF_MAX);
/* EY */
@@ -967,6 +969,10 @@ SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_UINT | CT
&SCTP_BASE_SYSCTL(sctp_add_more_threshold), 0, sysctl_sctp_check, "IU",
SCTPCTL_ADD_MORE_ON_OUTPUT_DESC);
+SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, incoming_streams, CTLTYPE_UINT | CTLFLAG_RW,
+ &SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default), 0, sysctl_sctp_check, "IU",
+ SCTPCTL_INCOMING_STREAMS_DESC);
+
SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_UINT | CTLFLAG_RW,
&SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), 0, sysctl_sctp_check, "IU",
SCTPCTL_OUTGOING_STREAMS_DESC);
diff --git a/freebsd/sys/netinet/sctp_sysctl.h b/freebsd/sys/netinet/sctp_sysctl.h
index 4ec37157..8090373e 100644
--- a/freebsd/sys/netinet/sctp_sysctl.h
+++ b/freebsd/sys/netinet/sctp_sysctl.h
@@ -72,6 +72,7 @@ struct sctp_sysctl {
uint32_t sctp_path_rtx_max_default;
uint32_t sctp_path_pf_threshold;
uint32_t sctp_add_more_threshold;
+ uint32_t sctp_nr_incoming_streams_default;
uint32_t sctp_nr_outgoing_streams_default;
uint32_t sctp_cmt_on_off;
uint32_t sctp_cmt_use_dac;
@@ -322,6 +323,12 @@ struct sctp_sysctl {
#define SCTPCTL_ADD_MORE_ON_OUTPUT_MAX 0xFFFFFFFF
#define SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT SCTP_DEFAULT_ADD_MORE
+/* incoming_streams: Default number of incoming streams */
+#define SCTPCTL_INCOMING_STREAMS_DESC "Default number of incoming streams"
+#define SCTPCTL_INCOMING_STREAMS_MIN 1
+#define SCTPCTL_INCOMING_STREAMS_MAX 65535
+#define SCTPCTL_INCOMING_STREAMS_DEFAULT SCTP_ISTREAM_INITIAL
+
/* outgoing_streams: Default number of outgoing streams */
#define SCTPCTL_OUTGOING_STREAMS_DESC "Default number of outgoing streams"
#define SCTPCTL_OUTGOING_STREAMS_MIN 1
diff --git a/freebsd/sys/netinet/sctp_uio.h b/freebsd/sys/netinet/sctp_uio.h
index d8e7da45..063fd9f1 100644
--- a/freebsd/sys/netinet/sctp_uio.h
+++ b/freebsd/sys/netinet/sctp_uio.h
@@ -1267,44 +1267,50 @@ sctp_sorecvmsg(struct socket *so,
#if !(defined(_KERNEL)) && !(defined(__Userspace__))
__BEGIN_DECLS
-int sctp_peeloff __P((int, sctp_assoc_t));
-int sctp_bindx __P((int, struct sockaddr *, int, int));
-int sctp_connectx __P((int, const struct sockaddr *, int, sctp_assoc_t *));
-int sctp_getaddrlen __P((sa_family_t));
-int sctp_getpaddrs __P((int, sctp_assoc_t, struct sockaddr **));
-void sctp_freepaddrs __P((struct sockaddr *));
-int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **));
-void sctp_freeladdrs __P((struct sockaddr *));
-int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *));
+int sctp_peeloff(int, sctp_assoc_t);
+int sctp_bindx(int, struct sockaddr *, int, int);
+int sctp_connectx(int, const struct sockaddr *, int, sctp_assoc_t *);
+int sctp_getaddrlen(sa_family_t);
+int sctp_getpaddrs(int, sctp_assoc_t, struct sockaddr **);
+void sctp_freepaddrs(struct sockaddr *);
+int sctp_getladdrs(int, sctp_assoc_t, struct sockaddr **);
+void sctp_freeladdrs(struct sockaddr *);
+int sctp_opt_info(int, sctp_assoc_t, int, void *, socklen_t *);
/* deprecated */
-ssize_t sctp_sendmsg
-__P((int, const void *, size_t, const struct sockaddr *,
- socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+ssize_t
+sctp_sendmsg(int, const void *, size_t, const struct sockaddr *,
+ socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
/* deprecated */
- ssize_t sctp_send __P((int, const void *, size_t,
- const struct sctp_sndrcvinfo *, int));
+ssize_t
+sctp_send(int, const void *, size_t,
+ const struct sctp_sndrcvinfo *, int);
/* deprecated */
- ssize_t sctp_sendx __P((int, const void *, size_t, struct sockaddr *,
- int, struct sctp_sndrcvinfo *, int));
+ssize_t
+sctp_sendx(int, const void *, size_t, struct sockaddr *,
+ int, struct sctp_sndrcvinfo *, int);
/* deprecated */
- ssize_t sctp_sendmsgx __P((int sd, const void *, size_t, struct sockaddr *,
- int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+ssize_t
+sctp_sendmsgx(int sd, const void *, size_t, struct sockaddr *,
+ int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
- sctp_assoc_t sctp_getassocid __P((int, struct sockaddr *));
+sctp_assoc_t sctp_getassocid(int, struct sockaddr *);
/* deprecated */
- ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *, socklen_t *,
- struct sctp_sndrcvinfo *, int *));
+ssize_t
+sctp_recvmsg(int, void *, size_t, struct sockaddr *, socklen_t *,
+ struct sctp_sndrcvinfo *, int *);
- ssize_t sctp_sendv __P((int, const struct iovec *, int, struct sockaddr *,
- int, void *, socklen_t, unsigned int, int));
+ssize_t
+sctp_sendv(int, const struct iovec *, int, struct sockaddr *,
+ int, void *, socklen_t, unsigned int, int);
- ssize_t sctp_recvv __P((int, const struct iovec *, int, struct sockaddr *,
- socklen_t *, void *, socklen_t *, unsigned int *, int *));
+ssize_t
+sctp_recvv(int, const struct iovec *, int, struct sockaddr *,
+ socklen_t *, void *, socklen_t *, unsigned int *, int *);
__END_DECLS
diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c
index 527790ce..81db1dc1 100644
--- a/freebsd/sys/netinet/sctp_usrreq.c
+++ b/freebsd/sys/netinet/sctp_usrreq.c
@@ -2054,18 +2054,29 @@ flags_out:
}
case SCTP_MAX_BURST:
{
- uint8_t *value;
+ struct sctp_assoc_value *av;
- SCTP_CHECK_AND_CAST(value, optval, uint8_t, *optsize);
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
- SCTP_INP_RLOCK(inp);
- if (inp->sctp_ep.max_burst < 256) {
- *value = inp->sctp_ep.max_burst;
+ if (stcb) {
+ av->assoc_value = stcb->asoc.max_burst;
+ SCTP_TCB_UNLOCK(stcb);
} else {
- *value = 255;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->sctp_ep.max_burst;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
}
- SCTP_INP_RUNLOCK(inp);
- *optsize = sizeof(uint8_t);
break;
}
case SCTP_MAXSEG:
@@ -4378,13 +4389,34 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
case SCTP_MAX_BURST:
{
- uint8_t *burst;
+ struct sctp_assoc_value *av;
- SCTP_CHECK_AND_CAST(burst, optval, uint8_t, optsize);
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
- SCTP_INP_WLOCK(inp);
- inp->sctp_ep.max_burst = *burst;
- SCTP_INP_WUNLOCK(inp);
+ if (stcb) {
+ stcb->asoc.max_burst = av->assoc_value;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_ep.max_burst = av->assoc_value;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
+ (av->assoc_id == SCTP_ALL_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ stcb->asoc.max_burst = av->assoc_value;
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ }
break;
}
case SCTP_MAXSEG:
diff --git a/freebsd/sys/netinet/sctp_var.h b/freebsd/sys/netinet/sctp_var.h
index 3862b90b..d88a2376 100644
--- a/freebsd/sys/netinet/sctp_var.h
+++ b/freebsd/sys/netinet/sctp_var.h
@@ -321,48 +321,34 @@ struct sctphdr;
void sctp_close(struct socket *so);
int sctp_disconnect(struct socket *so);
+void sctp_ctlinput(int, struct sockaddr *, void *);
+int sctp_ctloutput(struct socket *, struct sockopt *);
-void sctp_ctlinput __P((int, struct sockaddr *, void *));
-int sctp_ctloutput __P((struct socket *, struct sockopt *));
-
-#ifdef INET
-void sctp_input_with_port __P((struct mbuf *, int, uint16_t));
-
-#endif
#ifdef INET
-void sctp_input __P((struct mbuf *, int));
+void sctp_input_with_port(struct mbuf *, int, uint16_t);
+void sctp_input(struct mbuf *, int);
#endif
-void sctp_pathmtu_adjustment __P((struct sctp_tcb *, uint16_t));
-void sctp_drain __P((void));
-void sctp_init __P((void));
-
+void sctp_pathmtu_adjustment(struct sctp_tcb *, uint16_t);
+void sctp_drain(void);
+void sctp_init(void);
void sctp_finish(void);
-
int sctp_flush(struct socket *, int);
-int sctp_shutdown __P((struct socket *));
-void sctp_notify
-__P((struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
+int sctp_shutdown(struct socket *);
+void
+sctp_notify(struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
struct sockaddr *, struct sctp_tcb *,
- struct sctp_nets *));
-
- int sctp_bindx(struct socket *, int, struct sockaddr_storage *,
- int, int, struct proc *);
+ struct sctp_nets *);
+int
+sctp_bindx(struct socket *, int, struct sockaddr_storage *,
+ int, int, struct proc *);
/* can't use sctp_assoc_t here */
- int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
-
- int sctp_ingetaddr(struct socket *,
- struct sockaddr **
-);
-
- int sctp_peeraddr(struct socket *,
- struct sockaddr **
-);
-
- int sctp_listen(struct socket *, int, struct thread *);
-
- int sctp_accept(struct socket *, struct sockaddr **);
+int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
+int sctp_ingetaddr(struct socket *, struct sockaddr **);
+int sctp_peeraddr(struct socket *, struct sockaddr **);
+int sctp_listen(struct socket *, int, struct thread *);
+int sctp_accept(struct socket *, struct sockaddr **);
#endif /* _KERNEL */
diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c
index 3a88b894..15928d8b 100644
--- a/freebsd/sys/netinet/sctputil.c
+++ b/freebsd/sys/netinet/sctputil.c
@@ -2690,8 +2690,14 @@ set_error:
stcb->sctp_socket->so_error = ECONNRESET;
}
} else {
- SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED);
- stcb->sctp_socket->so_error = ECONNABORTED;
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ETIMEDOUT);
+ stcb->sctp_socket->so_error = ETIMEDOUT;
+ } else {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED);
+ stcb->sctp_socket->so_error = ECONNABORTED;
+ }
}
}
/* Wake ANY sleepers */
@@ -3532,8 +3538,8 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
if (stcb->sctp_socket->so_rcv.sb_state & SBS_CANTRCVMORE) {
return;
}
- if (stcb && ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
- (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED))) {
+ if ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
+ (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED)) {
if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
(notification == SCTP_NOTIFY_INTERFACE_UP) ||
(notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
@@ -3607,16 +3613,16 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
break;
}
case SCTP_NOTIFY_ASSOC_LOC_ABORTED:
- if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
- ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+ if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+ ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 0, so_locked);
} else {
sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 0, so_locked);
}
break;
case SCTP_NOTIFY_ASSOC_REM_ABORTED:
- if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
- ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+ if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+ ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 1, so_locked);
} else {
sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 1, so_locked);
@@ -3969,7 +3975,7 @@ sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
if (stcb == NULL) {
/* Got to have a TCB */
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
- if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+ if (LIST_EMPTY(&inp->sctp_asoc_list)) {
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
SCTP_CALLED_DIRECTLY_NOCMPSET);
}
@@ -4024,7 +4030,7 @@ sctp_handle_ootb(struct mbuf *m, int iphlen, int offset,
SCTP_STAT_INCR_COUNTER32(sctps_outoftheblue);
/* Generate a TO address for future reference */
if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
- if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+ if (LIST_EMPTY(&inp->sctp_asoc_list)) {
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
SCTP_CALLED_DIRECTLY_NOCMPSET);
}
diff --git a/freebsd/sys/netinet/tcp.h b/freebsd/sys/netinet/tcp.h
index affc4df0..5925b4da 100644
--- a/freebsd/sys/netinet/tcp.h
+++ b/freebsd/sys/netinet/tcp.h
@@ -34,6 +34,7 @@
#define _NETINET_TCP_H_
#include <sys/cdefs.h>
+#include <rtems/bsd/sys/types.h>
#if __BSD_VISIBLE
@@ -52,11 +53,11 @@ struct tcphdr {
tcp_seq th_seq; /* sequence number */
tcp_seq th_ack; /* acknowledgement number */
#if BYTE_ORDER == LITTLE_ENDIAN
- u_int th_x2:4, /* (unused) */
+ u_char th_x2:4, /* (unused) */
th_off:4; /* data offset */
#endif
#if BYTE_ORDER == BIG_ENDIAN
- u_int th_off:4, /* data offset */
+ u_char th_off:4, /* data offset */
th_x2:4; /* (unused) */
#endif
u_char th_flags;
@@ -103,29 +104,37 @@ struct tcphdr {
/*
- * Default maximum segment size for TCP.
- * With an IP MTU of 576, this is 536,
- * but 512 is probably more convenient.
- * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)).
- */
-#define TCP_MSS 512
-/*
- * TCP_MINMSS is defined to be 216 which is fine for the smallest
- * link MTU (256 bytes, AX.25 packet radio) in the Internet.
- * However it is very unlikely to come across such low MTU interfaces
- * these days (anno dato 2003).
- * See tcp_subr.c tcp_minmss SYSCTL declaration for more comments.
- * Setting this to "0" disables the minmss check.
+ * The default maximum segment size (MSS) to be used for new TCP connections
+ * when path MTU discovery is not enabled.
+ *
+ * RFC879 derives the default MSS from the largest datagram size hosts are
+ * minimally required to handle directly or through IP reassembly minus the
+ * size of the IP and TCP header. With IPv6 the minimum MTU is specified
+ * in RFC2460.
+ *
+ * For IPv4 the MSS is 576 - sizeof(struct tcpiphdr)
+ * For IPv6 the MSS is IPV6_MMTU - sizeof(struct ip6_hdr) - sizeof(struct tcphdr)
+ *
+ * We use explicit numerical definition here to avoid header pollution.
*/
-#define TCP_MINMSS 216
+#define TCP_MSS 536
+#define TCP6_MSS 1220
/*
- * Default maximum segment size for TCP6.
- * With an IP6 MSS of 1280, this is 1220,
- * but 1024 is probably more convenient. (xxx kazu in doubt)
- * This should be defined as MIN(1024, IP6_MSS - sizeof (struct tcpip6hdr))
+ * Limit the lowest MSS we accept for path MTU discovery and the TCP SYN MSS
+ * option. Allowing low values of MSS can consume significant resources and
+ * be used to mount a resource exhaustion attack.
+ * Connections requesting lower MSS values will be rounded up to this value
+ * and the IP_DF flag will be cleared to allow fragmentation along the path.
+ *
+ * See tcp_subr.c tcp_minmss SYSCTL declaration for more comments. Setting
+ * it to "0" disables the minmss check.
+ *
+ * The default value is fine for TCP across the Internet's smallest official
+ * link MTU (256 bytes for AX.25 packet radio). However, a connection is very
+ * unlikely to come across such low MTU interfaces these days (anno domini 2003).
*/
-#define TCP6_MSS 1024
+#define TCP_MINMSS 216
#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */
#define TTCP_CLIENT_SND_WND 4096 /* dflt send window for T/TCP client */
@@ -152,6 +161,10 @@ struct tcphdr {
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
+#define TCP_KEEPINIT 128 /* N, time to establish connection */
+#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
+#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
+#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
@@ -218,7 +231,7 @@ struct tcp_info {
/* FreeBSD extensions to tcp_info. */
u_int32_t tcpi_snd_wnd; /* Advertised send window. */
- u_int32_t tcpi_snd_bwnd; /* Bandwidth send window. */
+ u_int32_t tcpi_snd_bwnd; /* No longer used. */
u_int32_t tcpi_snd_nxt; /* Next egress seqno */
u_int32_t tcpi_rcv_nxt; /* Next ingress seqno */
u_int32_t tcpi_toe_tid; /* HWTID for TOE endpoints */
diff --git a/freebsd/sys/netinet/tcp_hostcache.c b/freebsd/sys/netinet/tcp_hostcache.c
index a0d38ff7..ee98af3f 100644
--- a/freebsd/sys/netinet/tcp_hostcache.c
+++ b/freebsd/sys/netinet/tcp_hostcache.c
@@ -120,7 +120,7 @@ static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
static void tcp_hc_purge_internal(int);
static void tcp_hc_purge(void *);
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0,
+static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0,
"TCP Host cache");
SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
index 25afbb26..50dfc1ce 100644
--- a/freebsd/sys/netinet/tcp_input.c
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -7,6 +7,7 @@
* Swinburne University of Technology, Melbourne, Australia.
* Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
* Copyright (c) 2010 The FreeBSD Foundation
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* Portions of this software were developed at the Centre for Advanced Internet
@@ -18,6 +19,9 @@
* Internet Architectures, Swinburne University of Technology, Melbourne,
* Australia by David Hayes under sponsorship from the FreeBSD Foundation.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -103,6 +107,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -153,6 +160,14 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
&VNET_NAME(tcp_do_rfc3390), 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, experimental, CTLFLAG_RW, 0,
+ "Experimental TCP extensions");
+
+VNET_DEFINE(int, tcp_do_initcwnd10) = 0;
+SYSCTL_VNET_INT(_net_inet_tcp_experimental, OID_AUTO, initcwnd10, CTLFLAG_RW,
+ &VNET_NAME(tcp_do_initcwnd10), 0,
+ "Enable draft-ietf-tcpm-initcwnd-05 (Increasing initial CWND to 10)");
+
VNET_DEFINE(int, tcp_do_rfc3465) = 1;
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
&VNET_NAME(tcp_do_rfc3465), 0,
@@ -163,7 +178,7 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW,
&VNET_NAME(tcp_abc_l_var), 2,
"Cap the max cwnd increment during slow-start to this number of segments");
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
+static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
VNET_DEFINE(int, tcp_do_ecn) = 0;
SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_RW,
@@ -181,6 +196,11 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW,
&VNET_NAME(tcp_insecure_rst), 0,
"Follow the old (insecure) criteria for accepting RST packets");
+VNET_DEFINE(int, tcp_recvspace) = 1024*64;
+#define V_tcp_recvspace VNET(tcp_recvspace)
+SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+ &VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size");
+
VNET_DEFINE(int, tcp_do_autorcvbuf) = 1;
#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
@@ -193,16 +213,12 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_inc), 0,
"Incrementor step size of automatic receive buffer");
-VNET_DEFINE(int, tcp_autorcvbuf_max) = 256*1024;
+VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_max), 0,
"Max size of automatic receive buffer");
-int tcp_read_locking = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW,
- &tcp_read_locking, 0, "Enable read locking strategy");
-
VNET_DEFINE(struct inpcbhead, tcb);
#define tcb6 tcb /* for KAME src sync over BSD*'s */
VNET_DEFINE(struct inpcbinfo, tcbinfo);
@@ -217,18 +233,18 @@ static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
static void tcp_xmit_timer(struct tcpcb *, int);
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
-static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
- uint16_t type);
-static void inline cc_conn_init(struct tcpcb *tp);
-static void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
static void inline tcp_fields_to_host(struct tcphdr *);
-static void inline hhook_run_tcp_est_in(struct tcpcb *tp,
- struct tcphdr *th, struct tcpopt *to);
#ifdef TCP_SIGNATURE
static void inline tcp_fields_to_net(struct tcphdr *);
static int inline tcp_signature_verify_input(struct mbuf *, int, int,
int, struct tcpopt *, struct tcphdr *, u_int);
#endif
+static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
+ uint16_t type);
+static void inline cc_conn_init(struct tcpcb *tp);
+static void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
+static void inline hhook_run_tcp_est_in(struct tcpcb *tp,
+ struct tcphdr *th, struct tcpopt *to);
/*
* Kernel module interface for updating tcpstat. The argument is an index
@@ -271,7 +287,7 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
INP_WLOCK_ASSERT(tp->t_inpcb);
tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
- if (tp->snd_cwnd == min(tp->snd_cwnd, tp->snd_wnd))
+ if (tp->snd_cwnd <= tp->snd_wnd)
tp->ccv->flags |= CCF_CWND_LIMITED;
else
tp->ccv->flags &= ~CCF_CWND_LIMITED;
@@ -303,9 +319,6 @@ cc_conn_init(struct tcpcb *tp)
struct hc_metrics_lite metrics;
struct inpcb *inp = tp->t_inpcb;
int rtt;
-#ifdef INET6
- int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
-#endif
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -339,44 +352,33 @@ cc_conn_init(struct tcpcb *tp)
}
/*
- * Set the slow-start flight size depending on whether this
- * is a local network or not.
- *
- * Extend this so we cache the cwnd too and retrieve it here.
- * Make cwnd even bigger than RFC3390 suggests but only if we
- * have previous experience with the remote host. Be careful
- * not make cwnd bigger than remote receive window or our own
- * send socket buffer. Maybe put some additional upper bound
- * on the retrieved cwnd. Should do incremental updates to
- * hostcache when cwnd collapses so next connection doesn't
- * overloads the path again.
+ * Set the initial slow-start flight size.
*
- * XXXAO: Initializing the CWND from the hostcache is broken
- * and in its current form not RFC conformant. It is disabled
- * until fixed or removed entirely.
+ * RFC5681 Section 3.1 specifies the default conservative values.
+ * RFC3390 specifies slightly more aggressive values.
+ * Draft-ietf-tcpm-initcwnd-05 increases it to ten segments.
*
- * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
- * We currently check only in syncache_socket for that.
+ * If a SYN or SYN/ACK was lost and retransmitted, we have to
+ * reduce the initial CWND to one segment as congestion is likely
+ * requiring us to be cautious.
*/
-/* #define TCP_METRICS_CWND */
-#ifdef TCP_METRICS_CWND
- if (metrics.rmx_cwnd)
- tp->snd_cwnd = max(tp->t_maxseg, min(metrics.rmx_cwnd / 2,
- min(tp->snd_wnd, so->so_snd.sb_hiwat)));
- else
-#endif
- if (V_tcp_do_rfc3390)
+ if (tp->snd_cwnd == 1)
+ tp->snd_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */
+ else if (V_tcp_do_initcwnd10)
+ tp->snd_cwnd = min(10 * tp->t_maxseg,
+ max(2 * tp->t_maxseg, 14600));
+ else if (V_tcp_do_rfc3390)
tp->snd_cwnd = min(4 * tp->t_maxseg,
max(2 * tp->t_maxseg, 4380));
-#ifdef INET6
- else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
- (!isipv6 && in_localaddr(inp->inp_faddr)))
-#else
- else if (in_localaddr(inp->inp_faddr))
-#endif
- tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
- else
- tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz;
+ else {
+ /* Per RFC5681 Section 3.1 */
+ if (tp->t_maxseg > 2190)
+ tp->snd_cwnd = 2 * tp->t_maxseg;
+ else if (tp->t_maxseg > 1095)
+ tp->snd_cwnd = 3 * tp->t_maxseg;
+ else
+ tp->snd_cwnd = 4 * tp->t_maxseg;
+ }
if (CC_ALGO(tp)->conn_init != NULL)
CC_ALGO(tp)->conn_init(tp->ccv);
@@ -546,43 +548,44 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
tcp_input(m, *offp);
return IPPROTO_DONE;
}
-#endif
+#endif /* INET6 */
void
tcp_input(struct mbuf *m, int off0)
{
- struct tcphdr *th;
+ struct tcphdr *th = NULL;
struct ip *ip = NULL;
+#ifdef INET
struct ipovly *ipov;
+#endif
struct inpcb *inp = NULL;
struct tcpcb *tp = NULL;
struct socket *so = NULL;
u_char *optp = NULL;
int optlen = 0;
- int len, tlen, off;
+#ifdef INET
+ int len;
+#endif
+ int tlen = 0, off;
int drop_hdrlen;
int thflags;
int rstreason = 0; /* For badport_bandlim accounting purposes */
- uint8_t iptos;
#ifdef TCP_SIGNATURE
uint8_t sig_checked = 0;
#endif
-#ifdef IPFIREWALL_FORWARD
- struct m_tag *fwd_tag;
-#endif
+ uint8_t iptos = 0;
+ struct m_tag *fwd_tag = NULL;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int isipv6;
#else
const void *ip6 = NULL;
- const int isipv6 = 0;
-#endif
+#endif /* INET6 */
struct tcpopt to; /* options in this segment */
char *s = NULL; /* address and port logging */
int ti_locked;
#define TI_UNLOCKED 1
-#define TI_RLOCKED 2
-#define TI_WLOCKED 3
+#define TI_WLOCKED 2
#ifdef TCPDEBUG
/*
@@ -601,16 +604,34 @@ tcp_input(struct mbuf *m, int off0)
to.to_flags = 0;
TCPSTAT_INC(tcps_rcvtotal);
- if (isipv6) {
#ifdef INET6
+ if (isipv6) {
/* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
+
+ if (m->m_len < (sizeof(*ip6) + sizeof(*th))) {
+ m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
+ if (m == NULL) {
+ TCPSTAT_INC(tcps_rcvshort);
+ return;
+ }
+ }
+
ip6 = mtod(m, struct ip6_hdr *);
+ th = (struct tcphdr *)((caddr_t)ip6 + off0);
tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
- if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
+ if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+ if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+ th->th_sum = m->m_pkthdr.csum_data;
+ else
+ th->th_sum = in6_cksum_pseudo(ip6, tlen,
+ IPPROTO_TCP, m->m_pkthdr.csum_data);
+ th->th_sum ^= 0xffff;
+ } else
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
+ if (th->th_sum) {
TCPSTAT_INC(tcps_rcvbadsum);
goto drop;
}
- th = (struct tcphdr *)((caddr_t)ip6 + off0);
/*
* Be proactive about unspecified IPv6 address in source.
@@ -624,10 +645,13 @@ tcp_input(struct mbuf *m, int off0)
/* XXX stat */
goto drop;
}
-#else
- th = NULL; /* XXX: Avoid compiler warning. */
+ }
#endif
- } else {
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
+ {
/*
* Get IP and TCP header together in first mbuf.
* Note: IP leaves IP header in first mbuf.
@@ -679,13 +703,18 @@ tcp_input(struct mbuf *m, int off0)
/* Re-initialization for later version check */
ip->ip_v = IPVERSION;
}
+#endif /* INET */
#ifdef INET6
if (isipv6)
iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+#endif
+#if defined(INET) && defined(INET6)
else
#endif
+#ifdef INET
iptos = ip->ip_tos;
+#endif
/*
* Check that TCP offset makes sense,
@@ -698,13 +727,18 @@ tcp_input(struct mbuf *m, int off0)
}
tlen -= off; /* tlen is used instead of ti->ti_len */
if (off > sizeof (struct tcphdr)) {
- if (isipv6) {
#ifdef INET6
+ if (isipv6) {
IP6_EXTHDR_CHECK(m, off0, off, );
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)((caddr_t)ip6 + off0);
+ }
#endif
- } else {
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
+ {
if (m->m_len < sizeof(struct ip) + off) {
if ((m = m_pullup(m, sizeof (struct ip) + off))
== NULL) {
@@ -716,6 +750,7 @@ tcp_input(struct mbuf *m, int off0)
th = (struct tcphdr *)((caddr_t)ip + off0);
}
}
+#endif
optlen = off - sizeof (struct tcphdr);
optp = (u_char *)(th + 1);
}
@@ -732,39 +767,83 @@ tcp_input(struct mbuf *m, int off0)
drop_hdrlen = off0 + off;
/*
- * Locate pcb for segment, which requires a lock on tcbinfo.
- * Optimisticaly acquire a global read lock rather than a write lock
- * unless header flags necessarily imply a state change. There are
- * two cases where we might discover later we need a write lock
- * despite the flags: ACKs moving a connection out of the syncache,
- * and ACKs for a connection in TIMEWAIT.
+ * Locate pcb for segment; if we're likely to add or remove a
+ * connection then first acquire pcbinfo lock. There are two cases
+ * where we might discover later we need a write lock despite the
+ * flags: ACKs moving a connection out of the syncache, and ACKs for
+ * a connection in TIMEWAIT.
*/
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tcp_read_locking == 0) {
+ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
- } else {
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- }
+ } else
+ ti_locked = TI_UNLOCKED;
findpcb:
#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- else
- panic("%s: findpcb ti_locked %d\n", __func__, ti_locked);
+ } else {
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
#endif
-#ifdef IPFIREWALL_FORWARD
/*
* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
*/
- fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+ if (
+#ifdef INET6
+ (isipv6 && (m->m_flags & M_IP6_NEXTHOP))
+#ifdef INET
+ || (!isipv6 && (m->m_flags & M_IP_NEXTHOP))
+#endif
+#endif
+#if defined(INET) && !defined(INET6)
+ (m->m_flags & M_IP_NEXTHOP)
+#endif
+ )
+ fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
- if (fwd_tag != NULL && isipv6 == 0) { /* IPv6 support is not yet */
+#ifdef INET6
+ if (isipv6 && fwd_tag != NULL) {
+ struct sockaddr_in6 *next_hop6;
+
+ next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
+ /*
+ * Transparently forwarded. Pretend to be the destination.
+ * Already got one like this?
+ */
+ inp = in6_pcblookup_mbuf(&V_tcbinfo,
+ &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport,
+ INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m);
+ if (!inp) {
+ /*
+ * It's new. Try to find the ambushing socket.
+ * Because we've rewritten the destination address,
+ * any hardware-generated hash is ignored.
+ */
+ inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src,
+ th->th_sport, &next_hop6->sin6_addr,
+ next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) :
+ th->th_dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
+ }
+ /* Remove the tag from the packet. We don't need it anymore. */
+ m_tag_delete(m, fwd_tag);
+ m->m_flags &= ~M_IP6_NEXTHOP;
+ fwd_tag = NULL;
+ } else if (isipv6) {
+ inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
+ th->th_sport, &ip6->ip6_dst, th->th_dport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif, m);
+ }
+#endif /* INET6 */
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ if (fwd_tag != NULL) {
struct sockaddr_in *next_hop;
next_hop = (struct sockaddr_in *)(fwd_tag+1);
@@ -772,41 +851,31 @@ findpcb:
* Transparently forwarded. Pretend to be the destination.
* already got one like this?
*/
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- ip->ip_dst, th->th_dport,
- 0, m->m_pkthdr.rcvif);
+ inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport,
+ ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif, m);
if (!inp) {
- /* It's new. Try to find the ambushing socket. */
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- next_hop->sin_addr,
- next_hop->sin_port ?
- ntohs(next_hop->sin_port) :
- th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
+ /*
+ * It's new. Try to find the ambushing socket.
+ * Because we've rewritten the destination address,
+ * any hardware-generated hash is ignored.
+ */
+ inp = in_pcblookup(&V_tcbinfo, ip->ip_src,
+ th->th_sport, next_hop->sin_addr,
+ next_hop->sin_port ? ntohs(next_hop->sin_port) :
+ th->th_dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
}
/* Remove the tag from the packet. We don't need it anymore. */
m_tag_delete(m, fwd_tag);
+ m->m_flags &= ~M_IP_NEXTHOP;
+ fwd_tag = NULL;
} else
-#endif /* IPFIREWALL_FORWARD */
- {
- if (isipv6) {
-#ifdef INET6
- inp = in6_pcblookup_hash(&V_tcbinfo,
- &ip6->ip6_src, th->th_sport,
- &ip6->ip6_dst, th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
-#endif
- } else
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- ip->ip_dst, th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
- }
+ inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
+ th->th_sport, ip->ip_dst, th->th_dport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif, m);
+#endif /* INET */
/*
* If the INPCB does not exist then all data in the incoming
@@ -835,7 +904,7 @@ findpcb:
rstreason = BANDLIM_RST_CLOSEDPORT;
goto dropwithreset;
}
- INP_WLOCK(inp);
+ INP_WLOCK_ASSERT(inp);
if (!(inp->inp_flags & INP_HW_FLOWID)
&& (m->m_flags & M_FLOWID)
&& ((inp->inp_socket == NULL)
@@ -847,12 +916,12 @@ findpcb:
#ifdef IPSEC
#ifdef INET6
if (isipv6 && ipsec6_in_reject(m, inp)) {
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(in_polvio);
goto dropunlock;
} else
#endif /* INET6 */
if (ipsec4_in_reject(m, inp) != 0) {
- V_ipsec4stat.in_polvio++;
+ IPSECSTAT_INC(in_polvio);
goto dropunlock;
}
#endif /* IPSEC */
@@ -876,28 +945,26 @@ findpcb:
* legitimate new connection attempt the old INPCB gets removed and
* we can try again to find a listening socket.
*
- * At this point, due to earlier optimism, we may hold a read lock on
- * the inpcbinfo, rather than a write lock. If so, we need to
- * upgrade, or if that fails, acquire a reference on the inpcb, drop
- * all locks, acquire a global write lock, and then re-acquire the
- * inpcb lock. We may at that point discover that another thread has
- * tried to free the inpcb, in which case we need to loop back and
- * try to find a new inpcb to deliver to.
+ * At this point, due to earlier optimism, we may hold only an inpcb
+ * lock, and not the inpcbinfo write lock. If so, we need to try to
+ * acquire it, or if that fails, acquire a reference on the inpcb,
+ * drop all locks, acquire a global write lock, and then re-acquire
+ * the inpcb lock. We may at that point discover that another thread
+ * has tried to free the inpcb, in which case we need to loop back
+ * and try to find a new inpcb to deliver to.
+ *
+ * XXXRW: It may be time to rethink timewait locking.
*/
relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("%s: INP_TIMEWAIT ti_locked %d", __func__, ti_locked));
-
- if (ti_locked == TI_RLOCKED) {
- if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) {
+ if (ti_locked == TI_UNLOCKED) {
+ if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
INP_WLOCK(inp);
- if (in_pcbrele(inp)) {
+ if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
@@ -927,28 +994,34 @@ relocked:
goto dropwithreset;
}
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_input(tp, m);
+ m = NULL; /* consumed by the TOE driver */
+ goto dropunlock;
+ }
+#endif
+
/*
* We've identified a valid inpcb, but it could be that we need an
- * inpcbinfo write lock and have only a read lock. In this case,
- * attempt to upgrade/relock using the same strategy as the TIMEWAIT
- * case above. If we relock, we have to jump back to 'relocked' as
- * the connection might now be in TIMEWAIT.
+ * inpcbinfo write lock but don't hold it. In this case, attempt to
+ * acquire using the same strategy as the TIMEWAIT case above. If we
+ * relock, we have to jump back to 'relocked' as the connection might
+ * now be in TIMEWAIT.
*/
- if (tp->t_state != TCPS_ESTABLISHED ||
- (thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tcp_read_locking == 0) {
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("%s: upgrade check ti_locked %d", __func__, ti_locked));
-
- if (ti_locked == TI_RLOCKED) {
- if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) {
+#ifdef INVARIANTS
+ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+#endif
+ if (tp->t_state != TCPS_ESTABLISHED) {
+ if (ti_locked == TI_UNLOCKED) {
+ if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
INP_WLOCK(inp);
- if (in_pcbrele(inp)) {
+ if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
@@ -969,25 +1042,28 @@ relocked:
#ifdef TCPDEBUG
if (so->so_options & SO_DEBUG) {
ostate = tp->t_state;
- if (isipv6) {
#ifdef INET6
+ if (isipv6) {
bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
-#endif
} else
+#endif
bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
tcp_savetcp = *th;
}
-#endif
+#endif /* TCPDEBUG */
/*
* When the socket is accepting connections (the INPCB is in LISTEN
* state) we look into the SYN cache if this is a new connection
- * attempt or the completion of a previous one.
+ * attempt or the completion of a previous one. Because listen
+ * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
+ * held in this case.
*/
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
"tp not listening", __func__));
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
bzero(&inc, sizeof(inc));
#ifdef INET6
@@ -1151,7 +1227,7 @@ relocked:
"SYN|FIN segment ignored (based on "
"sysctl setting)\n", s, __func__);
TCPSTAT_INC(tcps_badsyn);
- goto dropunlock;
+ goto dropunlock;
}
/*
* Segment's flags are (SYN) or (SYN|FIN).
@@ -1213,7 +1289,7 @@ relocked:
if (ia6)
ifa_free(&ia6->ia_ifa);
}
-#endif
+#endif /* INET6 */
/*
* Basic sanity checks on incoming SYN requests:
* Don't respond if the destination is a link layer
@@ -1232,8 +1308,8 @@ relocked:
"link layer address ignored\n", s, __func__);
goto dropunlock;
}
- if (isipv6) {
#ifdef INET6
+ if (isipv6) {
if (th->th_dport == th->th_sport &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
@@ -1250,8 +1326,13 @@ relocked:
"address ignored\n", s, __func__);
goto dropunlock;
}
+ }
#endif
- } else {
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
+ {
if (th->th_dport == th->th_sport &&
ip->ip_dst.s_addr == ip->ip_src.s_addr) {
if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
@@ -1272,6 +1353,7 @@ relocked:
goto dropunlock;
}
}
+#endif
/*
* SYN appears to be valid. Create compressed TCP state
* for syncache.
@@ -1289,6 +1371,15 @@ relocked:
*/
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return;
+ } else if (tp->t_state == TCPS_LISTEN) {
+ /*
+ * When a listen socket is torn down the SO_ACCEPTCONN
+ * flag is removed first while connections are drained
+ * from the accept queue in a unlock/lock cycle of the
+ * ACCEPT_LOCK, opening a race condition allowing a SYN
+ * attempt go through unhandled.
+ */
+ goto dropunlock;
}
#ifdef TCP_SIGNATURE
@@ -1320,13 +1411,17 @@ relocked:
return;
dropwithreset:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropwithreset ti_locked %d", __func__, ti_locked);
- ti_locked = TI_UNLOCKED;
+ ti_locked = TI_UNLOCKED;
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
if (inp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason);
@@ -1337,13 +1432,17 @@ dropwithreset:
goto drop;
dropunlock:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropunlock ti_locked %d", __func__, ti_locked);
- ti_locked = TI_UNLOCKED;
+ ti_locked = TI_UNLOCKED;
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
if (inp != NULL)
INP_WUNLOCK(inp);
@@ -1398,13 +1497,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- else
- panic("%s: ti_locked %d for EST", __func__,
- ti_locked);
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
#endif
}
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -1421,7 +1520,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
tp->t_rcvtime = ticks;
if (TCPS_HAVEESTABLISHED(tp->t_state))
- tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
/*
* Unscale the window into a 32-bit value.
@@ -1550,13 +1649,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: ti_locked %d on pure ACK",
- __func__, ti_locked);
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack);
@@ -1595,7 +1689,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_xmit_timer(tp,
ticks - tp->t_rtttime);
}
- tcp_xmit_bandwidth_limit(tp, th->th_ack);
acked = BYTES_THIS_ACK(tp, th);
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
@@ -1660,13 +1753,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: ti_locked %d on pure data "
- "segment", __func__, ti_locked);
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */
@@ -1877,7 +1965,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else {
tp->t_state = TCPS_ESTABLISHED;
cc_conn_init(tp);
- tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+ tcp_timer_activate(tp, TT_KEEP,
+ TP_KEEPIDLE(tp));
}
} else {
/*
@@ -2281,7 +2370,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else {
tp->t_state = TCPS_ESTABLISHED;
cc_conn_init(tp);
- tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
/*
* If segment contains data or ACK, will call tcp_reass()
@@ -2362,7 +2451,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* Compute the amount of data in flight first.
* We can inject new data into the pipe iff
- * we have less than 1/2 the original window's
+ * we have less than 1/2 the original window's
* worth of data in flight.
*/
awnd = (tp->snd_nxt - tp->snd_fack) +
@@ -2448,6 +2537,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
u_long oldcwnd = tp->snd_cwnd;
tcp_seq oldsndmax = tp->snd_max;
u_int sent;
+ int avail;
KASSERT(tp->t_dupacks == 1 ||
tp->t_dupacks == 2,
@@ -2469,7 +2559,17 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
break;
}
- (void) tcp_output(tp);
+ /*
+ * Only call tcp_output when there
+ * is new data available to be sent.
+ * Otherwise we would send pure ACKs.
+ */
+ SOCKBUF_LOCK(&so->so_snd);
+ avail = so->so_snd.sb_cc -
+ (tp->snd_nxt - tp->snd_una);
+ SOCKBUF_UNLOCK(&so->so_snd);
+ if (avail > 0)
+ (void) tcp_output(tp);
sent = tp->snd_max - oldsndmax;
if (sent > tp->t_maxseg) {
KASSERT((tp->t_dupacks == 2 &&
@@ -2529,9 +2629,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
}
process_ACK:
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_input: process_ACK ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
acked = BYTES_THIS_ACK(tp, th);
@@ -2575,7 +2672,6 @@ process_ACK:
tp->t_rttlow = ticks - tp->t_rtttime;
tcp_xmit_timer(tp, ticks - tp->t_rtttime);
}
- tcp_xmit_bandwidth_limit(tp, th->th_ack);
/*
* If all outstanding data is acked, stop retransmit
@@ -2654,12 +2750,11 @@ process_ACK:
* compressed state.
*/
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
- int timeout;
-
soisdisconnected(so);
- timeout = (tcp_fast_finwait2_recycle) ?
- tcp_finwait2_timeout : tcp_maxidle;
- tcp_timer_activate(tp, TT_2MSL, timeout);
+ tcp_timer_activate(tp, TT_2MSL,
+ (tcp_fast_finwait2_recycle ?
+ tcp_finwait2_timeout :
+ TP_MAXIDLE(tp)));
}
tp->t_state = TCPS_FIN_WAIT_2;
}
@@ -2698,9 +2793,6 @@ process_ACK:
}
step6:
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: step6 ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2786,9 +2878,6 @@ step6:
tp->rcv_up = tp->rcv_nxt;
}
dodata: /* XXX */
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: dodata ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2920,13 +3009,8 @@ dodata: /* XXX */
return;
}
}
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dodata epilogue ti_locked %d", __func__,
- ti_locked);
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG
@@ -2955,9 +3039,6 @@ check_delack:
return;
dropafterack:
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: dropafterack ti_locked %d", ti_locked));
-
/*
* Generate an ACK dropping incoming segment if it occupies
* sequence space, where the ACK reflects our state.
@@ -2984,13 +3065,8 @@ dropafterack:
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropafterack epilogue ti_locked %d", __func__,
- ti_locked);
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
@@ -3000,12 +3076,8 @@ dropafterack:
return;
dropwithreset:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropwithreset ti_locked %d", __func__, ti_locked);
ti_locked = TI_UNLOCKED;
if (tp != NULL) {
@@ -3016,15 +3088,14 @@ dropwithreset:
return;
drop:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
+ ti_locked = TI_UNLOCKED;
+ }
#ifdef INVARIANTS
else
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
#endif
- ti_locked = TI_UNLOCKED;
/*
* Drop space held by incoming segment and return.
@@ -3048,7 +3119,9 @@ static void
tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
int tlen, int rstreason)
{
+#ifdef INET
struct ip *ip;
+#endif
#ifdef INET6
struct ip6_hdr *ip6;
#endif
@@ -3067,8 +3140,12 @@ tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
goto drop;
/* IPv6 anycast check is done at tcp6_input() */
- } else
+ }
#endif
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
{
ip = mtod(m, struct ip *);
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
@@ -3077,6 +3154,7 @@ tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
goto drop;
}
+#endif
/* Perform bandwidth limiting. */
if (badport_bandlim(rstreason) < 0)
@@ -3307,10 +3385,8 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt)
/*
* Determine a reasonable value for maxseg size.
* If the route is known, check route for mtu.
- * If none, use an mss that can be handled on the outgoing
- * interface without forcing IP to fragment; if bigger than
- * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
- * to utilize large mbufs. If no route is found, route has no mtu,
+ * If none, use an mss that can be handled on the outgoing interface
+ * without forcing IP to fragment. If no route is found, route has no mtu,
* or the destination isn't local, use a default, hopefully conservative
* size (usually 512 or the default IP max size, but no more than the mtu
* of the interface), as we can't discover anything about intervening
@@ -3331,10 +3407,10 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt)
*/
void
tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
- struct hc_metrics_lite *metricptr, int *mtuflags)
+ struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap)
{
- int mss;
- u_long maxmtu;
+ int mss = 0;
+ u_long maxmtu = 0;
struct inpcb *inp = tp->t_inpcb;
struct hc_metrics_lite metrics;
int origoffer;
@@ -3358,14 +3434,19 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
/* Initialize. */
#ifdef INET6
if (isipv6) {
- maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags);
+ maxmtu = tcp_maxmtu6(&inp->inp_inc, cap);
tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
- } else
+ }
+#endif
+#if defined(INET) && defined(INET6)
+ else
#endif
+#ifdef INET
{
- maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags);
+ maxmtu = tcp_maxmtu(&inp->inp_inc, cap);
tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
}
+#endif
/*
* No route to sender, stay with default mss and return.
@@ -3426,14 +3507,19 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
if (!V_path_mtu_discovery &&
!in6_localaddr(&inp->in6p_faddr))
mss = min(mss, V_tcp_v6mssdflt);
- } else
+ }
+#endif
+#if defined(INET) && defined(INET6)
+ else
#endif
+#ifdef INET
{
mss = maxmtu - min_protoh;
if (!V_path_mtu_discovery &&
!in_localaddr(inp->inp_faddr))
mss = min(mss, V_tcp_mssdflt);
}
+#endif
/*
* XXX - The above conditional (mss = maxmtu - min_protoh)
* probably violates the TCP spec.
@@ -3481,13 +3567,6 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
(tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
mss -= TCPOLEN_TSTAMP_APPA;
-#if (MCLBYTES & (MCLBYTES - 1)) == 0
- if (mss > MCLBYTES)
- mss &= ~(MCLBYTES-1);
-#else
- if (mss > MCLBYTES)
- mss = mss / MCLBYTES * MCLBYTES;
-#endif
tp->t_maxseg = mss;
}
@@ -3499,11 +3578,12 @@ tcp_mss(struct tcpcb *tp, int offer)
struct inpcb *inp;
struct socket *so;
struct hc_metrics_lite metrics;
- int mtuflags = 0;
+ struct tcp_ifcap cap;
KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
-
- tcp_mss_update(tp, offer, -1, &metrics, &mtuflags);
+
+ bzero(&cap, sizeof(cap));
+ tcp_mss_update(tp, offer, -1, &metrics, &cap);
mss = tp->t_maxseg;
inp = tp->t_inpcb;
@@ -3517,7 +3597,7 @@ tcp_mss(struct tcpcb *tp, int offer)
*/
so = inp->inp_socket;
SOCKBUF_LOCK(&so->so_snd);
- if ((so->so_snd.sb_hiwat == tcp_sendspace) && metrics.rmx_sendpipe)
+ if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe)
bufsize = metrics.rmx_sendpipe;
else
bufsize = so->so_snd.sb_hiwat;
@@ -3534,7 +3614,7 @@ tcp_mss(struct tcpcb *tp, int offer)
tp->t_maxseg = mss;
SOCKBUF_LOCK(&so->so_rcv);
- if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe)
+ if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
bufsize = metrics.rmx_recvpipe;
else
bufsize = so->so_rcv.sb_hiwat;
@@ -3548,8 +3628,10 @@ tcp_mss(struct tcpcb *tp, int offer)
SOCKBUF_UNLOCK(&so->so_rcv);
/* Check the interface for TSO capabilities. */
- if (mtuflags & CSUM_TSO)
+ if (cap.ifcap & CSUM_TSO) {
tp->t_flags |= TF_TSO;
+ tp->t_tsomax = cap.tsomax;
+ }
}
/*
@@ -3569,16 +3651,23 @@ tcp_mssopt(struct in_conninfo *inc)
if (inc->inc_flags & INC_ISIPV6) {
mss = V_tcp_v6mssdflt;
maxmtu = tcp_maxmtu6(inc, NULL);
- thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
- } else
+ }
+#endif
+#if defined(INET) && defined(INET6)
+ else
#endif
+#ifdef INET
{
mss = V_tcp_mssdflt;
maxmtu = tcp_maxmtu(inc, NULL);
- thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct tcpiphdr);
}
+#endif
+#if defined(INET6) || defined(INET)
+ thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
+#endif
+
if (maxmtu && thcmtu)
mss = min(maxmtu, thcmtu) - min_protoh;
else if (maxmtu || thcmtu)
diff --git a/freebsd/sys/netinet/tcp_lro.c b/freebsd/sys/netinet/tcp_lro.c
index 9f1d13c3..52d92aa0 100644
--- a/freebsd/sys/netinet/tcp_lro.c
+++ b/freebsd/sys/netinet/tcp_lro.c
@@ -3,8 +3,12 @@
/*-
* Copyright (c) 2007, Myricom Inc.
* Copyright (c) 2008, Intel Corporation.
+ * Copyright (c) 2012 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by Bjoern Zeeb
+ * under sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -25,359 +29,589 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD$
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/endian.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/ethernet.h>
-#include <net/if_media.h>
+#include <net/vnet.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
+#include <netinet/ip6.h>
#include <netinet/ip.h>
+#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_lro.h>
-#include <machine/bus.h>
+#include <netinet6/ip6_var.h>
+
#include <machine/in_cksum.h>
+#ifndef LRO_ENTRIES
+#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */
+#endif
-static uint16_t do_csum_data(uint16_t *raw, int len)
-{
- uint32_t csum;
- csum = 0;
- while (len > 0) {
- csum += *raw;
- raw++;
- csum += *raw;
- raw++;
- len -= 4;
- }
- csum = (csum >> 16) + (csum & 0xffff);
- csum = (csum >> 16) + (csum & 0xffff);
- return (uint16_t)csum;
-}
+#define TCP_LRO_UPDATE_CSUM 1
+#ifndef TCP_LRO_UPDATE_CSUM
+#define TCP_LRO_INVALID_CSUM 0x0000
+#endif
-/*
- * Allocate and init the LRO data structures
- */
int
-tcp_lro_init(struct lro_ctrl *cntl)
+tcp_lro_init(struct lro_ctrl *lc)
{
- struct lro_entry *lro;
- int i, error = 0;
+ struct lro_entry *le;
+ int error, i;
- SLIST_INIT(&cntl->lro_free);
- SLIST_INIT(&cntl->lro_active);
-
- cntl->lro_bad_csum = 0;
- cntl->lro_queued = 0;
- cntl->lro_flushed = 0;
+ lc->lro_bad_csum = 0;
+ lc->lro_queued = 0;
+ lc->lro_flushed = 0;
+ lc->lro_cnt = 0;
+ SLIST_INIT(&lc->lro_free);
+ SLIST_INIT(&lc->lro_active);
+ error = 0;
for (i = 0; i < LRO_ENTRIES; i++) {
- lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
- M_DEVBUF, M_NOWAIT | M_ZERO);
- if (lro == NULL) {
+ le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (le == NULL) {
if (i == 0)
error = ENOMEM;
break;
}
- cntl->lro_cnt = i;
- SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
+ lc->lro_cnt = i + 1;
+ SLIST_INSERT_HEAD(&lc->lro_free, le, next);
}
return (error);
}
void
-tcp_lro_free(struct lro_ctrl *cntl)
+tcp_lro_free(struct lro_ctrl *lc)
{
- struct lro_entry *entry;
+ struct lro_entry *le;
- while (!SLIST_EMPTY(&cntl->lro_free)) {
- entry = SLIST_FIRST(&cntl->lro_free);
- SLIST_REMOVE_HEAD(&cntl->lro_free, next);
- free(entry, M_DEVBUF);
+ while (!SLIST_EMPTY(&lc->lro_free)) {
+ le = SLIST_FIRST(&lc->lro_free);
+ SLIST_REMOVE_HEAD(&lc->lro_free, next);
+ free(le, M_DEVBUF);
}
}
+#ifdef TCP_LRO_UPDATE_CSUM
+static uint16_t
+tcp_lro_csum_th(struct tcphdr *th)
+{
+ uint32_t ch;
+ uint16_t *p, l;
+
+ ch = th->th_sum = 0x0000;
+ l = th->th_off;
+ p = (uint16_t *)th;
+ while (l > 0) {
+ ch += *p;
+ p++;
+ ch += *p;
+ p++;
+ l--;
+ }
+ while (ch > 0xffff)
+ ch = (ch >> 16) + (ch & 0xffff);
+
+ return (ch & 0xffff);
+}
+
+static uint16_t
+tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
+ uint16_t tcp_data_len, uint16_t csum)
+{
+ uint32_t c;
+ uint16_t cs;
+
+ c = csum;
+
+ /* Remove length from checksum. */
+ switch (le->eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6;
+
+ ip6 = (struct ip6_hdr *)l3hdr;
+ if (le->append_cnt == 0)
+ cs = ip6->ip6_plen;
+ else {
+ uint32_t cx;
+
+ cx = ntohs(ip6->ip6_plen);
+ cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
+ }
+ break;
+ }
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip4;
+
+ ip4 = (struct ip *)l3hdr;
+ if (le->append_cnt == 0)
+ cs = ip4->ip_len;
+ else {
+ cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
+ IPPROTO_TCP);
+ cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
+ htons(cs));
+ }
+ break;
+ }
+#endif
+ default:
+ cs = 0; /* Keep compiler happy. */
+ }
+
+ cs = ~cs;
+ c += cs;
+
+ /* Remove TCP header csum. */
+ cs = ~tcp_lro_csum_th(th);
+ c += cs;
+ while (c > 0xffff)
+ c = (c >> 16) + (c & 0xffff);
+
+ return (c & 0xffff);
+}
+#endif
+
void
-tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
+tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
{
- struct ifnet *ifp;
- struct ip *ip;
- struct tcphdr *tcp;
- uint32_t *ts_ptr;
- uint32_t tcplen, tcp_csum;
-
-
- if (lro->append_cnt) {
- /* incorporate the new len into the ip header and
- * re-calculate the checksum */
- ip = lro->ip;
- ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
- ip->ip_sum = 0;
- ip->ip_sum = 0xffff ^
- do_csum_data((uint16_t*)ip,
- sizeof (*ip));
-
- lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
- CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- lro->m_head->m_pkthdr.csum_data = 0xffff;
- lro->m_head->m_pkthdr.len = lro->len;
-
- /* incorporate the latest ack into the tcp header */
- tcp = (struct tcphdr *) (ip + 1);
- tcp->th_ack = lro->ack_seq;
- tcp->th_win = lro->window;
- /* incorporate latest timestamp into the tcp header */
- if (lro->timestamp) {
- ts_ptr = (uint32_t *)(tcp + 1);
- ts_ptr[1] = htonl(lro->tsval);
- ts_ptr[2] = lro->tsecr;
+
+ if (le->append_cnt > 0) {
+ struct tcphdr *th;
+ uint16_t p_len;
+
+ p_len = htons(le->p_len);
+ switch (le->eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6;
+
+ ip6 = le->le_ip6;
+ ip6->ip6_plen = p_len;
+ th = (struct tcphdr *)(ip6 + 1);
+ le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
+ CSUM_PSEUDO_HDR;
+ le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
+ break;
}
- /*
- * update checksum in tcp header by re-calculating the
- * tcp pseudoheader checksum, and adding it to the checksum
- * of the tcp payload data
- */
- tcp->th_sum = 0;
- tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
- tcp_csum = lro->data_csum;
- tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(tcplen + IPPROTO_TCP));
- tcp_csum += do_csum_data((uint16_t*)tcp,
- tcp->th_off << 2);
- tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
- tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
- tcp->th_sum = 0xffff ^ tcp_csum;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip4;
+#ifdef TCP_LRO_UPDATE_CSUM
+ uint32_t cl;
+ uint16_t c;
+#endif
+
+ ip4 = le->le_ip4;
+#ifdef TCP_LRO_UPDATE_CSUM
+ /* Fix IP header checksum for new length. */
+ c = ~ip4->ip_sum;
+ cl = c;
+ c = ~ip4->ip_len;
+ cl += c + p_len;
+ while (cl > 0xffff)
+ cl = (cl >> 16) + (cl & 0xffff);
+ c = cl;
+ ip4->ip_sum = ~c;
+#else
+ ip4->ip_sum = TCP_LRO_INVALID_CSUM;
+#endif
+ ip4->ip_len = p_len;
+ th = (struct tcphdr *)(ip4 + 1);
+ le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
+ CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
+ le->p_len += ETHER_HDR_LEN;
+ break;
+ }
+#endif
+ default:
+ th = NULL; /* Keep compiler happy. */
+ }
+ le->m_head->m_pkthdr.csum_data = 0xffff;
+ le->m_head->m_pkthdr.len = le->p_len;
+
+ /* Incorporate the latest ACK into the TCP header. */
+ th->th_ack = le->ack_seq;
+ th->th_win = le->window;
+ /* Incorporate latest timestamp into the TCP header. */
+ if (le->timestamp != 0) {
+ uint32_t *ts_ptr;
+
+ ts_ptr = (uint32_t *)(th + 1);
+ ts_ptr[1] = htonl(le->tsval);
+ ts_ptr[2] = le->tsecr;
+ }
+#ifdef TCP_LRO_UPDATE_CSUM
+ /* Update the TCP header checksum. */
+ le->ulp_csum += p_len;
+ le->ulp_csum += tcp_lro_csum_th(th);
+ while (le->ulp_csum > 0xffff)
+ le->ulp_csum = (le->ulp_csum >> 16) +
+ (le->ulp_csum & 0xffff);
+ th->th_sum = (le->ulp_csum & 0xffff);
+ th->th_sum = ~th->th_sum;
+#else
+ th->th_sum = TCP_LRO_INVALID_CSUM;
+#endif
}
- ifp = cntl->ifp;
- (*ifp->if_input)(cntl->ifp, lro->m_head);
- cntl->lro_queued += lro->append_cnt + 1;
- cntl->lro_flushed++;
- lro->m_head = NULL;
- lro->timestamp = 0;
- lro->append_cnt = 0;
- SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
+
+ (*lc->ifp->if_input)(lc->ifp, le->m_head);
+ lc->lro_queued += le->append_cnt + 1;
+ lc->lro_flushed++;
+ bzero(le, sizeof(*le));
+ SLIST_INSERT_HEAD(&lc->lro_free, le, next);
}
-int
-tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
+#ifdef INET6
+static int
+tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
+ struct tcphdr **th)
{
- struct ether_header *eh;
- struct ip *ip;
- struct tcphdr *tcp;
- uint32_t *ts_ptr;
- struct mbuf *m_nxt, *m_tail;
- struct lro_entry *lro;
- int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
- int opt_bytes, trim, csum_flags;
- uint32_t seq, tmp_csum, device_mtu;
-
-
- eh = mtod(m_head, struct ether_header *);
- if (eh->ether_type != htons(ETHERTYPE_IP))
- return 1;
- ip = (struct ip *) (eh + 1);
- if (ip->ip_p != IPPROTO_TCP)
- return 1;
-
- /* ensure there are no options */
- if ((ip->ip_hl << 2) != sizeof (*ip))
- return -1;
-
- /* .. and the packet is not fragmented */
- if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
- return -1;
-
- /* verify that the IP header checksum is correct */
- csum_flags = m_head->m_pkthdr.csum_flags;
+
+ /* XXX-BZ we should check the flow-label. */
+
+ /* XXX-BZ We do not yet support ext. hdrs. */
+ if (ip6->ip6_nxt != IPPROTO_TCP)
+ return (TCP_LRO_NOT_SUPPORTED);
+
+ /* Find the TCP header. */
+ *th = (struct tcphdr *)(ip6 + 1);
+
+ return (0);
+}
+#endif
+
+#ifdef INET
+static int
+tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
+ struct tcphdr **th)
+{
+ int csum_flags;
+ uint16_t csum;
+
+ if (ip4->ip_p != IPPROTO_TCP)
+ return (TCP_LRO_NOT_SUPPORTED);
+
+ /* Ensure there are no options. */
+ if ((ip4->ip_hl << 2) != sizeof (*ip4))
+ return (TCP_LRO_CANNOT);
+
+ /* .. and the packet is not fragmented. */
+ if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
+ return (TCP_LRO_CANNOT);
+
+ /* Legacy IP has a header checksum that needs to be correct. */
+ csum_flags = m->m_pkthdr.csum_flags;
if (csum_flags & CSUM_IP_CHECKED) {
if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
- cntl->lro_bad_csum++;
- return -1;
+ lc->lro_bad_csum++;
+ return (TCP_LRO_CANNOT);
}
} else {
- tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
- if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
- cntl->lro_bad_csum++;
- return -1;
+ csum = in_cksum_hdr(ip4);
+ if (__predict_false((csum) != 0)) {
+ lc->lro_bad_csum++;
+ return (TCP_LRO_CANNOT);
}
}
-
- /* find the TCP header */
- tcp = (struct tcphdr *) (ip + 1);
-
- /* Get the TCP checksum if we dont have it */
- if (!csum)
- csum = tcp->th_sum;
-
- /* ensure no bits set besides ack or psh */
- if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
- return -1;
-
- /* check for timestamps. Since the only option we handle are
- timestamps, we only have to handle the simple case of
- aligned timestamps */
-
- opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
- tcp_hdr_len = sizeof (*tcp) + opt_bytes;
- ts_ptr = (uint32_t *)(tcp + 1);
- if (opt_bytes != 0) {
- if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
- (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
- TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
- return -1;
- }
- ip_len = ntohs(ip->ip_len);
- tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
-
+ /* Find the TCP header (we assured there are no IP options). */
+ *th = (struct tcphdr *)(ip4 + 1);
- /*
- * If frame is padded beyond the end of the IP packet,
- * then we must trim the extra bytes off the end.
- */
- tot_len = m_head->m_pkthdr.len;
- trim = tot_len - (ip_len + ETHER_HDR_LEN);
- if (trim != 0) {
- if (trim < 0) {
- /* truncated packet */
- return -1;
+ return (0);
+}
+#endif
+
+int
+tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+{
+ struct lro_entry *le;
+ struct ether_header *eh;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
+#endif
+#ifdef INET
+ struct ip *ip4 = NULL; /* Keep compiler happy. */
+#endif
+ struct tcphdr *th;
+ void *l3hdr = NULL; /* Keep compiler happy. */
+ uint32_t *ts_ptr;
+ tcp_seq seq;
+ int error, ip_len, l;
+ uint16_t eh_type, tcp_data_len;
+
+ /* We expect a contiguous header [eh, ip, tcp]. */
+
+ eh = mtod(m, struct ether_header *);
+ eh_type = ntohs(eh->ether_type);
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ CURVNET_SET(lc->ifp->if_vnet);
+ if (V_ip6_forwarding != 0) {
+ /* XXX-BZ stats but changing lro_ctrl is a problem. */
+ CURVNET_RESTORE();
+ return (TCP_LRO_CANNOT);
+ }
+ CURVNET_RESTORE();
+ l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
+ error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
+ if (error != 0)
+ return (error);
+ tcp_data_len = ntohs(ip6->ip6_plen);
+ ip_len = sizeof(*ip6) + tcp_data_len;
+ break;
+ }
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ CURVNET_SET(lc->ifp->if_vnet);
+ if (V_ipforwarding != 0) {
+ /* XXX-BZ stats but changing lro_ctrl is a problem. */
+ CURVNET_RESTORE();
+ return (TCP_LRO_CANNOT);
}
- m_adj(m_head, -trim);
- tot_len = m_head->m_pkthdr.len;
+ CURVNET_RESTORE();
+ l3hdr = ip4 = (struct ip *)(eh + 1);
+ error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
+ if (error != 0)
+ return (error);
+ ip_len = ntohs(ip4->ip_len);
+ tcp_data_len = ip_len - sizeof(*ip4);
+ break;
}
+#endif
+ /* XXX-BZ what happens in case of VLAN(s)? */
+ default:
+ return (TCP_LRO_NOT_SUPPORTED);
+ }
+
+ /*
+ * If the frame is padded beyond the end of the IP packet, then we must
+ * trim the extra bytes off.
+ */
+ l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
+ if (l != 0) {
+ if (l < 0)
+ /* Truncated packet. */
+ return (TCP_LRO_CANNOT);
- m_nxt = m_head;
- m_tail = NULL; /* -Wuninitialized */
- while (m_nxt != NULL) {
- m_tail = m_nxt;
- m_nxt = m_tail->m_next;
+ m_adj(m, -l);
}
- hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
- seq = ntohl(tcp->th_seq);
-
- SLIST_FOREACH(lro, &cntl->lro_active, next) {
- if (lro->source_port == tcp->th_sport &&
- lro->dest_port == tcp->th_dport &&
- lro->source_ip == ip->ip_src.s_addr &&
- lro->dest_ip == ip->ip_dst.s_addr) {
- /* Try to append it */
-
- if (__predict_false(seq != lro->next_seq)) {
- /* out of order packet */
- SLIST_REMOVE(&cntl->lro_active, lro,
- lro_entry, next);
- tcp_lro_flush(cntl, lro);
- return -1;
- }
-
- if (opt_bytes) {
- uint32_t tsval = ntohl(*(ts_ptr + 1));
- /* make sure timestamp values are increasing */
- if (__predict_false(lro->tsval > tsval ||
- *(ts_ptr + 2) == 0)) {
- return -1;
- }
- lro->tsval = tsval;
- lro->tsecr = *(ts_ptr + 2);
- }
-
- lro->next_seq += tcp_data_len;
- lro->ack_seq = tcp->th_ack;
- lro->window = tcp->th_win;
- lro->append_cnt++;
- if (tcp_data_len == 0) {
- m_freem(m_head);
- return 0;
- }
- /* subtract off the checksum of the tcp header
- * from the hardware checksum, and add it to the
- * stored tcp data checksum. Byteswap the checksum
- * if the total length so far is odd
- */
- tmp_csum = do_csum_data((uint16_t*)tcp,
- tcp_hdr_len);
- csum = csum + (tmp_csum ^ 0xffff);
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- if (lro->len & 0x1) {
- /* Odd number of bytes so far, flip bytes */
- csum = ((csum << 8) | (csum >> 8)) & 0xffff;
- }
- csum = csum + lro->data_csum;
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- lro->data_csum = csum;
-
- lro->len += tcp_data_len;
-
- /* adjust mbuf so that m->m_data points to
- the first byte of the payload */
- m_adj(m_head, hlen);
- /* append mbuf chain */
- lro->m_tail->m_next = m_head;
- /* advance the last pointer */
- lro->m_tail = m_tail;
- /* flush packet if required */
- device_mtu = cntl->ifp->if_mtu;
- if (lro->len > (65535 - device_mtu)) {
- SLIST_REMOVE(&cntl->lro_active, lro,
- lro_entry, next);
- tcp_lro_flush(cntl, lro);
- }
- return 0;
+ /*
+ * Check TCP header constraints.
+ */
+ /* Ensure no bits set besides ACK or PSH. */
+ if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
+ return (TCP_LRO_CANNOT);
+
+ /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
+ /* XXX-BZ Ideally we'd flush on PUSH? */
+
+ /*
+ * Check for timestamps.
+ * Since the only option we handle are timestamps, we only have to
+ * handle the simple case of aligned timestamps.
+ */
+ l = (th->th_off << 2);
+ tcp_data_len -= l;
+ l -= sizeof(*th);
+ ts_ptr = (uint32_t *)(th + 1);
+ if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
+ (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
+ return (TCP_LRO_CANNOT);
+
+ /* If the driver did not pass in the checksum, set it now. */
+ if (csum == 0x0000)
+ csum = th->th_sum;
+
+ seq = ntohl(th->th_seq);
+
+ /* Try to find a matching previous segment. */
+ SLIST_FOREACH(le, &lc->lro_active, next) {
+ if (le->eh_type != eh_type)
+ continue;
+ if (le->source_port != th->th_sport ||
+ le->dest_port != th->th_dport)
+ continue;
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ if (bcmp(&le->source_ip6, &ip6->ip6_src,
+ sizeof(struct in6_addr)) != 0 ||
+ bcmp(&le->dest_ip6, &ip6->ip6_dst,
+ sizeof(struct in6_addr)) != 0)
+ continue;
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ if (le->source_ip4 != ip4->ip_src.s_addr ||
+ le->dest_ip4 != ip4->ip_dst.s_addr)
+ continue;
+ break;
+#endif
+ }
+
+ /* Flush now if appending will result in overflow. */
+ if (le->p_len > (65535 - tcp_data_len)) {
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ break;
+ }
+
+ /* Try to append the new segment. */
+ if (__predict_false(seq != le->next_seq ||
+ (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
+ /* Out of order packet or duplicate ACK. */
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ return (TCP_LRO_CANNOT);
}
+
+ if (l != 0) {
+ uint32_t tsval = ntohl(*(ts_ptr + 1));
+ /* Make sure timestamp values are increasing. */
+ /* XXX-BZ flip and use TSTMP_GEQ macro for this? */
+ if (__predict_false(le->tsval > tsval ||
+ *(ts_ptr + 2) == 0))
+ return (TCP_LRO_CANNOT);
+ le->tsval = tsval;
+ le->tsecr = *(ts_ptr + 2);
+ }
+
+ le->next_seq += tcp_data_len;
+ le->ack_seq = th->th_ack;
+ le->window = th->th_win;
+ le->append_cnt++;
+
+#ifdef TCP_LRO_UPDATE_CSUM
+ le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
+ tcp_data_len, ~csum);
+#endif
+
+ if (tcp_data_len == 0) {
+ m_freem(m);
+ return (0);
+ }
+
+ le->p_len += tcp_data_len;
+
+ /*
+ * Adjust the mbuf so that m_data points to the first byte of
+ * the ULP payload. Adjust the mbuf to avoid complications and
+ * append new segment to existing mbuf chain.
+ */
+ m_adj(m, m->m_pkthdr.len - tcp_data_len);
+ m->m_flags &= ~M_PKTHDR;
+
+ le->m_tail->m_next = m;
+ le->m_tail = m_last(m);
+
+ /*
+ * If a possible next full length packet would cause an
+ * overflow, pro-actively flush now.
+ */
+ if (le->p_len > (65535 - lc->ifp->if_mtu)) {
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ }
+
+ return (0);
}
- if (SLIST_EMPTY(&cntl->lro_free))
- return -1;
-
- /* start a new chain */
- lro = SLIST_FIRST(&cntl->lro_free);
- SLIST_REMOVE_HEAD(&cntl->lro_free, next);
- SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
- lro->source_port = tcp->th_sport;
- lro->dest_port = tcp->th_dport;
- lro->source_ip = ip->ip_src.s_addr;
- lro->dest_ip = ip->ip_dst.s_addr;
- lro->next_seq = seq + tcp_data_len;
- lro->mss = tcp_data_len;
- lro->ack_seq = tcp->th_ack;
- lro->window = tcp->th_win;
-
- /* save the checksum of just the TCP payload by
- * subtracting off the checksum of the TCP header from
- * the entire hardware checksum
- * Since IP header checksum is correct, checksum over
- * the IP header is -0. Substracting -0 is unnecessary.
- */
- tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
- csum = csum + (tmp_csum ^ 0xffff);
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- lro->data_csum = csum;
-
- lro->ip = ip;
- /* record timestamp if it is present */
- if (opt_bytes) {
- lro->timestamp = 1;
- lro->tsval = ntohl(*(ts_ptr + 1));
- lro->tsecr = *(ts_ptr + 2);
+ /* Try to find an empty slot. */
+ if (SLIST_EMPTY(&lc->lro_free))
+ return (TCP_LRO_CANNOT);
+
+ /* Start a new segment chain. */
+ le = SLIST_FIRST(&lc->lro_free);
+ SLIST_REMOVE_HEAD(&lc->lro_free, next);
+ SLIST_INSERT_HEAD(&lc->lro_active, le, next);
+
+ /* Start filling in details. */
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ le->le_ip6 = ip6;
+ le->source_ip6 = ip6->ip6_src;
+ le->dest_ip6 = ip6->ip6_dst;
+ le->eh_type = eh_type;
+ le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ le->le_ip4 = ip4;
+ le->source_ip4 = ip4->ip_src.s_addr;
+ le->dest_ip4 = ip4->ip_dst.s_addr;
+ le->eh_type = eh_type;
+ le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
+ break;
+#endif
+ }
+ le->source_port = th->th_sport;
+ le->dest_port = th->th_dport;
+
+ le->next_seq = seq + tcp_data_len;
+ le->ack_seq = th->th_ack;
+ le->window = th->th_win;
+ if (l != 0) {
+ le->timestamp = 1;
+ le->tsval = ntohl(*(ts_ptr + 1));
+ le->tsecr = *(ts_ptr + 2);
}
- lro->len = tot_len;
- lro->m_head = m_head;
- lro->m_tail = m_tail;
- return 0;
+
+#ifdef TCP_LRO_UPDATE_CSUM
+ /*
+ * Do not touch the csum of the first packet. However save the
+ * "adjusted" checksum of just the source and destination addresses,
+ * the next header and the TCP payload. The length and TCP header
+ * parts may change, so we remove those from the saved checksum and
+ * re-add with final values on tcp_lro_flush() if needed.
+ */
+ KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
+ __func__, le, le->ulp_csum));
+
+ le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
+ ~csum);
+ th->th_sum = csum; /* Restore checksum on first packet. */
+#endif
+
+ le->m_head = m;
+ le->m_tail = m_last(m);
+
+ return (0);
}
+
+/* end */
diff --git a/freebsd/sys/netinet/tcp_lro.h b/freebsd/sys/netinet/tcp_lro.h
index 7e498871..b3a50179 100644
--- a/freebsd/sys/netinet/tcp_lro.h
+++ b/freebsd/sys/netinet/tcp_lro.h
@@ -30,31 +30,46 @@
#ifndef _TCP_LRO_H_
#define _TCP_LRO_H_
-struct lro_entry;
struct lro_entry
{
- SLIST_ENTRY(lro_entry) next;
- struct mbuf *m_head;
- struct mbuf *m_tail;
- int timestamp;
- struct ip *ip;
- uint32_t tsval;
- uint32_t tsecr;
- uint32_t source_ip;
- uint32_t dest_ip;
- uint32_t next_seq;
- uint32_t ack_seq;
- uint32_t len;
- uint32_t data_csum;
- uint16_t window;
- uint16_t source_port;
- uint16_t dest_port;
- uint16_t append_cnt;
- uint16_t mss;
-
+ SLIST_ENTRY(lro_entry) next;
+ struct mbuf *m_head;
+ struct mbuf *m_tail;
+ union {
+ struct ip *ip4;
+ struct ip6_hdr *ip6;
+ } leip;
+ union {
+ in_addr_t s_ip4;
+ struct in6_addr s_ip6;
+ } lesource;
+ union {
+ in_addr_t d_ip4;
+ struct in6_addr d_ip6;
+ } ledest;
+ uint16_t source_port;
+ uint16_t dest_port;
+ uint16_t eh_type; /* EthernetHeader type. */
+ uint16_t append_cnt;
+ uint32_t p_len; /* IP header payload length. */
+ uint32_t ulp_csum; /* TCP, etc. checksum. */
+ uint32_t next_seq; /* tcp_seq */
+ uint32_t ack_seq; /* tcp_seq */
+ uint32_t tsval;
+ uint32_t tsecr;
+ uint16_t window;
+ uint16_t timestamp; /* flag, not a TCP hdr field. */
};
SLIST_HEAD(lro_head, lro_entry);
+#define le_ip4 leip.ip4
+#define le_ip6 leip.ip6
+#define source_ip4 lesource.s_ip4
+#define dest_ip4 ledest.d_ip4
+#define source_ip6 lesource.s_ip6
+#define dest_ip6 ledest.d_ip6
+
+/* NB: This is part of driver structs. */
struct lro_ctrl {
struct ifnet *ifp;
int lro_queued;
@@ -66,13 +81,12 @@ struct lro_ctrl {
struct lro_head lro_free;
};
-
int tcp_lro_init(struct lro_ctrl *);
void tcp_lro_free(struct lro_ctrl *);
void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
-/* Number of LRO entries - these are per rx queue */
-#define LRO_ENTRIES 8
+#define TCP_LRO_CANNOT -1
+#define TCP_LRO_NOT_SUPPORTED 1
#endif /* _TCP_LRO_H_ */
diff --git a/freebsd/sys/netinet/tcp_offload.c b/freebsd/sys/netinet/tcp_offload.c
index 93b7d8de..cd41edab 100644
--- a/freebsd/sys/netinet/tcp_offload.c
+++ b/freebsd/sys/netinet/tcp_offload.c
@@ -1,147 +1,178 @@
#include <machine/rtems-bsd-kernel-space.h>
/*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_inet.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <rtems/bsd/sys/types.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-
+#include <sys/sockopt.h>
#include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
#include <net/route.h>
-#include <net/vnet.h>
-
#include <netinet/in.h>
-#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
-#include <netinet/toedev.h>
+#define TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/toecore.h>
-uint32_t toedev_registration_count;
+int registered_toedevs;
+/*
+ * Provide an opportunity for a TOE driver to offload.
+ */
int
tcp_offload_connect(struct socket *so, struct sockaddr *nam)
{
struct ifnet *ifp;
- struct toedev *tdev;
+ struct toedev *tod;
struct rtentry *rt;
- int error;
-
- if (toedev_registration_count == 0)
- return (EINVAL);
-
- /*
- * Look up the route used for the connection to
- * determine if it uses an interface capable of
- * offloading the connection.
- */
- rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
- if (rt)
+ int error = EOPNOTSUPP;
+
+ INP_WLOCK_ASSERT(sotoinpcb(so));
+ KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
+ ("%s: called with sa_family %d", __func__, nam->sa_family));
+
+ if (registered_toedevs == 0)
+ return (error);
+
+ rt = rtalloc1(nam, 0, 0);
+ if (rt)
RT_UNLOCK(rt);
- else
+ else
return (EHOSTUNREACH);
ifp = rt->rt_ifp;
- if ((ifp->if_capenable & IFCAP_TOE) == 0) {
- error = EINVAL;
- goto fail;
- }
-
- tdev = TOEDEV(ifp);
- if (tdev == NULL) {
- error = EPERM;
- goto fail;
- }
-
- if (tdev->tod_can_offload(tdev, so) == 0) {
- error = EPERM;
- goto fail;
- }
-
- return (tdev->tod_connect(tdev, so, rt, nam));
-fail:
+
+ if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
+ goto done;
+ if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
+ goto done;
+
+ tod = TOEDEV(ifp);
+ if (tod != NULL)
+ error = tod->tod_connect(tod, so, rt, nam);
+done:
RTFREE(rt);
return (error);
}
+void
+tcp_offload_listen_start(struct tcpcb *tp)
+{
-/*
- * This file contains code as a short-term staging area before it is moved in
- * to sys/netinet/tcp_offload.c
- */
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+}
void
-tcp_offload_twstart(struct tcpcb *tp)
+tcp_offload_listen_stop(struct tcpcb *tp)
{
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
}
-struct tcpcb *
-tcp_offload_close(struct tcpcb *tp)
+void
+tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
{
+ struct toedev *tod = tp->tod;
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tp = tcp_close(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- if (tp)
- INP_WUNLOCK(tp->t_inpcb);
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
- return (tp);
+ tod->tod_input(tod, tp, m);
}
-struct tcpcb *
-tcp_offload_drop(struct tcpcb *tp, int error)
+int
+tcp_offload_output(struct tcpcb *tp)
{
+ struct toedev *tod = tp->tod;
+ int error, flags;
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tp = tcp_drop(tp, error);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- if (tp)
- INP_WUNLOCK(tp->t_inpcb);
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
- return (tp);
+ flags = tcp_outflags[tp->t_state];
+
+ if (flags & TH_RST) {
+ /* XXX: avoid repeated calls like we do for FIN */
+ error = tod->tod_send_rst(tod, tp);
+ } else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
+ (tp->t_flags & TF_SENTFIN) == 0) {
+ error = tod->tod_send_fin(tod, tp);
+ if (error == 0)
+ tp->t_flags |= TF_SENTFIN;
+ } else
+ error = tod->tod_output(tod, tp);
+
+ return (error);
+}
+
+void
+tcp_offload_rcvd(struct tcpcb *tp)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_rcvd(tod, tp);
+}
+
+void
+tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
}
+void
+tcp_offload_detach(struct tcpcb *tp)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_pcb_detach(tod, tp);
+}
diff --git a/freebsd/sys/netinet/tcp_offload.h b/freebsd/sys/netinet/tcp_offload.h
index 313185f6..a0523665 100644
--- a/freebsd/sys/netinet/tcp_offload.h
+++ b/freebsd/sys/netinet/tcp_offload.h
@@ -1,30 +1,30 @@
/*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*
* $FreeBSD$
+ *
*/
#ifndef _NETINET_TCP_OFFLOAD_H_
@@ -34,321 +34,15 @@
#error "no user-serviceable parts inside"
#endif
-/*
- * A driver publishes that it provides offload services
- * by setting IFCAP_TOE in the ifnet. The offload connect
- * will bypass any further work if the interface that a
- * connection would use does not support TCP offload.
- *
- * The TOE API assumes that the tcp offload engine can offload the
- * the entire connection from set up to teardown, with some provision
- * being made to allowing the software stack to handle time wait. If
- * the device does not meet these criteria, it is the driver's responsibility
- * to overload the functions that it needs to in tcp_usrreqs and make
- * its own calls to tcp_output if it needs to do so.
- *
- * There is currently no provision for the device advertising the congestion
- * control algorithms it supports as there is currently no API for querying
- * an operating system for the protocols that it has loaded. This is a desirable
- * future extension.
- *
- *
- *
- * It is assumed that individuals deploying TOE will want connections
- * to be offloaded without software changes so all connections on an
- * interface providing TOE are offloaded unless the SO_NO_OFFLOAD
- * flag is set on the socket.
- *
- *
- * The toe_usrreqs structure constitutes the TOE driver's
- * interface to the TCP stack for functionality that doesn't
- * interact directly with userspace. If one wants to provide
- * (optional) functionality to do zero-copy to/from
- * userspace one still needs to override soreceive/sosend
- * with functions that fault in and pin the user buffers.
- *
- * + tu_send
- * - tells the driver that new data may have been added to the
- * socket's send buffer - the driver should not fail if the
- * buffer is in fact unchanged
- * - the driver is responsible for providing credits (bytes in the send window)
- * back to the socket by calling sbdrop() as segments are acknowledged.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_rcvd
- * - returns credits to the driver and triggers window updates
- * to the peer (a credit as used here is a byte in the peer's receive window)
- * - the driver is expected to determine how many bytes have been
- * consumed and credit that back to the card so that it can grow
- * the window again by maintaining its own state between invocations.
- * - In principle this could be used to shrink the window as well as
- * grow the window, although it is not used for that now.
- * - this function needs to correctly handle being called any number of
- * times without any bytes being consumed from the receive buffer.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_disconnect
- * - tells the driver to send FIN to peer
- * - driver is expected to send the remaining data and then do a clean half close
- * - disconnect implies at least half-close so only send, reset, and detach
- * are legal
- * - the driver is expected to handle transition through the shutdown
- * state machine and allow the stack to support SO_LINGER.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_reset
- * - closes the connection and sends a RST to peer
- * - driver is expectd to trigger an RST and detach the toepcb
- * - no further calls are legal after reset
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * The following fields in the tcpcb are expected to be referenced by the driver:
- * + iss
- * + rcv_nxt
- * + rcv_wnd
- * + snd_isn
- * + snd_max
- * + snd_nxt
- * + snd_una
- * + t_flags
- * + t_inpcb
- * + t_maxseg
- * + t_toe
- *
- * The following fields in the inpcb are expected to be referenced by the driver:
- * + inp_lport
- * + inp_fport
- * + inp_laddr
- * + inp_fport
- * + inp_socket
- * + inp_ip_tos
- *
- * The following fields in the socket are expected to be referenced by the
- * driver:
- * + so_comp
- * + so_error
- * + so_linger
- * + so_options
- * + so_rcv
- * + so_snd
- * + so_state
- * + so_timeo
- *
- * These functions all return 0 on success and can return the following errors
- * as appropriate:
- * + EPERM:
- * + ENOBUFS: memory allocation failed
- * + EMSGSIZE: MTU changed during the call
- * + EHOSTDOWN:
- * + EHOSTUNREACH:
- * + ENETDOWN:
- * * ENETUNREACH: the peer is no longer reachable
- *
- * + tu_detach
- * - tells driver that the socket is going away so disconnect
- * the toepcb and free appropriate resources
- * - allows the driver to cleanly handle the case of connection state
- * outliving the socket
- * - no further calls are legal after detach
- * - the driver is expected to provide its own synchronization between
- * detach and receiving new data.
- *
- * + tu_syncache_event
- * - even if it is not actually needed, the driver is expected to
- * call syncache_add for the initial SYN and then syncache_expand
- * for the SYN,ACK
- * - tells driver that a connection either has not been added or has
- * been dropped from the syncache
- * - the driver is expected to maintain state that lives outside the
- * software stack so the syncache needs to be able to notify the
- * toe driver that the software stack is not going to create a connection
- * for a received SYN
- * - The driver is responsible for any synchronization required between
- * the syncache dropping an entry and the driver processing the SYN,ACK.
- *
- */
-struct toe_usrreqs {
- int (*tu_send)(struct tcpcb *tp);
- int (*tu_rcvd)(struct tcpcb *tp);
- int (*tu_disconnect)(struct tcpcb *tp);
- int (*tu_reset)(struct tcpcb *tp);
- void (*tu_detach)(struct tcpcb *tp);
- void (*tu_syncache_event)(int event, void *toep);
-};
-
-/*
- * Proxy for struct tcpopt between TOE drivers and TCP functions.
- */
-struct toeopt {
- u_int64_t to_flags; /* see tcpopt in tcp_var.h */
- u_int16_t to_mss; /* maximum segment size */
- u_int8_t to_wscale; /* window scaling */
+extern int registered_toedevs;
- u_int8_t _pad1; /* explicit pad for 64bit alignment */
- u_int32_t _pad2; /* explicit pad for 64bit alignment */
- u_int64_t _pad3[4]; /* TBD */
-};
+int tcp_offload_connect(struct socket *, struct sockaddr *);
+void tcp_offload_listen_start(struct tcpcb *);
+void tcp_offload_listen_stop(struct tcpcb *);
+void tcp_offload_input(struct tcpcb *, struct mbuf *);
+int tcp_offload_output(struct tcpcb *);
+void tcp_offload_rcvd(struct tcpcb *);
+void tcp_offload_ctloutput(struct tcpcb *, int, int);
+void tcp_offload_detach(struct tcpcb *);
-#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */
-#define TOE_SC_DROP 2 /* connection was timed out */
-
-/*
- * Because listen is a one-to-many relationship (a socket can be listening
- * on all interfaces on a machine some of which may be using different TCP
- * offload devices), listen uses a publish/subscribe mechanism. The TCP
- * offload driver registers a listen notification function with the stack.
- * When a listen socket is created all TCP offload devices are notified
- * so that they can do the appropriate set up to offload connections on the
- * port to which the socket is bound. When the listen socket is closed,
- * the offload devices are notified so that they will stop listening on that
- * port and free any associated resources as well as sending RSTs on any
- * connections in the SYN_RCVD state.
- *
- */
-
-typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
-typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
-
-EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
-EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
-
-/*
- * Check if the socket can be offloaded by the following steps:
- * - determine the egress interface
- * - check the interface for TOE capability and TOE is enabled
- * - check if the device has resources to offload the connection
- */
-int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
-
-/*
- * The tcp_output_* routines are wrappers around the toe_usrreqs calls
- * which trigger packet transmission. In the non-offloaded case they
- * translate to tcp_output. The tcp_offload_* routines notify TOE
- * of specific events. I the non-offloaded case they are no-ops.
- *
- * Listen is a special case because it is a 1 to many relationship
- * and there can be more than one offload driver in the system.
- */
-
-/*
- * Connection is offloaded
- */
-#define tp_offload(tp) ((tp)->t_flags & TF_TOE)
-
-/*
- * hackish way of allowing this file to also be included by TOE
- * which needs to be kept ignorant of socket implementation details
- */
-#ifdef _SYS_SOCKETVAR_H_
-/*
- * The socket has not been marked as "do not offload"
- */
-#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0)
-
-static __inline int
-tcp_output_connect(struct socket *so, struct sockaddr *nam)
-{
- struct tcpcb *tp = sototcpcb(so);
- int error;
-
- /*
- * If offload has been disabled for this socket or the
- * connection cannot be offloaded just call tcp_output
- * to start the TCP state machine.
- */
-#ifndef TCP_OFFLOAD_DISABLE
- if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
-#endif
- error = tcp_output(tp);
- return (error);
-}
-
-static __inline int
-tcp_output_send(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_send(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_rcvd(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_rcvd(tp));
#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_disconnect(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_disconnect(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_reset(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_reset(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline void
-tcp_offload_detach(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- tp->t_tu->tu_detach(tp);
-#endif
-}
-
-static __inline void
-tcp_offload_listen_open(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
- EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
-#endif
-}
-
-static __inline void
-tcp_offload_listen_close(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
-#endif
-}
-#undef SO_OFFLOADABLE
-#endif /* _SYS_SOCKETVAR_H_ */
-#undef tp_offload
-
-void tcp_offload_twstart(struct tcpcb *tp);
-struct tcpcb *tcp_offload_close(struct tcpcb *tp);
-struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
-
-#endif /* _NETINET_TCP_OFFLOAD_H_ */
diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c
index c73fe099..6215c4e2 100644
--- a/freebsd/sys/netinet/tcp_output.c
+++ b/freebsd/sys/netinet/tcp_output.c
@@ -77,6 +77,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -86,31 +89,22 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-#ifdef notyet
-extern struct mbuf *m_copypack();
-#endif
-
VNET_DEFINE(int, path_mtu_discovery) = 1;
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW,
&VNET_NAME(path_mtu_discovery), 1,
"Enable Path MTU Discovery");
-VNET_DEFINE(int, ss_fltsz) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW,
- &VNET_NAME(ss_fltsz), 1,
- "Slow start flight size");
-
-VNET_DEFINE(int, ss_fltsz_local) = 4;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize,
- CTLFLAG_RW, &VNET_NAME(ss_fltsz_local), 1,
- "Slow start flight size for local networks");
-
VNET_DEFINE(int, tcp_do_tso) = 1;
#define V_tcp_do_tso VNET(tcp_do_tso)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
&VNET_NAME(tcp_do_tso), 0,
"Enable TCP Segmentation Offload");
+VNET_DEFINE(int, tcp_sendspace) = 1024*32;
+#define V_tcp_sendspace VNET(tcp_sendspace)
+SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
+ &VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
+
VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
@@ -123,7 +117,7 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_inc), 0,
"Incrementor step size of automatic send buffer");
-VNET_DEFINE(int, tcp_autosndbuf_max) = 256*1024;
+VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_max), 0,
@@ -175,7 +169,7 @@ tcp_output(struct tcpcb *tp)
{
struct socket *so = tp->t_inpcb->inp_socket;
long len, recwin, sendwin;
- int off, flags, error;
+ int off, flags, error = 0; /* Keep compiler happy */
struct mbuf *m;
struct ip *ip = NULL;
struct ipovly *ipov = NULL;
@@ -188,7 +182,7 @@ tcp_output(struct tcpcb *tp)
int idle, sendalot;
int sack_rxmit, sack_bytes_rxmt;
struct sackhole *p;
- int tso;
+ int tso, mtu;
struct tcpopt to;
#if 0
int maxburst = TCP_MAXBURST;
@@ -202,6 +196,11 @@ tcp_output(struct tcpcb *tp)
INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ return (tcp_offload_output(tp));
+#endif
+
/*
* Determine length of data that should be transmitted,
* and flags that will be used.
@@ -229,9 +228,9 @@ again:
tcp_sack_adjust(tp);
sendalot = 0;
tso = 0;
+ mtu = 0;
off = tp->snd_nxt - tp->snd_una;
sendwin = min(tp->snd_wnd, tp->snd_cwnd);
- sendwin = min(sendwin, tp->snd_bwnd);
flags = tcp_outflags[tp->t_state];
/*
@@ -472,9 +471,8 @@ after_sack_rexmit:
}
/*
- * Truncate to the maximum segment length or enable TCP Segmentation
- * Offloading (if supported by hardware) and ensure that FIN is removed
- * if the length no longer contains the last data byte.
+ * Decide if we can use TCP Segmentation Offloading (if supported by
+ * hardware).
*
* TSO may only be used if we are in a pure bulk sending state. The
* presence of TCP-MD5, SACK retransmits, SACK advertizements and
@@ -482,10 +480,6 @@ after_sack_rexmit:
* (except for the sequence number) for all generated packets. This
* makes it impossible to transmit any options which vary per generated
* segment or packet.
- *
- * The length of TSO bursts is limited to TCP_MAXWIN. That limit and
- * removal of FIN (if not already catched here) are handled later after
- * the exact length of the TCP options are known.
*/
#ifdef IPSEC
/*
@@ -494,22 +488,15 @@ after_sack_rexmit:
*/
ipsec_optlen = ipsec_hdrsiz_tcp(tp);
#endif
- if (len > tp->t_maxseg) {
- if ((tp->t_flags & TF_TSO) && V_tcp_do_tso &&
- ((tp->t_flags & TF_SIGNATURE) == 0) &&
- tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
- tp->t_inpcb->inp_options == NULL &&
- tp->t_inpcb->in6p_options == NULL
+ if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
+ ((tp->t_flags & TF_SIGNATURE) == 0) &&
+ tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
#ifdef IPSEC
- && ipsec_optlen == 0
+ ipsec_optlen == 0 &&
#endif
- ) {
- tso = 1;
- } else {
- len = tp->t_maxseg;
- sendalot = 1;
- }
- }
+ tp->t_inpcb->inp_options == NULL &&
+ tp->t_inpcb->in6p_options == NULL)
+ tso = 1;
if (sack_rxmit) {
if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
@@ -560,19 +547,39 @@ after_sack_rexmit:
}
/*
- * Compare available window to amount of window
- * known to peer (as advertised window less
- * next expected input). If the difference is at least two
- * max size segments, or at least 50% of the maximum possible
- * window, then want to send a window update to peer.
- * Skip this if the connection is in T/TCP half-open state.
- * Don't send pure window updates when the peer has closed
- * the connection and won't ever send more data.
+ * Sending of standalone window updates.
+ *
+ * Window updates are important when we close our window due to a
+ * full socket buffer and are opening it again after the application
+ * reads data from it. Once the window has opened again and the
+ * remote end starts to send again the ACK clock takes over and
+ * provides the most current window information.
+ *
+ * We must avoid the silly window syndrome whereas every read
+ * from the receive buffer, no matter how small, causes a window
+ * update to be sent. We also should avoid sending a flurry of
+ * window updates when the socket buffer had queued a lot of data
+ * and the application is doing small reads.
+ *
+ * Prevent a flurry of pointless window updates by only sending
+ * an update when we can increase the advertized window by more
+ * than 1/4th of the socket buffer capacity. When the buffer is
+ * getting full or is very small be more aggressive and send an
+ * update whenever we can increase by two mss sized segments.
+ * In all other situations the ACK's to new incoming data will
+ * carry further window increases.
+ *
+ * Don't send an independent window update if a delayed
+ * ACK is pending (it will get piggy-backed on it) or the
+ * remote side already has done a half-close and won't send
+ * more data. Skip this if the connection is in T/TCP
+ * half-open state.
*/
if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
+ !(tp->t_flags & TF_DELACK) &&
!TCPS_HAVERCVDFIN(tp->t_state)) {
/*
- * "adv" is the amount we can increase the window,
+ * "adv" is the amount we could increase the window,
* taking into account that we are limited by
* TCP_MAXWIN << tp->rcv_scale.
*/
@@ -592,9 +599,11 @@ after_sack_rexmit:
*/
if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale)
goto dontupdate;
- if (adv >= (long) (2 * tp->t_maxseg))
- goto send;
- if (2 * adv >= (long) so->so_rcv.sb_hiwat)
+
+ if (adv >= (long)(2 * tp->t_maxseg) &&
+ (adv >= (long)(so->so_rcv.sb_hiwat / 4) ||
+ recwin <= (long)(so->so_rcv.sb_hiwat / 8) ||
+ so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg))
goto send;
}
dontupdate:
@@ -680,7 +689,7 @@ send:
hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
else
#endif
- hdrlen = sizeof (struct tcpiphdr);
+ hdrlen = sizeof (struct tcpiphdr);
/*
* Compute options for segment.
@@ -753,28 +762,54 @@ send:
* bump the packet length beyond the t_maxopd length.
* Clear the FIN bit because we cut off the tail of
* the segment.
- *
- * When doing TSO limit a burst to TCP_MAXWIN minus the
- * IP, TCP and Options length to keep ip->ip_len from
- * overflowing. Prevent the last segment from being
- * fractional thus making them all equal sized and set
- * the flag to continue sending. TSO is disabled when
- * IP options or IPSEC are present.
*/
if (len + optlen + ipoptlen > tp->t_maxopd) {
flags &= ~TH_FIN;
+
if (tso) {
- if (len > TCP_MAXWIN - hdrlen - optlen) {
- len = TCP_MAXWIN - hdrlen - optlen;
- len = len - (len % (tp->t_maxopd - optlen));
+ KASSERT(ipoptlen == 0,
+ ("%s: TSO can't do IP options", __func__));
+
+ /*
+ * Limit a burst to t_tsomax minus IP,
+ * TCP and options length to keep ip->ip_len
+ * from overflowing or exceeding the maximum
+ * length allowed by the network interface.
+ */
+ if (len > tp->t_tsomax - hdrlen) {
+ len = tp->t_tsomax - hdrlen;
+ sendalot = 1;
+ }
+
+ /*
+ * Prevent the last segment from being
+ * fractional unless the send sockbuf can
+ * be emptied.
+ */
+ if (sendalot && off + len < so->so_snd.sb_cc) {
+ len -= len % (tp->t_maxopd - optlen);
sendalot = 1;
- } else if (tp->t_flags & TF_NEEDFIN)
+ }
+
+ /*
+ * Send the FIN in a separate segment
+ * after the bulk sending is done.
+ * We don't trust the TSO implementations
+ * to clear the FIN flag on all but the
+ * last segment.
+ */
+ if (tp->t_flags & TF_NEEDFIN)
sendalot = 1;
+
} else {
len = tp->t_maxopd - optlen - ipoptlen;
sendalot = 1;
}
- }
+ } else
+ tso = 0;
+
+ KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
+ ("%s: len > IP_MAXPACKET", __func__));
/*#ifdef DIAGNOSTIC*/
#ifdef INET6
@@ -810,19 +845,6 @@ send:
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
}
-#ifdef notyet
- if ((m = m_copypack(so->so_snd.sb_mb, off,
- (int)len, max_linkhdr + hdrlen)) == 0) {
- SOCKBUF_UNLOCK(&so->so_snd);
- error = ENOBUFS;
- goto out;
- }
- /*
- * m_copypack left space for our hdr; use it.
- */
- m->m_len += hdrlen;
- m->m_data -= hdrlen;
-#else
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
SOCKBUF_UNLOCK(&so->so_snd);
@@ -862,7 +884,7 @@ send:
goto out;
}
}
-#endif
+
/*
* If we're sending everything we've got, set PUSH.
* (This will keep happy those implementations which only
@@ -1059,19 +1081,24 @@ send:
* checksum extended header and data.
*/
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
- if (isipv6)
+ if (isipv6) {
/*
* ip6_plen is not need to be filled now, and will be filled
* in ip6_output.
*/
- th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
- sizeof(struct tcphdr) + optlen + len);
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
+ optlen + len, IPPROTO_TCP, 0);
+ }
+#endif
+#if defined(INET6) && defined(INET)
else
-#endif /* INET6 */
+#endif
+#ifdef INET
{
m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
@@ -1079,6 +1106,7 @@ send:
KASSERT(ip->ip_v == IPVERSION,
("%s: IP version incorrect: %d", __func__, ip->ip_v));
}
+#endif
/*
* Enable TSO and specify the size of the segments.
@@ -1092,6 +1120,16 @@ send:
m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
}
+#ifdef IPSEC
+ KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL),
+ ("%s: mbuf chain shorter than expected: %ld + %u + %u - %u != %u",
+ __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL)));
+#else
+ KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL),
+ ("%s: mbuf chain shorter than expected: %ld + %u + %u != %u",
+ __func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
+#endif
+
/*
* In transmit state, time the transmission and arrange for
* the retransmit. In persist state, just set snd_max.
@@ -1183,7 +1221,7 @@ timer:
#endif
ipov->ih_len = save;
}
-#endif
+#endif /* TCPDEBUG */
/*
* Fill in IP length and desired time to live and
@@ -1197,6 +1235,9 @@ timer:
*/
#ifdef INET6
if (isipv6) {
+ struct route_in6 ro;
+
+ bzero(&ro, sizeof(ro));
/*
* we separately set hoplimit for every segment, since the
* user might want to change the value via setsockopt.
@@ -1206,13 +1247,23 @@ timer:
ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
/* TODO: IPv6 IP6TOS_ECT bit on */
- error = ip6_output(m,
- tp->t_inpcb->in6p_outputopts, NULL,
- ((so->so_options & SO_DONTROUTE) ?
- IP_ROUTETOIF : 0), NULL, NULL, tp->t_inpcb);
- } else
+ error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro,
+ ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
+ NULL, NULL, tp->t_inpcb);
+
+ if (error == EMSGSIZE && ro.ro_rt != NULL)
+ mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ RO_RTFREE(&ro);
+ }
#endif /* INET6 */
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
{
+ struct route ro;
+
+ bzero(&ro, sizeof(ro));
ip->ip_len = m->m_pkthdr.len;
#ifdef INET6
if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
@@ -1229,10 +1280,15 @@ timer:
if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
ip->ip_off |= IP_DF;
- error = ip_output(m, tp->t_inpcb->inp_options, NULL,
+ error = ip_output(m, tp->t_inpcb->inp_options, &ro,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
tp->t_inpcb);
+
+ if (error == EMSGSIZE && ro.ro_rt != NULL)
+ mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ RO_RTFREE(&ro);
}
+#endif /* INET */
if (error) {
/*
@@ -1277,21 +1333,18 @@ out:
* For some reason the interface we used initially
* to send segments changed to another or lowered
* its MTU.
- *
- * tcp_mtudisc() will find out the new MTU and as
- * its last action, initiate retransmission, so it
- * is important to not do so here.
- *
* If TSO was active we either got an interface
* without TSO capabilits or TSO was turned off.
- * Disable it for this connection as too and
- * immediatly retry with MSS sized segments generated
- * by this function.
+ * If we obtained mtu from ip_output() then update
+ * it and try again.
*/
if (tso)
tp->t_flags &= ~TF_TSO;
- tcp_mtudisc(tp->t_inpcb, -1);
- return (0);
+ if (mtu != 0) {
+ tcp_mss_update(tp, -1, mtu, NULL, NULL);
+ goto again;
+ }
+ return (error);
case EHOSTDOWN:
case EHOSTUNREACH:
case ENETDOWN:
diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c
index 6b2605ce..aebda9db 100644
--- a/freebsd/sys/netinet/tcp_reass.c
+++ b/freebsd/sys/netinet/tcp_reass.c
@@ -76,24 +76,19 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
-static int tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS);
static int tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS);
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
static VNET_DEFINE(int, tcp_reass_maxseg) = 0;
#define V_tcp_reass_maxseg VNET(tcp_reass_maxseg)
-SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, maxsegments,
- CTLTYPE_INT | CTLFLAG_RDTUN,
- &VNET_NAME(tcp_reass_maxseg), 0, &tcp_reass_sysctl_maxseg, "I",
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &VNET_NAME(tcp_reass_maxseg), 0,
"Global maximum number of TCP Segments in Reassembly Queue");
-static VNET_DEFINE(int, tcp_reass_qsize) = 0;
-#define V_tcp_reass_qsize VNET(tcp_reass_qsize)
SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments,
- CTLTYPE_INT | CTLFLAG_RD,
- &VNET_NAME(tcp_reass_qsize), 0, &tcp_reass_sysctl_qsize, "I",
+ (CTLTYPE_INT | CTLFLAG_RD), NULL, 0, &tcp_reass_sysctl_qsize, "I",
"Global number of TCP Segments currently in Reassembly Queue");
static VNET_DEFINE(int, tcp_reass_overflows) = 0;
@@ -111,8 +106,10 @@ static void
tcp_reass_zone_change(void *tag)
{
+ /* Set the zone limit and read back the effective value. */
V_tcp_reass_maxseg = nmbclusters / 16;
- uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+ V_tcp_reass_maxseg = uma_zone_set_max(V_tcp_reass_zone,
+ V_tcp_reass_maxseg);
}
void
@@ -124,7 +121,9 @@ tcp_reass_init(void)
&V_tcp_reass_maxseg);
V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+ /* Set the zone limit and read back the effective value. */
+ V_tcp_reass_maxseg = uma_zone_set_max(V_tcp_reass_zone,
+ V_tcp_reass_maxseg);
EVENTHANDLER_REGISTER(nmbclusters_change,
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
}
@@ -158,17 +157,12 @@ tcp_reass_flush(struct tcpcb *tp)
}
static int
-tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS)
-{
- V_tcp_reass_maxseg = uma_zone_get_max(V_tcp_reass_zone);
- return (sysctl_handle_int(oidp, arg1, arg2, req));
-}
-
-static int
tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS)
{
- V_tcp_reass_qsize = uma_zone_get_cur(V_tcp_reass_zone);
- return (sysctl_handle_int(oidp, arg1, arg2, req));
+ int qsize;
+
+ qsize = uma_zone_get_cur(V_tcp_reass_zone);
+ return (sysctl_handle_int(oidp, &qsize, 0, req));
}
int
@@ -299,7 +293,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
th->th_seq += i;
}
}
- tp->t_rcvoopack++;
+ tp->t_rcvoopack++;
TCPSTAT_INC(tcps_rcvoopack);
TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
diff --git a/freebsd/sys/netinet/tcp_sack.c b/freebsd/sys/netinet/tcp_sack.c
index 449b538f..9cc1d86a 100644
--- a/freebsd/sys/netinet/tcp_sack.c
+++ b/freebsd/sys/netinet/tcp_sack.c
@@ -579,7 +579,7 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
/* Send one or 2 segments based on how much new data was acked. */
- if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) > 2)
+ if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2)
num_segs = 2;
tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
(tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg);
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c
index e23a0997..4c6d14eb 100644
--- a/freebsd/sys/netinet/tcp_subr.c
+++ b/freebsd/sys/netinet/tcp_subr.c
@@ -72,29 +72,25 @@ __FBSDID("$FreeBSD$");
#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
#ifdef INET6
#include <netinet/ip6.h>
-#endif
-#include <netinet/in_pcb.h>
-#ifdef INET6
#include <netinet6/in6_pcb.h>
-#endif
-#include <netinet/in_var.h>
-#include <netinet/ip_var.h>
-#ifdef INET6
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#endif
-#include <netinet/ip_icmp.h>
+
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -102,7 +98,12 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef INET6
#include <netinet6/ip6protosw.h>
+#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -166,15 +167,7 @@ SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0,
&sysctl_net_inet_tcp_mss_v6_check, "I",
"Default TCP Maximum Segment Size for IPv6");
-#endif
-
-static int
-vnet_sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
-{
-
- VNET_SYSCTL_ARG(req, arg1);
- return (sysctl_msec_to_ticks(oidp, arg1, arg2, req));
-}
+#endif /* INET6 */
/*
* Minimum MSS we accept and use. This prevents DoS attacks where
@@ -187,7 +180,7 @@ vnet_sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
&VNET_NAME(tcp_minmss), 0,
- "Minmum TCP Maximum Segment Size");
+ "Minimum TCP Maximum Segment Size");
VNET_DEFINE(int, tcp_do_rfc1323) = 1;
SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
@@ -221,49 +214,9 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
&VNET_NAME(tcp_isn_reseed_interval), 0,
"Seconds between reseeding of ISN secret");
-/*
- * TCP bandwidth limiting sysctls. Note that the default lower bound of
- * 1024 exists only for debugging. A good production default would be
- * something like 6100.
- */
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, inflight, CTLFLAG_RW, 0,
- "TCP inflight data limiting");
-
-static VNET_DEFINE(int, tcp_inflight_enable) = 0;
-#define V_tcp_inflight_enable VNET(tcp_inflight_enable)
-SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, enable, CTLFLAG_RW,
- &VNET_NAME(tcp_inflight_enable), 0,
- "Enable automatic TCP inflight data limiting");
-
-static int tcp_inflight_debug = 0;
-SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, debug, CTLFLAG_RW,
- &tcp_inflight_debug, 0,
- "Debug TCP inflight calculations");
-
-static VNET_DEFINE(int, tcp_inflight_rttthresh);
-#define V_tcp_inflight_rttthresh VNET(tcp_inflight_rttthresh)
-SYSCTL_VNET_PROC(_net_inet_tcp_inflight, OID_AUTO, rttthresh,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_inflight_rttthresh), 0,
- vnet_sysctl_msec_to_ticks, "I",
- "RTT threshold below which inflight will deactivate itself");
-
-static VNET_DEFINE(int, tcp_inflight_min) = 6144;
-#define V_tcp_inflight_min VNET(tcp_inflight_min)
-SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, min, CTLFLAG_RW,
- &VNET_NAME(tcp_inflight_min), 0,
- "Lower-bound for TCP inflight window");
-
-static VNET_DEFINE(int, tcp_inflight_max) = TCP_MAXWIN << TCP_MAX_WINSHIFT;
-#define V_tcp_inflight_max VNET(tcp_inflight_max)
-SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, max, CTLFLAG_RW,
- &VNET_NAME(tcp_inflight_max), 0,
- "Upper-bound for TCP inflight window");
-
-static VNET_DEFINE(int, tcp_inflight_stab) = 20;
-#define V_tcp_inflight_stab VNET(tcp_inflight_stab)
-SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
- &VNET_NAME(tcp_inflight_stab), 0,
- "Inflight Algorithm Stabilization 20 = 2 packets");
+static int tcp_soreceive_stream = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
+ &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets");
#ifdef TCP_SIGNATURE
static int tcp_sig_checksigs = 1;
@@ -278,7 +231,6 @@ VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
static struct inpcb *tcp_notify(struct inpcb *, int);
static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
-static void tcp_isn_tick(void *);
static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
void *ip4hdr, const void *ip6hdr);
@@ -309,7 +261,6 @@ static VNET_DEFINE(uma_zone_t, tcpcb_zone);
#define V_tcpcb_zone VNET(tcpcb_zone)
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
-struct callout isn_callout;
static struct mtx isn_mtx;
#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
@@ -342,13 +293,6 @@ tcp_init(void)
{
int hashsize;
- INP_INFO_LOCK_INIT(&V_tcbinfo, "tcp");
- LIST_INIT(&V_tcb);
-#ifdef VIMAGE
- V_tcbinfo.ipi_vnet = curvnet;
-#endif
- V_tcbinfo.ipi_listhead = &V_tcb;
-
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
&V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
@@ -362,14 +306,9 @@ tcp_init(void)
printf("WARNING: TCB hash size not a power of 2\n");
hashsize = 512; /* safe default */
}
- V_tcbinfo.ipi_hashbase = hashinit(hashsize, M_PCB,
- &V_tcbinfo.ipi_hashmask);
- V_tcbinfo.ipi_porthashbase = hashinit(hashsize, M_PCB,
- &V_tcbinfo.ipi_porthashmask);
- V_tcbinfo.ipi_zone = uma_zcreate("tcp_inpcb", sizeof(struct inpcb),
- NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
- V_tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH;
+ in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
+ "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_4TUPLE);
/*
* These have to be type stable for the benefit of the timers.
@@ -405,6 +344,16 @@ tcp_init(void)
tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
tcp_tcbhashsize = hashsize;
+ TUNABLE_INT_FETCH("net.inet.tcp.soreceive_stream", &tcp_soreceive_stream);
+ if (tcp_soreceive_stream) {
+#ifdef INET
+ tcp_usrreqs.pru_soreceive = soreceive_stream;
+#endif
+#ifdef INET6
+ tcp6_usrreqs.pru_soreceive = soreceive_stream;
+#endif /* INET6 */
+ }
+
#ifdef INET6
#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
#else /* INET6 */
@@ -417,8 +366,6 @@ tcp_init(void)
#undef TCP_MINPROTOHDR
ISN_LOCK_INIT();
- callout_init(&isn_callout, CALLOUT_MPSAFE);
- callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
SHUTDOWN_PRI_DEFAULT);
EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
@@ -434,18 +381,9 @@ tcp_destroy(void)
tcp_hc_destroy();
syncache_destroy();
tcp_tw_destroy();
-
- /* XXX check that hashes are empty! */
- hashdestroy(V_tcbinfo.ipi_hashbase, M_PCB,
- V_tcbinfo.ipi_hashmask);
- hashdestroy(V_tcbinfo.ipi_porthashbase, M_PCB,
- V_tcbinfo.ipi_porthashmask);
-
+ in_pcbinfo_destroy(&V_tcbinfo);
uma_zdestroy(V_sack_hole_zone);
uma_zdestroy(V_tcpcb_zone);
- uma_zdestroy(V_tcbinfo.ipi_zone);
-
- INP_INFO_LOCK_DESTROY(&V_tcbinfo);
}
#endif
@@ -453,7 +391,6 @@ void
tcp_fini(void *xtp)
{
- callout_stop(&isn_callout);
}
/*
@@ -481,8 +418,12 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
ip6->ip6_plen = htons(sizeof(struct tcphdr));
ip6->ip6_src = inp->in6p_laddr;
ip6->ip6_dst = inp->in6p_faddr;
- } else
+ }
+#endif /* INET6 */
+#if defined(INET6) && defined(INET)
+ else
#endif
+#ifdef INET
{
struct ip *ip;
@@ -499,6 +440,7 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
ip->ip_src = inp->inp_laddr;
ip->ip_dst = inp->inp_faddr;
}
+#endif /* INET */
th->th_sport = inp->inp_lport;
th->th_dport = inp->inp_fport;
th->th_seq = 0;
@@ -560,7 +502,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
#ifdef INET6
- isipv6 = ((struct ip *)ipgen)->ip_v == 6;
+ isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4);
ip6 = ipgen;
#endif /* INET6 */
ip = ipgen;
@@ -608,6 +550,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
m_freem(m->m_next);
m->m_next = NULL;
m->m_data = (caddr_t)ipgen;
+ m_addr_changed(m);
/* m_len is set later */
tlen = 0;
#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
@@ -638,11 +581,14 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
ip6->ip6_flow = 0;
ip6->ip6_vfc = IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_TCP;
- ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) +
- tlen));
+ ip6->ip6_plen = 0; /* Set in ip6_output(). */
tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
- } else
+ }
+#endif
+#if defined(INET) && defined(INET6)
+ else
#endif
+#ifdef INET
{
tlen += sizeof (struct tcpiphdr);
ip->ip_len = tlen;
@@ -650,6 +596,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
if (V_path_mtu_discovery)
ip->ip_off |= IP_DF;
}
+#endif
m->m_len = tlen;
m->m_pkthdr.len = tlen;
m->m_pkthdr.rcvif = NULL;
@@ -679,22 +626,27 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
else
nth->th_win = htons((u_short)win);
nth->th_urp = 0;
+
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
- nth->th_sum = 0;
- nth->th_sum = in6_cksum(m, IPPROTO_TCP,
- sizeof(struct ip6_hdr),
- tlen - sizeof(struct ip6_hdr));
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ nth->th_sum = in6_cksum_pseudo(ip6,
+ tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
NULL, NULL);
- } else
+ }
#endif /* INET6 */
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
{
+ m->m_pkthdr.csum_flags = CSUM_TCP;
nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
- m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
}
+#endif /* INET */
#ifdef TCPDEBUG
if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
@@ -702,9 +654,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#ifdef INET6
if (isipv6)
(void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
- else
#endif /* INET6 */
- (void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
+ (void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
+#endif
}
/*
@@ -786,10 +742,8 @@ tcp_newtcpcb(struct inpcb *inp)
tp->t_rttmin = tcp_rexmit_min;
tp->t_rxtcur = TCPTV_RTOBASE;
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
- tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->t_rcvtime = ticks;
- tp->t_bw_rtttime = ticks;
/*
* IPv4 TTL initialization is necessary for an IPv6 socket as well,
* because the socket may be bound to an IPv6 wildcard address,
@@ -878,7 +832,7 @@ tcp_drop(struct tcpcb *tp, int errno)
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tp->t_state = TCPS_CLOSED;
- (void) tcp_output_reset(tp);
+ (void) tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@@ -900,8 +854,19 @@ tcp_discardcb(struct tcpcb *tp)
INP_WLOCK_ASSERT(inp);
/*
- * Make sure that all of our timers are stopped before we
- * delete the PCB.
+ * Make sure that all of our timers are stopped before we delete the
+ * PCB.
+ *
+ * XXXRW: Really, we would like to use callout_drain() here in order
+ * to avoid races experienced in tcp_timer.c where a timer is already
+ * executing at this point. However, we can't, both because we're
+ * running in a context where we can't sleep, and also because we
+ * hold locks required by the timers. What we instead need to do is
+ * test to see if callout_drain() is required, and if so, defer some
+ * portion of the remainder of tcp_discardcb() to an asynchronous
+ * context that can callout_drain() and then continue. Some care
+ * will be required to ensure that no further processing takes place
+ * on the tcpcb, even though it hasn't been freed (a flag?).
*/
callout_stop(&tp->t_timers->tt_rexmt);
callout_stop(&tp->t_timers->tt_persist);
@@ -958,8 +923,6 @@ tcp_discardcb(struct tcpcb *tp)
metrics.rmx_rtt = tp->t_srtt;
metrics.rmx_rttvar = tp->t_rttvar;
- /* XXX: This wraps if the pipe is more than 4 Gbit per second */
- metrics.rmx_bandwidth = tp->snd_bandwidth;
metrics.rmx_cwnd = tp->snd_cwnd;
metrics.rmx_sendpipe = 0;
metrics.rmx_recvpipe = 0;
@@ -969,8 +932,12 @@ tcp_discardcb(struct tcpcb *tp)
/* free the reassembly queue, if any */
tcp_reass_flush(tp);
+
+#ifdef TCP_OFFLOAD
/* Disconnect offload device, if any. */
- tcp_offload_detach(tp);
+ if (tp->t_flags & TF_TOE)
+ tcp_offload_detach(tp);
+#endif
tcp_free_sackholes(tp);
@@ -999,9 +966,10 @@ tcp_close(struct tcpcb *tp)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
- /* Notify any offload devices of listener close */
+#ifdef TCP_OFFLOAD
if (tp->t_state == TCPS_LISTEN)
- tcp_offload_listen_close(tp);
+ tcp_offload_listen_stop(tp);
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
@@ -1211,8 +1179,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
else if (inp->inp_flags & INP_TIMEWAIT) {
bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
xt.xt_tp.t_state = TCPS_TIME_WAIT;
- } else
+ } else {
bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
+ if (xt.xt_tp.t_timers)
+ tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer);
+ }
if (inp->inp_socket != NULL)
sotoxsocket(inp->inp_socket, &xt.xt_socket);
else {
@@ -1228,9 +1199,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_tcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -1257,6 +1228,7 @@ SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist,
CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+#ifdef INET
static int
tcp_getcred(SYSCTL_HANDLER_ARGS)
{
@@ -1271,12 +1243,9 @@ tcp_getcred(SYSCTL_HANDLER_ARGS)
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
- INP_INFO_RLOCK(&V_tcbinfo);
- inp = in_pcblookup_hash(&V_tcbinfo, addrs[1].sin_addr,
- addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+ addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -1284,10 +1253,8 @@ tcp_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -1296,6 +1263,7 @@ tcp_getcred(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred,
CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
+#endif /* INET */
#ifdef INET6
static int
@@ -1304,7 +1272,10 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
struct xucred xuc;
struct sockaddr_in6 addrs[2];
struct inpcb *inp;
- int error, mapped = 0;
+ int error;
+#ifdef INET
+ int mapped = 0;
+#endif
error = priv_check(req->td, PRIV_NETINET_GETCRED);
if (error)
@@ -1317,27 +1288,28 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
return (error);
}
if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
+#ifdef INET
if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
mapped = 1;
else
+#endif
return (EINVAL);
}
- INP_INFO_RLOCK(&V_tcbinfo);
+#ifdef INET
if (mapped == 1)
- inp = in_pcblookup_hash(&V_tcbinfo,
+ inp = in_pcblookup(&V_tcbinfo,
*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
addrs[1].sin6_port,
*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
- addrs[0].sin6_port,
- 0, NULL);
+ addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL);
else
- inp = in6_pcblookup_hash(&V_tcbinfo,
+#endif
+ inp = in6_pcblookup(&V_tcbinfo,
&addrs[1].sin6_addr, addrs[1].sin6_port,
- &addrs[0].sin6_addr, addrs[0].sin6_port, 0, NULL);
+ &addrs[0].sin6_addr, addrs[0].sin6_port,
+ INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -1345,10 +1317,8 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -1357,9 +1327,10 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
-#endif
+#endif /* INET6 */
+#ifdef INET
void
tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{
@@ -1408,10 +1379,9 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
th = (struct tcphdr *)((caddr_t)ip
+ (ip->ip_hl << 2));
INP_INFO_WLOCK(&V_tcbinfo);
- inp = in_pcblookup_hash(&V_tcbinfo, faddr, th->th_dport,
- ip->ip_src, th->th_sport, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport,
+ ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL) {
- INP_WLOCK(inp);
if (!(inp->inp_flags & INP_TIMEWAIT) &&
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
@@ -1473,6 +1443,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
} else
in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
}
+#endif /* INET */
#ifdef INET6
void
@@ -1600,11 +1571,13 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
#define ISN_RANDOM_INCREMENT (4096 - 1)
static VNET_DEFINE(u_char, isn_secret[32]);
+static VNET_DEFINE(int, isn_last);
static VNET_DEFINE(int, isn_last_reseed);
static VNET_DEFINE(u_int32_t, isn_offset);
static VNET_DEFINE(u_int32_t, isn_offset_old);
#define V_isn_secret VNET(isn_secret)
+#define V_isn_last VNET(isn_last)
#define V_isn_last_reseed VNET(isn_last_reseed)
#define V_isn_offset VNET(isn_offset)
#define V_isn_offset_old VNET(isn_offset_old)
@@ -1615,6 +1588,7 @@ tcp_new_isn(struct tcpcb *tp)
MD5_CTX isn_ctx;
u_int32_t md5_buffer[4];
tcp_seq new_isn;
+ u_int32_t projected_offset;
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -1650,38 +1624,17 @@ tcp_new_isn(struct tcpcb *tp)
new_isn = (tcp_seq) md5_buffer[0];
V_isn_offset += ISN_STATIC_INCREMENT +
(arc4random() & ISN_RANDOM_INCREMENT);
- new_isn += V_isn_offset;
- ISN_UNLOCK();
- return (new_isn);
-}
-
-/*
- * Increment the offset to the next ISN_BYTES_PER_SECOND / 100 boundary
- * to keep time flowing at a relatively constant rate. If the random
- * increments have already pushed us past the projected offset, do nothing.
- */
-static void
-tcp_isn_tick(void *xtp)
-{
- VNET_ITERATOR_DECL(vnet_iter);
- u_int32_t projected_offset;
-
- VNET_LIST_RLOCK_NOSLEEP();
- ISN_LOCK();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS */
- projected_offset =
- V_isn_offset_old + ISN_BYTES_PER_SECOND / 100;
-
+ if (ticks != V_isn_last) {
+ projected_offset = V_isn_offset_old +
+ ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last);
if (SEQ_GT(projected_offset, V_isn_offset))
V_isn_offset = projected_offset;
-
V_isn_offset_old = V_isn_offset;
- CURVNET_RESTORE();
+ V_isn_last = ticks;
}
+ new_isn += V_isn_offset;
ISN_UNLOCK();
- VNET_LIST_RUNLOCK_NOSLEEP();
- callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
+ return (new_isn);
}
/*
@@ -1755,10 +1708,11 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
- tcp_output_send(tp);
+ tcp_output(tp);
return (inp);
}
+#ifdef INET
/*
* Look-up the routing entry to the peer of this inpcb. If no route
* is found and it cannot be allocated, then return 0. This routine
@@ -1766,7 +1720,7 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
* tcp_mss_update to get the peer/interface MTU.
*/
u_long
-tcp_maxmtu(struct in_conninfo *inc, int *flags)
+tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
{
struct route sro;
struct sockaddr_in *dst;
@@ -1791,19 +1745,21 @@ tcp_maxmtu(struct in_conninfo *inc, int *flags)
maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
/* Report additional interface capabilities. */
- if (flags != NULL) {
+ if (cap != NULL) {
if (ifp->if_capenable & IFCAP_TSO4 &&
ifp->if_hwassist & CSUM_TSO)
- *flags |= CSUM_TSO;
+ cap->ifcap |= CSUM_TSO;
+ cap->tsomax = ifp->if_hw_tsomax;
}
RTFREE(sro.ro_rt);
}
return (maxmtu);
}
+#endif /* INET */
#ifdef INET6
u_long
-tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
{
struct route_in6 sro6;
struct ifnet *ifp;
@@ -1827,10 +1783,11 @@ tcp_maxmtu6(struct in_conninfo *inc, int *flags)
IN6_LINKMTU(sro6.ro_rt->rt_ifp));
/* Report additional interface capabilities. */
- if (flags != NULL) {
+ if (cap != NULL) {
if (ifp->if_capenable & IFCAP_TSO6 &&
ifp->if_hwassist & CSUM_TSO)
- *flags |= CSUM_TSO;
+ cap->ifcap |= CSUM_TSO;
+ cap->tsomax = ifp->if_hw_tsomax;
}
RTFREE(sro6.ro_rt);
}
@@ -1882,154 +1839,6 @@ ipsec_hdrsiz_tcp(struct tcpcb *tp)
}
#endif /* IPSEC */
-/*
- * TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
- *
- * This code attempts to calculate the bandwidth-delay product as a
- * means of determining the optimal window size to maximize bandwidth,
- * minimize RTT, and avoid the over-allocation of buffers on interfaces and
- * routers. This code also does a fairly good job keeping RTTs in check
- * across slow links like modems. We implement an algorithm which is very
- * similar (but not meant to be) TCP/Vegas. The code operates on the
- * transmitter side of a TCP connection and so only effects the transmit
- * side of the connection.
- *
- * BACKGROUND: TCP makes no provision for the management of buffer space
- * at the end points or at the intermediate routers and switches. A TCP
- * stream, whether using NewReno or not, will eventually buffer as
- * many packets as it is able and the only reason this typically works is
- * due to the fairly small default buffers made available for a connection
- * (typicaly 16K or 32K). As machines use larger windows and/or window
- * scaling it is now fairly easy for even a single TCP connection to blow-out
- * all available buffer space not only on the local interface, but on
- * intermediate routers and switches as well. NewReno makes a misguided
- * attempt to 'solve' this problem by waiting for an actual failure to occur,
- * then backing off, then steadily increasing the window again until another
- * failure occurs, ad-infinitum. This results in terrible oscillation that
- * is only made worse as network loads increase and the idea of intentionally
- * blowing out network buffers is, frankly, a terrible way to manage network
- * resources.
- *
- * It is far better to limit the transmit window prior to the failure
- * condition being achieved. There are two general ways to do this: First
- * you can 'scan' through different transmit window sizes and locate the
- * point where the RTT stops increasing, indicating that you have filled the
- * pipe, then scan backwards until you note that RTT stops decreasing, then
- * repeat ad-infinitum. This method works in principle but has severe
- * implementation issues due to RTT variances, timer granularity, and
- * instability in the algorithm which can lead to many false positives and
- * create oscillations as well as interact badly with other TCP streams
- * implementing the same algorithm.
- *
- * The second method is to limit the window to the bandwidth delay product
- * of the link. This is the method we implement. RTT variances and our
- * own manipulation of the congestion window, bwnd, can potentially
- * destabilize the algorithm. For this reason we have to stabilize the
- * elements used to calculate the window. We do this by using the minimum
- * observed RTT, the long term average of the observed bandwidth, and
- * by adding two segments worth of slop. It isn't perfect but it is able
- * to react to changing conditions and gives us a very stable basis on
- * which to extend the algorithm.
- */
-void
-tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
-{
- u_long bw;
- u_long bwnd;
- int save_ticks;
-
- INP_WLOCK_ASSERT(tp->t_inpcb);
-
- /*
- * If inflight_enable is disabled in the middle of a tcp connection,
- * make sure snd_bwnd is effectively disabled.
- */
- if (V_tcp_inflight_enable == 0 ||
- tp->t_rttlow < V_tcp_inflight_rttthresh) {
- tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
- tp->snd_bandwidth = 0;
- return;
- }
-
- /*
- * Figure out the bandwidth. Due to the tick granularity this
- * is a very rough number and it MUST be averaged over a fairly
- * long period of time. XXX we need to take into account a link
- * that is not using all available bandwidth, but for now our
- * slop will ramp us up if this case occurs and the bandwidth later
- * increases.
- *
- * Note: if ticks rollover 'bw' may wind up negative. We must
- * effectively reset t_bw_rtttime for this case.
- */
- save_ticks = ticks;
- if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
- return;
-
- bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
- (save_ticks - tp->t_bw_rtttime);
- tp->t_bw_rtttime = save_ticks;
- tp->t_bw_rtseq = ack_seq;
- if (tp->t_bw_rtttime == 0 || (int)bw < 0)
- return;
- bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
-
- tp->snd_bandwidth = bw;
-
- /*
- * Calculate the semi-static bandwidth delay product, plus two maximal
- * segments. The additional slop puts us squarely in the sweet
- * spot and also handles the bandwidth run-up case and stabilization.
- * Without the slop we could be locking ourselves into a lower
- * bandwidth.
- *
- * Situations Handled:
- * (1) Prevents over-queueing of packets on LANs, especially on
- * high speed LANs, allowing larger TCP buffers to be
- * specified, and also does a good job preventing
- * over-queueing of packets over choke points like modems
- * (at least for the transmit side).
- *
- * (2) Is able to handle changing network loads (bandwidth
- * drops so bwnd drops, bandwidth increases so bwnd
- * increases).
- *
- * (3) Theoretically should stabilize in the face of multiple
- * connections implementing the same algorithm (this may need
- * a little work).
- *
- * (4) Stability value (defaults to 20 = 2 maximal packets) can
- * be adjusted with a sysctl but typically only needs to be
- * on very slow connections. A value no smaller then 5
- * should be used, but only reduce this default if you have
- * no other choice.
- */
-#define USERTT ((tp->t_srtt + tp->t_rttbest) / 2)
- bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + V_tcp_inflight_stab * tp->t_maxseg / 10;
-#undef USERTT
-
- if (tcp_inflight_debug > 0) {
- static int ltime;
- if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
- ltime = ticks;
- printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
- tp,
- bw,
- tp->t_rttbest,
- tp->t_srtt,
- bwnd
- );
- }
- }
- if ((long)bwnd < V_tcp_inflight_min)
- bwnd = V_tcp_inflight_min;
- if (bwnd > V_tcp_inflight_max)
- bwnd = V_tcp_inflight_max;
- if ((long)bwnd < tp->t_maxseg * 2)
- bwnd = tp->t_maxseg * 2;
- tp->snd_bwnd = bwnd;
-}
-
#ifdef TCP_SIGNATURE
/*
* Callback function invoked by m_apply() to digest TCP segment data
@@ -2071,11 +1880,15 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
u_char *buf, u_int direction)
{
union sockaddr_union dst;
+#ifdef INET
struct ippseudo ippseudo;
+#endif
MD5_CTX ctx;
int doff;
struct ip *ip;
+#ifdef INET
struct ipovly *ipovly;
+#endif
struct secasvar *sav;
struct tcphdr *th;
#ifdef INET6
@@ -2097,12 +1910,14 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
ip6 = NULL; /* Make the compiler happy. */
#endif
switch (ip->ip_v) {
+#ifdef INET
case IPVERSION:
dst.sa.sa_len = sizeof(struct sockaddr_in);
dst.sa.sa_family = AF_INET;
dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ?
ip->ip_src : ip->ip_dst;
break;
+#endif
#ifdef INET6
case (IPV6_VERSION >> 4):
ip6 = mtod(m, struct ip6_hdr *);
@@ -2142,6 +1957,7 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
* tcp_output(), the underlying ip_len member has not yet been set.
*/
switch (ip->ip_v) {
+#ifdef INET
case IPVERSION:
ipovly = (struct ipovly *)ip;
ippseudo.ippseudo_src = ipovly->ih_src;
@@ -2155,6 +1971,7 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip));
doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen;
break;
+#endif
#ifdef INET6
/*
* RFC 2385, 2.0 Proposal
@@ -2335,6 +2152,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
return (error);
break;
#endif
+#ifdef INET
case AF_INET:
fin = (struct sockaddr_in *)&addrs[0];
lin = (struct sockaddr_in *)&addrs[1];
@@ -2342,6 +2160,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
lin->sin_len != sizeof(struct sockaddr_in))
return (EINVAL);
break;
+#endif
default:
return (EINVAL);
}
@@ -2349,18 +2168,19 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
- inp = in6_pcblookup_hash(&V_tcbinfo, &fin6->sin6_addr,
- fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, 0,
- NULL);
+ inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
+ fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
+ INPLOOKUP_WLOCKPCB, NULL);
break;
#endif
+#ifdef INET
case AF_INET:
- inp = in_pcblookup_hash(&V_tcbinfo, fin->sin_addr,
- fin->sin_port, lin->sin_addr, lin->sin_port, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
+ lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
break;
+#endif
}
if (inp != NULL) {
- INP_WLOCK(inp);
if (inp->inp_flags & INP_TIMEWAIT) {
/*
* XXXRW: There currently exists a state where an
@@ -2387,7 +2207,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
return (error);
}
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
+SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
CTLTYPE_STRUCT|CTLFLAG_WR|CTLFLAG_SKIP, NULL,
0, sysctl_drop, "", "Drop TCP connection");
@@ -2485,6 +2305,7 @@ tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
sp = s + strlen(s);
sprintf(sp, "]:%i", ntohs(th->th_dport));
#endif /* INET6 */
+#ifdef INET
} else if (ip && th) {
inet_ntoa_r(ip->ip_src, sp);
sp = s + strlen(s);
@@ -2493,6 +2314,7 @@ tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
inet_ntoa_r(ip->ip_dst, sp);
sp = s + strlen(s);
sprintf(sp, "]:%i", ntohs(th->th_dport));
+#endif /* INET */
} else {
free(s, M_TCPLOG);
return (NULL);
diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c
index 80da0349..10bd00ae 100644
--- a/freebsd/sys/netinet/tcp_syncache.c
+++ b/freebsd/sys/netinet/tcp_syncache.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_pcbgroup.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -82,10 +83,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/toecore.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -111,10 +114,8 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
-#ifdef TCP_OFFLOAD_DISABLE
-#define TOEPCB_ISSET(sc) (0)
-#else
-#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
+#ifdef TCP_OFFLOAD
+#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
#endif
static void syncache_drop(struct syncache *, struct syncache_head *);
@@ -124,6 +125,7 @@ struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
static int syncache_respond(struct syncache *);
static struct socket *syncache_socket(struct syncache *, struct socket *,
struct mbuf *m);
+static int syncache_sysctl_count(SYSCTL_HANDLER_ARGS);
static void syncache_timeout(struct syncache *sc, struct syncache_head *sch,
int docallout);
static void syncache_timer(void *);
@@ -148,7 +150,8 @@ static struct syncache
static VNET_DEFINE(struct tcp_syncache, tcp_syncache);
#define V_tcp_syncache VNET(tcp_syncache)
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache");
+static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0,
+ "TCP SYN cache");
SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
&VNET_NAME(tcp_syncache.bucket_limit), 0,
@@ -158,8 +161,8 @@ SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
&VNET_NAME(tcp_syncache.cache_limit), 0,
"Overall entry limit for syncache");
-SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
- &VNET_NAME(tcp_syncache.cache_count), 0,
+SYSCTL_VNET_PROC(_net_inet_tcp_syncache, OID_AUTO, count, (CTLTYPE_UINT|CTLFLAG_RD),
+ NULL, 0, &syncache_sysctl_count, "IU",
"Current number of entries in syncache");
SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
@@ -225,7 +228,6 @@ syncache_init(void)
{
int i;
- V_tcp_syncache.cache_count = 0;
V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
@@ -268,7 +270,8 @@ syncache_init(void)
/* Create the syncache entry zone. */
V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit);
+ V_tcp_syncache.cache_limit = uma_zone_set_max(V_tcp_syncache.zone,
+ V_tcp_syncache.cache_limit);
}
#ifdef VIMAGE
@@ -296,8 +299,8 @@ syncache_destroy(void)
mtx_destroy(&sch->sch_mtx);
}
- KASSERT(V_tcp_syncache.cache_count == 0, ("%s: cache_count %d not 0",
- __func__, V_tcp_syncache.cache_count));
+ KASSERT(uma_zone_get_cur(V_tcp_syncache.zone) == 0,
+ ("%s: cache_count not 0", __func__));
/* Free the allocated global resources. */
uma_zdestroy(V_tcp_syncache.zone);
@@ -305,6 +308,15 @@ syncache_destroy(void)
}
#endif
+static int
+syncache_sysctl_count(SYSCTL_HANDLER_ARGS)
+{
+ int count;
+
+ count = uma_zone_get_cur(V_tcp_syncache.zone);
+ return (sysctl_handle_int(oidp, &count, 0, req));
+}
+
/*
* Inserts a syncache entry into the specified bucket row.
* Locks and unlocks the syncache_head autonomously.
@@ -332,6 +344,14 @@ syncache_insert(struct syncache *sc, struct syncache_head *sch)
TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
sch->sch_length++;
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_added(tod, sc->sc_todctx);
+ }
+#endif
+
/* Reinitialize the bucket row's timer. */
if (sch->sch_length == 1)
sch->sch_nextc = ticks + INT_MAX;
@@ -339,7 +359,6 @@ syncache_insert(struct syncache *sc, struct syncache_head *sch)
SCH_UNLOCK(sch);
- V_tcp_syncache.cache_count++;
TCPSTAT_INC(tcps_sc_added);
}
@@ -356,12 +375,15 @@ syncache_drop(struct syncache *sc, struct syncache_head *sch)
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
-#ifndef TCP_OFFLOAD_DISABLE
- if (sc->sc_tu)
- sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
-#endif
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_removed(tod, sc->sc_todctx);
+ }
+#endif
+
syncache_free(sc);
- V_tcp_syncache.cache_count--;
}
/*
@@ -629,7 +651,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
struct inpcb *inp = NULL;
struct socket *so;
struct tcpcb *tp;
- int error = 0;
+ int error;
char *s;
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
@@ -663,6 +685,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
inp = sotoinpcb(so);
inp->inp_inc.inc_fibnum = so->so_fibnum;
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
/* Insert new socket into PCB hash list. */
inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
@@ -677,8 +700,14 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
#ifdef INET6
}
#endif
+
+ /*
+ * Install in the reservation hash table for now, but don't yet
+ * install a connection group since the full 4-tuple isn't yet
+ * configured.
+ */
inp->inp_lport = sc->sc_inc.inc_lport;
- if ((error = in_pcbinshash(inp)) != 0) {
+ if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
/*
* Undo the assignments above if we failed to
* put the PCB on the hash lists.
@@ -696,6 +725,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
#ifdef IPSEC
@@ -730,13 +760,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
laddr6 = inp->in6p_laddr;
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
inp->in6p_laddr = sc->sc_inc.inc6_laddr;
+ if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
#ifndef __rtems__
- if ((error = in6_pcbconnect(inp, (struct sockaddr *)&sin6,
- thread0.td_ucred)) != 0) {
-#else /* __rtems__ */
- if ((error = in6_pcbconnect(inp, (struct sockaddr *)&sin6,
- NULL)) != 0) {
-#endif /* __rtems__ */
+ thread0.td_ucred, m)) != 0) {
+#else /* __rtems__ */
+ NULL, m)) != 0) {
+#endif /* __rtems__ */
inp->in6p_laddr = laddr6;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
@@ -744,13 +773,18 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
/* Override flowlabel from in6_pcbconnect. */
inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
inp->inp_flow |= sc->sc_flowlabel;
- } else
+ }
+#endif /* INET6 */
+#if defined(INET) && defined(INET6)
+ else
#endif
+#ifdef INET
{
struct in_addr laddr;
struct sockaddr_in sin;
@@ -770,14 +804,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
laddr = inp->inp_laddr;
if (inp->inp_laddr.s_addr == INADDR_ANY)
inp->inp_laddr = sc->sc_inc.inc_laddr;
+ if ((error = in_pcbconnect_mbuf(inp, (struct sockaddr *)&sin,
#ifndef __rtems__
- if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin,
- thread0.td_ucred)) != 0) {
-#else /* __rtems__ */
- if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin,
- NULL)) != 0) {
-#endif /* __rtems__ */
-
+ thread0.td_ucred, m)) != 0) {
+#else /* __rtems__ */
+ NULL, m)) != 0) {
+#endif /* __rtems__ */
inp->inp_laddr = laddr;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
@@ -785,9 +817,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
}
+#endif /* INET */
+ INP_HASH_WUNLOCK(&V_tcbinfo);
tp = intotcpcb(inp);
tp->t_state = TCPS_SYN_RECEIVED;
tp->iss = sc->sc_iss;
@@ -835,12 +870,33 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
tcp_mss(tp, sc->sc_peer_mss);
/*
- * If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
+ * If the SYN,ACK was retransmitted, indicate that CWND to be
+ * limited to one segment in cc_conn_init().
* NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits.
*/
if (sc->sc_rxmits > 1)
- tp->snd_cwnd = tp->t_maxseg;
- tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+ tp->snd_cwnd = 1;
+
+#ifdef TCP_OFFLOAD
+ /*
+ * Allow a TOE driver to install its hooks. Note that we hold the
+ * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
+ * new connection before the TOE driver has done its thing.
+ */
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_offload_socket(tod, sc->sc_todctx, so);
+ }
+#endif
+ /*
+ * Copy and activate timers.
+ */
+ tp->t_keepinit = sototcpcb(lso)->t_keepinit;
+ tp->t_keepidle = sototcpcb(lso)->t_keepidle;
+ tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
+ tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
INP_WUNLOCK(inp);
@@ -913,7 +969,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/* Pull out the entry to unlock the bucket row. */
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
- V_tcp_syncache.cache_count--;
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_removed(tod, sc->sc_todctx);
+ }
+#endif
SCH_UNLOCK(sch);
}
@@ -921,7 +983,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* Segment validation:
* ACK must match our initial sequence number + 1 (the SYN|ACK).
*/
- if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
+ if (th->th_ack != sc->sc_iss + 1) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
"rejected\n", s, __func__, th->th_ack, sc->sc_iss);
@@ -932,9 +994,8 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* The SEQ must fall in the window starting at the received
* initial receive sequence number + 1 (the SYN).
*/
- if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
- SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
- !TOEPCB_ISSET(sc)) {
+ if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+ SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
"rejected\n", s, __func__, th->th_seq, sc->sc_irs);
@@ -951,8 +1012,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* If timestamps were negotiated the reflected timestamp
* must be equal to what we actually sent in the SYN|ACK.
*/
- if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
- !TOEPCB_ISSET(sc)) {
+ if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
"segment rejected\n",
@@ -980,25 +1040,6 @@ failed:
return (0);
}
-int
-tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct socket **lsop, struct mbuf *m)
-{
- struct tcpopt to;
- int rc;
-
- bzero(&to, sizeof(struct tcpopt));
- to.to_mss = toeo->to_mss;
- to.to_wscale = toeo->to_wscale;
- to.to_flags = toeo->to_flags;
-
- INP_INFO_WLOCK(&V_tcbinfo);
- rc = syncache_expand(inc, &to, th, lsop, m);
- INP_INFO_WUNLOCK(&V_tcbinfo);
-
- return (rc);
-}
-
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@@ -1014,8 +1055,8 @@ tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
*/
static void
_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m,
- struct toe_usrreqs *tu, void *toepcb)
+ struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
+ void *todctx)
{
struct tcpcb *tp;
struct socket *so;
@@ -1080,7 +1121,11 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#ifdef INET6
if (!(inc->inc_flags & INC_ISIPV6))
#endif
+#ifdef INET
ipopts = (m) ? ip_srcroute(m) : NULL;
+#else
+ ipopts = NULL;
+#endif
/*
* See if we already have an entry for this connection.
@@ -1097,11 +1142,6 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
-#ifndef TCP_OFFLOAD_DISABLE
- if (sc->sc_tu)
- sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
- sc->sc_toepcb);
-#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1134,7 +1174,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
s, __func__);
free(s, M_TCPLOG);
}
- if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
+ if (syncache_respond(sc) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@@ -1185,9 +1225,9 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc->sc_ip_tos = ip_tos;
sc->sc_ip_ttl = ip_ttl;
}
-#ifndef TCP_OFFLOAD_DISABLE
- sc->sc_tu = tu;
- sc->sc_toepcb = toepcb;
+#ifdef TCP_OFFLOAD
+ sc->sc_tod = tod;
+ sc->sc_todctx = todctx;
#endif
sc->sc_irs = th->th_seq;
sc->sc_iss = arc4random();
@@ -1282,7 +1322,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/*
* Do a standard 3-way handshake.
*/
- if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
+ if (syncache_respond(sc) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@@ -1314,8 +1354,8 @@ syncache_respond(struct syncache *sc)
{
struct ip *ip = NULL;
struct mbuf *m;
- struct tcphdr *th;
- int optlen, error;
+ struct tcphdr *th = NULL;
+ int optlen, error = 0; /* Make compiler happy */
u_int16_t hlen, tlen, mssopt;
struct tcpopt to;
#ifdef INET6
@@ -1363,8 +1403,12 @@ syncache_respond(struct syncache *sc)
ip6->ip6_flow |= sc->sc_flowlabel;
th = (struct tcphdr *)(ip6 + 1);
- } else
+ }
+#endif
+#if defined(INET6) && defined(INET)
+ else
#endif
+#ifdef INET
{
ip = mtod(m, struct ip *);
ip->ip_v = IPVERSION;
@@ -1391,6 +1435,7 @@ syncache_respond(struct syncache *sc)
th = (struct tcphdr *)(ip + 1);
}
+#endif /* INET */
th->th_sport = sc->sc_inc.inc_lport;
th->th_dport = sc->sc_inc.inc_fport;
@@ -1451,22 +1496,45 @@ syncache_respond(struct syncache *sc)
optlen = 0;
M_SETFIB(m, sc->sc_inc.inc_fibnum);
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
- th->th_sum = 0;
- th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen,
- tlen + optlen - hlen);
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
+ IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
+
+ return (error);
+ }
+#endif
error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
- } else
+ }
+#endif
+#if defined(INET6) && defined(INET)
+ else
#endif
+#ifdef INET
{
+ m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
- m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
+
+ return (error);
+ }
+#endif
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
+#endif
return (error);
}
@@ -1478,23 +1546,12 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
void
-tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
- struct toe_usrreqs *tu, void *toepcb)
+tcp_offload_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, struct socket **lsop, void *tod, void *todctx)
{
- struct tcpopt to;
- bzero(&to, sizeof(struct tcpopt));
- to.to_mss = toeo->to_mss;
- to.to_wscale = toeo->to_wscale;
- to.to_flags = toeo->to_flags;
-
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
-
- _syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
+ _syncache_add(inc, to, th, inp, lsop, NULL, tod, todctx);
}
-
/*
* The purpose of SYN cookies is to avoid keeping track of all SYN's we
* receive and to be able to handle SYN floods from bogus source addresses
diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h
index 93c7aaa2..c55bfbcd 100644
--- a/freebsd/sys/netinet/tcp_syncache.h
+++ b/freebsd/sys/netinet/tcp_syncache.h
@@ -34,8 +34,6 @@
#define _NETINET_TCP_SYNCACHE_H_
#ifdef _KERNEL
-struct toeopt;
-
void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
@@ -43,14 +41,10 @@ void syncache_destroy(void);
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
-int tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct socket **lsop, struct mbuf *m);
void syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
-void tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
- struct tcphdr *, struct inpcb *, struct socket **,
- struct toe_usrreqs *tu, void *toepcb);
-
+void tcp_offload_syncache_add(struct in_conninfo *, struct tcpopt *,
+ struct tcphdr *, struct inpcb *, struct socket **, void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
int syncache_pcbcount(void);
@@ -75,12 +69,14 @@ struct syncache {
u_int8_t sc_requested_s_scale:4,
sc_requested_r_scale:4;
u_int16_t sc_flags;
-#ifndef TCP_OFFLOAD_DISABLE
- struct toe_usrreqs *sc_tu; /* TOE operations */
- void *sc_toepcb; /* TOE protocol block */
-#endif
+#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
+ struct toedev *sc_tod; /* entry added by this TOE */
+ void *sc_todctx; /* TOE driver context */
+#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
+
+ u_int32_t sc_spare[2]; /* UTO */
};
/*
@@ -117,7 +113,6 @@ struct tcp_syncache {
u_int hashsize;
u_int hashmask;
u_int bucket_limit;
- u_int cache_count; /* XXX: unprotected */
u_int cache_limit;
u_int rexmt_limit;
u_int hash_secret;
diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c
index 77cc1feb..db952e42 100644
--- a/freebsd/sys/netinet/tcp_timer.c
+++ b/freebsd/sys/netinet/tcp_timer.c
@@ -34,6 +34,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_tcpdebug.h>
@@ -43,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -112,18 +114,25 @@ int tcp_finwait2_timeout;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
&tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
+int tcp_keepcnt = TCPTV_KEEPCNT;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
+ "Number of keepalive probes to send");
-static int tcp_keepcnt = TCPTV_KEEPCNT;
/* max idle probes */
int tcp_maxpersistidle;
- /* max idle time in persist */
-int tcp_maxidle;
static int tcp_rexmit_drop_options = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
&tcp_rexmit_drop_options, 0,
"Drop TCP options from 3rd and later retransmitted SYN");
+static int per_cpu_timers = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
+ &per_cpu_timers , 0, "run tcp timers on all cpus");
+
+#define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
+ ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
+
/*
* Tcp protocol timeout routine called every 500 ms.
* Updates timestamps used for TCP
@@ -137,7 +146,6 @@ tcp_slowtimo(void)
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
INP_INFO_WLOCK(&V_tcbinfo);
(void) tcp_tw_2msl_scan(0);
INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -265,9 +273,9 @@ tcp_timer_2msl(void *xtp)
tp = tcp_close(tp);
} else {
if (tp->t_state != TCPS_TIME_WAIT &&
- ticks - tp->t_rcvtime <= tcp_maxidle)
- callout_reset(&tp->t_timers->tt_2msl, tcp_keepintvl,
- tcp_timer_2msl, tp);
+ ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
+ callout_reset_on(&tp->t_timers->tt_2msl,
+ TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
else
tp = tcp_close(tp);
}
@@ -334,7 +342,7 @@ tcp_timer_keep(void *xtp)
goto dropit;
if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
- if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle)
+ if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
goto dropit;
/*
* Send a packet designed to force a response
@@ -356,9 +364,11 @@ tcp_timer_keep(void *xtp)
tp->rcv_nxt, tp->snd_una - 1, 0);
free(t_template, M_TEMP);
}
- callout_reset(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
+ callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
+ tcp_timer_keep, tp, INP_CPU(inp));
} else
- callout_reset(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+ callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
+ tcp_timer_keep, tp, INP_CPU(inp));
#ifdef TCPDEBUG
if (inp->inp_socket->so_options & SO_DEBUG)
@@ -445,6 +455,16 @@ tcp_timer_persist(void *xtp)
tp = tcp_drop(tp, ETIMEDOUT);
goto out;
}
+ /*
+ * If the user has closed the socket then drop a persisting
+ * connection after a much reduced timeout.
+ */
+ if (tp->t_state > TCPS_CLOSE_WAIT &&
+ (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
+ TCPSTAT_INC(tcps_persistdrop);
+ tp = tcp_drop(tp, ETIMEDOUT);
+ goto out;
+ }
tcp_setpersist(tp);
tp->t_flags |= TF_FORCEDATA;
(void) tcp_output(tp);
@@ -474,8 +494,7 @@ tcp_timer_rexmt(void * xtp)
ostate = tp->t_state;
#endif
- INP_INFO_WLOCK(&V_tcbinfo);
- headlocked = 1;
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
/*
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
@@ -486,7 +505,7 @@ tcp_timer_rexmt(void * xtp)
*/
if (inp == NULL) {
tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -494,14 +513,14 @@ tcp_timer_rexmt(void * xtp)
if (callout_pending(&tp->t_timers->tt_rexmt) ||
!callout_active(&tp->t_timers->tt_rexmt)) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(&tp->t_timers->tt_rexmt);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -514,13 +533,37 @@ tcp_timer_rexmt(void * xtp)
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
TCPSTAT_INC(tcps_timeoutdrop);
+ in_pcbref(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_WUNLOCK(inp);
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
+ return;
+ }
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
+ return;
+ }
+
tp = tcp_drop(tp, tp->t_softerror ?
tp->t_softerror : ETIMEDOUT);
+ headlocked = 1;
goto out;
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
headlocked = 0;
- if (tp->t_rxtshift == 1) {
+ if (tp->t_state == TCPS_SYN_SENT) {
+ /*
+ * If the SYN was retransmitted, indicate CWND to be
+ * limited to 1 segment in cc_conn_init().
+ */
+ tp->snd_cwnd = 1;
+ } else if (tp->t_rxtshift == 1) {
/*
* first retransmit; record ssthresh and cwnd so they can
* be recovered if this turns out to be a "bad" retransmit.
@@ -547,13 +590,13 @@ tcp_timer_rexmt(void * xtp)
tp->t_flags &= ~TF_PREVVALID;
TCPSTAT_INC(tcps_rexmttimeo);
if (tp->t_state == TCPS_SYN_SENT)
- rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
+ rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
TCPT_RANGESET(tp->t_rxtcur, rexmt,
tp->t_rttmin, TCPTV_REXMTMAX);
/*
- * Disable rfc1323 if we haven't got any response to
+ * Disable RFC1323 and SACK if we haven't got any response to
* our third SYN to work-around some broken terminal servers
* (most of which have hopefully been retired) that have bad VJ
* header compression code which trashes TCP segments containing
@@ -561,7 +604,7 @@ tcp_timer_rexmt(void * xtp)
*/
if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
(tp->t_rxtshift == 3))
- tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
+ tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
/*
* If we backed off this far, our srtt estimate is probably bogus.
* Clobber it so we'll take the next rtt measurement as our srtt;
@@ -572,7 +615,6 @@ tcp_timer_rexmt(void * xtp)
#ifdef INET6
if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
in6_losing(tp->t_inpcb);
- else
#endif
tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
tp->t_srtt = 0;
@@ -610,6 +652,13 @@ tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
{
struct callout *t_callout;
void *f_callout;
+ struct inpcb *inp = tp->t_inpcb;
+ int cpu = INP_CPU(inp);
+
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ return;
+#endif
switch (timer_type) {
case TT_DELACK:
@@ -638,7 +687,7 @@ tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
if (delta == 0) {
callout_stop(t_callout);
} else {
- callout_reset(t_callout, delta, f_callout, tp);
+ callout_reset_on(t_callout, delta, f_callout, tp, cpu);
}
}
@@ -668,3 +717,24 @@ tcp_timer_active(struct tcpcb *tp, int timer_type)
}
return callout_active(t_callout);
}
+
+#define ticks_to_msecs(t) (1000*(t) / hz)
+
+void
+tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer)
+{
+ bzero(xtimer, sizeof(struct xtcp_timer));
+ if (timer == NULL)
+ return;
+ if (callout_active(&timer->tt_delack))
+ xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks);
+ if (callout_active(&timer->tt_rexmt))
+ xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks);
+ if (callout_active(&timer->tt_persist))
+ xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks);
+ if (callout_active(&timer->tt_keep))
+ xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
+ if (callout_active(&timer->tt_2msl))
+ xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
+ xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
+}
diff --git a/freebsd/sys/netinet/tcp_timer.h b/freebsd/sys/netinet/tcp_timer.h
index ff455b6b..0da58fd8 100644
--- a/freebsd/sys/netinet/tcp_timer.h
+++ b/freebsd/sys/netinet/tcp_timer.h
@@ -86,9 +86,6 @@
#define TCPTV_KEEPINTVL ( 75*hz) /* default probe interval */
#define TCPTV_KEEPCNT 8 /* max probes before drop */
-#define TCPTV_INFLIGHT_RTTTHRESH (10*hz/1000) /* below which inflight
- disengages, in msec */
-
#define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */
/*
@@ -121,7 +118,7 @@
#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */
-#define TCPTV_DELACK (hz / PR_FASTHZ / 2) /* 100ms timeout */
+#define TCPTV_DELACK ( hz/10 ) /* 100ms timeout */
#ifdef TCPTIMERS
static const char *tcptimers[] =
@@ -141,6 +138,8 @@ static const char *tcptimers[] =
#ifdef _KERNEL
+struct xtcp_timer;
+
struct tcp_timer {
struct callout tt_rexmt; /* retransmit timer */
struct callout tt_persist; /* retransmit persistence */
@@ -154,10 +153,16 @@ struct tcp_timer {
#define TT_KEEP 0x08
#define TT_2MSL 0x10
+#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
+#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
+#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
+#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
+#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+
extern int tcp_keepinit; /* time to establish connection */
extern int tcp_keepidle; /* time before keepalive probes begin */
extern int tcp_keepintvl; /* time between keepalive probes */
-extern int tcp_maxidle; /* time to drop after starting probes */
+extern int tcp_keepcnt; /* number of keepalives */
extern int tcp_delacktime; /* time before sending a delayed ACK */
extern int tcp_maxpersistidle;
extern int tcp_rexmit_min;
@@ -177,6 +182,8 @@ void tcp_timer_keep(void *xtp);
void tcp_timer_persist(void *xtp);
void tcp_timer_rexmt(void *xtp);
void tcp_timer_delack(void *xtp);
+void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
+ struct xtcp_timer *xtimer);
#endif /* _KERNEL */
diff --git a/freebsd/sys/netinet/tcp_timewait.c b/freebsd/sys/netinet/tcp_timewait.c
index f9b613a7..9034fab4 100644
--- a/freebsd/sys/netinet/tcp_timewait.c
+++ b/freebsd/sys/netinet/tcp_timewait.c
@@ -59,23 +59,19 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
#ifdef INET6
#include <netinet/ip6.h>
-#endif
-#include <netinet/in_pcb.h>
-#ifdef INET6
#include <netinet6/in6_pcb.h>
-#endif
-#include <netinet/in_var.h>
-#include <netinet/ip_var.h>
-#ifdef INET6
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#endif
-#include <netinet/ip_icmp.h>
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
@@ -88,7 +84,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef INET6
#include <netinet6/ip6protosw.h>
+#endif
#include <machine/in_cksum.h>
@@ -204,15 +202,31 @@ tcp_twstart(struct tcpcb *tp)
struct inpcb *inp = tp->t_inpcb;
int acknow;
struct socket *so;
+#ifdef INET6
+ int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
+#endif
INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* tcp_tw_2msl_reset(). */
INP_WLOCK_ASSERT(inp);
- if (V_nolocaltimewait && in_localip(inp->inp_faddr)) {
- tp = tcp_close(tp);
- if (tp != NULL)
- INP_WUNLOCK(inp);
- return;
+ if (V_nolocaltimewait) {
+ int error = 0;
+#ifdef INET6
+ if (isipv6)
+ error = in6_localaddr(&inp->in6p_faddr);
+#endif
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ error = in_localip(inp->inp_faddr);
+#endif
+ if (error) {
+ tp = tcp_close(tp);
+ if (tp != NULL)
+ INP_WUNLOCK(inp);
+ return;
+ }
}
tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
@@ -493,16 +507,21 @@ int
tcp_twrespond(struct tcptw *tw, int flags)
{
struct inpcb *inp = tw->tw_inpcb;
- struct tcphdr *th;
+#if defined(INET6) || defined(INET)
+ struct tcphdr *th = NULL;
+#endif
struct mbuf *m;
+#ifdef INET
struct ip *ip = NULL;
+#endif
u_int hdrlen, optlen;
- int error;
+ int error = 0; /* Keep compiler happy */
struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
+ hdrlen = 0; /* Keep compiler happy */
INP_WLOCK_ASSERT(inp);
@@ -521,14 +540,19 @@ tcp_twrespond(struct tcptw *tw, int flags)
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)(ip6 + 1);
tcpip_fillheaders(inp, ip6, th);
- } else
+ }
#endif
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
{
hdrlen = sizeof(struct tcpiphdr);
ip = mtod(m, struct ip *);
th = (struct tcphdr *)(ip + 1);
tcpip_fillheaders(inp, ip, th);
}
+#endif
to.to_flags = 0;
/*
@@ -553,20 +577,25 @@ tcp_twrespond(struct tcptw *tw, int flags)
th->th_flags = flags;
th->th_win = htons(tw->last_win);
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
- th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
- sizeof(struct tcphdr) + optlen);
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ th->th_sum = in6_cksum_pseudo(ip6,
+ sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
error = ip6_output(m, inp->in6p_outputopts, NULL,
(tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
- } else
+ }
#endif
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
{
+ m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
- m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
ip->ip_len = m->m_pkthdr.len;
if (V_path_mtu_discovery)
ip->ip_off |= IP_DF;
@@ -574,6 +603,7 @@ tcp_twrespond(struct tcptw *tw, int flags)
((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
NULL, inp);
}
+#endif
if (flags & TH_ACK)
TCPSTAT_INC(tcps_sndacks);
else
diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c
index 2c1cd615..61711a6e 100644
--- a/freebsd/sys/netinet/tcp_usrreq.c
+++ b/freebsd/sys/netinet/tcp_usrreq.c
@@ -4,8 +4,12 @@
* Copyright (c) 1982, 1986, 1988, 1993
* The Regents of the University of California.
* Copyright (c) 2006-2007 Robert N. M. Watson
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -43,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -66,17 +71,13 @@ __FBSDID("$FreeBSD$");
#include <netinet/cc.h>
#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#ifdef INET6
-#include <netinet/ip6.h>
-#endif
#include <netinet/in_pcb.h>
-#ifdef INET6
-#include <netinet6/in6_pcb.h>
-#endif
+#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
@@ -88,14 +89,18 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
+#endif
/*
* TCP protocol interface to socket abstraction.
*/
static int tcp_attach(struct socket *);
+#ifdef INET
static int tcp_connect(struct tcpcb *, struct sockaddr *,
struct thread *td);
+#endif /* INET */
#ifdef INET6
static int tcp6_connect(struct tcpcb *, struct sockaddr *,
struct thread *td);
@@ -233,6 +238,7 @@ tcp_usr_detach(struct socket *so)
INP_INFO_WUNLOCK(&V_tcbinfo);
}
+#ifdef INET
/*
* Give the socket an address.
*/
@@ -256,7 +262,6 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
return (EAFNOSUPPORT);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
INP_WLOCK(inp);
@@ -266,14 +271,16 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+ INP_HASH_WLOCK(&V_tcbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
out:
TCPDEBUG2(PRU_BIND);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
+#endif /* INET */
#ifdef INET6
static int
@@ -296,7 +303,6 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
return (EAFNOSUPPORT);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
INP_WLOCK(inp);
@@ -306,8 +312,10 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+ INP_HASH_WLOCK(&V_tcbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
+#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
inp->inp_vflag |= INP_IPV4;
@@ -319,18 +327,21 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp->inp_vflag &= ~INP_IPV6;
error = in_pcbbind(inp, (struct sockaddr *)&sin,
td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto out;
}
}
+#endif
error = in6_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
out:
TCPDEBUG2(PRU_BIND);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
+#ifdef INET
/*
* Prepare to accept connections.
*/
@@ -342,7 +353,6 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
INP_WLOCK(inp);
@@ -354,21 +364,26 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (error == 0 && inp->inp_lport == 0)
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
- tcp_offload_listen_open(tp);
+#ifdef TCP_OFFLOAD
+ if ((so->so_options & SO_NO_OFFLOAD) == 0)
+ tcp_offload_listen_start(tp);
+#endif
}
SOCK_UNLOCK(so);
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
+#endif /* INET */
#ifdef INET6
static int
@@ -379,7 +394,6 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
INP_WLOCK(inp);
@@ -391,26 +405,32 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (error == 0 && inp->inp_lport == 0) {
inp->inp_vflag &= ~INP_IPV4;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
inp->inp_vflag |= INP_IPV4;
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
+#ifdef TCP_OFFLOAD
+ if ((so->so_options & SO_NO_OFFLOAD) == 0)
+ tcp_offload_listen_start(tp);
+#endif
}
SOCK_UNLOCK(so);
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
+#ifdef INET
/*
* Initiate connection to peer.
* Create a template for use in transmissions on this connection.
@@ -439,7 +459,6 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
return (error);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
INP_WLOCK(inp);
@@ -451,13 +470,20 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
TCPDEBUG1();
if ((error = tcp_connect(tp, nam, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (so->so_options & SO_NO_OFFLOAD) == 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
+#endif /* INET */
#ifdef INET6
static int
@@ -480,7 +506,6 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
&& IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
return (EAFNOSUPPORT);
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
INP_WLOCK(inp);
@@ -490,6 +515,12 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+#ifdef INET
+ /*
+ * XXXRW: Some confusion: V4/V6 flags relate to binding, and
+ * therefore probably require the hash lock, which isn't held here.
+ * Is this a significant problem?
+ */
if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
struct sockaddr_in sin;
@@ -506,9 +537,16 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (so->so_options & SO_NO_OFFLOAD) == 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ error = tcp_output(tp);
goto out;
}
+#endif
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
inp->inp_inc.inc_flags |= INC_ISIPV6;
@@ -516,12 +554,18 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
if ((error = tcp6_connect(tp, nam, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (so->so_options & SO_NO_OFFLOAD) == 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
@@ -563,6 +607,7 @@ out:
return (error);
}
+#ifdef INET
/*
* Accept a connection. Essentially all the work is done at higher levels;
* just return the address of the peer, storing through addr.
@@ -614,6 +659,7 @@ out:
*nam = in_sockaddr(port, &addr);
return error;
}
+#endif /* INET */
#ifdef INET6
static int
@@ -633,6 +679,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -658,6 +705,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
out:
TCPDEBUG2(PRU_ACCEPT);
INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
if (error == 0) {
if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -692,7 +740,7 @@ tcp_usr_shutdown(struct socket *so)
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- error = tcp_output_disconnect(tp);
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
@@ -722,7 +770,12 @@ tcp_usr_rcvd(struct socket *so, int flags)
}
tp = intotcpcb(inp);
TCPDEBUG1();
- tcp_output_rcvd(tp);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ tcp_offload_rcvd(tp);
+ else
+#endif
+ tcp_output(tp);
out:
TCPDEBUG2(PRU_RCVD);
@@ -744,25 +797,17 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- int headlocked = 0;
#ifdef INET6
int isipv6;
#endif
TCPDEBUG0;
/*
- * We require the pcbinfo lock in two cases:
- *
- * (1) An implied connect is taking place, which can result in
- * binding IPs and ports and hence modification of the pcb hash
- * chains.
- *
- * (2) PRUS_EOF is set, resulting in explicit close on the send.
+ * We require the pcbinfo lock if we will close the socket as part of
+ * this call.
*/
- if ((nam != NULL) || (flags & PRUS_EOF)) {
+ if (flags & PRUS_EOF)
INP_INFO_WLOCK(&V_tcbinfo);
- headlocked = 1;
- }
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -799,13 +844,16 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#ifdef INET6
if (isipv6)
error = tcp6_connect(tp, nam, td);
- else
#endif /* INET6 */
- error = tcp_connect(tp, nam, td);
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ error = tcp_connect(tp, nam, td);
+#endif
if (error)
goto out;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
@@ -820,14 +868,10 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
socantsendmore(so);
tcp_usrclosed(tp);
}
- if (headlocked) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
- }
if (!(inp->inp_flags & INP_DROPPED)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tcp_output_send(tp);
+ error = tcp_output(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@@ -859,33 +903,31 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#ifdef INET6
if (isipv6)
error = tcp6_connect(tp, nam, td);
- else
#endif /* INET6 */
- error = tcp_connect(tp, nam, td);
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ error = tcp_connect(tp, nam, td);
+#endif
if (error)
goto out;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tcp_mss(tp, -1);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
- } else if (nam) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flags |= TF_FORCEDATA;
- error = tcp_output_send(tp);
+ error = tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
out:
TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
- if (headlocked)
+ if (flags & PRUS_EOF)
INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
@@ -1009,6 +1051,7 @@ out:
return (error);
}
+#ifdef INET
struct pr_usrreqs tcp_usrreqs = {
.pru_abort = tcp_usr_abort,
.pru_accept = tcp_usr_accept,
@@ -1025,12 +1068,10 @@ struct pr_usrreqs tcp_usrreqs = {
.pru_send = tcp_usr_send,
.pru_shutdown = tcp_usr_shutdown,
.pru_sockaddr = in_getsockaddr,
-#if 0
- .pru_soreceive = soreceive_stream,
-#endif
.pru_sosetlabel = in_pcbsosetlabel,
.pru_close = tcp_usr_close,
};
+#endif /* INET */
#ifdef INET6
struct pr_usrreqs tcp6_usrreqs = {
@@ -1049,14 +1090,12 @@ struct pr_usrreqs tcp6_usrreqs = {
.pru_send = tcp_usr_send,
.pru_shutdown = tcp_usr_shutdown,
.pru_sockaddr = in6_mapped_sockaddr,
-#if 0
- .pru_soreceive = soreceive_stream,
-#endif
- .pru_sosetlabel = in_pcbsosetlabel,
+ .pru_sosetlabel = in_pcbsosetlabel,
.pru_close = tcp_usr_close,
};
#endif /* INET6 */
+#ifdef INET
/*
* Common subroutine to open a TCP connection to remote host specified
* by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
@@ -1076,13 +1115,13 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
u_short lport;
int error;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (inp->inp_lport == 0) {
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error)
- return error;
+ goto out;
}
/*
@@ -1095,11 +1134,14 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
&inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
if (error && oinp == NULL)
- return error;
- if (oinp)
- return EADDRINUSE;
+ goto out;
+ if (oinp) {
+ error = EADDRINUSE;
+ goto out;
+ }
inp->inp_laddr = laddr;
in_pcbrehash(inp);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
/*
* Compute window scaling to request:
@@ -1113,13 +1155,16 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
tp->iss = tcp_new_isn(tp);
- tp->t_bw_rtseq = tp->iss;
tcp_sendseqinit(tp);
return 0;
+
+out:
+ INP_HASH_WUNLOCK(&V_tcbinfo);
+ return (error);
}
+#endif /* INET */
#ifdef INET6
static int
@@ -1131,13 +1176,13 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
struct in6_addr addr6;
int error;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (inp->inp_lport == 0) {
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error)
- return error;
+ goto out;
}
/*
@@ -1145,18 +1190,23 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
* earlier incarnation of this same connection still in
* TIME_WAIT state, creating an ADDRINUSE error.
* in6_pcbladdr() also handles scope zone IDs.
+ *
+ * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked()
+ * outside of in6_pcb.c if there were an in6_pcbconnect_setup().
*/
error = in6_pcbladdr(inp, nam, &addr6);
if (error)
- return error;
- oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
+ goto out;
+ oinp = in6_pcblookup_hash_locked(inp->inp_pcbinfo,
&sin6->sin6_addr, sin6->sin6_port,
IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
? &addr6
: &inp->in6p_laddr,
inp->inp_lport, 0, NULL);
- if (oinp)
- return EADDRINUSE;
+ if (oinp) {
+ error = EADDRINUSE;
+ goto out;
+ }
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
inp->in6p_laddr = addr6;
inp->in6p_faddr = sin6->sin6_addr;
@@ -1167,6 +1217,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
inp->inp_flow |=
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
in_pcbrehash(inp);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
/* Compute window scaling to request. */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
@@ -1176,12 +1227,14 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
tp->iss = tcp_new_isn(tp);
- tp->t_bw_rtseq = tp->iss;
tcp_sendseqinit(tp);
return 0;
+
+out:
+ INP_HASH_WUNLOCK(&V_tcbinfo);
+ return error;
}
#endif /* INET6 */
@@ -1224,7 +1277,7 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
ti->tcpi_rcv_space = tp->rcv_wnd;
ti->tcpi_rcv_nxt = tp->rcv_nxt;
ti->tcpi_snd_wnd = tp->snd_wnd;
- ti->tcpi_snd_bwnd = tp->snd_bwnd;
+ ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
ti->tcpi_snd_nxt = tp->snd_nxt;
ti->tcpi_snd_mss = tp->t_maxseg;
ti->tcpi_rcv_mss = tp->t_maxseg;
@@ -1254,6 +1307,7 @@ int
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
{
int error, opt, optval;
+ u_int ui;
struct inpcb *inp;
struct tcpcb *tp;
struct tcp_info ti;
@@ -1269,11 +1323,15 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
if (inp->inp_vflag & INP_IPV6PROTO) {
INP_WUNLOCK(inp);
error = ip6_ctloutput(so, sopt);
- } else {
+ }
#endif /* INET6 */
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ {
INP_WUNLOCK(inp);
error = ip_ctloutput(so, sopt);
-#ifdef INET6
}
#endif
return (error);
@@ -1299,9 +1357,9 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_flags |= TF_SIGNATURE;
else
tp->t_flags &= ~TF_SIGNATURE;
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
#endif /* TCP_SIGNATURE */
+
case TCP_NODELAY:
case TCP_NOOPT:
INP_WUNLOCK(inp);
@@ -1327,6 +1385,13 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_flags |= opt;
else
tp->t_flags &= ~opt;
+unlock_and_done:
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_ctloutput(tp, sopt->sopt_dir,
+ sopt->sopt_name);
+ }
+#endif
INP_WUNLOCK(inp);
break;
@@ -1345,8 +1410,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
if (TCPS_HAVEESTABLISHED(tp->t_state))
error = tcp_output(tp);
}
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_MAXSEG:
INP_WUNLOCK(inp);
@@ -1361,8 +1425,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_maxseg = optval;
else
error = EINVAL;
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_INFO:
INP_WUNLOCK(inp);
@@ -1414,6 +1477,64 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
}
}
CC_LIST_RUNLOCK();
+ goto unlock_and_done;
+
+ case TCP_KEEPIDLE:
+ case TCP_KEEPINTVL:
+ case TCP_KEEPINIT:
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
+ if (error)
+ return (error);
+
+ if (ui > (UINT_MAX / hz)) {
+ error = EINVAL;
+ break;
+ }
+ ui *= hz;
+
+ INP_WLOCK_RECHECK(inp);
+ switch (sopt->sopt_name) {
+ case TCP_KEEPIDLE:
+ tp->t_keepidle = ui;
+ /*
+ * XXX: better check current remaining
+ * timeout and "merge" it with new value.
+ */
+ if ((tp->t_state > TCPS_LISTEN) &&
+ (tp->t_state <= TCPS_CLOSING))
+ tcp_timer_activate(tp, TT_KEEP,
+ TP_KEEPIDLE(tp));
+ break;
+ case TCP_KEEPINTVL:
+ tp->t_keepintvl = ui;
+ if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+ (TP_MAXIDLE(tp) > 0))
+ tcp_timer_activate(tp, TT_2MSL,
+ TP_MAXIDLE(tp));
+ break;
+ case TCP_KEEPINIT:
+ tp->t_keepinit = ui;
+ if (tp->t_state == TCPS_SYN_RECEIVED ||
+ tp->t_state == TCPS_SYN_SENT)
+ tcp_timer_activate(tp, TT_KEEP,
+ TP_KEEPINIT(tp));
+ break;
+ }
+ goto unlock_and_done;
+
+ case TCP_KEEPCNT:
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
+ if (error)
+ return (error);
+
+ INP_WLOCK_RECHECK(inp);
+ tp->t_keepcnt = ui;
+ if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+ (TP_MAXIDLE(tp) > 0))
+ tcp_timer_activate(tp, TT_2MSL,
+ TP_MAXIDLE(tp));
INP_WUNLOCK(inp);
break;
@@ -1478,18 +1599,6 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
#undef INP_WLOCK_RECHECK
/*
- * tcp_sendspace and tcp_recvspace are the default send and receive window
- * sizes, respectively. These are obsolescent (this information should
- * be set by the route).
- */
-u_long tcp_sendspace = 1024*32;
-SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
- &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
-u_long tcp_recvspace = 1024*64;
-SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
- &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
-
-/*
* Attach TCP protocol to socket, allocating
* internet protocol control block, tcp control block,
* bufer space, and entering LISTEN state if to accept connections.
@@ -1502,7 +1611,7 @@ tcp_attach(struct socket *so)
int error;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
- error = soreserve(so, tcp_sendspace, tcp_recvspace);
+ error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
if (error)
return (error);
}
@@ -1570,7 +1679,7 @@ tcp_disconnect(struct tcpcb *tp)
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- tcp_output_disconnect(tp);
+ tcp_output(tp);
}
}
@@ -1593,7 +1702,9 @@ tcp_usrclosed(struct tcpcb *tp)
switch (tp->t_state) {
case TCPS_LISTEN:
- tcp_offload_listen_close(tp);
+#ifdef TCP_OFFLOAD
+ tcp_offload_listen_stop(tp);
+#endif
/* FALLTHROUGH */
case TCPS_CLOSED:
tp->t_state = TCPS_CLOSED;
@@ -1626,7 +1737,7 @@ tcp_usrclosed(struct tcpcb *tp)
int timeout;
timeout = (tcp_fast_finwait2_recycle) ?
- tcp_finwait2_timeout : tcp_maxidle;
+ tcp_finwait2_timeout : TP_MAXIDLE(tp);
tcp_timer_activate(tp, TT_2MSL, timeout);
}
}
@@ -1865,26 +1976,24 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
db_print_indent(indent);
- db_printf("snd_wnd: %lu snd_cwnd: %lu snd_bwnd: %lu\n",
- tp->snd_wnd, tp->snd_cwnd, tp->snd_bwnd);
+ db_printf("snd_wnd: %lu snd_cwnd: %lu\n",
+ tp->snd_wnd, tp->snd_cwnd);
db_print_indent(indent);
- db_printf("snd_ssthresh: %lu snd_bandwidth: %lu snd_recover: "
- "0x%08x\n", tp->snd_ssthresh, tp->snd_bandwidth,
- tp->snd_recover);
+ db_printf("snd_ssthresh: %lu snd_recover: "
+ "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
db_print_indent(indent);
db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n",
tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
db_print_indent(indent);
- db_printf("t_rttime: %u t_rtsq: 0x%08x t_bw_rtttime: %u\n",
- tp->t_rtttime, tp->t_rtseq, tp->t_bw_rtttime);
+ db_printf("t_rttime: %u t_rtsq: 0x%08x\n",
+ tp->t_rtttime, tp->t_rtseq);
db_print_indent(indent);
- db_printf("t_bw_rtseq: 0x%08x t_rxtcur: %d t_maxseg: %u "
- "t_srtt: %d\n", tp->t_bw_rtseq, tp->t_rxtcur, tp->t_maxseg,
- tp->t_srtt);
+ db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n",
+ tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
db_print_indent(indent);
db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u "
diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h
index 618250cd..171eafb6 100644
--- a/freebsd/sys/netinet/tcp_var.h
+++ b/freebsd/sys/netinet/tcp_var.h
@@ -72,6 +72,7 @@ struct sackhint {
int sack_bytes_rexmit;
tcp_seq last_sack_ack; /* Most recent/largest sacked ack */
+ int ispare; /* explicit pad for 64bit alignment */
uint64_t _pad[2]; /* 1 sacked_bytes, 1 TBD */
};
@@ -131,12 +132,12 @@ struct tcpcb {
u_long snd_wnd; /* send window */
u_long snd_cwnd; /* congestion-controlled window */
- u_long snd_bwnd; /* bandwidth-controlled window */
+ u_long snd_spare1; /* unused */
u_long snd_ssthresh; /* snd_cwnd size threshold for
* for slow start exponential to
* linear switch
*/
- u_long snd_bandwidth; /* calculated bandwidth or 0 */
+ u_long snd_spare2; /* unused */
tcp_seq snd_recover; /* for use in NewReno Fast Recovery */
u_int t_maxopd; /* mss plus options */
@@ -146,8 +147,8 @@ struct tcpcb {
u_int t_rtttime; /* RTT measurement start time */
tcp_seq t_rtseq; /* sequence number being timed */
- u_int t_bw_rtttime; /* used for bandwidth calculation */
- tcp_seq t_bw_rtseq; /* used for bandwidth calculation */
+ u_int t_bw_spare1; /* unused */
+ tcp_seq t_bw_spare2; /* unused */
int t_rxtcur; /* current retransmit value (ticks) */
u_int t_maxseg; /* maximum segment size */
@@ -177,6 +178,7 @@ struct tcpcb {
u_long snd_cwnd_prev; /* cwnd prior to retransmit */
u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */
tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */
+ int t_sndzerowin; /* zero-window updates sent */
u_int t_badrxtwin; /* window for retransmit recovery */
u_char snd_limited; /* segments limited transmitted */
/* SACK related state */
@@ -192,21 +194,25 @@ struct tcpcb {
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
- struct toe_usrreqs *t_tu; /* offload operations vector */
+ struct toedev *tod; /* toedev handling this connection */
+ int t_sndrexmitpack; /* retransmit packets sent */
+ int t_rcvoopack; /* out-of-order packets received */
void *t_toe; /* TOE pcb pointer */
int t_bytes_acked; /* # bytes acked during current RTT */
-
- int t_sndzerowin; /* zero-window updates sent */
-
struct cc_algo *cc_algo; /* congestion control algorithm */
struct cc_var *ccv; /* congestion control specific vars */
struct osd *osd; /* storage for Khelp module data */
- void *t_pspare2[3]; /* 3 TBD */
- uint64_t _pad[10]; /* 7 UTO, 3 TBD (1-2 CC/RTT?) */
+ u_int t_keepinit; /* time to establish connection */
+ u_int t_keepidle; /* time before keepalive probes begin */
+ u_int t_keepintvl; /* interval between keepalives */
+ u_int t_keepcnt; /* number of keepalives before close */
- uint64_t t_sndrexmitpack;/* retransmit packets sent */
- uint64_t t_rcvoopack; /* out-of-order packets received */
+ u_int t_tsomax; /* tso burst length limit */
+
+ uint32_t t_ispare[7]; /* 5 UTO, 2 TBD */
+ void *t_pspare2[4]; /* 4 TBD */
+ uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */
};
/*
@@ -301,6 +307,7 @@ struct tcpopt {
u_int16_t to_mss; /* maximum segment size */
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
+ u_int32_t to_spare; /* UTO */
};
/*
@@ -319,6 +326,15 @@ struct hc_metrics_lite { /* must stay in sync with hc_metrics */
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
};
+/*
+ * Used by tcp_maxmtu() to communicate interface specific features
+ * and limits at the time of connection setup.
+ */
+struct tcp_ifcap {
+ int ifcap;
+ u_int tsomax;
+};
+
#ifndef _NETINET_IN_PCB_H_
struct in_conninfo;
#endif /* _NETINET_IN_PCB_H_ */
@@ -478,7 +494,7 @@ struct tcpstat {
u_long tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
u_long tcps_sack_rcv_blocks; /* SACK blocks (options) received */
u_long tcps_sack_send_blocks; /* SACK blocks (options) sent */
- u_long tcps_sack_sboverflow; /* times scoreboard overflowed */
+ u_long tcps_sack_sboverflow; /* times scoreboard overflowed */
/* ECN related stats */
u_long tcps_ecn_ce; /* ECN Congestion Experienced */
@@ -494,7 +510,7 @@ struct tcpstat {
u_long tcps_sig_err_sigopt; /* No signature expected by socket */
u_long tcps_sig_err_nosigopt; /* No signature provided by segment */
- u_long _pad[7]; /* 6 UTO, 1 TBD */
+ u_long _pad[12]; /* 6 UTO, 6 TBD */
};
#ifdef _KERNEL
@@ -535,11 +551,20 @@ struct tcp_hhook_data {
* included. Not all of our clients do.
*/
#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
+struct xtcp_timer {
+ int tt_rexmt; /* retransmit timer */
+ int tt_persist; /* retransmit persistence */
+ int tt_keep; /* keepalive */
+ int tt_2msl; /* 2*msl TIME_WAIT timer */
+ int tt_delack; /* delayed ACK timer */
+ int t_rcvtime; /* Time since last packet received */
+};
struct xtcpcb {
size_t xt_len;
struct inpcb xt_inp;
struct tcpcb xt_tp;
struct xsocket xt_socket;
+ struct xtcp_timer xt_timer;
u_quad_t xt_alignment_hack;
};
#endif
@@ -597,9 +622,10 @@ VNET_DECLARE(int, tcp_mssdflt); /* XXX */
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_delack_enabled);
VNET_DECLARE(int, tcp_do_rfc3390);
+VNET_DECLARE(int, tcp_do_initcwnd10);
+VNET_DECLARE(int, tcp_sendspace);
+VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, path_mtu_discovery);
-VNET_DECLARE(int, ss_fltsz);
-VNET_DECLARE(int, ss_fltsz_local);
VNET_DECLARE(int, tcp_do_rfc3465);
VNET_DECLARE(int, tcp_abc_l_var);
#define V_tcb VNET(tcb)
@@ -609,9 +635,10 @@ VNET_DECLARE(int, tcp_abc_l_var);
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_delack_enabled VNET(tcp_delack_enabled)
#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390)
+#define V_tcp_do_initcwnd10 VNET(tcp_do_initcwnd10)
+#define V_tcp_sendspace VNET(tcp_sendspace)
+#define V_tcp_recvspace VNET(tcp_recvspace)
#define V_path_mtu_discovery VNET(path_mtu_discovery)
-#define V_ss_fltsz VNET(ss_fltsz)
-#define V_ss_fltsz_local VNET(ss_fltsz_local)
#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465)
#define V_tcp_abc_l_var VNET(tcp_abc_l_var)
@@ -648,7 +675,7 @@ void tcp_init(void);
void tcp_destroy(void);
#endif
void tcp_fini(void *);
-char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
+char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
const void *);
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
const void *);
@@ -659,10 +686,10 @@ void tcp_reass_flush(struct tcpcb *);
void tcp_reass_destroy(void);
#endif
void tcp_input(struct mbuf *, int);
-u_long tcp_maxmtu(struct in_conninfo *, int *);
-u_long tcp_maxmtu6(struct in_conninfo *, int *);
+u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
+u_long tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
- int *);
+ struct tcp_ifcap *);
void tcp_mss(struct tcpcb *, int);
int tcp_mssopt(struct in_conninfo *);
struct inpcb *
@@ -695,7 +722,6 @@ void tcpip_fillheaders(struct inpcb *, void *, void *);
void tcp_timer_activate(struct tcpcb *, int, u_int);
int tcp_timer_active(struct tcpcb *, int);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
-void tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
*/
@@ -709,8 +735,6 @@ void tcp_hc_updatemtu(struct in_conninfo *, u_long);
void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
extern struct pr_usrreqs tcp_usrreqs;
-extern u_long tcp_sendspace;
-extern u_long tcp_recvspace;
tcp_seq tcp_new_isn(struct tcpcb *);
void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
diff --git a/freebsd/sys/netinet/toecore.h b/freebsd/sys/netinet/toecore.h
new file mode 100644
index 00000000..6ea98518
--- /dev/null
+++ b/freebsd/sys/netinet/toecore.h
@@ -0,0 +1,130 @@
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TOE_H_
+#define _NETINET_TOE_H_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+struct tcpopt;
+struct tcphdr;
+struct in_conninfo;
+
+struct toedev {
+ TAILQ_ENTRY(toedev) link; /* glue for toedev_list */
+ void *tod_softc; /* TOE driver private data */
+
+ /*
+ * Active open. If a failure occurs, it is reported back by the driver
+ * via toe_connect_failed.
+ */
+ int (*tod_connect)(struct toedev *, struct socket *, struct rtentry *,
+ struct sockaddr *);
+
+ /* Passive open. */
+ int (*tod_listen_start)(struct toedev *, struct tcpcb *);
+ int (*tod_listen_stop)(struct toedev *, struct tcpcb *);
+
+ /*
+ * The kernel uses this routine to pass on any frame it receives for an
+ * offloaded connection to the TOE driver. This is an unusual event.
+ */
+ void (*tod_input)(struct toedev *, struct tcpcb *, struct mbuf *);
+
+ /*
+ * This is called by the kernel during pru_rcvd for an offloaded TCP
+ * connection and provides an opportunity for the TOE driver to manage
+ * its rx window and credits.
+ */
+ void (*tod_rcvd)(struct toedev *, struct tcpcb *);
+
+ /*
+ * Transmit routine. The kernel calls this to have the TOE driver
+ * evaluate whether there is data to be transmitted, and transmit it.
+ */
+ int (*tod_output)(struct toedev *, struct tcpcb *);
+
+ /* Immediate teardown: send RST to peer. */
+ int (*tod_send_rst)(struct toedev *, struct tcpcb *);
+
+ /* Initiate orderly disconnect by sending FIN to the peer. */
+ int (*tod_send_fin)(struct toedev *, struct tcpcb *);
+
+ /* Called to indicate that the kernel is done with this TCP PCB. */
+ void (*tod_pcb_detach)(struct toedev *, struct tcpcb *);
+
+ /*
+ * The kernel calls this once it has information about an L2 entry that
+ * the TOE driver enquired about previously (via toe_l2_resolve).
+ */
+ void (*tod_l2_update)(struct toedev *, struct ifnet *,
+ struct sockaddr *, uint8_t *, uint16_t);
+
+ /* XXX. Route has been redirected. */
+ void (*tod_route_redirect)(struct toedev *, struct ifnet *,
+ struct rtentry *, struct rtentry *);
+
+ /* Syncache interaction. */
+ void (*tod_syncache_added)(struct toedev *, void *);
+ void (*tod_syncache_removed)(struct toedev *, void *);
+ int (*tod_syncache_respond)(struct toedev *, void *, struct mbuf *);
+ void (*tod_offload_socket)(struct toedev *, void *, struct socket *);
+
+ /* TCP socket option */
+ void (*tod_ctloutput)(struct toedev *, struct tcpcb *, int, int);
+};
+
+#include <sys/eventhandler.h>
+typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+void init_toedev(struct toedev *);
+int register_toedev(struct toedev *);
+int unregister_toedev(struct toedev *);
+
+/*
+ * General interface for looking up L2 information for an IP address. If an
+ * answer is not available right away then the TOE driver's tod_l2_update will
+ * be called later.
+ */
+int toe_l2_resolve(struct toedev *, struct ifnet *, struct sockaddr *,
+ uint8_t *, uint16_t *);
+
+void toe_connect_failed(struct toedev *, struct inpcb *, int);
+
+void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+ struct inpcb *, void *, void *);
+int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+ struct socket **);
+
+int toe_4tuple_check(struct in_conninfo *, struct tcphdr *, struct ifnet *);
+#endif
diff --git a/freebsd/sys/netinet/udp.h b/freebsd/sys/netinet/udp.h
index 5ec55970..c2d638dd 100644
--- a/freebsd/sys/netinet/udp.h
+++ b/freebsd/sys/netinet/udp.h
@@ -57,7 +57,7 @@ struct udphdr {
* UDP Encapsulation of IPsec Packets options.
*/
/* Encapsulation types. */
-#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
+#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
#define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-02+ */
/* Default ESP in UDP encapsulation port. */
diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c
index 6c0e61c1..bf95e954 100644
--- a/freebsd/sys/netinet/udp_usrreq.c
+++ b/freebsd/sys/netinet/udp_usrreq.c
@@ -4,8 +4,12 @@
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
* The Regents of the University of California.
* Copyright (c) 2008 Robert N. M. Watson
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -41,6 +45,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
@@ -149,9 +154,12 @@ SYSCTL_VNET_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
&VNET_NAME(udpstat), udpstat,
"UDP statistics (struct udpstat, netinet/udp_var.h)");
+#ifdef INET
static void udp_detach(struct socket *so);
static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
struct mbuf *, struct thread *);
+#endif
+
#ifdef IPSEC
#ifdef IPSEC_NAT_T
#define UF_ESPINUDP_ALL (UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
@@ -183,25 +191,12 @@ void
udp_init(void)
{
-
- INP_INFO_LOCK_INIT(&V_udbinfo, "udp");
- LIST_INIT(&V_udb);
-#ifdef VIMAGE
- V_udbinfo.ipi_vnet = curvnet;
-#endif
- V_udbinfo.ipi_listhead = &V_udb;
- V_udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB,
- &V_udbinfo.ipi_hashmask);
- V_udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB,
- &V_udbinfo.ipi_porthashmask);
- V_udbinfo.ipi_zone = uma_zcreate("udp_inpcb", sizeof(struct inpcb),
- NULL, NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
-
+ in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
+ "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_2TUPLE);
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(V_udpcb_zone, maxsockets);
-
EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
@@ -244,17 +239,12 @@ void
udp_destroy(void)
{
- hashdestroy(V_udbinfo.ipi_hashbase, M_PCB,
- V_udbinfo.ipi_hashmask);
- hashdestroy(V_udbinfo.ipi_porthashbase, M_PCB,
- V_udbinfo.ipi_porthashmask);
-
+ in_pcbinfo_destroy(&V_udbinfo);
uma_zdestroy(V_udpcb_zone);
- uma_zdestroy(V_udbinfo.ipi_zone);
- INP_INFO_LOCK_DESTROY(&V_udbinfo);
}
#endif
+#ifdef INET
/*
* Subroutine of udp_input(), which appends the provided mbuf chain to the
* passed pcb/socket. The caller must provide a sockaddr_in via udp_in that
@@ -272,25 +262,32 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
#ifdef INET6
struct sockaddr_in6 udp_in6;
#endif
-#ifdef IPSEC
-#ifdef IPSEC_NAT_T
-#ifdef INET
struct udpcb *up;
-#endif
-#endif
-#endif
- INP_RLOCK_ASSERT(inp);
+ INP_LOCK_ASSERT(inp);
+
+ /*
+ * Engage the tunneling protocol.
+ */
+ up = intoudpcb(inp);
+ if (up->u_tun_func != NULL) {
+ (*up->u_tun_func)(n, off, inp);
+ return;
+ }
+
+ if (n == NULL)
+ return;
+
+ off += sizeof(struct udphdr);
#ifdef IPSEC
/* Check AH/ESP integrity. */
if (ipsec4_in_reject(n, inp)) {
m_freem(n);
- V_ipsec4stat.in_polvio++;
+ IPSECSTAT_INC(in_polvio);
return;
}
#ifdef IPSEC_NAT_T
-#ifdef INET
up = intoudpcb(inp);
KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
if (up->u_flags & UF_ESPINUDP_ALL) { /* IPSec UDP encaps. */
@@ -298,7 +295,6 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
if (n == NULL) /* Consumed. */
return;
}
-#endif /* INET */
#endif /* IPSEC_NAT_T */
#endif /* IPSEC */
#ifdef MAC
@@ -306,14 +302,14 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
m_freem(n);
return;
}
-#endif
+#endif /* MAC */
if (inp->inp_flags & INP_CONTROLOPTS ||
inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
#ifdef INET6
if (inp->inp_vflag & INP_IPV6)
(void)ip6_savecontrol_v4(inp, n, &opts, NULL);
else
-#endif
+#endif /* INET6 */
ip_savecontrol(inp, &opts, ip, n);
}
#ifdef INET6
@@ -324,7 +320,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
in6_sin_2_v4mapsin6(udp_in, &udp_in6);
append_sa = (struct sockaddr *)&udp_in6;
} else
-#endif
+#endif /* INET6 */
append_sa = (struct sockaddr *)udp_in;
m_adj(n, off);
@@ -348,13 +344,10 @@ udp_input(struct mbuf *m, int off)
struct udphdr *uh;
struct ifnet *ifp;
struct inpcb *inp;
- struct udpcb *up;
int len;
struct ip save_ip;
struct sockaddr_in udp_in;
-#ifdef IPFIREWALL_FORWARD
struct m_tag *fwd_tag;
-#endif
ifp = m->m_pkthdr.rcvif;
UDPSTAT_INC(udps_ipackets);
@@ -452,34 +445,12 @@ udp_input(struct mbuf *m, int off)
} else
UDPSTAT_INC(udps_nosum);
-#ifdef IPFIREWALL_FORWARD
- /*
- * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
- */
- fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
- if (fwd_tag != NULL) {
- struct sockaddr_in *next_hop;
-
- /*
- * Do the hack.
- */
- next_hop = (struct sockaddr_in *)(fwd_tag + 1);
- ip->ip_dst = next_hop->sin_addr;
- uh->uh_dport = ntohs(next_hop->sin_port);
-
- /*
- * Remove the tag from the packet. We don't need it anymore.
- */
- m_tag_delete(m, fwd_tag);
- }
-#endif
-
- INP_INFO_RLOCK(&V_udbinfo);
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
in_broadcast(ip->ip_dst, ifp)) {
struct inpcb *last;
struct ip_moptions *imo;
+ INP_INFO_RLOCK(&V_udbinfo);
last = NULL;
LIST_FOREACH(inp, &V_udb, inp_list) {
if (inp->inp_lport != uh->uh_dport)
@@ -501,24 +472,24 @@ udp_input(struct mbuf *m, int off)
INP_RLOCK(inp);
/*
- * Detached PCBs can linger in the list if someone
- * holds a reference. (e.g. udp_pcblist)
+ * XXXRW: Because we weren't holding either the inpcb
+ * or the hash lock when we checked for a match
+ * before, we should probably recheck now that the
+ * inpcb lock is held.
*/
- if (inp->inp_socket == NULL) {
- INP_RUNLOCK(inp);
- continue;
- }
/*
* Handle socket delivery policy for any-source
* and source-specific multicast. [RFC3678]
*/
imo = inp->inp_moptions;
- if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
- imo != NULL) {
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
struct sockaddr_in group;
int blocked;
-
+ if (imo == NULL) {
+ INP_RUNLOCK(inp);
+ continue;
+ }
bzero(&group, sizeof(struct sockaddr_in));
group.sin_len = sizeof(struct sockaddr_in);
group.sin_family = AF_INET;
@@ -541,24 +512,7 @@ udp_input(struct mbuf *m, int off)
struct mbuf *n;
n = m_copy(m, 0, M_COPYALL);
- up = intoudpcb(last);
- if (up->u_tun_func == NULL) {
- if (n != NULL)
- udp_append(last,
- ip, n,
- iphlen +
- sizeof(struct udphdr),
- &udp_in);
- } else {
- /*
- * Engage the tunneling protocol we
- * will have to leave the info_lock
- * up, since we are hunting through
- * multiple UDP's.
- */
-
- (*up->u_tun_func)(n, iphlen, last);
- }
+ udp_append(last, ip, n, iphlen, &udp_in);
INP_RUNLOCK(last);
}
last = inp;
@@ -582,18 +536,12 @@ udp_input(struct mbuf *m, int off)
* or multicast datgram.)
*/
UDPSTAT_INC(udps_noportbcast);
- goto badheadlocked;
- }
- up = intoudpcb(last);
- if (up->u_tun_func == NULL) {
- udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
- &udp_in);
- } else {
- /*
- * Engage the tunneling protocol.
- */
- (*up->u_tun_func)(m, iphlen, last);
+ if (inp)
+ INP_RUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_udbinfo);
+ goto badunlocked;
}
+ udp_append(last, ip, m, iphlen, &udp_in);
INP_RUNLOCK(last);
INP_INFO_RUNLOCK(&V_udbinfo);
return;
@@ -602,8 +550,41 @@ udp_input(struct mbuf *m, int off)
/*
* Locate pcb for datagram.
*/
- inp = in_pcblookup_hash(&V_udbinfo, ip->ip_src, uh->uh_sport,
- ip->ip_dst, uh->uh_dport, 1, ifp);
+
+ /*
+ * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
+ */
+ if ((m->m_flags & M_IP_NEXTHOP) &&
+ (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
+ struct sockaddr_in *next_hop;
+
+ next_hop = (struct sockaddr_in *)(fwd_tag + 1);
+
+ /*
+ * Transparently forwarded. Pretend to be the destination.
+ * Already got one like this?
+ */
+ inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+ ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
+ if (!inp) {
+ /*
+ * It's new. Try to find the ambushing socket.
+ * Because we've rewritten the destination address,
+ * any hardware-generated hash is ignored.
+ */
+ inp = in_pcblookup(&V_udbinfo, ip->ip_src,
+ uh->uh_sport, next_hop->sin_addr,
+ next_hop->sin_port ? htons(next_hop->sin_port) :
+ uh->uh_dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, ifp);
+ }
+ /* Remove the tag from the packet. We don't need it anymore. */
+ m_tag_delete(m, fwd_tag);
+ m->m_flags &= ~M_IP_NEXTHOP;
+ } else
+ inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+ ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, ifp, m);
if (inp == NULL) {
if (udp_log_in_vain) {
char buf[4*sizeof "123"];
@@ -617,57 +598,35 @@ udp_input(struct mbuf *m, int off)
UDPSTAT_INC(udps_noport);
if (m->m_flags & (M_BCAST | M_MCAST)) {
UDPSTAT_INC(udps_noportbcast);
- goto badheadlocked;
+ goto badunlocked;
}
if (V_udp_blackhole)
- goto badheadlocked;
+ goto badunlocked;
if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
- goto badheadlocked;
+ goto badunlocked;
*ip = save_ip;
ip->ip_len += iphlen;
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
- INP_INFO_RUNLOCK(&V_udbinfo);
return;
}
/*
* Check the minimum TTL for socket.
*/
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
-
- /*
- * Detached PCBs can linger in the hash table if someone holds a
- * reference. (e.g. udp_pcblist)
- */
- if (inp->inp_socket == NULL) {
- INP_RUNLOCK(inp);
- goto badunlocked;
- }
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
INP_RUNLOCK(inp);
- goto badunlocked;
- }
- up = intoudpcb(inp);
- if (up->u_tun_func == NULL) {
- udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
- } else {
- /*
- * Engage the tunneling protocol.
- */
-
- (*up->u_tun_func)(m, iphlen, inp);
+ m_freem(m);
+ return;
}
+ udp_append(inp, ip, m, iphlen, &udp_in);
INP_RUNLOCK(inp);
return;
-badheadlocked:
- if (inp)
- INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
badunlocked:
m_freem(m);
}
+#endif /* INET */
/*
* Notify a udp user of an asynchronous error; just wake up so that they can
@@ -691,6 +650,7 @@ udp_notify(struct inpcb *inp, int errno)
return (inp);
}
+#ifdef INET
void
udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{
@@ -721,21 +681,20 @@ udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
return;
if (ip != NULL) {
uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- INP_INFO_RLOCK(&V_udbinfo);
- inp = in_pcblookup_hash(&V_udbinfo, faddr, uh->uh_dport,
- ip->ip_src, uh->uh_sport, 0, NULL);
+ inp = in_pcblookup(&V_udbinfo, faddr, uh->uh_dport,
+ ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_socket != NULL) {
udp_notify(inp, inetctlerrmap[cmd]);
}
INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_udbinfo);
} else
in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd],
udp_notify);
}
+#endif /* INET */
static int
udp_pcblist(SYSCTL_HANDLER_ARGS)
@@ -820,9 +779,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_udbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_udbinfo);
@@ -848,6 +807,7 @@ SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
udp_pcblist, "S,xinpcb", "List of active UDP sockets");
+#ifdef INET
static int
udp_getcred(SYSCTL_HANDLER_ARGS)
{
@@ -862,12 +822,11 @@ udp_getcred(SYSCTL_HANDLER_ARGS)
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
- INP_INFO_RLOCK(&V_udbinfo);
- inp = in_pcblookup_hash(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
- addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
+ inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+ addrs[0].sin_addr, addrs[0].sin_port,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -875,10 +834,8 @@ udp_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_udbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -887,6 +844,7 @@ udp_getcred(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
+#endif /* INET */
int
udp_ctloutput(struct socket *so, struct sockopt *sopt)
@@ -905,11 +863,15 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
if (INP_CHECK_SOCKAF(so, AF_INET6)) {
INP_WUNLOCK(inp);
error = ip6_ctloutput(so, sopt);
- } else {
+ }
#endif
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
+ {
INP_WUNLOCK(inp);
error = ip_ctloutput(so, sopt);
-#ifdef INET6
}
#endif
return (error);
@@ -981,6 +943,10 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
return (error);
}
+#ifdef INET
+#define UH_WLOCKED 2
+#define UH_RLOCKED 1
+#define UH_UNLOCKED 0
static int
udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *td)
@@ -1010,6 +976,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
}
src.sin_family = 0;
+ INP_RLOCK(inp);
tos = inp->inp_ip_tos;
if (control != NULL) {
/*
@@ -1017,6 +984,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* stored in a single mbuf.
*/
if (control->m_next) {
+ INP_RUNLOCK(inp);
m_freem(control);
m_freem(m);
return (EINVAL);
@@ -1066,6 +1034,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
m_freem(control);
}
if (error) {
+ INP_RUNLOCK(inp);
m_freem(m);
return (error);
}
@@ -1083,29 +1052,26 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* conservative locks than required the second time around, so later
* assertions have to accept that. Further analysis of the number of
* misses under contention is required.
+ *
+ * XXXRW: Check that hash locking update here is correct.
*/
sin = (struct sockaddr_in *)addr;
- INP_RLOCK(inp);
if (sin != NULL &&
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
INP_RUNLOCK(inp);
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
- unlock_udbinfo = 2;
+ INP_HASH_WLOCK(&V_udbinfo);
+ unlock_udbinfo = UH_WLOCKED;
} else if ((sin != NULL && (
(sin->sin_addr.s_addr == INADDR_ANY) ||
(sin->sin_addr.s_addr == INADDR_BROADCAST) ||
(inp->inp_laddr.s_addr == INADDR_ANY) ||
(inp->inp_lport == 0))) ||
(src.sin_family == AF_INET)) {
- if (!INP_INFO_TRY_RLOCK(&V_udbinfo)) {
- INP_RUNLOCK(inp);
- INP_INFO_RLOCK(&V_udbinfo);
- INP_RLOCK(inp);
- }
- unlock_udbinfo = 1;
+ INP_HASH_RLOCK(&V_udbinfo);
+ unlock_udbinfo = UH_RLOCKED;
} else
- unlock_udbinfo = 0;
+ unlock_udbinfo = UH_UNLOCKED;
/*
* If the IP_SENDSRCADDR control message was specified, override the
@@ -1115,7 +1081,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
laddr = inp->inp_laddr;
lport = inp->inp_lport;
if (src.sin_family == AF_INET) {
- INP_INFO_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(&V_udbinfo);
if ((lport == 0) ||
(laddr.s_addr == INADDR_ANY &&
src.sin_addr.s_addr == INADDR_ANY)) {
@@ -1166,7 +1132,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
inp->inp_lport == 0 ||
sin->sin_addr.s_addr == INADDR_ANY ||
sin->sin_addr.s_addr == INADDR_BROADCAST) {
- INP_INFO_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(&V_udbinfo);
error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
&lport, &faddr.s_addr, &fport, NULL,
td->td_ucred);
@@ -1180,8 +1146,8 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
/* Commit the local port if newly assigned. */
if (inp->inp_laddr.s_addr == INADDR_ANY &&
inp->inp_lport == 0) {
- INP_INFO_WLOCK_ASSERT(&V_udbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(&V_udbinfo);
/*
* Remember addr if jailed, to prevent
* rebinding.
@@ -1276,25 +1242,25 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
((struct ip *)ui)->ip_tos = tos; /* XXX */
UDPSTAT_INC(udps_opackets);
- if (unlock_udbinfo == 2)
- INP_INFO_WUNLOCK(&V_udbinfo);
- else if (unlock_udbinfo == 1)
- INP_INFO_RUNLOCK(&V_udbinfo);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_HASH_WUNLOCK(&V_udbinfo);
+ else if (unlock_udbinfo == UH_RLOCKED)
+ INP_HASH_RUNLOCK(&V_udbinfo);
error = ip_output(m, inp->inp_options, NULL, ipflags,
inp->inp_moptions, inp);
- if (unlock_udbinfo == 2)
+ if (unlock_udbinfo == UH_WLOCKED)
INP_WUNLOCK(inp);
else
INP_RUNLOCK(inp);
return (error);
release:
- if (unlock_udbinfo == 2) {
+ if (unlock_udbinfo == UH_WLOCKED) {
+ INP_HASH_WUNLOCK(&V_udbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
- } else if (unlock_udbinfo == 1) {
+ } else if (unlock_udbinfo == UH_RLOCKED) {
+ INP_HASH_RUNLOCK(&V_udbinfo);
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
} else
INP_RUNLOCK(inp);
m_freem(m);
@@ -1303,7 +1269,6 @@ release:
#if defined(IPSEC) && defined(IPSEC_NAT_T)
-#ifdef INET
/*
* Potentially decap ESP in UDP frame. Check for an ESP header
* and optional marker; if present, strip the UDP header and
@@ -1332,7 +1297,7 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
if (minlen > m->m_pkthdr.len)
minlen = m->m_pkthdr.len;
if ((m = m_pullup(m, minlen)) == NULL) {
- V_ipsec4stat.in_inval++;
+ IPSECSTAT_INC(in_inval);
return (NULL); /* Bypass caller processing. */
}
data = mtod(m, caddr_t); /* Points to ip header. */
@@ -1372,7 +1337,7 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
uint32_t spi;
if (payload <= sizeof(struct esp)) {
- V_ipsec4stat.in_inval++;
+ IPSECSTAT_INC(in_inval);
m_freem(m);
return (NULL); /* Discard. */
}
@@ -1393,7 +1358,7 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
2 * sizeof(uint16_t), M_NOWAIT);
if (tag == NULL) {
- V_ipsec4stat.in_nomem++;
+ IPSECSTAT_INC(in_nomem);
m_freem(m);
return (NULL); /* Discard. */
}
@@ -1435,7 +1400,6 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
(void) ipsec4_common_input(m, iphlen, ip->ip_p);
return (NULL); /* NB: consumed, bypass processing. */
}
-#endif /* INET */
#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
static void
@@ -1445,15 +1409,15 @@ udp_abort(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -1490,6 +1454,7 @@ udp_attach(struct socket *so, int proto, struct thread *td)
INP_INFO_WUNLOCK(&V_udbinfo);
return (0);
}
+#endif /* INET */
int
udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
@@ -1512,6 +1477,7 @@ udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
return (0);
}
+#ifdef INET
static int
udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
@@ -1520,11 +1486,11 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_udbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -1535,15 +1501,15 @@ udp_close(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_close: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -1555,25 +1521,23 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (EISCONN);
}
sin = (struct sockaddr_in *)nam;
error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
if (error != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
+ INP_HASH_WLOCK(&V_udbinfo);
error = in_pcbconnect(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
if (error == 0)
soisconnected(so);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -1605,21 +1569,19 @@ udp_disconnect(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr == INADDR_ANY) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (ENOTCONN);
}
-
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (0);
}
@@ -1633,6 +1595,7 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
KASSERT(inp != NULL, ("udp_send: inp == NULL"));
return (udp_output(inp, m, addr, control, td));
}
+#endif /* INET */
int
udp_shutdown(struct socket *so)
@@ -1647,6 +1610,7 @@ udp_shutdown(struct socket *so)
return (0);
}
+#ifdef INET
struct pr_usrreqs udp_usrreqs = {
.pru_abort = udp_abort,
.pru_attach = udp_attach,
@@ -1664,3 +1628,4 @@ struct pr_usrreqs udp_usrreqs = {
.pru_sosetlabel = in_pcbsosetlabel,
.pru_close = udp_close,
};
+#endif /* INET */
diff --git a/freebsd/sys/netinet/udp_var.h b/freebsd/sys/netinet/udp_var.h
index 5cf7dc9f..6b9b5362 100644
--- a/freebsd/sys/netinet/udp_var.h
+++ b/freebsd/sys/netinet/udp_var.h
@@ -152,7 +152,7 @@ int udp_newudpcb(struct inpcb *);
void udp_discardcb(struct udpcb *);
void udp_ctlinput(int, struct sockaddr *, void *);
-int udp_ctloutput(struct socket *, struct sockopt *);
+int udp_ctloutput(struct socket *, struct sockopt *);
void udp_init(void);
#ifdef VIMAGE
void udp_destroy(void);
diff --git a/freebsd/sys/netinet6/dest6.c b/freebsd/sys/netinet6/dest6.c
index cb7bb73b..15240dfc 100644
--- a/freebsd/sys/netinet6/dest6.c
+++ b/freebsd/sys/netinet6/dest6.c
@@ -95,7 +95,7 @@ dest6_input(struct mbuf **mp, int *offp, int proto)
for (optlen = 0; dstoptlen > 0; dstoptlen -= optlen, opt += optlen) {
if (*opt != IP6OPT_PAD1 &&
(dstoptlen < IP6OPT_MINLEN || *(opt + 1) + 2 > dstoptlen)) {
- V_ip6stat.ip6s_toosmall++;
+ IP6STAT_INC(ip6s_toosmall);
goto bad;
}
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c
index 3a58a48e..8e6b0680 100644
--- a/freebsd/sys/netinet6/frag6.c
+++ b/freebsd/sys/netinet6/frag6.c
@@ -217,7 +217,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
return IPPROTO_DONE;
}
- V_ip6stat.ip6s_fragments++;
+ IP6STAT_INC(ip6s_fragments);
in6_ifstat_inc(dstifp, ifs6_reass_reqd);
/* offset now points to data portion */
@@ -230,7 +230,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
*/
if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
/* XXX-BZ we want dedicated counters for this. */
- V_ip6stat.ip6s_reassembled++;
+ IP6STAT_INC(ip6s_reassembled);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
*offp = offset;
return (ip6f->ip6f_nxt);
@@ -605,7 +605,7 @@ insert:
m->m_pkthdr.len = plen;
}
- V_ip6stat.ip6s_reassembled++;
+ IP6STAT_INC(ip6s_reassembled);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
/*
@@ -621,7 +621,7 @@ insert:
dropfrag:
IP6Q_UNLOCK();
in6_ifstat_inc(dstifp, ifs6_reass_fail);
- V_ip6stat.ip6s_fragdropped++;
+ IP6STAT_INC(ip6s_fragdropped);
m_freem(m);
return IPPROTO_DONE;
}
@@ -745,7 +745,7 @@ frag6_slowtimo(void)
--q6->ip6q_ttl;
q6 = q6->ip6q_next;
if (q6->ip6q_prev->ip6q_ttl == 0) {
- V_ip6stat.ip6s_fragtimeout++;
+ IP6STAT_INC(ip6s_fragtimeout);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
frag6_freef(q6->ip6q_prev);
}
@@ -757,7 +757,7 @@ frag6_slowtimo(void)
*/
while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets &&
V_ip6q.ip6q_prev) {
- V_ip6stat.ip6s_fragoverflow++;
+ IP6STAT_INC(ip6s_fragoverflow);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
frag6_freef(V_ip6q.ip6q_prev);
}
@@ -783,7 +783,7 @@ frag6_drain(void)
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
while (V_ip6q.ip6q_next != &V_ip6q) {
- V_ip6stat.ip6s_fragdropped++;
+ IP6STAT_INC(ip6s_fragdropped);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
frag6_freef(V_ip6q.ip6q_next);
}
diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c
index 8c6bc0c5..32d50e94 100644
--- a/freebsd/sys/netinet6/icmp6.c
+++ b/freebsd/sys/netinet6/icmp6.c
@@ -107,6 +107,7 @@ __FBSDID("$FreeBSD$");
#include <netinet6/scope6_var.h>
#include <netinet6/mld6_var.h>
#include <netinet6/nd6.h>
+#include <netinet6/send.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -131,18 +132,18 @@ VNET_DECLARE(int, icmp6_nodeinfo);
#define V_icmp6errppslim_last VNET(icmp6errppslim_last)
#define V_icmp6_nodeinfo VNET(icmp6_nodeinfo)
-static void icmp6_errcount(struct icmp6errstat *, int, int);
+static void icmp6_errcount(int, int);
static int icmp6_rip6_input(struct mbuf **, int);
static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
-static const char *icmp6_redirect_diag __P((struct in6_addr *,
- struct in6_addr *, struct in6_addr *));
+static const char *icmp6_redirect_diag(struct in6_addr *,
+ struct in6_addr *, struct in6_addr *);
static struct mbuf *ni6_input(struct mbuf *, int);
static struct mbuf *ni6_nametodns(const char *, int, int);
static int ni6_dnsmatch(const char *, int, const char *, int);
-static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *,
- struct ifnet **, struct in6_addr *));
-static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
- struct ifnet *, int));
+static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *,
+ struct ifnet **, struct in6_addr *);
+static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
+ struct ifnet *, int);
static int icmp6_notify_error(struct mbuf **, int, int, int);
/*
@@ -160,59 +161,59 @@ kmod_icmp6stat_inc(int statnum)
}
static void
-icmp6_errcount(struct icmp6errstat *stat, int type, int code)
+icmp6_errcount(int type, int code)
{
switch (type) {
case ICMP6_DST_UNREACH:
switch (code) {
case ICMP6_DST_UNREACH_NOROUTE:
- stat->icp6errs_dst_unreach_noroute++;
+ ICMP6STAT_INC(icp6s_odst_unreach_noroute);
return;
case ICMP6_DST_UNREACH_ADMIN:
- stat->icp6errs_dst_unreach_admin++;
+ ICMP6STAT_INC(icp6s_odst_unreach_admin);
return;
case ICMP6_DST_UNREACH_BEYONDSCOPE:
- stat->icp6errs_dst_unreach_beyondscope++;
+ ICMP6STAT_INC(icp6s_odst_unreach_beyondscope);
return;
case ICMP6_DST_UNREACH_ADDR:
- stat->icp6errs_dst_unreach_addr++;
+ ICMP6STAT_INC(icp6s_odst_unreach_addr);
return;
case ICMP6_DST_UNREACH_NOPORT:
- stat->icp6errs_dst_unreach_noport++;
+ ICMP6STAT_INC(icp6s_odst_unreach_noport);
return;
}
break;
case ICMP6_PACKET_TOO_BIG:
- stat->icp6errs_packet_too_big++;
+ ICMP6STAT_INC(icp6s_opacket_too_big);
return;
case ICMP6_TIME_EXCEEDED:
switch (code) {
case ICMP6_TIME_EXCEED_TRANSIT:
- stat->icp6errs_time_exceed_transit++;
+ ICMP6STAT_INC(icp6s_otime_exceed_transit);
return;
case ICMP6_TIME_EXCEED_REASSEMBLY:
- stat->icp6errs_time_exceed_reassembly++;
+ ICMP6STAT_INC(icp6s_otime_exceed_reassembly);
return;
}
break;
case ICMP6_PARAM_PROB:
switch (code) {
case ICMP6_PARAMPROB_HEADER:
- stat->icp6errs_paramprob_header++;
+ ICMP6STAT_INC(icp6s_oparamprob_header);
return;
case ICMP6_PARAMPROB_NEXTHEADER:
- stat->icp6errs_paramprob_nextheader++;
+ ICMP6STAT_INC(icp6s_oparamprob_nextheader);
return;
case ICMP6_PARAMPROB_OPTION:
- stat->icp6errs_paramprob_option++;
+ ICMP6STAT_INC(icp6s_oparamprob_option);
return;
}
break;
case ND_REDIRECT:
- stat->icp6errs_redirect++;
+ ICMP6STAT_INC(icp6s_oredirect);
return;
}
- stat->icp6errs_unknown++;
+ ICMP6STAT_INC(icp6s_ounknown);
}
/*
@@ -263,7 +264,7 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
ICMP6STAT_INC(icp6s_error);
/* count per-type-code statistics */
- icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, type, code);
+ icmp6_errcount(type, code);
#ifdef M_DECRYPTED /*not openbsd*/
if (m->m_flags & M_DECRYPTED) {
@@ -416,6 +417,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
int icmp6len = m->m_pkthdr.len - *offp;
int code, sum, noff;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ int ip6len, error;
ifp = m->m_pkthdr.rcvif;
@@ -430,6 +432,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
*/
ip6 = mtod(m, struct ip6_hdr *);
+ ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
if (icmp6len < sizeof(struct icmp6_hdr)) {
ICMP6STAT_INC(icp6s_tooshort);
goto freeit;
@@ -772,11 +775,33 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
- nd6_rs_input(m, off, icmp6len);
+
+ /* Send incoming SeND packet to user space. */
+ if (send_sendso_input_hook != NULL) {
+ IP6_EXTHDR_CHECK(m, off,
+ icmp6len, IPPROTO_DONE);
+ error = send_sendso_input_hook(m, ifp,
+ SND_IN, ip6len);
+ /* -1 == no app on SEND socket */
+ if (error == 0)
+ return (IPPROTO_DONE);
+ nd6_rs_input(m, off, icmp6len);
+ } else
+ nd6_rs_input(m, off, icmp6len);
m = NULL;
goto freeit;
}
- nd6_rs_input(n, off, icmp6len);
+ if (send_sendso_input_hook != NULL) {
+ IP6_EXTHDR_CHECK(n, off,
+ icmp6len, IPPROTO_DONE);
+ error = send_sendso_input_hook(n, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ goto freeit;
+ /* -1 == no app on SEND socket */
+ nd6_rs_input(n, off, icmp6len);
+ } else
+ nd6_rs_input(n, off, icmp6len);
/* m stays. */
break;
@@ -787,12 +812,27 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
if (icmp6len < sizeof(struct nd_router_advert))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
- /* give up local */
- nd6_ra_input(m, off, icmp6len);
+
+ /* Send incoming SeND-protected/ND packet to user space. */
+ if (send_sendso_input_hook != NULL) {
+ error = send_sendso_input_hook(m, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ return (IPPROTO_DONE);
+ nd6_ra_input(m, off, icmp6len);
+ } else
+ nd6_ra_input(m, off, icmp6len);
m = NULL;
goto freeit;
}
- nd6_ra_input(n, off, icmp6len);
+ if (send_sendso_input_hook != NULL) {
+ error = send_sendso_input_hook(n, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ goto freeit;
+ nd6_ra_input(n, off, icmp6len);
+ } else
+ nd6_ra_input(n, off, icmp6len);
/* m stays. */
break;
@@ -803,12 +843,25 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
if (icmp6len < sizeof(struct nd_neighbor_solicit))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
- /* give up local */
- nd6_ns_input(m, off, icmp6len);
+ if (send_sendso_input_hook != NULL) {
+ error = send_sendso_input_hook(m, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ return (IPPROTO_DONE);
+ nd6_ns_input(m, off, icmp6len);
+ } else
+ nd6_ns_input(m, off, icmp6len);
m = NULL;
goto freeit;
}
- nd6_ns_input(n, off, icmp6len);
+ if (send_sendso_input_hook != NULL) {
+ error = send_sendso_input_hook(n, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ goto freeit;
+ nd6_ns_input(n, off, icmp6len);
+ } else
+ nd6_ns_input(n, off, icmp6len);
/* m stays. */
break;
@@ -819,12 +872,27 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
if (icmp6len < sizeof(struct nd_neighbor_advert))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
- /* give up local */
- nd6_na_input(m, off, icmp6len);
+
+ /* Send incoming SeND-protected/ND packet to user space. */
+ if (send_sendso_input_hook != NULL) {
+ error = send_sendso_input_hook(m, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ return (IPPROTO_DONE);
+ nd6_na_input(m, off, icmp6len);
+ } else
+ nd6_na_input(m, off, icmp6len);
m = NULL;
goto freeit;
}
- nd6_na_input(n, off, icmp6len);
+ if (send_sendso_input_hook != NULL) {
+ error = send_sendso_input_hook(n, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ goto freeit;
+ nd6_na_input(n, off, icmp6len);
+ } else
+ nd6_na_input(n, off, icmp6len);
/* m stays. */
break;
@@ -835,12 +903,25 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
if (icmp6len < sizeof(struct nd_redirect))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
- /* give up local */
- icmp6_redirect_input(m, off);
+ if (send_sendso_input_hook != NULL) {
+ error = send_sendso_input_hook(m, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ return (IPPROTO_DONE);
+ icmp6_redirect_input(m, off);
+ } else
+ icmp6_redirect_input(m, off);
m = NULL;
goto freeit;
}
- icmp6_redirect_input(n, off);
+ if (send_sendso_input_hook != NULL) {
+ error = send_sendso_input_hook(n, ifp,
+ SND_IN, ip6len);
+ if (error == 0)
+ goto freeit;
+ icmp6_redirect_input(n, off);
+ } else
+ icmp6_redirect_input(n, off);
/* m stays. */
break;
@@ -1102,6 +1183,8 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
ip6cp.ip6c_src = &icmp6src;
ip6cp.ip6c_nxt = nxt;
+ m_addr_changed(m);
+
if (icmp6type == ICMP6_PACKET_TOO_BIG) {
notifymtu = ntohl(icmp6->icmp6_mtu);
ip6cp.ip6c_cmdarg = (void *)&notifymtu;
@@ -2229,6 +2312,8 @@ icmp6_reflect(struct mbuf *m, size_t off)
m->m_flags &= ~(M_BCAST|M_MCAST);
+ m_addr_changed(m);
+
ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
if (outif)
icmp6_ifoutstat_inc(outif, type, code);
@@ -2290,14 +2375,11 @@ icmp6_redirect_input(struct mbuf *m, int off)
union nd_opts ndopts;
char ip6buf[INET6_ADDRSTRLEN];
- if (!m)
- return;
+ M_ASSERTPKTHDR(m);
+ KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__));
ifp = m->m_pkthdr.rcvif;
- if (!ifp)
- return;
-
/* XXX if we are router, we don't update route by icmp6 redirect */
if (V_ip6_forwarding)
goto freeit;
@@ -2350,23 +2432,23 @@ icmp6_redirect_input(struct mbuf *m, int off)
if (rt) {
if (rt->rt_gateway == NULL ||
rt->rt_gateway->sa_family != AF_INET6) {
+ RTFREE_LOCKED(rt);
nd6log((LOG_ERR,
"ICMP6 redirect rejected; no route "
"with inet6 gateway found for redirect dst: %s\n",
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
- RTFREE_LOCKED(rt);
goto bad;
}
gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
+ RTFREE_LOCKED(rt);
nd6log((LOG_ERR,
"ICMP6 redirect rejected; "
"not equal to gw-for-src=%s (must be same): "
"%s\n",
ip6_sprintf(ip6buf, gw6),
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
- RTFREE_LOCKED(rt);
goto bad;
}
} else {
@@ -2404,9 +2486,8 @@ icmp6_redirect_input(struct mbuf *m, int off)
icmp6len -= sizeof(*nd_rd);
nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
if (nd6_options(&ndopts) < 0) {
- nd6log((LOG_INFO, "icmp6_redirect_input: "
- "invalid ND option, rejected: %s\n",
- icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
+ nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n",
+ __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
/* nd6_options have incremented stats */
goto freeit;
}
@@ -2417,10 +2498,9 @@ icmp6_redirect_input(struct mbuf *m, int off)
}
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
- nd6log((LOG_INFO,
- "icmp6_redirect_input: lladdrlen mismatch for %s "
+ nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s "
"(if %d, icmp6 packet %d): %s\n",
- ip6_sprintf(ip6buf, &redtgt6),
+ __func__, ip6_sprintf(ip6buf, &redtgt6),
ifp->if_addrlen, lladdrlen - 2,
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
@@ -2483,6 +2563,7 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
struct in6_addr *router_ll6;
struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */
struct mbuf *m = NULL; /* newly allocated one */
+ struct m_tag *mtag;
struct ip6_hdr *ip6; /* m as struct ip6_hdr */
struct nd_redirect *nd_rd;
struct llentry *ln = NULL;
@@ -2491,7 +2572,7 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
struct ifnet *outif = NULL;
struct sockaddr_in6 src_sa;
- icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
+ icmp6_errcount(ND_REDIRECT, 0);
/* if we are not router, we don't send icmp6 redirect */
if (!V_ip6_forwarding)
@@ -2743,6 +2824,15 @@ noredhdropt:;
nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
sizeof(*ip6), ntohs(ip6->ip6_plen));
+ if (send_sendso_input_hook != NULL) {
+ mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short),
+ M_NOWAIT);
+ if (mtag == NULL)
+ goto fail;
+ *(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type;
+ m_tag_prepend(m, mtag);
+ }
+
/* send the packet to outside... */
ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
if (outif) {
diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c
index 1eed3901..eac5e11e 100644
--- a/freebsd/sys/netinet6/in6.c
+++ b/freebsd/sys/netinet6/in6.c
@@ -107,6 +107,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/scope6_var.h>
#include <netinet6/in6_pcb.h>
+VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix);
+#define V_icmp6_nodeinfo_oldmcprefix VNET(icmp6_nodeinfo_oldmcprefix)
+
/*
* Definitions of some costant IP6 addresses.
*/
@@ -130,15 +133,93 @@ const struct in6_addr in6mask128 = IN6MASK128;
const struct sockaddr_in6 sa6_any =
{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };
-static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t,
- struct ifnet *, struct thread *));
-static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *,
- struct sockaddr_in6 *, int));
+static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t,
+ struct ifnet *, struct thread *);
+static int in6_ifinit(struct ifnet *, struct in6_ifaddr *,
+ struct sockaddr_in6 *, int);
static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
int (*faithprefix_p)(struct in6_addr *);
+#define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa))
+#define ia62ifa(ia6) (&((ia6)->ia_ifa))
+
+void
+in6_ifaddloop(struct ifaddr *ifa)
+{
+ struct sockaddr_dl gateway;
+ struct sockaddr_in6 mask, addr;
+ struct rtentry rt;
+ struct in6_ifaddr *ia;
+ struct ifnet *ifp;
+ struct llentry *ln;
+
+ ia = ifa2ia6(ifa);
+ ifp = ifa->ifa_ifp;
+ IF_AFDATA_LOCK(ifp);
+ ifa->ifa_rtrequest = nd6_rtrequest;
+ ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR |
+ LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr);
+ IF_AFDATA_UNLOCK(ifp);
+ if (ln != NULL) {
+ ln->la_expire = 0; /* for IPv6 this means permanent */
+ ln->ln_state = ND6_LLINFO_REACHABLE;
+ /*
+ * initialize for rtmsg generation
+ */
+ bzero(&gateway, sizeof(gateway));
+ gateway.sdl_len = sizeof(gateway);
+ gateway.sdl_family = AF_LINK;
+ gateway.sdl_nlen = 0;
+ gateway.sdl_alen = 6;
+ memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned,
+ sizeof(ln->ll_addr));
+ LLE_WUNLOCK(ln);
+ }
+
+ bzero(&rt, sizeof(rt));
+ rt.rt_gateway = (struct sockaddr *)&gateway;
+ memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
+ memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
+ rt_mask(&rt) = (struct sockaddr *)&mask;
+ rt_key(&rt) = (struct sockaddr *)&addr;
+ rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC;
+ /* Announce arrival of local address to all FIBs. */
+ rt_newaddrmsg(RTM_ADD, ifa, 0, &rt);
+}
+
+void
+in6_ifremloop(struct ifaddr *ifa)
+{
+ struct sockaddr_dl gateway;
+ struct sockaddr_in6 mask, addr;
+ struct rtentry rt0;
+ struct in6_ifaddr *ia;
+ struct ifnet *ifp;
+ ia = ifa2ia6(ifa);
+ ifp = ifa->ifa_ifp;
+ memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
+ memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
+ lltable_prefix_free(AF_INET6, (struct sockaddr *)&addr,
+ (struct sockaddr *)&mask, LLE_STATIC);
+
+ /*
+ * initialize for rtmsg generation
+ */
+ bzero(&gateway, sizeof(gateway));
+ gateway.sdl_len = sizeof(gateway);
+ gateway.sdl_family = AF_LINK;
+ gateway.sdl_nlen = 0;
+ gateway.sdl_alen = ifp->if_addrlen;
+ bzero(&rt0, sizeof(rt0));
+ rt0.rt_gateway = (struct sockaddr *)&gateway;
+ rt_mask(&rt0) = (struct sockaddr *)&mask;
+ rt_key(&rt0) = (struct sockaddr *)&addr;
+ rt0.rt_flags = RTF_HOST | RTF_STATIC;
+ /* Announce removal of local address to all FIBs. */
+ rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0);
+}
int
in6_mask2len(struct in6_addr *mask, u_char *lim0)
@@ -176,15 +257,12 @@ in6_mask2len(struct in6_addr *mask, u_char *lim0)
return x * 8 + y;
}
-#define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa))
-#define ia62ifa(ia6) (&((ia6)->ia_ifa))
-
#ifdef COMPAT_FREEBSD32
struct in6_ndifreq32 {
- char ifname[IFNAMSIZ];
- uint32_t ifindex;
+ char ifname[IFNAMSIZ];
+ uint32_t ifindex;
};
-#define SIOCGDEFIFACE32_IN6 _IOWR('i', 86, struct in6_ndifreq32)
+#define SIOCGDEFIFACE32_IN6 _IOWR('i', 86, struct in6_ndifreq32)
#endif
int
@@ -200,7 +278,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
switch (cmd) {
case SIOCGETSGCNT_IN6:
case SIOCGETMIFCNT_IN6:
- /*
+ /*
* XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c.
* We cannot see how that would be needed, so do not adjust the
* KPI blindly; more likely should clean up the IPv4 variant.
@@ -413,7 +491,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
}
if (td != NULL) {
- error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ?
+ error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ?
PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
if (error)
goto out;
@@ -671,8 +749,32 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
* that is, this address might make other addresses detached.
*/
pfxlist_onlink_check();
- if (error == 0 && ia)
+ if (error == 0 && ia) {
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+ /*
+ * Try to clear the flag when a new
+ * IPv6 address is added onto an
+ * IFDISABLED interface and it
+ * succeeds.
+ */
+ struct in6_ndireq nd;
+
+ memset(&nd, 0, sizeof(nd));
+ nd.ndi.flags = ND_IFINFO(ifp)->flags;
+ nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
+ if (nd6_ioctl(SIOCSIFINFO_FLAGS,
+ (caddr_t)&nd, ifp) < 0)
+ log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
+ "SIOCSIFINFO_FLAGS for -ifdisabled "
+ "failed.");
+ /*
+ * Ignore failure of clearing the flag
+ * intentionally. The failure means
+ * address duplication was detected.
+ */
+ }
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+ }
break;
}
@@ -714,6 +816,7 @@ out:
return (error);
}
+
/*
* Join necessary multicast groups. Factored out from in6_update_ifa().
* This entire work should only be done once, for the default FIB.
@@ -835,6 +938,17 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
else
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
}
+ if (V_icmp6_nodeinfo_oldmcprefix &&
+ in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) {
+ imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay);
+ if (imm == NULL)
+ nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+ "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
+ &mltaddr.sin6_addr), if_name(ifp), error));
+ /* XXX not very fatal, go on... */
+ else
+ LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
+ }
/*
* Join interface-local all-nodes address.
@@ -1123,6 +1237,10 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
if (hostIsNew && in6if_do_dad(ifp))
ia->ia6_flags |= IN6_IFF_TENTATIVE;
+ /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
+
/*
* We are done if we have simply modified an existing address.
*/
@@ -1247,7 +1365,7 @@ in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0)
bzero(&sin6, sizeof(sin6));
sin6.sin6_len = sizeof(sin6);
sin6.sin6_family = AF_INET6;
- memcpy(&sin6.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr,
+ memcpy(&sin6.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr,
sizeof(sin6.sin6_addr));
error = in6_setscope(&sin6.sin6_addr, ifa0->ifa_ifp, NULL);
if (error != 0)
@@ -1256,16 +1374,17 @@ in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0)
rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
if (rt != NULL && rt->rt_gateway != NULL &&
- (memcmp(&satosin6(rt->rt_gateway)->sin6_addr,
+ (memcmp(&satosin6(rt->rt_gateway)->sin6_addr,
&ia->ia_addr.sin6_addr,
sizeof(ia->ia_addr.sin6_addr)) == 0)) {
- /*
+ /*
* If no more IPv6 address exists on this interface then
* remove the multicast address route.
*/
if (ifa0 == NULL) {
- memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr,
- sizeof(mltaddr.sin6_addr));
+ memcpy(&mltaddr.sin6_addr,
+ &satosin6(rt_key(rt))->sin6_addr,
+ sizeof(mltaddr.sin6_addr));
RTFREE_LOCKED(rt);
error = in6_rtrequest(RTM_DELETE,
(struct sockaddr *)&mltaddr,
@@ -1297,16 +1416,17 @@ in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0)
rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
if (rt != NULL && rt->rt_gateway != NULL &&
- (memcmp(&satosin6(rt->rt_gateway)->sin6_addr,
+ (memcmp(&satosin6(rt->rt_gateway)->sin6_addr,
&ia->ia_addr.sin6_addr,
sizeof(ia->ia_addr.sin6_addr)) == 0)) {
- /*
+ /*
* If no more IPv6 address exists on this interface then
* remove the multicast address route.
*/
if (ifa0 == NULL) {
- memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr,
- sizeof(mltaddr.sin6_addr));
+ memcpy(&mltaddr.sin6_addr,
+ &satosin6(rt_key(rt))->sin6_addr,
+ sizeof(mltaddr.sin6_addr));
RTFREE_LOCKED(rt);
error = in6_rtrequest(RTM_DELETE,
@@ -1338,9 +1458,6 @@ in6_purgeaddr(struct ifaddr *ifa)
{
struct ifnet *ifp = ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
- struct sockaddr_dl gateway;
- struct sockaddr_in6 mask, addr;
- struct rtentry rt0;
int plen, error;
struct ifaddr *ifa0;
@@ -1353,8 +1470,7 @@ in6_purgeaddr(struct ifaddr *ifa)
TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) {
if ((ifa0->ifa_addr->sa_family != AF_INET6) ||
memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr,
- &ia->ia_addr.sin6_addr,
- sizeof(struct in6_addr)) == 0)
+ &ia->ia_addr.sin6_addr, sizeof(struct in6_addr)) == 0)
continue;
else
break;
@@ -1365,12 +1481,12 @@ in6_purgeaddr(struct ifaddr *ifa)
/*
* Remove the loopback route to the interface address.
- * The check for the current setting of "nd6_useloopback"
+ * The check for the current setting of "nd6_useloopback"
* is not needed.
*/
if (ia->ia_flags & IFA_RTSELF) {
error = ifa_del_loopback_route((struct ifaddr *)ia,
- (struct sockaddr *)&ia->ia_addr);
+ (struct sockaddr *)&ia->ia_addr);
if (error == 0)
ia->ia_flags &= ~IFA_RTSELF;
}
@@ -1379,28 +1495,7 @@ in6_purgeaddr(struct ifaddr *ifa)
nd6_dad_stop(ifa);
/* Remove local address entry from lltable. */
- IF_AFDATA_LOCK(ifp);
- lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR),
- (struct sockaddr *)&ia->ia_addr);
- IF_AFDATA_UNLOCK(ifp);
-
- /*
- * initialize for rtmsg generation
- */
- bzero(&gateway, sizeof(gateway));
- gateway.sdl_len = sizeof(gateway);
- gateway.sdl_family = AF_LINK;
- gateway.sdl_nlen = 0;
- gateway.sdl_alen = ifp->if_addrlen;
- /* */
- bzero(&rt0, sizeof(rt0));
- rt0.rt_gateway = (struct sockaddr *)&gateway;
- memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
- memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
- rt_mask(&rt0) = (struct sockaddr *)&mask;
- rt_key(&rt0) = (struct sockaddr *)&addr;
- rt0.rt_flags = RTF_HOST | RTF_STATIC;
- rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0);
+ in6_ifremloop(ifa);
/* Leave multicast groups. */
error = in6_purgeaddr_mc(ifp, ia, ifa0);
@@ -1793,7 +1888,7 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
* Special case:
* If a new destination address is specified for a point-to-point
* interface, install a route to the destination as an interface
- * direct route.
+ * direct route.
* XXX: the logic below rejects assigning multiple addresses on a p2p
* interface that share the same destination.
*/
@@ -1817,49 +1912,14 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
*/
if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) {
error = ifa_add_loopback_route((struct ifaddr *)ia,
- (struct sockaddr *)&ia->ia_addr);
+ (struct sockaddr *)&ia->ia_addr);
if (error == 0)
ia->ia_flags |= IFA_RTSELF;
}
/* Add local address to lltable, if necessary (ex. on p2p link). */
- if (newhost) {
- struct llentry *ln;
- struct rtentry rt;
- struct sockaddr_dl gateway;
- struct sockaddr_in6 mask, addr;
-
- IF_AFDATA_LOCK(ifp);
- ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
- ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | LLE_EXCLUSIVE),
- (struct sockaddr *)&ia->ia_addr);
- IF_AFDATA_UNLOCK(ifp);
- if (ln != NULL) {
- ln->la_expire = 0; /* for IPv6 this means permanent */
- ln->ln_state = ND6_LLINFO_REACHABLE;
- /*
- * initialize for rtmsg generation
- */
- bzero(&gateway, sizeof(gateway));
- gateway.sdl_len = sizeof(gateway);
- gateway.sdl_family = AF_LINK;
- gateway.sdl_nlen = 0;
- gateway.sdl_alen = 6;
- memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned, sizeof(ln->ll_addr));
- /* */
- LLE_WUNLOCK(ln);
- }
-
- bzero(&rt, sizeof(rt));
- rt.rt_gateway = (struct sockaddr *)&gateway;
- memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
- memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
- rt_mask(&rt) = (struct sockaddr *)&mask;
- rt_key(&rt) = (struct sockaddr *)&addr;
- rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC;
- /* Announce arrival of local address to all FIBs. */
- rt_newaddrmsg(RTM_ADD, &ia->ia_ifa, 0, &rt);
- }
+ if (newhost)
+ in6_ifaddloop(&(ia->ia_ifa));
return (error);
}
@@ -1879,7 +1939,7 @@ in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
continue;
if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
if ((((struct in6_ifaddr *)ifa)->ia6_flags &
- ignoreflags) != 0)
+ ignoreflags) != 0)
continue;
ifa_ref(ifa);
break;
@@ -1915,6 +1975,32 @@ in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
}
/*
+ * Find a link-local scoped address on ifp and return it if any.
+ */
+struct in6_ifaddr *
+in6ifa_llaonifp(struct ifnet *ifp)
+{
+ struct sockaddr_in6 *sin6;
+ struct ifaddr *ifa;
+
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+ return (NULL);
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
+ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
+ IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr) ||
+ IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
+ break;
+ }
+ if_addr_runlock(ifp);
+
+ return ((struct in6_ifaddr *)ifa);
+}
+
+/*
* Convert IP6 address to printable (loggable) representation. Caller
* has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
*/
@@ -1922,7 +2008,7 @@ static char digits[] = "0123456789abcdef";
char *
ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
{
- int i;
+ int i, cnt = 0, maxcnt = 0, idx = 0, index = 0;
char *cp;
const u_int16_t *a = (const u_int16_t *)addr;
const u_int8_t *d;
@@ -1931,6 +2017,23 @@ ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
cp = ip6buf;
for (i = 0; i < 8; i++) {
+ if (*(a + i) == 0) {
+ cnt++;
+ if (cnt == 1)
+ idx = i;
+ }
+ else if (maxcnt < cnt) {
+ maxcnt = cnt;
+ index = idx;
+ cnt = 0;
+ }
+ }
+ if (maxcnt < cnt) {
+ maxcnt = cnt;
+ index = idx;
+ }
+
+ for (i = 0; i < 8; i++) {
if (dcolon == 1) {
if (*a == 0) {
if (i == 7)
@@ -1941,7 +2044,7 @@ ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
dcolon = 2;
}
if (*a == 0) {
- if (dcolon == 0 && *(a + 1) == 0) {
+ if (dcolon == 0 && *(a + 1) == 0 && i == index) {
if (i == 0)
*cp++ = ':';
*cp++ = ':';
@@ -2000,6 +2103,27 @@ in6_localaddr(struct in6_addr *in6)
return (0);
}
+/*
+ * Return 1 if an internet address is for the local host and configured
+ * on one of its interfaces.
+ */
+int
+in6_localip(struct in6_addr *in6)
+{
+ struct in6_ifaddr *ia;
+
+ IN6_IFADDR_RLOCK();
+ TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+ if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) {
+ IN6_IFADDR_RUNLOCK();
+ return (1);
+ }
+ }
+ IN6_IFADDR_RUNLOCK();
+ return (0);
+}
+
+
int
in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
{
@@ -2008,7 +2132,7 @@ in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
IN6_IFADDR_RLOCK();
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
- &sa6->sin6_addr) &&
+ &sa6->sin6_addr) &&
(ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) {
IN6_IFADDR_RUNLOCK();
return (1); /* true */
@@ -2226,6 +2350,9 @@ in6if_do_dad(struct ifnet *ifp)
if ((ifp->if_flags & IFF_LOOPBACK) != 0)
return (0);
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+ return (0);
+
switch (ifp->if_type) {
#ifdef IFT_DUMMY
case IFT_DUMMY:
@@ -2276,7 +2403,7 @@ in6_setmaxmtu(void)
maxmtu = IN6_LINKMTU(ifp);
}
IFNET_RUNLOCK_NOSLEEP();
- if (maxmtu) /* update only when maxmtu is positive */
+ if (maxmtu) /* update only when maxmtu is positive */
V_in6_maxmtu = maxmtu;
}
@@ -2305,6 +2432,7 @@ in6_if2idlen(struct ifnet *ifp)
#ifdef IFT_MIP
case IFT_MIP: /* ditto */
#endif
+ case IFT_INFINIBAND:
return (64);
case IFT_FDDI: /* RFC2467 */
return (64);
@@ -2346,76 +2474,73 @@ struct in6_llentry {
struct sockaddr_in6 l3_addr6;
};
+/*
+ * Deletes an address from the address table.
+ * This function is called by the timer functions
+ * such as arptimer() and nd6_llinfo_timer(), and
+ * the caller does the locking.
+ */
+static void
+in6_lltable_free(struct lltable *llt, struct llentry *lle)
+{
+ LLE_WUNLOCK(lle);
+ LLE_LOCK_DESTROY(lle);
+ free(lle, M_LLTABLE);
+}
+
static struct llentry *
in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
{
struct in6_llentry *lle;
- lle = malloc(sizeof(struct in6_llentry), M_LLTABLE,
- M_DONTWAIT | M_ZERO);
+ lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
if (lle == NULL) /* NB: caller generates msg */
return NULL;
lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
lle->base.lle_refcnt = 1;
+ lle->base.lle_free = in6_lltable_free;
LLE_LOCK_INIT(&lle->base);
callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
CALLOUT_RETURNUNLOCKED);
- return &lle->base;
-}
-
-/*
- * Deletes an address from the address table.
- * This function is called by the timer functions
- * such as arptimer() and nd6_llinfo_timer(), and
- * the caller does the locking.
- */
-static void
-in6_lltable_free(struct lltable *llt, struct llentry *lle)
-{
- LLE_WUNLOCK(lle);
- LLE_LOCK_DESTROY(lle);
- free(lle, M_LLTABLE);
+ return (&lle->base);
}
static void
-in6_lltable_prefix_free(struct lltable *llt,
- const struct sockaddr *prefix,
- const struct sockaddr *mask,
- u_int flags)
+in6_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
+ const struct sockaddr *mask, u_int flags)
{
const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix;
const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask;
struct llentry *lle, *next;
- register int i;
+ int i;
/*
- * (flags & LLE_STATIC) means deleting all entries
- * including static ND6 entries
+ * (flags & LLE_STATIC) means deleting all entries
+ * including static ND6 entries.
*/
- for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
if (IN6_ARE_MASKED_ADDR_EQUAL(
- &((struct sockaddr_in6 *)L3_ADDR(lle))->sin6_addr,
- &pfx->sin6_addr,
- &msk->sin6_addr) &&
- ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
- int canceled;
-
- canceled = callout_drain(&lle->la_timer);
+ &satosin6(L3_ADDR(lle))->sin6_addr,
+ &pfx->sin6_addr, &msk->sin6_addr) &&
+ ((flags & LLE_STATIC) ||
+ !(lle->la_flags & LLE_STATIC))) {
LLE_WLOCK(lle);
- if (canceled)
+ if (callout_stop(&lle->la_timer))
LLE_REMREF(lle);
llentry_free(lle);
}
}
}
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
}
static int
-in6_lltable_rtcheck(struct ifnet *ifp,
- u_int flags,
+in6_lltable_rtcheck(struct ifnet *ifp,
+ u_int flags,
const struct sockaddr *l3addr)
{
struct rtentry *rt;
@@ -2430,8 +2555,8 @@ in6_lltable_rtcheck(struct ifnet *ifp,
RT_DEFAULT_FIB);
if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) {
struct ifaddr *ifa;
- /*
- * Create an ND6 cache for an IPv6 neighbor
+ /*
+ * Create an ND6 cache for an IPv6 neighbor
* that is not covered by our own prefix.
*/
/* XXX ifaof_ifpforaddr should take a const param */
@@ -2472,8 +2597,8 @@ in6_lltable_lookup(struct lltable *llt, u_int flags,
struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle);
if (lle->la_flags & LLE_DELETED)
continue;
- if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr,
- sizeof(struct in6_addr)) == 0)
+ if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr,
+ sizeof(struct in6_addr)) == 0)
break;
}
@@ -2502,15 +2627,20 @@ in6_lltable_lookup(struct lltable *llt, u_int flags,
lle->lle_tbl = llt;
lle->lle_head = lleh;
+ lle->la_flags |= LLE_LINKED;
LIST_INSERT_HEAD(lleh, lle, lle_next);
} else if (flags & LLE_DELETE) {
if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
LLE_WLOCK(lle);
- lle->la_flags = LLE_DELETED;
- LLE_WUNLOCK(lle);
+ lle->la_flags |= LLE_DELETED;
#ifdef DIAGNOSTIC
- log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
-#endif
+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
+#endif
+ if ((lle->la_flags &
+ (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC)
+ llentry_free(lle);
+ else
+ LLE_WUNLOCK(lle);
}
lle = (void *)-1;
}
@@ -2620,7 +2750,6 @@ in6_domifattach(struct ifnet *ifp)
ext->scope6_id = scope6_ifattach(ifp);
ext->lltable = lltable_init(ifp, AF_INET6);
if (ext->lltable != NULL) {
- ext->lltable->llt_free = in6_lltable_free;
ext->lltable->llt_prefix_free = in6_lltable_prefix_free;
ext->lltable->llt_lookup = in6_lltable_lookup;
ext->lltable->llt_dump = in6_lltable_dump;
@@ -2697,8 +2826,7 @@ in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
struct sockaddr_in *sin_p;
struct sockaddr_in6 *sin6_p;
- sin6_p = malloc(sizeof *sin6_p, M_SONAME,
- M_WAITOK);
+ sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK);
sin_p = (struct sockaddr_in *)*nam;
in6_sin_2_v4mapsin6(sin_p, sin6_p);
free(*nam, M_SONAME);
diff --git a/freebsd/sys/netinet6/in6.h b/freebsd/sys/netinet6/in6.h
index 7abbfa40..616f1009 100644
--- a/freebsd/sys/netinet6/in6.h
+++ b/freebsd/sys/netinet6/in6.h
@@ -235,37 +235,37 @@ extern const struct in6_addr in6addr_linklocal_allv2routers;
* Unspecified
*/
#define IN6_IS_ADDR_UNSPECIFIED(a) \
- ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) == 0))
+ ((a)->__u6_addr.__u6_addr32[0] == 0 && \
+ (a)->__u6_addr.__u6_addr32[1] == 0 && \
+ (a)->__u6_addr.__u6_addr32[2] == 0 && \
+ (a)->__u6_addr.__u6_addr32[3] == 0)
/*
* Loopback
*/
#define IN6_IS_ADDR_LOOPBACK(a) \
- ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) == ntohl(1)))
+ ((a)->__u6_addr.__u6_addr32[0] == 0 && \
+ (a)->__u6_addr.__u6_addr32[1] == 0 && \
+ (a)->__u6_addr.__u6_addr32[2] == 0 && \
+ (a)->__u6_addr.__u6_addr32[3] == ntohl(1))
/*
* IPv4 compatible
*/
#define IN6_IS_ADDR_V4COMPAT(a) \
- ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) != 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) != ntohl(1)))
+ ((a)->__u6_addr.__u6_addr32[0] == 0 && \
+ (a)->__u6_addr.__u6_addr32[1] == 0 && \
+ (a)->__u6_addr.__u6_addr32[2] == 0 && \
+ (a)->__u6_addr.__u6_addr32[3] != 0 && \
+ (a)->__u6_addr.__u6_addr32[3] != ntohl(1))
/*
* Mapped
*/
#define IN6_IS_ADDR_V4MAPPED(a) \
- ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
- (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == ntohl(0x0000ffff)))
+ ((a)->__u6_addr.__u6_addr32[0] == 0 && \
+ (a)->__u6_addr.__u6_addr32[1] == 0 && \
+ (a)->__u6_addr.__u6_addr32[2] == ntohl(0x0000ffff))
/*
* KAME Scope Values
@@ -376,6 +376,8 @@ extern const struct in6_addr in6addr_linklocal_allv2routers;
struct route_in6 {
struct rtentry *ro_rt;
struct llentry *ro_lle;
+ struct in6_addr *ro_ia6;
+ int ro_flags;
struct sockaddr_in6 ro_dst;
};
#endif
@@ -397,8 +399,8 @@ struct route_in6 {
#define IPV6_MULTICAST_IF 9 /* u_int; set/get IP6 multicast i/f */
#define IPV6_MULTICAST_HOPS 10 /* int; set/get IP6 multicast hops */
#define IPV6_MULTICAST_LOOP 11 /* u_int; set/get IP6 multicast loopback */
-#define IPV6_JOIN_GROUP 12 /* ip6_mreq; join a group membership */
-#define IPV6_LEAVE_GROUP 13 /* ip6_mreq; leave a group membership */
+#define IPV6_JOIN_GROUP 12 /* ipv6_mreq; join a group membership */
+#define IPV6_LEAVE_GROUP 13 /* ipv6_mreq; leave a group membership */
#define IPV6_PORTRANGE 14 /* int; range to choose for unspec port */
#define ICMP6_FILTER 18 /* icmp6_filter; icmp6 filter */
/* RFC2292 options */
@@ -611,7 +613,12 @@ struct ip6_mtuinfo {
#define IPV6CTL_STEALTH 45
#define ICMPV6CTL_ND6_ONLINKNSRFC4861 47
-#define IPV6CTL_MAXID 48
+#define IPV6CTL_NO_RADR 48 /* No defroute from RA */
+#define IPV6CTL_NORBIT_RAIF 49 /* Disable R-bit in NA on RA
+ * receiving IF. */
+#define IPV6CTL_RFC6204W3 50 /* Accept defroute even when forwarding
+ enabled */
+#define IPV6CTL_MAXID 51
#endif /* __BSD_VISIBLE */
/*
@@ -625,22 +632,25 @@ struct ip6_mtuinfo {
#ifdef _KERNEL
struct cmsghdr;
-
-int in6_cksum __P((struct mbuf *, u_int8_t, u_int32_t, u_int32_t));
-int in6_localaddr __P((struct in6_addr *));
-int in6_addrscope __P((struct in6_addr *));
-struct in6_ifaddr *in6_ifawithifp __P((struct ifnet *, struct in6_addr *));
-extern void in6_if_up __P((struct ifnet *));
+struct ip6_hdr;
+
+int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t);
+int in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t);
+int in6_localaddr(struct in6_addr *);
+int in6_localip(struct in6_addr *);
+int in6_addrscope(struct in6_addr *);
+struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *);
+extern void in6_if_up(struct ifnet *);
struct sockaddr;
extern u_char ip6_protox[];
-void in6_sin6_2_sin __P((struct sockaddr_in *sin,
- struct sockaddr_in6 *sin6));
-void in6_sin_2_v4mapsin6 __P((struct sockaddr_in *sin,
- struct sockaddr_in6 *sin6));
-void in6_sin6_2_sin_in_sock __P((struct sockaddr *nam));
-void in6_sin_2_v4mapsin6_in_sock __P((struct sockaddr **nam));
-extern void addrsel_policy_init __P((void));
+void in6_sin6_2_sin(struct sockaddr_in *sin,
+ struct sockaddr_in6 *sin6);
+void in6_sin_2_v4mapsin6(struct sockaddr_in *sin,
+ struct sockaddr_in6 *sin6);
+void in6_sin6_2_sin_in_sock(struct sockaddr *nam);
+void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam);
+extern void addrsel_policy_init(void);
#define satosin6(sa) ((struct sockaddr_in6 *)(sa))
#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
@@ -664,43 +674,43 @@ typedef __socklen_t socklen_t;
__BEGIN_DECLS
struct cmsghdr;
-extern int inet6_option_space __P((int));
-extern int inet6_option_init __P((void *, struct cmsghdr **, int));
-extern int inet6_option_append __P((struct cmsghdr *, const uint8_t *,
- int, int));
-extern uint8_t *inet6_option_alloc __P((struct cmsghdr *, int, int, int));
-extern int inet6_option_next __P((const struct cmsghdr *, uint8_t **));
-extern int inet6_option_find __P((const struct cmsghdr *, uint8_t **, int));
-
-extern size_t inet6_rthdr_space __P((int, int));
-extern struct cmsghdr *inet6_rthdr_init __P((void *, int));
-extern int inet6_rthdr_add __P((struct cmsghdr *, const struct in6_addr *,
- unsigned int));
-extern int inet6_rthdr_lasthop __P((struct cmsghdr *, unsigned int));
+extern int inet6_option_space(int);
+extern int inet6_option_init(void *, struct cmsghdr **, int);
+extern int inet6_option_append(struct cmsghdr *, const uint8_t *,
+ int, int);
+extern uint8_t *inet6_option_alloc(struct cmsghdr *, int, int, int);
+extern int inet6_option_next(const struct cmsghdr *, uint8_t **);
+extern int inet6_option_find(const struct cmsghdr *, uint8_t **, int);
+
+extern size_t inet6_rthdr_space(int, int);
+extern struct cmsghdr *inet6_rthdr_init(void *, int);
+extern int inet6_rthdr_add(struct cmsghdr *, const struct in6_addr *,
+ unsigned int);
+extern int inet6_rthdr_lasthop(struct cmsghdr *, unsigned int);
#if 0 /* not implemented yet */
-extern int inet6_rthdr_reverse __P((const struct cmsghdr *, struct cmsghdr *));
+extern int inet6_rthdr_reverse(const struct cmsghdr *, struct cmsghdr *);
#endif
-extern int inet6_rthdr_segments __P((const struct cmsghdr *));
-extern struct in6_addr *inet6_rthdr_getaddr __P((struct cmsghdr *, int));
-extern int inet6_rthdr_getflags __P((const struct cmsghdr *, int));
-
-extern int inet6_opt_init __P((void *, socklen_t));
-extern int inet6_opt_append __P((void *, socklen_t, int, uint8_t, socklen_t,
- uint8_t, void **));
-extern int inet6_opt_finish __P((void *, socklen_t, int));
-extern int inet6_opt_set_val __P((void *, int, void *, socklen_t));
-
-extern int inet6_opt_next __P((void *, socklen_t, int, uint8_t *, socklen_t *,
- void **));
-extern int inet6_opt_find __P((void *, socklen_t, int, uint8_t, socklen_t *,
- void **));
-extern int inet6_opt_get_val __P((void *, int, void *, socklen_t));
-extern socklen_t inet6_rth_space __P((int, int));
-extern void *inet6_rth_init __P((void *, socklen_t, int, int));
-extern int inet6_rth_add __P((void *, const struct in6_addr *));
-extern int inet6_rth_reverse __P((const void *, void *));
-extern int inet6_rth_segments __P((const void *));
-extern struct in6_addr *inet6_rth_getaddr __P((const void *, int));
+extern int inet6_rthdr_segments(const struct cmsghdr *);
+extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int);
+extern int inet6_rthdr_getflags(const struct cmsghdr *, int);
+
+extern int inet6_opt_init(void *, socklen_t);
+extern int inet6_opt_append(void *, socklen_t, int, uint8_t, socklen_t,
+ uint8_t, void **);
+extern int inet6_opt_finish(void *, socklen_t, int);
+extern int inet6_opt_set_val(void *, int, void *, socklen_t);
+
+extern int inet6_opt_next(void *, socklen_t, int, uint8_t *, socklen_t *,
+ void **);
+extern int inet6_opt_find(void *, socklen_t, int, uint8_t, socklen_t *,
+ void **);
+extern int inet6_opt_get_val(void *, int, void *, socklen_t);
+extern socklen_t inet6_rth_space(int, int);
+extern void *inet6_rth_init(void *, socklen_t, int, int);
+extern int inet6_rth_add(void *, const struct in6_addr *);
+extern int inet6_rth_reverse(const void *, void *);
+extern int inet6_rth_segments(const void *);
+extern struct in6_addr *inet6_rth_getaddr(const void *, int);
__END_DECLS
#endif /* __BSD_VISIBLE */
diff --git a/freebsd/sys/netinet6/in6_cksum.c b/freebsd/sys/netinet6/in6_cksum.c
index e0e03f45..e129ca71 100644
--- a/freebsd/sys/netinet6/in6_cksum.c
+++ b/freebsd/sys/netinet6/in6_cksum.c
@@ -80,10 +80,70 @@ __FBSDID("$FreeBSD$");
*/
#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x)
-#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; (void)ADDCARRY(sum);}
+
+static int
+_in6_cksum_pseudo(struct ip6_hdr *ip6, uint32_t len, uint8_t nxt, uint16_t csum)
+{
+ int sum;
+ uint16_t scope, *w;
+ union {
+ u_int16_t phs[4];
+ struct {
+ u_int32_t ph_len;
+ u_int8_t ph_zero[3];
+ u_int8_t ph_nxt;
+ } __packed ph;
+ } uph;
+
+ sum = csum;
+
+ /*
+ * First create IP6 pseudo header and calculate a summary.
+ */
+ uph.ph.ph_len = htonl(len);
+ uph.ph.ph_zero[0] = uph.ph.ph_zero[1] = uph.ph.ph_zero[2] = 0;
+ uph.ph.ph_nxt = nxt;
+
+ /* Payload length and upper layer identifier. */
+ sum += uph.phs[0]; sum += uph.phs[1];
+ sum += uph.phs[2]; sum += uph.phs[3];
+
+ /* IPv6 source address. */
+ scope = in6_getscope(&ip6->ip6_src);
+ w = (u_int16_t *)&ip6->ip6_src;
+ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+ sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
+ if (scope != 0)
+ sum -= scope;
+
+ /* IPv6 destination address. */
+ scope = in6_getscope(&ip6->ip6_dst);
+ w = (u_int16_t *)&ip6->ip6_dst;
+ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+ sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
+ if (scope != 0)
+ sum -= scope;
+
+ return (sum);
+}
+
+int
+in6_cksum_pseudo(struct ip6_hdr *ip6, uint32_t len, uint8_t nxt, uint16_t csum)
+{
+ int sum;
+ union {
+ u_int16_t s[2];
+ u_int32_t l;
+ } l_util;
+
+ sum = _in6_cksum_pseudo(ip6, len, nxt, csum);
+ REDUCE;
+ return (sum);
+}
/*
- * m MUST contain a continuous IP6 header.
+ * m MUST contain a contiguous IP6 header.
* off is an offset where TCP/UDP/ICMP6 header starts.
* len is a total length of a transport segment.
* (e.g. TCP header + TCP payload)
@@ -91,12 +151,10 @@ __FBSDID("$FreeBSD$");
int
in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
{
- u_int16_t *w;
- int sum = 0;
- int mlen = 0;
- int byte_swapped = 0;
struct ip6_hdr *ip6;
- struct in6_addr in6;
+ u_int16_t *w, scope;
+ int byte_swapped, mlen;
+ int sum;
union {
u_int16_t phs[4];
struct {
@@ -114,42 +172,38 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
u_int32_t l;
} l_util;
- /* sanity check */
- if (m->m_pkthdr.len < off + len) {
- panic("in6_cksum: mbuf len (%d) < off+len (%d+%d)",
- m->m_pkthdr.len, off, len);
- }
-
- bzero(&uph, sizeof(uph));
+ /* Sanity check. */
+ KASSERT(m->m_pkthdr.len >= off + len, ("%s: mbuf len (%d) < off(%d)+"
+ "len(%d)", __func__, m->m_pkthdr.len, off, len));
/*
* First create IP6 pseudo header and calculate a summary.
*/
- ip6 = mtod(m, struct ip6_hdr *);
uph.ph.ph_len = htonl(len);
+ uph.ph.ph_zero[0] = uph.ph.ph_zero[1] = uph.ph.ph_zero[2] = 0;
uph.ph.ph_nxt = nxt;
- /*
- * IPv6 source address.
- * XXX: we'd like to avoid copying the address, but we can't due to
- * the possibly embedded scope zone ID.
- */
- in6 = ip6->ip6_src;
- in6_clearscope(&in6);
- w = (u_int16_t *)&in6;
+ /* Payload length and upper layer identifier. */
+ sum = uph.phs[0]; sum += uph.phs[1];
+ sum += uph.phs[2]; sum += uph.phs[3];
+
+ ip6 = mtod(m, struct ip6_hdr *);
+
+ /* IPv6 source address. */
+ scope = in6_getscope(&ip6->ip6_src);
+ w = (u_int16_t *)&ip6->ip6_src;
sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
+ if (scope != 0)
+ sum -= scope;
- /* IPv6 destination address */
- in6 = ip6->ip6_dst;
- in6_clearscope(&in6);
- w = (u_int16_t *)&in6;
+ /* IPv6 destination address. */
+ scope = in6_getscope(&ip6->ip6_dst);
+ w = (u_int16_t *)&ip6->ip6_dst;
sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
-
- /* Payload length and upper layer identifier */
- sum += uph.phs[0]; sum += uph.phs[1];
- sum += uph.phs[2]; sum += uph.phs[3];
+ if (scope != 0)
+ sum -= scope;
/*
* Secondly calculate a summary of the first mbuf excluding offset.
@@ -169,14 +223,16 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
/*
* Force to even boundary.
*/
- if ((1 & (long) w) && (mlen > 0)) {
+ if ((1 & (long)w) && (mlen > 0)) {
REDUCE;
sum <<= 8;
s_util.c[0] = *(u_char *)w;
w = (u_int16_t *)((char *)w + 1);
mlen--;
byte_swapped = 1;
- }
+ } else
+ byte_swapped = 0;
+
/*
* Unroll the loop to make overhead from
* branches &c small.
diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c
index bab07c38..9e0f37f0 100644
--- a/freebsd/sys/netinet6/in6_gif.c
+++ b/freebsd/sys/netinet6/in6_gif.c
@@ -266,6 +266,8 @@ in6_gif_output(struct ifnet *ifp,
#endif
}
+ m_addr_changed(m);
+
#ifdef IPV6_MINMTU
/*
* force fragmentation to minimum MTU, to avoid path MTU discovery.
@@ -301,14 +303,14 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
sc = (struct gif_softc *)encap_getarg(m);
if (sc == NULL) {
m_freem(m);
- V_ip6stat.ip6s_nogif++;
+ IP6STAT_INC(ip6s_nogif);
return IPPROTO_DONE;
}
gifp = GIF2IFP(sc);
if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
m_freem(m);
- V_ip6stat.ip6s_nogif++;
+ IP6STAT_INC(ip6s_nogif);
return IPPROTO_DONE;
}
@@ -363,7 +365,7 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
break;
default:
- V_ip6stat.ip6s_nogif++;
+ IP6STAT_INC(ip6s_nogif);
m_freem(m);
return IPPROTO_DONE;
}
diff --git a/freebsd/sys/netinet6/in6_gif.h b/freebsd/sys/netinet6/in6_gif.h
index f9520898..e1184175 100644
--- a/freebsd/sys/netinet6/in6_gif.h
+++ b/freebsd/sys/netinet6/in6_gif.h
@@ -36,10 +36,10 @@
#define GIF_HLIM 30
struct gif_softc;
-int in6_gif_input __P((struct mbuf **, int *, int));
-int in6_gif_output __P((struct ifnet *, int, struct mbuf *));
-int gif_encapcheck6 __P((const struct mbuf *, int, int, void *));
-int in6_gif_attach __P((struct gif_softc *));
-int in6_gif_detach __P((struct gif_softc *));
+int in6_gif_input(struct mbuf **, int *, int);
+int in6_gif_output(struct ifnet *, int, struct mbuf *);
+int gif_encapcheck6(const struct mbuf *, int, int, void *);
+int in6_gif_attach(struct gif_softc *);
+int in6_gif_detach(struct gif_softc *);
#endif /* _NETINET6_IN6_GIF_H_ */
diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c
index 57a7efef..a8f03017 100644
--- a/freebsd/sys/netinet6/in6_ifattach.c
+++ b/freebsd/sys/netinet6/in6_ifattach.c
@@ -272,6 +272,7 @@ found:
/* get EUI64 */
switch (ifp->if_type) {
+ case IFT_BRIDGE:
case IFT_ETHER:
case IFT_L2VLAN:
case IFT_FDDI:
@@ -622,13 +623,16 @@ in6_ifattach_loopback(struct ifnet *ifp)
/*
* compute NI group address, based on the current hostname setting.
- * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later).
+ * see RFC 4620.
*
* when ifp == NULL, the caller is responsible for filling scopeid.
+ *
+ * If oldmcprefix == 1, FF02:0:0:0:0:2::/96 is used for NI group address
+ * while it is FF02:0:0:0:0:2:FF00::/104 in RFC 4620.
*/
-int
-in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
- struct in6_addr *in6)
+static int
+in6_nigroup0(struct ifnet *ifp, const char *name, int namelen,
+ struct in6_addr *in6, int oldmcprefix)
{
struct prison *pr;
const char *p;
@@ -677,7 +681,7 @@ in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
*q = *q - 'A' + 'a';
}
- /* generate 8 bytes of pseudo-random value. */
+ /* generate 16 bytes of pseudo-random value. */
bzero(&ctxt, sizeof(ctxt));
MD5Init(&ctxt);
MD5Update(&ctxt, &l, sizeof(l));
@@ -687,13 +691,36 @@ in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
bzero(in6, sizeof(*in6));
in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL;
in6->s6_addr8[11] = 2;
- bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
+ if (oldmcprefix == 0) {
+ in6->s6_addr8[12] = 0xff;
+ /* Copy the first 24 bits of 128-bit hash into the address. */
+ bcopy(digest, &in6->s6_addr8[13], 3);
+ } else {
+ /* Copy the first 32 bits of 128-bit hash into the address. */
+ bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
+ }
if (in6_setscope(in6, ifp, NULL))
return (-1); /* XXX: should not fail */
return 0;
}
+int
+in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
+ struct in6_addr *in6)
+{
+
+ return (in6_nigroup0(ifp, name, namelen, in6, 0));
+}
+
+int
+in6_nigroup_oldmcprefix(struct ifnet *ifp, const char *name, int namelen,
+ struct in6_addr *in6)
+{
+
+ return (in6_nigroup0(ifp, name, namelen, in6, 1));
+}
+
/*
* XXX multiple loopback interface needs more care. for instance,
* nodelocal address needs to be configured onto only one of them.
@@ -711,7 +738,8 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
switch (ifp->if_type) {
case IFT_PFLOG:
case IFT_PFSYNC:
- case IFT_CARP:
+ ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
+ ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
return;
}
@@ -719,7 +747,6 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
* quirks based on interface type
*/
switch (ifp->if_type) {
-#ifdef IFT_STF
case IFT_STF:
/*
* 6to4 interface is a very special kind of beast.
@@ -727,8 +754,8 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
* linklocals for 6to4 interface, but there's no use and
* it is rather harmful to have one.
*/
- goto statinit;
-#endif
+ ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
+ break;
default:
break;
}
@@ -762,22 +789,23 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
/*
* assign a link-local address, if there's none.
*/
- if (V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) {
+ if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
+ ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL) {
+ int error;
+
ia = in6ifa_ifpforlinklocal(ifp, 0);
if (ia == NULL) {
- if (in6_ifattach_linklocal(ifp, altifp) == 0) {
- /* linklocal address assigned */
- } else {
- /* failed to assign linklocal address. bark? */
- }
+ error = in6_ifattach_linklocal(ifp, altifp);
+#if 0
+ if (error)
+ log(LOG_NOTICE, "in6_ifattach_linklocal: "
+ "failed to add a link-local addr to %s\n",
+ if_name(ifp));
+#endif
} else
ifa_free(&ia->ia_ifa);
}
-#ifdef IFT_STF /* XXX */
-statinit:
-#endif
-
/* update dynamically. */
if (V_in6_maxmtu < ifp->if_mtu)
V_in6_maxmtu = ifp->if_mtu;
diff --git a/freebsd/sys/netinet6/in6_ifattach.h b/freebsd/sys/netinet6/in6_ifattach.h
index 441eb755..af627313 100644
--- a/freebsd/sys/netinet6/in6_ifattach.h
+++ b/freebsd/sys/netinet6/in6_ifattach.h
@@ -34,12 +34,13 @@
#define _NETINET6_IN6_IFATTACH_H_
#ifdef _KERNEL
-void in6_ifattach __P((struct ifnet *, struct ifnet *));
-void in6_ifdetach __P((struct ifnet *));
-int in6_get_tmpifid __P((struct ifnet *, u_int8_t *, const u_int8_t *, int));
-void in6_tmpaddrtimer __P((void *));
-int in6_get_hw_ifid __P((struct ifnet *, struct in6_addr *));
-int in6_nigroup __P((struct ifnet *, const char *, int, struct in6_addr *));
+void in6_ifattach(struct ifnet *, struct ifnet *);
+void in6_ifdetach(struct ifnet *);
+int in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int);
+void in6_tmpaddrtimer(void *);
+int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
+int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *);
+int in6_nigroup_oldmcprefix(struct ifnet *, const char *, int, struct in6_addr *);
#endif /* _KERNEL */
#endif /* _NETINET6_IN6_IFATTACH_H_ */
diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c
index 67c85c37..55f2fab2 100644
--- a/freebsd/sys/netinet6/in6_mcast.c
+++ b/freebsd/sys/netinet6/in6_mcast.c
@@ -152,7 +152,8 @@ static int sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS);
SYSCTL_DECL(_net_inet6_ip6); /* XXX Not in any common header. */
-SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv6 multicast");
+static SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0,
+ "IPv6 multicast");
static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER;
SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc,
@@ -172,7 +173,7 @@ SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
&in6_mcast_loop, 0, "Loopback multicast datagrams by default");
TUNABLE_INT("net.inet6.ip6.mcast.loop", &in6_mcast_loop);
-SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
+static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters,
"Per-interface stack-wide source filters");
diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c
index 6446edeb..4d607f71 100644
--- a/freebsd/sys/netinet6/in6_pcb.c
+++ b/freebsd/sys/netinet6/in6_pcb.c
@@ -2,8 +2,12 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -72,6 +76,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_pcbgroup.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -107,8 +112,6 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_pcb.h>
#include <netinet6/scope6_var.h>
-#include <security/mac/mac_framework.h>
-
struct in6_addr zeroin6_addr;
int
@@ -119,17 +122,18 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
u_short lport = 0;
- int error, wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+ int error, lookupflags = 0;
+ int reuseport = (so->so_options & SO_REUSEPORT);
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */
return (EADDRNOTAVAIL);
if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
return (EINVAL);
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
- wild = INPLOOKUP_WILDCARD;
+ lookupflags = INPLOOKUP_WILDCARD;
if (nam == NULL) {
if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
@@ -189,6 +193,7 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
}
if (lport) {
struct inpcb *t;
+ struct tcptw *tw;
/* GROSS */
if (ntohs(lport) <= V_ipport_reservedhigh &&
@@ -208,14 +213,15 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
(!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
!IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
- (t->inp_socket->so_options & SO_REUSEPORT)
+ (t->inp_flags2 & INP_REUSEPORT) == 0) &&
#ifndef __rtems__
- == 0) && (inp->inp_cred->cr_uid !=
+ (inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
#else /* __rtems__ */
- == 0))
+ 0)
#endif /* __rtems__ */
return (EADDRINUSE);
+#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
struct sockaddr_in sin;
@@ -238,36 +244,49 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
#endif /* __rtems__ */
return (EADDRINUSE);
}
+#endif
}
t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
- lport, wild, cred);
- if (t && (reuseport & ((t->inp_flags & INP_TIMEWAIT) ?
- intotw(t)->tw_so_options :
- t->inp_socket->so_options)) == 0)
+ lport, lookupflags, cred);
+ if (t && (t->inp_flags & INP_TIMEWAIT)) {
+ /*
+ * XXXRW: If an incpb has had its timewait
+ * state recycled, we treat the address as
+ * being in use (for now). This is better
+ * than a panic, but not desirable.
+ */
+ tw = intotw(t);
+ if (tw == NULL ||
+ (reuseport & tw->tw_so_options) == 0)
+ return (EADDRINUSE);
+ } else if (t && (reuseport & inp_so_options(t)) == 0) {
return (EADDRINUSE);
+ }
+#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
struct sockaddr_in sin;
in6_sin6_2_sin(&sin, sin6);
t = in_pcblookup_local(pcbinfo, sin.sin_addr,
- lport, wild, cred);
+ lport, lookupflags, cred);
if (t && t->inp_flags & INP_TIMEWAIT) {
- if ((reuseport &
- intotw(t)->tw_so_options) == 0 &&
- (ntohl(t->inp_laddr.s_addr) !=
+ tw = intotw(t);
+ if (tw == NULL)
+ return (EADDRINUSE);
+ if ((reuseport & tw->tw_so_options) == 0
+ && (ntohl(t->inp_laddr.s_addr) !=
INADDR_ANY || ((inp->inp_vflag &
INP_IPV6PROTO) ==
(t->inp_vflag & INP_IPV6PROTO))))
return (EADDRINUSE);
- }
- else if (t &&
- (reuseport & t->inp_socket->so_options)
- == 0 && (ntohl(t->inp_laddr.s_addr) !=
- INADDR_ANY || INP_SOCKAF(so) ==
- INP_SOCKAF(t->inp_socket)))
+ } else if (t &&
+ (reuseport & inp_so_options(t)) == 0 &&
+ (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
+ (t->inp_vflag & INP_IPV6PROTO) != 0))
return (EADDRINUSE);
}
+#endif
}
inp->in6p_laddr = sin6->sin6_addr;
}
@@ -309,8 +328,8 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
int scope_ambiguous = 0;
struct in6_addr in6a;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */
if (nam->sa_len != sizeof (*sin6))
return (EINVAL);
@@ -371,15 +390,16 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
* then pick one.
*/
int
-in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam,
- struct ucred *cred)
+in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam,
+ struct ucred *cred, struct mbuf *m)
{
+ struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
struct in6_addr addr6;
int error;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
/*
* Call inner routine, to assign local interface address.
@@ -388,7 +408,7 @@ in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam,
if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
return (error);
- if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
+ if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr,
sin6->sin6_port,
IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
? &addr6 : &inp->in6p_laddr,
@@ -411,17 +431,24 @@ in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam,
inp->inp_flow |=
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
- in_pcbrehash(inp);
+ in_pcbrehash_mbuf(inp, m);
return (0);
}
+int
+in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
+{
+
+ return (in6_pcbconnect_mbuf(inp, nam, cred, NULL));
+}
+
void
in6_pcbdisconnect(struct inpcb *inp)
{
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
inp->inp_fport = 0;
@@ -512,11 +539,14 @@ in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL"));
+#ifdef INET
if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
error = in_getsockaddr(so, nam);
if (error == 0)
in6_sin_2_v4mapsin6_in_sock(nam);
- } else {
+ } else
+#endif
+ {
/* scope issues will be handled in in6_getsockaddr(). */
error = in6_getsockaddr(so, nam);
}
@@ -533,11 +563,13 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL"));
+#ifdef INET
if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
error = in_getpeeraddr(so, nam);
if (error == 0)
in6_sin_2_v4mapsin6_in_sock(nam);
} else
+#endif
/* scope issues will be handled in in6_getpeeraddr(). */
error = in6_getpeeraddr(so, nam);
@@ -655,18 +687,22 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
}
/*
- * Lookup a PCB based on the local address and port.
+ * Lookup a PCB based on the local address and port. Caller must hold the
+ * hash lock. No inpcb locks or references are acquired.
*/
struct inpcb *
in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
- u_short lport, int wild_okay, struct ucred *cred)
+ u_short lport, int lookupflags, struct ucred *cred)
{
register struct inpcb *inp;
int matchwild = 3, wildcard;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+ KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
- if (!wild_okay) {
+ if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
struct inpcbhead *head;
/*
* Look for an unconnected (wildcard foreign addr) PCB that
@@ -817,20 +853,158 @@ in6_rtchange(struct inpcb *inp, int errno)
return inp;
}
+#ifdef PCBGROUP
+/*
+ * Lookup PCB in hash list, using pcbgroup tables.
+ */
+static struct inpcb *
+in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+ struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr,
+ u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+ struct inpcbhead *head;
+ struct inpcb *inp, *tmpinp;
+ u_short fport = fport_arg, lport = lport_arg;
+ int faith;
+
+ if (faithprefix_p != NULL)
+ faith = (*faithprefix_p)(laddr);
+ else
+ faith = 0;
+
+ /*
+ * First look for an exact match.
+ */
+ tmpinp = NULL;
+ INP_GROUP_LOCK(pcbgroup);
+ head = &pcbgroup->ipg_hashbase[
+ INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
+ pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+ if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
+ IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport) {
+ /*
+ * XXX We should be able to directly return
+ * the inp here, without any checks.
+ * Well unless both bound with SO_REUSEPORT?
+ */
+ if (prison_flag(inp->inp_cred, PR_IP6))
+ goto found;
+ if (tmpinp == NULL)
+ tmpinp = inp;
+ }
+ }
+ if (tmpinp != NULL) {
+ inp = tmpinp;
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+ struct inpcb *jail_wild = NULL;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+ inp->inp_lport != lport) {
+ continue;
+ }
+
+ /* XXX inp locking */
+ if (faith && (inp->inp_flags & INP_FAITH) == 0)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP6);
+ if (injail) {
+ if (prison_check_ip6(inp->inp_cred,
+ laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+ if (inp != NULL)
+ goto found;
+ } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
+ INP_GROUP_UNLOCK(pcbgroup);
+ return (NULL);
+
+found:
+ in_pcbref(inp);
+ INP_GROUP_UNLOCK(pcbgroup);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking buf", __func__);
+ return (inp);
+}
+#endif /* PCBGROUP */
+
/*
* Lookup PCB in hash list.
*/
struct inpcb *
-in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
- u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int wildcard,
- struct ifnet *ifp)
+in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+ u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
+ int lookupflags, struct ifnet *ifp)
{
struct inpcbhead *head;
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
int faith;
- INP_INFO_LOCK_ASSERT(pcbinfo);
+ KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if (faithprefix_p != NULL)
faith = (*faithprefix_p)(laddr);
@@ -869,7 +1043,7 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
/*
* Then look for a wildcard match, if requested.
*/
- if (wildcard == INPLOOKUP_WILDCARD) {
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
struct inpcb *local_wild = NULL, *local_exact = NULL;
struct inpcb *jail_wild = NULL;
int injail;
@@ -926,7 +1100,7 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
return (local_exact);
if (local_wild != NULL)
return (local_wild);
- } /* if (wildcard == INPLOOKUP_WILDCARD) */
+ } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
/*
* Not found.
@@ -934,6 +1108,101 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
return (NULL);
}
+/*
+ * Lookup PCB in hash list, using pcbinfo tables. This variation locks the
+ * hash list lock, and will return the inpcb locked (i.e., requires
+ * INPLOOKUP_LOCKPCB).
+ */
+static struct inpcb *
+in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+ u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
+ struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ INP_HASH_RLOCK(pcbinfo);
+ inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
+ (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
+ if (inp != NULL) {
+ in_pcbref(inp);
+ INP_HASH_RUNLOCK(pcbinfo);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ } else
+ INP_HASH_RUNLOCK(pcbinfo);
+ return (inp);
+}
+
+/*
+ * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
+ * from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in6_pcbgroup.c.
+ */
+struct inpcb *
+in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
+ struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
+{
+#if defined(PCBGROUP)
+ struct inpcbgroup *pcbgroup;
+#endif
+
+ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+ KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
+ ("%s: LOCKPCB not set", __func__));
+
+#if defined(PCBGROUP)
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
+ return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
+ lookupflags, ifp));
+}
+
+struct inpcb *
+in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+ u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
+ struct ifnet *ifp, struct mbuf *m)
+{
+#ifdef PCBGROUP
+ struct inpcbgroup *pcbgroup;
+#endif
+
+ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+ KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
+ ("%s: LOCKPCB not set", __func__));
+
+#ifdef PCBGROUP
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid);
+ if (pcbgroup != NULL)
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
+ fport, laddr, lport, lookupflags, ifp));
+ pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
+ return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
+ lookupflags, ifp));
+}
+
void
init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
{
diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h
index c54a8cf3..19d151b7 100644
--- a/freebsd/sys/netinet6/in6_pcb.h
+++ b/freebsd/sys/netinet6/in6_pcb.h
@@ -69,36 +69,56 @@
#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
-void in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *));
-void in6_losing __P((struct inpcb *));
-int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *));
-int in6_pcbconnect __P((struct inpcb *, struct sockaddr *, struct ucred *));
-void in6_pcbdisconnect __P((struct inpcb *));
+struct inpcbgroup *
+ in6_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+ in6_pcbgroup_byinpcb(struct inpcb *);
+struct inpcbgroup *
+ in6_pcbgroup_bymbuf(struct inpcbinfo *, struct mbuf *);
+struct inpcbgroup *
+ in6_pcbgroup_bytuple(struct inpcbinfo *, const struct in6_addr *,
+ u_short, const struct in6_addr *, u_short);
+
+void in6_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
+void in6_losing(struct inpcb *);
+int in6_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
+int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
+int in6_pcbconnect_mbuf(struct inpcb *, struct sockaddr *,
+ struct ucred *, struct mbuf *);
+void in6_pcbdisconnect(struct inpcb *);
int in6_pcbladdr(struct inpcb *, struct sockaddr *, struct in6_addr *);
struct inpcb *
- in6_pcblookup_local __P((struct inpcbinfo *,
+ in6_pcblookup_local(struct inpcbinfo *,
struct in6_addr *, u_short, int,
- struct ucred *));
+ struct ucred *);
+struct inpcb *
+ in6_pcblookup(struct inpcbinfo *, struct in6_addr *,
+ u_int, struct in6_addr *, u_int, int,
+ struct ifnet *);
+struct inpcb *
+ in6_pcblookup_hash_locked(struct inpcbinfo *, struct in6_addr *,
+ u_int, struct in6_addr *, u_int, int,
+ struct ifnet *);
struct inpcb *
- in6_pcblookup_hash __P((struct inpcbinfo *,
- struct in6_addr *, u_int, struct in6_addr *,
- u_int, int, struct ifnet *));
-void in6_pcbnotify __P((struct inpcbinfo *, struct sockaddr *,
+ in6_pcblookup_mbuf(struct inpcbinfo *, struct in6_addr *,
+ u_int, struct in6_addr *, u_int, int,
+ struct ifnet *ifp, struct mbuf *);
+void in6_pcbnotify(struct inpcbinfo *, struct sockaddr *,
u_int, const struct sockaddr *, u_int, int, void *,
- struct inpcb *(*)(struct inpcb *, int)));
+ struct inpcb *(*)(struct inpcb *, int));
struct inpcb *
- in6_rtchange __P((struct inpcb *, int));
+ in6_rtchange(struct inpcb *, int);
struct sockaddr *
- in6_sockaddr __P((in_port_t port, struct in6_addr *addr_p));
+ in6_sockaddr(in_port_t port, struct in6_addr *addr_p);
struct sockaddr *
- in6_v4mapsin6_sockaddr __P((in_port_t port, struct in_addr *addr_p));
-int in6_getpeeraddr __P((struct socket *so, struct sockaddr **nam));
-int in6_getsockaddr __P((struct socket *so, struct sockaddr **nam));
-int in6_mapped_sockaddr __P((struct socket *so, struct sockaddr **nam));
-int in6_mapped_peeraddr __P((struct socket *so, struct sockaddr **nam));
-int in6_selecthlim __P((struct in6pcb *, struct ifnet *));
-int in6_pcbsetport __P((struct in6_addr *, struct inpcb *, struct ucred *));
-void init_sin6 __P((struct sockaddr_in6 *sin6, struct mbuf *m));
+ in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p);
+int in6_getpeeraddr(struct socket *so, struct sockaddr **nam);
+int in6_getsockaddr(struct socket *so, struct sockaddr **nam);
+int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam);
+int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam);
+int in6_selecthlim(struct in6pcb *, struct ifnet *);
+int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct ucred *);
+void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m);
#endif /* _KERNEL */
#endif /* !_NETINET6_IN6_PCB_H_ */
diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c
index df05febf..a6c3b4e8 100644
--- a/freebsd/sys/netinet6/in6_proto.c
+++ b/freebsd/sys/netinet6/in6_proto.c
@@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ipstealth.h>
#include <rtems/bsd/local/opt_sctp.h>
#include <rtems/bsd/local/opt_mpath.h>
+#include <rtems/bsd/local/opt_route.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
@@ -127,6 +128,10 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
+#ifdef FLOWTABLE
+#include <net/flowtable.h>
+#endif
+
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
@@ -167,6 +172,9 @@ struct ip6protosw inet6sw[] = {
.pr_input = udp6_input,
.pr_ctlinput = udp6_ctlinput,
.pr_ctloutput = ip6_ctloutput,
+#ifndef INET /* Do not call initialization twice. */
+ .pr_init = udp_init,
+#endif
.pr_usrreqs = &udp6_usrreqs,
},
{
@@ -220,6 +228,9 @@ struct ip6protosw inet6sw[] = {
.pr_output = rip6_output,
.pr_ctlinput = rip6_ctlinput,
.pr_ctloutput = rip6_ctloutput,
+#ifndef INET /* Do not call initialization twice. */
+ .pr_init = rip_init,
+#endif
.pr_usrreqs = &rip6_usrreqs
},
{
@@ -388,6 +399,9 @@ VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS;
VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM;
VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS;
VNET_DEFINE(int, ip6_accept_rtadv) = 0;
+VNET_DEFINE(int, ip6_no_radr) = 0;
+VNET_DEFINE(int, ip6_norbit_raif) = 0;
+VNET_DEFINE(int, ip6_rfc6204w3) = 0;
VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_log_interval) = 5;
@@ -436,6 +450,7 @@ VNET_DEFINE(int, icmp6errppslim) = 100; /* 100pps */
/* control how to respond to NI queries */
VNET_DEFINE(int, icmp6_nodeinfo) =
(ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
+VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1;
/* UDP on IP6 parameters */
VNET_DEFINE(int, udp6_sendspace) = 9216;/* really max datagram size */
@@ -508,12 +523,27 @@ SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
&VNET_NAME(ip6_sendredirects), 0, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW,
&VNET_NAME(ip6_defhlim), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RD,
+SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RW,
&VNET_NAME(ip6stat), ip6stat, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
- CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0, "");
+ CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
+ "Default value of per-interface flag for accepting ICMPv6 Router"
+ "Advertisement messages");
+SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
+ CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
+ "Default value of per-interface flag to control whether routers "
+ "sending ICMPv6 RA messages on that interface are added into the "
+ "default router list.");
+SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif, CTLFLAG_RW,
+ &VNET_NAME(ip6_norbit_raif), 0,
+ "Always set 0 to R flag in ICMPv6 NA messages when accepting RA"
+ " on the interface.");
+SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3,
+ CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0,
+ "Accept the default router list from ICMPv6 RA messages even "
+ "when packet forwarding enabled.");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
&VNET_NAME(ip6_keepfaith), 0, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
@@ -543,8 +573,10 @@ SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only, CTLFLAG_RW,
&VNET_NAME(ip6_v6only), 0, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal,
- CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD,
+ CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
+ "Default value of per-interface flag for automatically adding an IPv6"
+ " link-local address to interfaces when attached");
+SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RW,
&VNET_NAME(rip6stat), rip6stat, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, "");
@@ -559,12 +591,22 @@ SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW,
&VNET_NAME(ip6stealth), 0, "");
#endif
+#ifdef FLOWTABLE
+VNET_DEFINE(int, ip6_output_flowtable_size) = 2048;
+VNET_DEFINE(struct flowtable *, ip6_ft);
+#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
+
+SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
+ &VNET_NAME(ip6_output_flowtable_size), 2048,
+ "number of entries in the per-cpu output flow caches");
+#endif
+
/* net.inet6.icmp6 */
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RD,
+SYSCTL_VNET_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RW,
&VNET_NAME(icmp6stat), icmp6stat, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW,
&VNET_NAME(nd6_prune), 0, "");
@@ -578,6 +620,11 @@ SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback,
CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW,
&VNET_NAME(icmp6_nodeinfo), 0, "");
+SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
+ nodeinfo_oldmcprefix, CTLFLAG_RW,
+ &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0,
+ "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup"
+ " for compatibility with KAME implememtation.");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c
index 74577cbd..cc2f5ee5 100644
--- a/freebsd/sys/netinet6/in6_src.c
+++ b/freebsd/sys/netinet6/in6_src.c
@@ -129,20 +129,20 @@ static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy);
VNET_DEFINE(int, ip6_prefer_tempaddr) = 0;
-static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *,
+static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
- struct rtentry **, int, u_int));
-static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *,
+ struct rtentry **, int, u_int);
+static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *ro, struct ifnet **,
- struct ifnet *, u_int));
+ struct ifnet *, u_int);
static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
static void init_policy_queue(void);
static int add_addrsel_policyent(struct in6_addrpolicy *);
static int delete_addrsel_policyent(struct in6_addrpolicy *);
-static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *),
- void *));
+static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
+ void *);
static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
@@ -153,9 +153,8 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
* an entry to the caller for later use.
*/
#define REPLACE(r) do {\
- if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
- sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
- V_ip6stat.ip6s_sources_rule[(r)]++; \
+ IP6STAT_INC(ip6s_sources_rule[(r)]); \
+ rule = (r); \
/* { \
char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
@@ -163,9 +162,6 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
goto replace; \
} while(0)
#define NEXT(r) do {\
- if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
- sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
- V_ip6stat.ip6s_sources_rule[(r)]++; \
/* { \
char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
@@ -173,9 +169,8 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
goto next; /* XXX: we can't use 'continue' here */ \
} while(0)
#define BREAK(r) do { \
- if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \
- sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
- V_ip6stat.ip6s_sources_rule[(r)]++; \
+ IP6STAT_INC(ip6s_sources_rule[(r)]); \
+ rule = (r); \
goto out; /* XXX: we can't use 'break' here */ \
} while(0)
@@ -192,7 +187,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
u_int32_t odstzone;
int prefer_tempaddr;
- int error;
+ int error, rule;
struct ip6_moptions *mopts;
KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__));
@@ -308,6 +303,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
if (error)
return (error);
+ rule = 0;
IN6_IFADDR_RLOCK();
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
int new_scope = -1, new_matchlen = -1;
@@ -385,10 +381,12 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
*/
/* Rule 5: Prefer outgoing interface */
- if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
- NEXT(5);
- if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
- REPLACE(5);
+ if (!(ND_IFINFO(ifp)->flags & ND6_IFF_NO_PREFER_IFACE)) {
+ if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
+ NEXT(5);
+ if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
+ REPLACE(5);
+ }
/*
* Rule 6: Prefer matching label
@@ -487,6 +485,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
if ((ia = ia_best) == NULL) {
IN6_IFADDR_RUNLOCK();
+ IP6STAT_INC(ip6s_sources_none);
return (EADDRNOTAVAIL);
}
@@ -503,6 +502,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL &&
(inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
IN6_IFADDR_RUNLOCK();
+ IP6STAT_INC(ip6s_sources_none);
return (EADDRNOTAVAIL);
}
@@ -510,6 +510,16 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
*ifpp = ifp;
bcopy(&tmp, srcp, sizeof(*srcp));
+ if (ia->ia_ifp == ifp)
+ IP6STAT_INC(ip6s_sources_sameif[best_scope]);
+ else
+ IP6STAT_INC(ip6s_sources_otherif[best_scope]);
+ if (dst_scope == best_scope)
+ IP6STAT_INC(ip6s_sources_samescope[best_scope]);
+ else
+ IP6STAT_INC(ip6s_sources_otherscope[best_scope]);
+ if (IFA6_IS_DEPRECATED(ia))
+ IP6STAT_INC(ip6s_sources_deprecated[best_scope]);
IN6_IFADDR_RUNLOCK();
return (0);
}
@@ -733,7 +743,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
error = EHOSTUNREACH;
}
if (error == EHOSTUNREACH)
- V_ip6stat.ip6s_noroute++;
+ IP6STAT_INC(ip6s_noroute);
if (retifp != NULL) {
*retifp = ifp;
@@ -875,8 +885,7 @@ in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp)
RTFREE(ro6.ro_rt);
if (lifp)
return (ND_IFINFO(lifp)->chlim);
- } else
- return (V_ip6_defhlim);
+ }
}
return (V_ip6_defhlim);
}
@@ -890,13 +899,13 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
{
struct socket *so = inp->inp_socket;
u_int16_t lport = 0;
- int error, wild = 0;
+ int error, lookupflags = 0;
#ifdef INVARIANTS
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
#endif
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
error = prison_local_ip6(cred, laddr,
((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0));
@@ -905,11 +914,11 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
/* XXX: this is redundant when called from in6_pcbbind */
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
- wild = INPLOOKUP_WILDCARD;
+ lookupflags = INPLOOKUP_WILDCARD;
inp->inp_flags |= INP_ANONPORT;
- error = in_pcb_lport(inp, NULL, &lport, cred, wild);
+ error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags);
if (error != 0)
return (error);
@@ -966,7 +975,7 @@ struct walkarg {
static int in6_src_sysctl(SYSCTL_HANDLER_ARGS);
SYSCTL_DECL(_net_inet6_ip6);
-SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
+static SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
CTLFLAG_RD, in6_src_sysctl, "");
static int
@@ -1100,6 +1109,7 @@ delete_addrsel_policyent(struct in6_addrpolicy *key)
TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry);
ADDRSEL_UNLOCK();
ADDRSEL_XUNLOCK();
+ free(pol, M_IFADDR);
return (0);
}
diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h
index 793eb540..c7ebe523 100644
--- a/freebsd/sys/netinet6/in6_var.h
+++ b/freebsd/sys/netinet6/in6_var.h
@@ -750,36 +750,37 @@ int in6_leavegroup(struct in6_multi_mship *);
/* flags to in6_update_ifa */
#define IN6_IFAUPDATE_DADDELAY 0x1 /* first time to configure an address */
-int in6_mask2len __P((struct in6_addr *, u_char *));
-int in6_control __P((struct socket *, u_long, caddr_t, struct ifnet *,
- struct thread *));
-int in6_update_ifa __P((struct ifnet *, struct in6_aliasreq *,
- struct in6_ifaddr *, int));
-void in6_purgeaddr __P((struct ifaddr *));
-int in6if_do_dad __P((struct ifnet *));
-void in6_purgeif __P((struct ifnet *));
-void in6_savemkludge __P((struct in6_ifaddr *));
-void *in6_domifattach __P((struct ifnet *));
-void in6_domifdetach __P((struct ifnet *, void *));
-void in6_setmaxmtu __P((void));
-int in6_if2idlen __P((struct ifnet *));
-struct in6_ifaddr *in6ifa_ifpforlinklocal __P((struct ifnet *, int));
-struct in6_ifaddr *in6ifa_ifpwithaddr __P((struct ifnet *, struct in6_addr *));
-char *ip6_sprintf __P((char *, const struct in6_addr *));
-int in6_addr2zoneid __P((struct ifnet *, struct in6_addr *, u_int32_t *));
-int in6_matchlen __P((struct in6_addr *, struct in6_addr *));
-int in6_are_prefix_equal __P((struct in6_addr *, struct in6_addr *, int));
-void in6_prefixlen2mask __P((struct in6_addr *, int));
-int in6_prefix_ioctl __P((struct socket *, u_long, caddr_t,
- struct ifnet *));
-int in6_prefix_add_ifid __P((int, struct in6_ifaddr *));
-void in6_prefix_remove_ifid __P((int, struct in6_ifaddr *));
-void in6_purgeprefix __P((struct ifnet *));
+int in6_mask2len(struct in6_addr *, u_char *);
+int in6_control(struct socket *, u_long, caddr_t, struct ifnet *,
+ struct thread *);
+int in6_update_ifa(struct ifnet *, struct in6_aliasreq *,
+ struct in6_ifaddr *, int);
+void in6_purgeaddr(struct ifaddr *);
+int in6if_do_dad(struct ifnet *);
+void in6_purgeif(struct ifnet *);
+void in6_savemkludge(struct in6_ifaddr *);
+void *in6_domifattach(struct ifnet *);
+void in6_domifdetach(struct ifnet *, void *);
+void in6_setmaxmtu(void);
+int in6_if2idlen(struct ifnet *);
+struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int);
+struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *);
+struct in6_ifaddr *in6ifa_llaonifp(struct ifnet *);
+char *ip6_sprintf(char *, const struct in6_addr *);
+int in6_addr2zoneid(struct ifnet *, struct in6_addr *, u_int32_t *);
+int in6_matchlen(struct in6_addr *, struct in6_addr *);
+int in6_are_prefix_equal(struct in6_addr *, struct in6_addr *, int);
+void in6_prefixlen2mask(struct in6_addr *, int);
+int in6_prefix_ioctl(struct socket *, u_long, caddr_t,
+ struct ifnet *);
+int in6_prefix_add_ifid(int, struct in6_ifaddr *);
+void in6_prefix_remove_ifid(int, struct in6_ifaddr *);
+void in6_purgeprefix(struct ifnet *);
void in6_ifremloop(struct ifaddr *);
void in6_ifaddloop(struct ifaddr *);
-int in6_is_addr_deprecated __P((struct sockaddr_in6 *));
-int in6_src_ioctl __P((u_long, caddr_t));
+int in6_is_addr_deprecated(struct sockaddr_in6 *);
+int in6_src_ioctl(u_long, caddr_t);
/*
* Extended API for IPv6 FIB support.
diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c
index fa5d373f..2b45804f 100644
--- a/freebsd/sys/netinet6/ip6_forward.c
+++ b/freebsd/sys/netinet6/ip6_forward.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_ipstealth.h>
@@ -52,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/netisr.h>
#include <net/route.h>
#include <net/pfil.h>
@@ -100,11 +102,15 @@ ip6_forward(struct mbuf *m, int srcrt)
struct mbuf *mcopy = NULL;
struct ifnet *origifp; /* maybe unnecessary */
u_int32_t inzone, outzone;
- struct in6_addr src_in6, dst_in6;
+ struct in6_addr src_in6, dst_in6, odst;
#ifdef IPSEC
struct secpolicy *sp = NULL;
int ipsecrt = 0;
#endif
+#ifdef SCTP
+ int sw_csum;
+#endif
+ struct m_tag *fwd_tag;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#ifdef IPSEC
@@ -116,7 +122,7 @@ ip6_forward(struct mbuf *m, int srcrt)
* before forwarding packet actually.
*/
if (ipsec6_in_reject(m, NULL)) {
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(in_polvio);
m_freem(m);
return;
}
@@ -131,7 +137,7 @@ ip6_forward(struct mbuf *m, int srcrt)
if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 ||
IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
- V_ip6stat.ip6s_cantforward++;
+ IP6STAT_INC(ip6s_cantforward);
/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
if (V_ip6_log_time + V_ip6_log_interval < time_second) {
V_ip6_log_time = time_second;
@@ -178,8 +184,8 @@ ip6_forward(struct mbuf *m, int srcrt)
sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
IP_FORWARDING, &error);
if (sp == NULL) {
- V_ipsec6stat.out_inval++;
- V_ip6stat.ip6s_cantforward++;
+ IPSEC6STAT_INC(out_inval);
+ IP6STAT_INC(ip6s_cantforward);
if (mcopy) {
#if 0
/* XXX: what icmp ? */
@@ -199,8 +205,8 @@ ip6_forward(struct mbuf *m, int srcrt)
/*
* This packet is just discarded.
*/
- V_ipsec6stat.out_polvio++;
- V_ip6stat.ip6s_cantforward++;
+ IPSEC6STAT_INC(out_polvio);
+ IP6STAT_INC(ip6s_cantforward);
KEY_FREESP(&sp);
if (mcopy) {
#if 0
@@ -222,7 +228,7 @@ ip6_forward(struct mbuf *m, int srcrt)
if (sp->req == NULL) {
/* XXX should be panic ? */
printf("ip6_forward: No IPsec request specified.\n");
- V_ip6stat.ip6s_cantforward++;
+ IP6STAT_INC(ip6s_cantforward);
KEY_FREESP(&sp);
if (mcopy) {
#if 0
@@ -306,7 +312,7 @@ ip6_forward(struct mbuf *m, int srcrt)
/* don't show these error codes to the user */
break;
}
- V_ip6stat.ip6s_cantforward++;
+ IP6STAT_INC(ip6s_cantforward);
if (mcopy) {
#if 0
/* XXX: what icmp ? */
@@ -347,18 +353,18 @@ ip6_forward(struct mbuf *m, int srcrt)
goto skip_routing;
skip_ipsec:
#endif
-
+again:
bzero(&rin6, sizeof(struct route_in6));
dst = (struct sockaddr_in6 *)&rin6.ro_dst;
dst->sin6_len = sizeof(struct sockaddr_in6);
dst->sin6_family = AF_INET6;
dst->sin6_addr = ip6->ip6_dst;
-
+again2:
rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m));
if (rin6.ro_rt != NULL)
RT_UNLOCK(rin6.ro_rt);
else {
- V_ip6stat.ip6s_noroute++;
+ IP6STAT_INC(ip6s_noroute);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
if (mcopy) {
icmp6_error(mcopy, ICMP6_DST_UNREACH,
@@ -383,13 +389,13 @@ skip_routing:
src_in6 = ip6->ip6_src;
if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) {
/* XXX: this should not happen */
- V_ip6stat.ip6s_cantforward++;
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_cantforward);
+ IP6STAT_INC(ip6s_badscope);
goto bad;
}
if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) {
- V_ip6stat.ip6s_cantforward++;
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_cantforward);
+ IP6STAT_INC(ip6s_badscope);
goto bad;
}
if (inzone != outzone
@@ -397,8 +403,8 @@ skip_routing:
&& !ipsecrt
#endif
) {
- V_ip6stat.ip6s_cantforward++;
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_cantforward);
+ IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
if (V_ip6_log_time + V_ip6_log_interval < time_second) {
@@ -428,8 +434,8 @@ skip_routing:
if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 ||
in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 ||
inzone != outzone) {
- V_ip6stat.ip6s_cantforward++;
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_cantforward);
+ IP6STAT_INC(ip6s_badscope);
goto bad;
}
@@ -556,6 +562,7 @@ skip_routing:
if (!PFIL_HOOKED(&V_inet6_pfil_hook))
goto pass;
+ odst = ip6->ip6_dst;
/* Run through list of hooks for output packets. */
error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL);
if (error != 0)
@@ -564,16 +571,66 @@ skip_routing:
goto freecopy;
ip6 = mtod(m, struct ip6_hdr *);
+ /* See if destination IP address was changed by packet filter. */
+ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
+ m->m_flags |= M_SKIP_FIREWALL;
+ /* If destination is now ourself drop to ip6_input(). */
+ if (in6_localip(&ip6->ip6_dst)) {
+ m->m_flags |= M_FASTFWD_OURS;
+ if (m->m_pkthdr.rcvif == NULL)
+ m->m_pkthdr.rcvif = V_loif;
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ m->m_pkthdr.csum_flags |=
+ CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+#ifdef SCTP
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
+ m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
+#endif
+ error = netisr_queue(NETISR_IPV6, m);
+ goto out;
+ } else
+ goto again; /* Redo the routing table lookup. */
+ }
+
+ /* See if local, if yes, send it to netisr. */
+ if (m->m_flags & M_FASTFWD_OURS) {
+ if (m->m_pkthdr.rcvif == NULL)
+ m->m_pkthdr.rcvif = V_loif;
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ m->m_pkthdr.csum_flags |=
+ CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+#ifdef SCTP
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
+ m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
+#endif
+ error = netisr_queue(NETISR_IPV6, m);
+ goto out;
+ }
+ /* Or forward to some other address? */
+ if ((m->m_flags & M_IP6_NEXTHOP) &&
+ (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
+ dst = (struct sockaddr_in6 *)&rin6.ro_dst;
+ bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in6));
+ m->m_flags |= M_SKIP_FIREWALL;
+ m->m_flags &= ~M_IP6_NEXTHOP;
+ m_tag_delete(m, fwd_tag);
+ goto again2;
+ }
+
pass:
error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
if (error) {
in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
- V_ip6stat.ip6s_cantforward++;
+ IP6STAT_INC(ip6s_cantforward);
} else {
- V_ip6stat.ip6s_forward++;
+ IP6STAT_INC(ip6s_forward);
in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward);
if (type)
- V_ip6stat.ip6s_redirectsent++;
+ IP6STAT_INC(ip6s_redirectsent);
else {
if (mcopy)
goto freecopy;
diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c
index ee537a52..aba38ecf 100644
--- a/freebsd/sys/netinet6/ip6_input.c
+++ b/freebsd/sys/netinet6/ip6_input.c
@@ -67,7 +67,9 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_route.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -92,6 +94,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/ip_var.h>
#include <netinet/in_systm.h>
#include <net/if_llatbl.h>
#ifdef INET
@@ -115,6 +118,12 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
+#ifdef FLOWTABLE
+#include <net/flowtable.h>
+VNET_DECLARE(int, ip6_output_flowtable_size);
+#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
+#endif
+
extern struct domain inet6domain;
u_char ip6_protox[IPPROTO_MAX];
@@ -139,6 +148,9 @@ RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
static void ip6_init2(void *);
static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
+static struct ip6aux *ip6_addaux(struct mbuf *);
+static struct ip6aux *ip6_findaux(struct mbuf *m);
+static void ip6_delaux (struct mbuf *);
static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
#ifdef PULLDOWN_TEST
static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
@@ -156,6 +168,8 @@ ip6_init(void)
TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
&V_ip6_auto_linklocal);
+ TUNABLE_INT_FETCH("net.inet6.ip6.accept_rtadv", &V_ip6_accept_rtadv);
+ TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr);
TAILQ_INIT(&V_in6_ifaddrhead);
@@ -171,6 +185,24 @@ ip6_init(void)
nd6_init();
frag6_init();
+#ifdef FLOWTABLE
+ if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size",
+ &V_ip6_output_flowtable_size)) {
+ if (V_ip6_output_flowtable_size < 256)
+ V_ip6_output_flowtable_size = 256;
+ if (!powerof2(V_ip6_output_flowtable_size)) {
+ printf("flowtable must be power of 2 size\n");
+ V_ip6_output_flowtable_size = 2048;
+ }
+ } else {
+ /*
+ * round up to the next power of 2
+ */
+ V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
+ }
+ V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU);
+#endif
+
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
/* Skip global initialization stuff for non-default instances. */
@@ -301,6 +333,83 @@ ip6_init2(void *dummy)
/* This must be after route_init(), which is now SI_ORDER_THIRD */
SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
+static int
+ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off,
+ int *nxt, int *ours)
+{
+ struct ip6_hdr *ip6;
+ struct ip6_hbh *hbh;
+
+ if (ip6_hopopts_input(plen, rtalert, &m, off)) {
+#if 0 /*touches NULL pointer*/
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
+#endif
+ goto out; /* m have already been freed */
+ }
+
+ /* adjust pointer */
+ ip6 = mtod(m, struct ip6_hdr *);
+
+ /*
+ * if the payload length field is 0 and the next header field
+ * indicates Hop-by-Hop Options header, then a Jumbo Payload
+ * option MUST be included.
+ */
+ if (ip6->ip6_plen == 0 && *plen == 0) {
+ /*
+ * Note that if a valid jumbo payload option is
+ * contained, ip6_hopopts_input() must set a valid
+ * (non-zero) payload length to the variable plen.
+ */
+ IP6STAT_INC(ip6s_badoptions);
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
+ icmp6_error(m, ICMP6_PARAM_PROB,
+ ICMP6_PARAMPROB_HEADER,
+ (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
+ goto out;
+ }
+#ifndef PULLDOWN_TEST
+ /* ip6_hopopts_input() ensures that mbuf is contiguous */
+ hbh = (struct ip6_hbh *)(ip6 + 1);
+#else
+ IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
+ sizeof(struct ip6_hbh));
+ if (hbh == NULL) {
+ IP6STAT_INC(ip6s_tooshort);
+ goto out;
+ }
+#endif
+ *nxt = hbh->ip6h_nxt;
+
+ /*
+ * If we are acting as a router and the packet contains a
+ * router alert option, see if we know the option value.
+ * Currently, we only support the option value for MLD, in which
+ * case we should pass the packet to the multicast routing
+ * daemon.
+ */
+ if (*rtalert != ~0) {
+ switch (*rtalert) {
+ case IP6OPT_RTALERT_MLD:
+ if (V_ip6_forwarding)
+ *ours = 1;
+ break;
+ default:
+ /*
+ * RFC2711 requires unrecognized values must be
+ * silently ignored.
+ */
+ break;
+ }
+ }
+
+ return (0);
+
+out:
+ return (1);
+}
+
void
ip6_input(struct mbuf *m)
{
@@ -334,26 +443,36 @@ ip6_input(struct mbuf *m)
*/
ip6_delaux(m);
+ if (m->m_flags & M_FASTFWD_OURS) {
+ /*
+ * Firewall changed destination to local.
+ */
+ m->m_flags &= ~M_FASTFWD_OURS;
+ ours = 1;
+ deliverifp = m->m_pkthdr.rcvif;
+ ip6 = mtod(m, struct ip6_hdr *);
+ goto hbhcheck;
+ }
+
/*
* mbuf statistics
*/
if (m->m_flags & M_EXT) {
if (m->m_next)
- V_ip6stat.ip6s_mext2m++;
+ IP6STAT_INC(ip6s_mext2m);
else
- V_ip6stat.ip6s_mext1++;
+ IP6STAT_INC(ip6s_mext1);
} else {
-#define M2MMAX (sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0]))
if (m->m_next) {
if (m->m_flags & M_LOOP) {
- V_ip6stat.ip6s_m2m[V_loif->if_index]++;
- } else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
- V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
+ IP6STAT_INC(ip6s_m2m[V_loif->if_index]);
+ } else if (m->m_pkthdr.rcvif->if_index < IP6S_M2MMAX)
+ IP6STAT_INC(
+ ip6s_m2m[m->m_pkthdr.rcvif->if_index]);
else
- V_ip6stat.ip6s_m2m[0]++;
+ IP6STAT_INC(ip6s_m2m[0]);
} else
- V_ip6stat.ip6s_m1++;
-#undef M2MMAX
+ IP6STAT_INC(ip6s_m1);
}
/* drop the packet if IPv6 operation is disabled on the IF */
@@ -363,7 +482,7 @@ ip6_input(struct mbuf *m)
}
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
- V_ip6stat.ip6s_total++;
+ IP6STAT_INC(ip6s_total);
#ifndef PULLDOWN_TEST
/*
@@ -401,7 +520,7 @@ ip6_input(struct mbuf *m)
struct ifnet *inifp;
inifp = m->m_pkthdr.rcvif;
if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
- V_ip6stat.ip6s_toosmall++;
+ IP6STAT_INC(ip6s_toosmall);
in6_ifstat_inc(inifp, ifs6_in_hdrerr);
return;
}
@@ -410,12 +529,12 @@ ip6_input(struct mbuf *m)
ip6 = mtod(m, struct ip6_hdr *);
if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
- V_ip6stat.ip6s_badvers++;
+ IP6STAT_INC(ip6s_badvers);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
goto bad;
}
- V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
+ IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]);
/*
* Check against address spoofing/corruption.
@@ -425,7 +544,7 @@ ip6_input(struct mbuf *m)
/*
* XXX: "badscope" is not very suitable for a multicast source.
*/
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
@@ -437,7 +556,7 @@ ip6_input(struct mbuf *m)
* because ip6_mloopback() passes the "actual" interface
* as the outgoing/incoming interface.
*/
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
@@ -462,7 +581,7 @@ ip6_input(struct mbuf *m)
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
@@ -476,11 +595,18 @@ ip6_input(struct mbuf *m)
*/
if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
#endif
+#ifdef IPSEC
+ /*
+ * Bypass packet filtering for packets previously handled by IPsec.
+ */
+ if (ip6_ipsec_filtertunnel(m))
+ goto passin;
+#endif /* IPSEC */
/*
* Run through list of hooks for input packets.
@@ -503,6 +629,23 @@ ip6_input(struct mbuf *m)
ip6 = mtod(m, struct ip6_hdr *);
srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
+ if (m->m_flags & M_FASTFWD_OURS) {
+ m->m_flags &= ~M_FASTFWD_OURS;
+ ours = 1;
+ deliverifp = m->m_pkthdr.rcvif;
+ goto hbhcheck;
+ }
+ if ((m->m_flags & M_IP6_NEXTHOP) &&
+ m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
+ /*
+ * Directly ship the packet on. This allows forwarding
+ * packets originally destined to us to some other directly
+ * connected host.
+ */
+ ip6_forward(m, 1);
+ goto out;
+ }
+
passin:
/*
* Disambiguate address scope zones (if there is ambiguity).
@@ -515,12 +658,12 @@ passin:
* is not loopback.
*/
if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
- V_ip6stat.ip6s_badscope++; /* XXX */
+ IP6STAT_INC(ip6s_badscope); /* XXX */
goto bad;
}
if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) ||
in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
goto bad;
}
@@ -724,7 +867,7 @@ passin:
* and we're not a router.
*/
if (!V_ip6_forwarding) {
- V_ip6stat.ip6s_cantforward++;
+ IP6STAT_INC(ip6s_cantforward);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
goto bad;
}
@@ -763,71 +906,11 @@ passin:
*/
plen = (u_int32_t)ntohs(ip6->ip6_plen);
if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
- struct ip6_hbh *hbh;
+ int error;
- if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
-#if 0 /*touches NULL pointer*/
- in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
-#endif
- goto out; /* m have already been freed */
- }
-
- /* adjust pointer */
- ip6 = mtod(m, struct ip6_hdr *);
-
- /*
- * if the payload length field is 0 and the next header field
- * indicates Hop-by-Hop Options header, then a Jumbo Payload
- * option MUST be included.
- */
- if (ip6->ip6_plen == 0 && plen == 0) {
- /*
- * Note that if a valid jumbo payload option is
- * contained, ip6_hopopts_input() must set a valid
- * (non-zero) payload length to the variable plen.
- */
- V_ip6stat.ip6s_badoptions++;
- in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
- in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
- icmp6_error(m, ICMP6_PARAM_PROB,
- ICMP6_PARAMPROB_HEADER,
- (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
+ error = ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours);
+ if (error != 0)
goto out;
- }
-#ifndef PULLDOWN_TEST
- /* ip6_hopopts_input() ensures that mbuf is contiguous */
- hbh = (struct ip6_hbh *)(ip6 + 1);
-#else
- IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
- sizeof(struct ip6_hbh));
- if (hbh == NULL) {
- V_ip6stat.ip6s_tooshort++;
- goto out;
- }
-#endif
- nxt = hbh->ip6h_nxt;
-
- /*
- * If we are acting as a router and the packet contains a
- * router alert option, see if we know the option value.
- * Currently, we only support the option value for MLD, in which
- * case we should pass the packet to the multicast routing
- * daemon.
- */
- if (rtalert != ~0) {
- switch (rtalert) {
- case IP6OPT_RTALERT_MLD:
- if (V_ip6_forwarding)
- ours = 1;
- break;
- default:
- /*
- * RFC2711 requires unrecognized values must be
- * silently ignored.
- */
- break;
- }
- }
} else
nxt = ip6->ip6_nxt;
@@ -838,7 +921,7 @@ passin:
* Drop packet if shorter than we expect.
*/
if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
goto bad;
}
@@ -890,7 +973,7 @@ passin:
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
@@ -898,13 +981,13 @@ passin:
/*
* Tell launch routine the next header
*/
- V_ip6stat.ip6s_delivered++;
+ IP6STAT_INC(ip6s_delivered);
in6_ifstat_inc(deliverifp, ifs6_in_deliver);
nest = 0;
while (nxt != IPPROTO_DONE) {
if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
- V_ip6stat.ip6s_toomanyhdr++;
+ IP6STAT_INC(ip6s_toomanyhdr);
goto bad;
}
@@ -913,7 +996,7 @@ passin:
* more sanity checks in header chain processing.
*/
if (m->m_pkthdr.len < off) {
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
goto bad;
}
@@ -1007,14 +1090,14 @@ ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
if (hbh == NULL) {
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
return -1;
}
hbhlen = (hbh->ip6h_len + 1) << 3;
IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
hbhlen);
if (hbh == NULL) {
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
return -1;
}
#endif
@@ -1039,7 +1122,7 @@ ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
*
* The function assumes that hbh header is located right after the IPv6 header
* (RFC2460 p7), opthead is pointer into data content in m, and opthead to
- * opthead + hbhlen is located in continuous memory region.
+ * opthead + hbhlen is located in contiguous memory region.
*/
int
ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
@@ -1059,7 +1142,7 @@ ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
break;
case IP6OPT_PADN:
if (hbhlen < IP6OPT_MINLEN) {
- V_ip6stat.ip6s_toosmall++;
+ IP6STAT_INC(ip6s_toosmall);
goto bad;
}
optlen = *(opt + 1) + 2;
@@ -1067,7 +1150,7 @@ ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
case IP6OPT_ROUTER_ALERT:
/* XXX may need check for alignment */
if (hbhlen < IP6OPT_RTALERT_LEN) {
- V_ip6stat.ip6s_toosmall++;
+ IP6STAT_INC(ip6s_toosmall);
goto bad;
}
if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
@@ -1084,7 +1167,7 @@ ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
case IP6OPT_JUMBO:
/* XXX may need check for alignment */
if (hbhlen < IP6OPT_JUMBO_LEN) {
- V_ip6stat.ip6s_toosmall++;
+ IP6STAT_INC(ip6s_toosmall);
goto bad;
}
if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
@@ -1102,7 +1185,7 @@ ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
*/
ip6 = mtod(m, struct ip6_hdr *);
if (ip6->ip6_plen) {
- V_ip6stat.ip6s_badoptions++;
+ IP6STAT_INC(ip6s_badoptions);
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff + opt - opthead);
@@ -1126,7 +1209,7 @@ ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
* there's no explicit mention in specification.
*/
if (*plenp != 0) {
- V_ip6stat.ip6s_badoptions++;
+ IP6STAT_INC(ip6s_badoptions);
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff + opt + 2 - opthead);
@@ -1138,7 +1221,7 @@ ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
* jumbo payload length must be larger than 65535.
*/
if (jumboplen <= IPV6_MAXPACKET) {
- V_ip6stat.ip6s_badoptions++;
+ IP6STAT_INC(ip6s_badoptions);
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff + opt + 2 - opthead);
@@ -1149,7 +1232,7 @@ ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
break;
default: /* unknown option */
if (hbhlen < IP6OPT_MINLEN) {
- V_ip6stat.ip6s_toosmall++;
+ IP6STAT_INC(ip6s_toosmall);
goto bad;
}
optlen = ip6_unknown_opt(opt, m,
@@ -1172,7 +1255,7 @@ ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
* Unknown option processing.
* The third argument `off' is the offset from the IPv6 header to the option,
* which is necessary if the IPv6 header the and option header and IPv6 header
- * is not continuous in order to return an ICMPv6 error.
+ * is not contiguous in order to return an ICMPv6 error.
*/
int
ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
@@ -1186,11 +1269,11 @@ ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
m_freem(m);
return (-1);
case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
- V_ip6stat.ip6s_badoptions++;
+ IP6STAT_INC(ip6s_badoptions);
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
return (-1);
case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
- V_ip6stat.ip6s_badoptions++;
+ IP6STAT_INC(ip6s_badoptions);
ip6 = mtod(m, struct ip6_hdr *);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
(m->m_flags & (M_BCAST|M_MCAST)))
@@ -1369,14 +1452,14 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
ip6->ip6_nxt);
if (ext == NULL) {
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
return;
}
hbh = mtod(ext, struct ip6_hbh *);
hbhlen = (hbh->ip6h_len + 1) << 3;
if (hbhlen != ext->m_len) {
m_freem(ext);
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
return;
}
#endif
@@ -1443,7 +1526,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
#else
ext = ip6_pullexthdr(m, off, nxt);
if (ext == NULL) {
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
return;
}
ip6e = mtod(ext, struct ip6_ext *);
@@ -1453,7 +1536,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
elen = (ip6e->ip6e_len + 1) << 3;
if (elen != ext->m_len) {
m_freem(ext);
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
return;
}
#endif
@@ -1471,7 +1554,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
mp = &(*mp)->m_next;
break;
case IPPROTO_ROUTING:
- if (!in6p->inp_flags & IN6P_RTHDR)
+ if (!(in6p->inp_flags & IN6P_RTHDR))
break;
*mp = sbcreatecontrol((caddr_t)ip6e, elen,
@@ -1753,7 +1836,7 @@ ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
}
}
-struct ip6aux *
+static struct ip6aux *
ip6_addaux(struct mbuf *m)
{
struct m_tag *mtag;
@@ -1770,7 +1853,7 @@ ip6_addaux(struct mbuf *m)
return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
}
-struct ip6aux *
+static struct ip6aux *
ip6_findaux(struct mbuf *m)
{
struct m_tag *mtag;
@@ -1779,7 +1862,7 @@ ip6_findaux(struct mbuf *m)
return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
}
-void
+static void
ip6_delaux(struct mbuf *m)
{
struct m_tag *mtag;
diff --git a/freebsd/sys/netinet6/ip6_ipsec.c b/freebsd/sys/netinet6/ip6_ipsec.c
index d0b075da..fe61dab9 100644
--- a/freebsd/sys/netinet6/ip6_ipsec.c
+++ b/freebsd/sys/netinet6/ip6_ipsec.c
@@ -32,6 +32,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
@@ -45,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <net/if.h>
#include <net/route.h>
@@ -152,7 +154,7 @@ ip6_ipsec_fwd(struct mbuf *m)
KEY_FREESP(&sp);
splx(s);
if (error) {
- V_ip6stat.ip6s_cantforward++;
+ IP6STAT_INC(ip6s_cantforward);
return 1;
}
#endif /* IPSEC */
@@ -291,11 +293,16 @@ ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
/*
* Do delayed checksums now because we send before
* this is done in the normal processing path.
+ * For IPv6 we do delayed checksums in ip6_output.c.
*/
+#ifdef INET
if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ ipseclog((LOG_DEBUG,
+ "%s: we do not support IPv4 over IPv6", __func__));
in_delayed_cksum(*m);
(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
+#endif
/*
* Preserve KAME behaviour: ENOENT can be returned
diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c
index 3188a13e..00eab8ed 100644
--- a/freebsd/sys/netinet6/ip6_mroute.c
+++ b/freebsd/sys/netinet6/ip6_mroute.c
@@ -157,13 +157,14 @@ static VNET_DEFINE(int, ip6_mrouter_ver) = 0;
SYSCTL_DECL(_net_inet6);
SYSCTL_DECL(_net_inet6_ip6);
-SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
+static SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
static struct mrt6stat mrt6stat;
SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
&mrt6stat, mrt6stat,
"Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
+#define MRT6STAT_INC(name) mrt6stat.name += 1
#define NO_RTE_FOUND 0x1
#define RTE_FOUND 0x2
@@ -251,10 +252,11 @@ static mifi_t nummifs = 0;
static mifi_t reg_mif_num = (mifi_t)-1;
static struct pim6stat pim6stat;
-SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW,
&pim6stat, pim6stat,
- "PIM Statistics (struct pim6stat, netinet6/pim_var.h)");
+ "PIM Statistics (struct pim6stat, netinet6/pim6_var.h)");
+#define PIM6STAT_INC(name) pim6stat.name += 1
static VNET_DEFINE(int, pim6);
#define V_pim6 VNET(pim6)
@@ -272,7 +274,7 @@ static VNET_DEFINE(int, pim6);
#define MF6CFIND(o, g, rt) do { \
struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
rt = NULL; \
- mrt6stat.mrt6s_mfc_lookups++; \
+ MRT6STAT_INC(mrt6s_mfc_lookups); \
while (_rt) { \
if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
@@ -283,7 +285,7 @@ static VNET_DEFINE(int, pim6);
_rt = _rt->mf6c_next; \
} \
if (rt == NULL) { \
- mrt6stat.mrt6s_mfc_misses++; \
+ MRT6STAT_INC(mrt6s_mfc_misses); \
} \
} while (/*CONSTCOND*/ 0)
@@ -718,7 +720,6 @@ add_m6if(struct mif6ctl *mifcp)
mifp->m6_pkt_out = 0;
mifp->m6_bytes_in = 0;
mifp->m6_bytes_out = 0;
- bzero(&mifp->m6_route, sizeof(mifp->m6_route));
/* Adjust nummifs up if the mifi is higher than nummifs */
if (nummifs <= mifcp->mif6c_mifi)
@@ -1103,7 +1104,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
* (although such packets must normally set 1 to the hop limit field).
*/
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
- V_ip6stat.ip6s_cantforward++;
+ IP6STAT_INC(ip6s_cantforward);
if (V_ip6_log_time + V_ip6_log_interval < time_second) {
V_ip6_log_time = time_second;
log(LOG_DEBUG,
@@ -1145,7 +1146,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
GET_TIME(tp);
#endif /* UPCALL_TIMING */
- mrt6stat.mrt6s_no_route++;
+ MRT6STAT_INC(mrt6s_no_route);
#ifdef MRT6DEBUG
if (V_mrt6debug & (DEBUG_FORWARD | DEBUG_MFC))
log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n",
@@ -1272,7 +1273,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
"socket queue full\n");
- mrt6stat.mrt6s_upq_sockfull++;
+ MRT6STAT_INC(mrt6s_upq_sockfull);
free(rte, M_MRTABLE6);
m_freem(mb0);
free(rt, M_MRTABLE6);
@@ -1280,7 +1281,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
return (ENOBUFS);
}
- mrt6stat.mrt6s_upcalls++;
+ MRT6STAT_INC(mrt6s_upcalls);
/* insert new entry at head of hash chain */
bzero(rt, sizeof(*rt));
@@ -1306,7 +1307,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
if (++npkts > MAX_UPQ6) {
- mrt6stat.mrt6s_upq_ovflw++;
+ MRT6STAT_INC(mrt6s_upq_ovflw);
free(rte, M_MRTABLE6);
m_freem(mb0);
MFC6_UNLOCK();
@@ -1375,7 +1376,7 @@ expire_upcalls(void *unused)
free(rte, M_MRTABLE6);
rte = n;
} while (rte != NULL);
- mrt6stat.mrt6s_cache_cleanups++;
+ MRT6STAT_INC(mrt6s_cache_cleanups);
n6expire[i]--;
*nptr = mfc->mf6c_next;
@@ -1431,7 +1432,7 @@ ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
ifp->if_index, mifi,
mif6table[mifi].m6_ifp->if_index);
#endif
- mrt6stat.mrt6s_wrong_if++;
+ MRT6STAT_INC(mrt6s_wrong_if);
rt->mf6c_wrong_if++;
/*
* If we are doing PIM processing, and we are forwarding
@@ -1504,14 +1505,14 @@ ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
break;
}
- mrt6stat.mrt6s_upcalls++;
+ MRT6STAT_INC(mrt6s_upcalls);
if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_WARNING, "mdq, ip6_mrouter socket queue full\n");
#endif
- ++mrt6stat.mrt6s_upq_sockfull;
+ MRT6STAT_INC(mrt6s_upq_sockfull);
return (ENOBUFS);
} /* if socket Q full */
} /* if PIM */
@@ -1538,7 +1539,7 @@ ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
dst0 = ip6->ip6_dst;
if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 ||
(error = in6_setscope(&dst0, ifp, &idzone)) != 0) {
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
return (error);
}
for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) {
@@ -1558,7 +1559,7 @@ ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
&odzone) ||
iszone != oszone ||
idzone != odzone) {
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
continue;
}
}
@@ -1577,11 +1578,8 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
struct mbuf *mb_copy;
struct ifnet *ifp = mifp->m6_ifp;
int error = 0;
- struct sockaddr_in6 *dst6;
u_long linkmtu;
- dst6 = &mifp->m6_route.ro_dst;
-
/*
* Make a new reference to the packet; make sure that
* the IPv6 header is actually copied, not just referenced,
@@ -1611,8 +1609,8 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
/* XXX: ip6_output will override ip6->ip6_hlim */
im6o.im6o_multicast_hlim = ip6->ip6_hlim;
im6o.im6o_multicast_loop = 1;
- error = ip6_output(mb_copy, NULL, &mifp->m6_route,
- IPV6_FORWARDING, &im6o, NULL, NULL);
+ error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o,
+ NULL, NULL);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_XMIT)
@@ -1627,10 +1625,13 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
* loop back a copy now.
*/
if (in6_mcast_loop) {
- dst6->sin6_len = sizeof(struct sockaddr_in6);
- dst6->sin6_family = AF_INET6;
- dst6->sin6_addr = ip6->ip6_dst;
- ip6_mloopback(ifp, m, &mifp->m6_route.ro_dst);
+ struct sockaddr_in6 dst6;
+
+ bzero(&dst6, sizeof(dst6));
+ dst6.sin6_len = sizeof(struct sockaddr_in6);
+ dst6.sin6_family = AF_INET6;
+ dst6.sin6_addr = ip6->ip6_dst;
+ ip6_mloopback(ifp, m, &dst6);
}
/*
@@ -1639,15 +1640,18 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
*/
linkmtu = IN6_LINKMTU(ifp);
if (mb_copy->m_pkthdr.len <= linkmtu || linkmtu < IPV6_MMTU) {
- dst6->sin6_len = sizeof(struct sockaddr_in6);
- dst6->sin6_family = AF_INET6;
- dst6->sin6_addr = ip6->ip6_dst;
+ struct sockaddr_in6 dst6;
+
+ bzero(&dst6, sizeof(dst6));
+ dst6.sin6_len = sizeof(struct sockaddr_in6);
+ dst6.sin6_family = AF_INET6;
+ dst6.sin6_addr = ip6->ip6_dst;
/*
* We just call if_output instead of nd6_output here, since
* we need no ND for a multicast forwarded packet...right?
*/
error = (*ifp->if_output)(ifp, mb_copy,
- (struct sockaddr *)&mifp->m6_route.ro_dst, NULL);
+ (struct sockaddr *)&dst6, NULL);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_XMIT)
log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
@@ -1696,7 +1700,7 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
ip6_sprintf(ip6bufd, &ip6->ip6_dst));
}
#endif
- ++pim6stat.pim6s_snd_registers;
+ PIM6STAT_INC(pim6s_snd_registers);
/* Make a copy of the packet to send to the user level process */
MGETHDR(mm, M_DONTWAIT, MT_HEADER);
@@ -1731,7 +1735,7 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
im6->im6_mif = mif - mif6table;
/* iif info is not given for reg. encap.n */
- mrt6stat.mrt6s_upcalls++;
+ MRT6STAT_INC(mrt6s_upcalls);
if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
#ifdef MRT6DEBUG
@@ -1739,7 +1743,7 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
log(LOG_WARNING,
"register_send: ip6_mrouter socket queue full\n");
#endif
- ++mrt6stat.mrt6s_upq_sockfull;
+ MRT6STAT_INC(mrt6s_upq_sockfull);
return (ENOBUFS);
}
return (0);
@@ -1780,7 +1784,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
int minlen;
int off = *offp;
- ++pim6stat.pim6s_rcv_total;
+ PIM6STAT_INC(pim6s_rcv_total);
ip6 = mtod(m, struct ip6_hdr *);
pimlen = m->m_pkthdr.len - *offp;
@@ -1789,7 +1793,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
* Validate lengths
*/
if (pimlen < PIM_MINLEN) {
- ++pim6stat.pim6s_rcv_tooshort;
+ PIM6STAT_INC(pim6s_rcv_tooshort);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM)
log(LOG_DEBUG,"pim6_input: PIM packet too short\n");
@@ -1822,7 +1826,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
#else
IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
if (pim == NULL) {
- pim6stat.pim6s_rcv_tooshort++;
+ PIM6STAT_INC(pim6s_rcv_tooshort);
return (IPPROTO_DONE);
}
#endif
@@ -1842,7 +1846,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
cksumlen = pimlen;
if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
- ++pim6stat.pim6s_rcv_badsum;
+ PIM6STAT_INC(pim6s_rcv_badsum);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM)
log(LOG_DEBUG,
@@ -1856,7 +1860,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
/* PIM version check */
if (pim->pim_ver != PIM_VERSION) {
- ++pim6stat.pim6s_rcv_badversion;
+ PIM6STAT_INC(pim6s_rcv_badversion);
#ifdef MRT6DEBUG
log(LOG_ERR,
"pim6_input: incorrect version %d, expecting %d\n",
@@ -1882,7 +1886,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#endif
- ++pim6stat.pim6s_rcv_registers;
+ PIM6STAT_INC(pim6s_rcv_registers);
if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) {
#ifdef MRT6DEBUG
@@ -1904,8 +1908,8 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
* Validate length
*/
if (pimlen < PIM6_REG_MINLEN) {
- ++pim6stat.pim6s_rcv_tooshort;
- ++pim6stat.pim6s_rcv_badregisters;
+ PIM6STAT_INC(pim6s_rcv_tooshort);
+ PIM6STAT_INC(pim6s_rcv_badregisters);
#ifdef MRT6DEBUG
log(LOG_ERR,
"pim6_input: register packet size too "
@@ -1929,7 +1933,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
/* verify the version number of the inner packet */
if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
- ++pim6stat.pim6s_rcv_badregisters;
+ PIM6STAT_INC(pim6s_rcv_badregisters);
#ifdef MRT6DEBUG
log(LOG_DEBUG, "pim6_input: invalid IP version (%d) "
"of the inner packet\n",
@@ -1941,7 +1945,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
/* verify the inner packet is destined to a mcast group */
if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
- ++pim6stat.pim6s_rcv_badregisters;
+ PIM6STAT_INC(pim6s_rcv_badregisters);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM)
log(LOG_DEBUG,
diff --git a/freebsd/sys/netinet6/ip6_mroute.h b/freebsd/sys/netinet6/ip6_mroute.h
index 701d5345..d2df0dbe 100644
--- a/freebsd/sys/netinet6/ip6_mroute.h
+++ b/freebsd/sys/netinet6/ip6_mroute.h
@@ -212,7 +212,6 @@ struct mif6 {
u_quad_t m6_pkt_out; /* # pkts out on interface */
u_quad_t m6_bytes_in; /* # bytes in on interface */
u_quad_t m6_bytes_out; /* # bytes out on interface */
- struct route_in6 m6_route; /* cached route */
#ifdef notyet
u_int m6_rsvp_on; /* RSVP listening on this vif */
struct socket *m6_rsvpd; /* RSVP daemon socket */
diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c
index a92e68ec..06f1246a 100644
--- a/freebsd/sys/netinet6/ip6_output.c
+++ b/freebsd/sys/netinet6/ip6_output.c
@@ -67,8 +67,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_sctp.h>
+#include <rtems/bsd/local/opt_route.h>
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
@@ -83,6 +85,8 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <sys/ucred.h>
+#include <machine/in_cksum.h>
+
#include <net/if.h>
#include <net/netisr.h>
#include <net/route.h>
@@ -91,6 +95,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -113,6 +118,10 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
#include <netinet6/scope6_var.h>
+#ifdef FLOWTABLE
+#include <net/flowtable.h>
+#endif
+
extern int in6_mcast_loop;
struct ip6_exthdrs {
@@ -123,21 +132,21 @@ struct ip6_exthdrs {
struct mbuf *ip6e_dest2;
};
-static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
- struct ucred *, int));
-static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
- struct socket *, struct sockopt *));
+static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
+ struct ucred *, int);
+static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
+ struct socket *, struct sockopt *);
static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
-static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
- struct ucred *, int, int, int));
+static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *,
+ struct ucred *, int, int, int);
static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
-static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
- struct ip6_frag **));
+static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
+ struct ip6_frag **);
static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
-static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
- struct ifnet *, struct in6_addr *, u_long *, int *, u_int));
+static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
+ struct ifnet *, struct in6_addr *, u_long *, int *, u_int);
static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
@@ -177,12 +186,39 @@ static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
}\
} while (/*CONSTCOND*/ 0)
+static void
+in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
+{
+ u_short csum;
+
+ csum = in_cksum_skip(m, offset + plen, offset);
+ if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
+ csum = 0xffff;
+ offset += m->m_pkthdr.csum_data; /* checksum offset */
+
+ if (offset + sizeof(u_short) > m->m_len) {
+ printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
+ "csum_flags=0x%04x\n", __func__, m->m_len, plen, offset,
+ m->m_pkthdr.csum_flags);
+ /*
+ * XXX this should not happen, but if it does, the correct
+ * behavior may be to insert the checksum in the appropriate
+ * next mbuf in the chain.
+ */
+ return;
+ }
+ *(u_short *)(m->m_data + offset) = csum;
+}
+
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
* This function may modify ver and hlim only.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
+ * If route_in6 ro is present and has ro_rt initialized, route lookup would be
+ * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
+ * then result of route lookup is stored in ro->ro_rt.
*
* type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
@@ -215,9 +251,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct route_in6 *ro_pmtu = NULL;
int hdrsplit = 0;
int needipsec = 0;
-#ifdef SCTP
- int sw_csum;
-#endif
+ int sw_csum, tso;
#ifdef IPSEC
struct ipsec_output_state state;
struct ip6_rthdr *rh = NULL;
@@ -225,6 +259,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
int segleft_org = 0;
struct secpolicy *sp = NULL;
#endif /* IPSEC */
+ struct m_tag *fwd_tag = NULL;
ip6 = mtod(m, struct ip6_hdr *);
if (ip6 == NULL) {
@@ -236,9 +271,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
M_SETFIB(m, inp->inp_inc.inc_fibnum);
finaldst = ip6->ip6_dst;
-
bzero(&exthdrs, sizeof(exthdrs));
-
if (opt) {
/* Hop-by-Hop options header */
MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
@@ -273,6 +306,20 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
goto freehdrs;
case -1: /* Do IPSec */
needipsec = 1;
+ /*
+ * Do delayed checksums now, as we may send before returning.
+ */
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ plen = m->m_pkthdr.len - sizeof(*ip6);
+ in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+ }
+#ifdef SCTP
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
+ sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
+ m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
+ }
+#endif
case 0: /* No IPSec */
default:
break;
@@ -453,16 +500,16 @@ skip_ipsec2:;
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
(flags & IPV6_UNSPECSRC) == 0) {
error = EOPNOTSUPP;
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
goto bad;
}
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
error = EOPNOTSUPP;
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
goto bad;
}
- V_ip6stat.ip6s_localout++;
+ IP6STAT_INC(ip6s_localout);
/*
* Route packet.
@@ -475,7 +522,21 @@ skip_ipsec2:;
if (opt && opt->ip6po_rthdr)
ro = &opt->ip6po_route;
dst = (struct sockaddr_in6 *)&ro->ro_dst;
+#ifdef FLOWTABLE
+ if (ro->ro_rt == NULL) {
+ struct flentry *fle;
+ /*
+ * The flow table returns route entries valid for up to 30
+ * seconds; we rely on the remainder of ip_output() taking no
+ * longer than that long for the stability of ro_rt. The
+ * flow ID assignment must have happened before this point.
+ */
+ fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
+ if (fle != NULL)
+ flow_to_route_in6(fle, ro);
+ }
+#endif
again:
/*
* if specified, try to fill in the traffic class field.
@@ -577,23 +638,23 @@ again:
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
- bzero(&dst_sa, sizeof(dst_sa));
- dst_sa.sin6_family = AF_INET6;
- dst_sa.sin6_len = sizeof(dst_sa);
- dst_sa.sin6_addr = ip6->ip6_dst;
- if ((error = in6_selectroute_fib(&dst_sa, opt, im6o, ro,
- &ifp, &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0) {
- switch (error) {
- case EHOSTUNREACH:
- V_ip6stat.ip6s_noroute++;
- break;
- case EADDRNOTAVAIL:
- default:
- break; /* XXX statistics? */
+ if (ro->ro_rt && fwd_tag == NULL) {
+ rt = ro->ro_rt;
+ ifp = ro->ro_rt->rt_ifp;
+ } else {
+ if (fwd_tag == NULL) {
+ bzero(&dst_sa, sizeof(dst_sa));
+ dst_sa.sin6_family = AF_INET6;
+ dst_sa.sin6_len = sizeof(dst_sa);
+ dst_sa.sin6_addr = ip6->ip6_dst;
+ }
+ error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
+ &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
+ if (error != 0) {
+ if (ifp != NULL)
+ in6_ifstat_inc(ifp, ifs6_out_discard);
+ goto bad;
}
- if (ifp != NULL)
- in6_ifstat_inc(ifp, ifs6_out_discard);
- goto bad;
}
if (rt == NULL) {
/*
@@ -618,7 +679,7 @@ again:
/*
* The outgoing interface must be in the zone of source and
- * destination addresses.
+ * destination addresses.
*/
origifp = ifp;
@@ -644,7 +705,7 @@ again:
goto badscope;
}
- /* We should use ia_ifp to support the case of
+ /* We should use ia_ifp to support the case of
* sending packets to an address of our own.
*/
if (ia != NULL && ia->ia_ifp)
@@ -654,7 +715,7 @@ again:
goto routefound;
badscope:
- V_ip6stat.ip6s_badscope++;
+ IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(origifp, ifs6_out_discard);
if (error == 0)
error = EHOSTUNREACH; /* XXX */
@@ -683,7 +744,7 @@ again:
* Confirm that the outgoing interface supports multicast.
*/
if (!(ifp->if_flags & IFF_MULTICAST)) {
- V_ip6stat.ip6s_noroute++;
+ IP6STAT_INC(ip6s_noroute);
in6_ifstat_inc(ifp, ifs6_out_discard);
error = ENETUNREACH;
goto bad;
@@ -796,13 +857,13 @@ again:
#ifdef DIAGNOSTIC
if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
- panic("ip6e_hbh is not continuous");
+ panic("ip6e_hbh is not contiguous");
#endif
/*
* XXX: if we have to send an ICMPv6 error to the sender,
* we need the M_LOOP flag since icmp6_error() expects
* the IPv6 and the hop-by-hop options header are
- * continuous unless the flag is set.
+ * contiguous unless the flag is set.
*/
m->m_flags |= M_LOOP;
m->m_pkthdr.rcvif = ifp;
@@ -832,18 +893,17 @@ again:
if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
m->m_flags |= M_SKIP_FIREWALL;
/* If destination is now ourself drop to ip6_input(). */
- if (in6_localaddr(&ip6->ip6_dst)) {
+ if (in6_localip(&ip6->ip6_dst)) {
+ m->m_flags |= M_FASTFWD_OURS;
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = V_loif;
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
m->m_pkthdr.csum_flags |=
- CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
- m->m_pkthdr.csum_flags |=
- CSUM_IP_CHECKED | CSUM_IP_VALID;
#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
#endif
error = netisr_queue(NETISR_IPV6, m);
@@ -852,7 +912,32 @@ again:
goto again; /* Redo the routing table lookup. */
}
- /* XXX: IPFIREWALL_FORWARD */
+ /* See if local, if yes, send it to netisr. */
+ if (m->m_flags & M_FASTFWD_OURS) {
+ if (m->m_pkthdr.rcvif == NULL)
+ m->m_pkthdr.rcvif = V_loif;
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ m->m_pkthdr.csum_flags |=
+ CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+#ifdef SCTP
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
+ m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
+#endif
+ error = netisr_queue(NETISR_IPV6, m);
+ goto done;
+ }
+ /* Or forward to some other address? */
+ if ((m->m_flags & M_IP6_NEXTHOP) &&
+ (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
+ dst = (struct sockaddr_in6 *)&ro->ro_dst;
+ bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
+ m->m_flags |= M_SKIP_FIREWALL;
+ m->m_flags &= ~M_IP6_NEXTHOP;
+ m_tag_delete(m, fwd_tag);
+ goto again;
+ }
passout:
/*
@@ -874,16 +959,32 @@ passout:
* 4: if dontfrag == 1 && alwaysfrag == 1
* error, as we cannot handle this conflicting request
*/
+ sw_csum = m->m_pkthdr.csum_flags;
+ if (!hdrsplit) {
+ tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0;
+ sw_csum &= ~ifp->if_hwassist;
+ } else
+ tso = 0;
+ /*
+ * If we added extension headers, we will not do TSO and calculate the
+ * checksums ourselves for now.
+ * XXX-BZ Need a framework to know when the NIC can handle it, even
+ * with ext. hdrs.
+ */
+ if (sw_csum & CSUM_DELAY_DATA_IPV6) {
+ sw_csum &= ~CSUM_DELAY_DATA_IPV6;
+ in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
+ }
#ifdef SCTP
- sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
- if (sw_csum & CSUM_SCTP) {
+ if (sw_csum & CSUM_SCTP_IPV6) {
+ sw_csum &= ~CSUM_SCTP_IPV6;
sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
- sw_csum &= ~CSUM_SCTP;
}
#endif
+ m->m_pkthdr.csum_flags &= ifp->if_hwassist;
tlen = m->m_pkthdr.len;
- if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
+ if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
dontfrag = 1;
else
dontfrag = 0;
@@ -892,7 +993,7 @@ passout:
error = EMSGSIZE;
goto bad;
}
- if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
+ if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* case 2-b */
/*
* Even if the DONTFRAG option is specified, we cannot send the
* packet when the data length is larger than the MTU of the
@@ -976,10 +1077,26 @@ passout:
if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
< tlen /* - hlen */)) {
error = ENOBUFS;
- V_ip6stat.ip6s_odropped++;
+ IP6STAT_INC(ip6s_odropped);
goto bad;
}
+
+ /*
+ * If the interface will not calculate checksums on
+ * fragmented packets, then do it here.
+ * XXX-BZ handle the hw offloading case. Need flags.
+ */
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ in6_delayed_cksum(m, plen, hlen);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+ }
+#ifdef SCTP
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
+ sctp_delayed_cksum(m, hlen);
+ m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
+ }
+#endif
mnext = &m->m_nextpkt;
/*
@@ -1010,7 +1127,7 @@ passout:
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (!m) {
error = ENOBUFS;
- V_ip6stat.ip6s_odropped++;
+ IP6STAT_INC(ip6s_odropped);
goto sendorfree;
}
m->m_pkthdr.rcvif = NULL;
@@ -1023,7 +1140,7 @@ passout:
m->m_len = sizeof(*mhip6);
error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
if (error) {
- V_ip6stat.ip6s_odropped++;
+ IP6STAT_INC(ip6s_odropped);
goto sendorfree;
}
ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
@@ -1035,7 +1152,7 @@ passout:
sizeof(*ip6f) - sizeof(struct ip6_hdr)));
if ((m_frgpart = m_copy(m0, off, len)) == 0) {
error = ENOBUFS;
- V_ip6stat.ip6s_odropped++;
+ IP6STAT_INC(ip6s_odropped);
goto sendorfree;
}
m_cat(m, m_frgpart);
@@ -1044,7 +1161,7 @@ passout:
ip6f->ip6f_reserved = 0;
ip6f->ip6f_ident = id;
ip6f->ip6f_nxt = nextproto;
- V_ip6stat.ip6s_ofragments++;
+ IP6STAT_INC(ip6s_ofragments);
in6_ifstat_inc(ifp, ifs6_out_fragcreat);
}
@@ -1073,14 +1190,13 @@ sendorfree:
}
if (error == 0)
- V_ip6stat.ip6s_fragmented++;
+ IP6STAT_INC(ip6s_fragmented);
done:
- if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
- RTFREE(ro->ro_rt);
- } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
- RTFREE(ro_pmtu->ro_rt);
- }
+ if (ro == &ip6route)
+ RO_RTFREE(ro);
+ if (ro_pmtu == &ip6route)
+ RO_RTFREE(ro_pmtu);
#ifdef IPSEC
if (sp != NULL)
KEY_FREESP(&sp);
@@ -1375,6 +1491,24 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
if (sopt->sopt_level == SOL_SOCKET &&
sopt->sopt_dir == SOPT_SET) {
switch (sopt->sopt_name) {
+ case SO_REUSEADDR:
+ INP_WLOCK(in6p);
+ if ((so->so_options & SO_REUSEADDR) != 0)
+ in6p->inp_flags2 |= INP_REUSEADDR;
+ else
+ in6p->inp_flags2 &= ~INP_REUSEADDR;
+ INP_WUNLOCK(in6p);
+ error = 0;
+ break;
+ case SO_REUSEPORT:
+ INP_WLOCK(in6p);
+ if ((so->so_options & SO_REUSEPORT) != 0)
+ in6p->inp_flags2 |= INP_REUSEPORT;
+ else
+ in6p->inp_flags2 &= ~INP_REUSEPORT;
+ INP_WUNLOCK(in6p);
+ error = 0;
+ break;
case SO_SETFIB:
INP_WLOCK(in6p);
in6p->inp_inc.inc_fibnum = so->so_fibnum;
@@ -1385,7 +1519,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
break;
}
}
- } else {
+ } else { /* level == IPPROTO_IPV6 */
switch (op) {
case SOPT_SET:
diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h
index 9e210e2d..4a094d42 100644
--- a/freebsd/sys/netinet6/ip6_var.h
+++ b/freebsd/sys/netinet6/ip6_var.h
@@ -204,12 +204,14 @@ struct ip6stat {
u_quad_t ip6s_rawout; /* total raw ip packets generated */
u_quad_t ip6s_badscope; /* scope error */
u_quad_t ip6s_notmember; /* don't join this multicast group */
- u_quad_t ip6s_nxthist[256]; /* next header history */
+#define IP6S_HDRCNT 256 /* headers count */
+ u_quad_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
u_quad_t ip6s_m1; /* one mbuf */
- u_quad_t ip6s_m2m[32]; /* two or more mbuf */
+#define IP6S_M2MMAX 32
+ u_quad_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */
u_quad_t ip6s_mext1; /* one ext mbuf */
u_quad_t ip6s_mext2m; /* two or more ext mbuf */
- u_quad_t ip6s_exthdrtoolong; /* ext hdr are not continuous */
+ u_quad_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */
u_quad_t ip6s_nogif; /* no match gif found */
u_quad_t ip6s_toomanyhdr; /* discarded due to too many headers */
@@ -218,27 +220,29 @@ struct ip6stat {
* algorithm:
* XXX: hardcoded 16 = # of ip6 multicast scope types + 1
*/
+#define IP6S_RULESMAX 16
+#define IP6S_SCOPECNT 16
/* number of times that address selection fails */
u_quad_t ip6s_sources_none;
/* number of times that an address on the outgoing I/F is chosen */
- u_quad_t ip6s_sources_sameif[16];
+ u_quad_t ip6s_sources_sameif[IP6S_SCOPECNT];
/* number of times that an address on a non-outgoing I/F is chosen */
- u_quad_t ip6s_sources_otherif[16];
+ u_quad_t ip6s_sources_otherif[IP6S_SCOPECNT];
/*
* number of times that an address that has the same scope
* from the destination is chosen.
*/
- u_quad_t ip6s_sources_samescope[16];
+ u_quad_t ip6s_sources_samescope[IP6S_SCOPECNT];
/*
* number of times that an address that has a different scope
* from the destination is chosen.
*/
- u_quad_t ip6s_sources_otherscope[16];
+ u_quad_t ip6s_sources_otherscope[IP6S_SCOPECNT];
/* number of times that a deprecated address is chosen */
- u_quad_t ip6s_sources_deprecated[16];
+ u_quad_t ip6s_sources_deprecated[IP6S_SCOPECNT];
/* number of times that each rule of source selection is applied. */
- u_quad_t ip6s_sources_rule[16];
+ u_quad_t ip6s_sources_rule[IP6S_RULESMAX];
};
#ifdef _KERNEL
@@ -285,6 +289,8 @@ struct ip6aux {
#define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */
#define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */
+#define M_IP6_NEXTHOP M_PROTO7 /* explicit ip nexthop */
+
#ifdef __NO_STRICT_ALIGNMENT
#define IP6_HDR_ALIGNED_P(ip) 1
#else
@@ -316,6 +322,11 @@ VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly
VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly
* queue */
VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */
+VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */
+VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
+ * receiving IF. */
+VNET_DECLARE(int, ip6_rfc6204w3); /* Accept defroute from RA even when
+ forwarding enabled */
VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */
VNET_DECLARE(int, ip6_log_interval);
VNET_DECLARE(time_t, ip6_log_time);
@@ -327,6 +338,9 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */
#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
#define V_ip6_maxfrags VNET(ip6_maxfrags)
#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv)
+#define V_ip6_no_radr VNET(ip6_no_radr)
+#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
+#define V_ip6_rfc6204w3 VNET(ip6_rfc6204w3)
#define V_ip6_keepfaith VNET(ip6_keepfaith)
#define V_ip6_log_interval VNET(ip6_log_interval)
#define V_ip6_log_time VNET(ip6_log_time)
@@ -361,87 +375,87 @@ struct sockopt;
struct inpcb;
-int icmp6_ctloutput __P((struct socket *, struct sockopt *sopt));
+int icmp6_ctloutput(struct socket *, struct sockopt *sopt);
struct in6_ifaddr;
-void ip6_init __P((void));
+void ip6_init(void);
#ifdef VIMAGE
-void ip6_destroy __P((void));
+void ip6_destroy(void);
#endif
int ip6proto_register(short);
int ip6proto_unregister(short);
-void ip6_input __P((struct mbuf *));
-struct in6_ifaddr *ip6_getdstifaddr __P((struct mbuf *));
-void ip6_freepcbopts __P((struct ip6_pktopts *));
+void ip6_input(struct mbuf *);
+struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *);
+void ip6_freepcbopts(struct ip6_pktopts *);
-int ip6_unknown_opt __P((u_int8_t *, struct mbuf *, int));
-char * ip6_get_prevhdr __P((struct mbuf *, int));
-int ip6_nexthdr __P((struct mbuf *, int, int, int *));
-int ip6_lasthdr __P((struct mbuf *, int, int, int *));
+int ip6_unknown_opt(u_int8_t *, struct mbuf *, int);
+char * ip6_get_prevhdr(struct mbuf *, int);
+int ip6_nexthdr(struct mbuf *, int, int, int *);
+int ip6_lasthdr(struct mbuf *, int, int, int *);
-struct ip6aux *ip6_addaux __P((struct mbuf *));
-struct ip6aux *ip6_findaux __P((struct mbuf *));
-void ip6_delaux __P((struct mbuf *));
+#ifdef __notyet__
+struct ip6aux *ip6_findaux(struct mbuf *);
+#endif
extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *,
struct mbuf *);
-int ip6_process_hopopts __P((struct mbuf *, u_int8_t *, int, u_int32_t *,
- u_int32_t *));
+int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *,
+ u_int32_t *);
struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *,
struct mbuf **, int *);
-void ip6_savecontrol __P((struct inpcb *, struct mbuf *, struct mbuf **));
-void ip6_notify_pmtu __P((struct inpcb *, struct sockaddr_in6 *,
- u_int32_t *));
-int ip6_sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+void ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **);
+void ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *,
+ u_int32_t *);
+int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
-void ip6_forward __P((struct mbuf *, int));
+void ip6_forward(struct mbuf *, int);
-void ip6_mloopback __P((struct ifnet *, struct mbuf *, struct sockaddr_in6 *));
-int ip6_output __P((struct mbuf *, struct ip6_pktopts *,
+void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *);
+int ip6_output(struct mbuf *, struct ip6_pktopts *,
struct route_in6 *,
int,
struct ip6_moptions *, struct ifnet **,
- struct inpcb *));
-int ip6_ctloutput __P((struct socket *, struct sockopt *));
-int ip6_raw_ctloutput __P((struct socket *, struct sockopt *));
-void ip6_initpktopts __P((struct ip6_pktopts *));
-int ip6_setpktopts __P((struct mbuf *, struct ip6_pktopts *,
- struct ip6_pktopts *, struct ucred *, int));
-void ip6_clearpktopts __P((struct ip6_pktopts *, int));
-struct ip6_pktopts *ip6_copypktopts __P((struct ip6_pktopts *, int));
-int ip6_optlen __P((struct inpcb *));
-
-int route6_input __P((struct mbuf **, int *, int));
-
-void frag6_init __P((void));
-int frag6_input __P((struct mbuf **, int *, int));
-void frag6_slowtimo __P((void));
-void frag6_drain __P((void));
-
-void rip6_init __P((void));
-int rip6_input __P((struct mbuf **, int *, int));
-void rip6_ctlinput __P((int, struct sockaddr *, void *));
-int rip6_ctloutput __P((struct socket *, struct sockopt *));
-int rip6_output __P((struct mbuf *, ...));
-int rip6_usrreq __P((struct socket *,
- int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *));
-
-int dest6_input __P((struct mbuf **, int *, int));
-int none_input __P((struct mbuf **, int *, int));
+ struct inpcb *);
+int ip6_ctloutput(struct socket *, struct sockopt *);
+int ip6_raw_ctloutput(struct socket *, struct sockopt *);
+void ip6_initpktopts(struct ip6_pktopts *);
+int ip6_setpktopts(struct mbuf *, struct ip6_pktopts *,
+ struct ip6_pktopts *, struct ucred *, int);
+void ip6_clearpktopts(struct ip6_pktopts *, int);
+struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
+int ip6_optlen(struct inpcb *);
+
+int route6_input(struct mbuf **, int *, int);
+
+void frag6_init(void);
+int frag6_input(struct mbuf **, int *, int);
+void frag6_slowtimo(void);
+void frag6_drain(void);
+
+void rip6_init(void);
+int rip6_input(struct mbuf **, int *, int);
+void rip6_ctlinput(int, struct sockaddr *, void *);
+int rip6_ctloutput(struct socket *, struct sockopt *);
+int rip6_output(struct mbuf *, ...);
+int rip6_usrreq(struct socket *,
+ int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *);
+
+int dest6_input(struct mbuf **, int *, int);
+int none_input(struct mbuf **, int *, int);
int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *,
struct inpcb *inp, struct route_in6 *, struct ucred *cred,
struct ifnet **, struct in6_addr *);
-int in6_selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *,
+int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
- struct rtentry **));
+ struct rtentry **);
int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
struct rtentry **, u_int);
-u_int32_t ip6_randomid __P((void));
-u_int32_t ip6_randomflowlabel __P((void));
+u_int32_t ip6_randomid(void);
+u_int32_t ip6_randomflowlabel(void);
#endif /* _KERNEL */
#endif /* !_NETINET6_IP6_VAR_H_ */
diff --git a/freebsd/sys/netinet6/ip6protosw.h b/freebsd/sys/netinet6/ip6protosw.h
index 1fae44c8..ec802a51 100644
--- a/freebsd/sys/netinet6/ip6protosw.h
+++ b/freebsd/sys/netinet6/ip6protosw.h
@@ -118,26 +118,26 @@ struct ip6protosw {
/* protocol-protocol hooks */
int (*pr_input) /* input to protocol (from below) */
- __P((struct mbuf **, int *, int));
+ (struct mbuf **, int *, int);
int (*pr_output) /* output to protocol (from above) */
- __P((struct mbuf *, ...));
+ (struct mbuf *, ...);
void (*pr_ctlinput) /* control input (from below) */
- __P((int, struct sockaddr *, void *));
+ (int, struct sockaddr *, void *);
int (*pr_ctloutput) /* control output (from above) */
- __P((struct socket *, struct sockopt *));
+ (struct socket *, struct sockopt *);
/* utility hooks */
void (*pr_init) /* initialization hook */
- __P((void));
+ (void);
void (*pr_destroy) /* cleanup hook */
- __P((void));
+ (void);
void (*pr_fasttimo) /* fast timeout (200ms) */
- __P((void));
+ (void);
void (*pr_slowtimo) /* slow timeout (500ms) */
- __P((void));
+ (void);
void (*pr_drain) /* flush any excess space possible */
- __P((void));
+ (void);
struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */
};
diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c
index d2fd82b7..25f03411 100644
--- a/freebsd/sys/netinet6/mld6.c
+++ b/freebsd/sys/netinet6/mld6.c
@@ -196,7 +196,7 @@ static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS);
* to a vnet in ifp->if_vnet.
*/
static struct mtx mld_mtx;
-MALLOC_DEFINE(M_MLD, "mld", "mld state");
+static MALLOC_DEFINE(M_MLD, "mld", "mld state");
#define MLD_EMBEDSCOPE(pin6, zoneid) \
if (IN6_IS_SCOPE_LINKLOCAL(pin6) || \
@@ -234,8 +234,9 @@ SYSCTL_VNET_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
/*
* Non-virtualized sysctls.
*/
-SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
- sysctl_mld_ifinfo, "Per-interface MLDv2 state");
+static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo,
+ CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo,
+ "Per-interface MLDv2 state");
static int mld_v1enable = 1;
SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW,
@@ -834,7 +835,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */
- if (maxdelay >= 32678) {
+ if (maxdelay >= 32768) {
maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
(MLD_MRC_EXP(maxdelay) + 3);
}
@@ -868,16 +869,10 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
*/
if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
/*
- * General Queries SHOULD be directed to ff02::1.
* A general query with a source list has undefined
* behaviour; discard it.
*/
- struct in6_addr dst;
-
- dst = ip6->ip6_dst;
- in6_clearscope(&dst);
- if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
- nsrc > 0)
+ if (nsrc > 0)
return (EINVAL);
is_general_query = 1;
} else {
@@ -2204,6 +2199,7 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
#endif
mld_v1_transmit_report(inm, MLD_LISTENER_DONE);
inm->in6m_state = MLD_NOT_MEMBER;
+ V_current_state_timers_running6 = 1;
} else if (mli->mli_version == MLD_VERSION_2) {
/*
* Stop group timer and all pending reports.
@@ -3098,7 +3094,6 @@ mld_dispatch_packet(struct mbuf *m)
m0 = mld_v2_encap_report(ifp, m);
if (m0 == NULL) {
CTR2(KTR_MLD, "%s: dropped %p", __func__, m);
- m_freem(m);
IP6STAT_INC(ip6s_odropped);
goto out;
}
diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c
index ead44620..b84baf18 100644
--- a/freebsd/sys/netinet6/nd6.c
+++ b/freebsd/sys/netinet6/nd6.c
@@ -72,7 +72,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
+#include <netinet6/in6_ifattach.h>
#include <netinet/icmp6.h>
+#include <netinet6/send.h>
#include <sys/limits.h>
@@ -122,8 +124,10 @@ VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
static struct sockaddr_in6 all1_sa;
-static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *,
- struct ifnet *));
+int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
+
+static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *,
+ struct ifnet *);
static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
static void nd6_slowtimo(void *);
static int regen_tmpaddr(struct in6_ifaddr *);
@@ -172,21 +176,37 @@ nd6_ifattach(struct ifnet *ifp)
{
struct nd_ifinfo *nd;
- nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK);
- bzero(nd, sizeof(*nd));
-
+ nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO);
nd->initialized = 1;
nd->chlim = IPV6_DEFHLIM;
nd->basereachable = REACHABLE_TIME;
nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
nd->retrans = RETRANS_TIMER;
+
+ nd->flags = ND6_IFF_PERFORMNUD;
+
+ /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
+ * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by
+ * default regardless of the V_ip6_auto_linklocal configuration to
+ * give a reasonable default behavior.
+ */
+ if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) ||
+ (ifp->if_flags & IFF_LOOPBACK))
+ nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
/*
- * Note that the default value of ip6_accept_rtadv is 0, which means
- * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV
- * here.
+ * A loopback interface does not need to accept RTADV.
+ * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by
+ * default regardless of the V_ip6_accept_rtadv configuration to
+ * prevent the interface from accepting RA messages arrived
+ * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV.
*/
- nd->flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV);
+ if (V_ip6_accept_rtadv &&
+ !(ifp->if_flags & IFF_LOOPBACK) &&
+ (ifp->if_type != IFT_BRIDGE))
+ nd->flags |= ND6_IFF_ACCEPT_RTADV;
+ if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
+ nd->flags |= ND6_IFF_NO_RADR;
/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
nd6_setmtu0(ifp, nd);
@@ -276,10 +296,9 @@ nd6_option(union nd_opts *ndopts)
struct nd_opt_hdr *nd_opt;
int olen;
- if (ndopts == NULL)
- panic("ndopts == NULL in nd6_option");
- if (ndopts->nd_opts_last == NULL)
- panic("uninitialized ndopts in nd6_option");
+ KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
+ KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
+ __func__));
if (ndopts->nd_opts_search == NULL)
return NULL;
if (ndopts->nd_opts_done)
@@ -327,10 +346,9 @@ nd6_options(union nd_opts *ndopts)
struct nd_opt_hdr *nd_opt;
int i = 0;
- if (ndopts == NULL)
- panic("ndopts == NULL in nd6_options");
- if (ndopts->nd_opts_last == NULL)
- panic("uninitialized ndopts in nd6_options");
+ KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
+ KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
+ __func__));
if (ndopts->nd_opts_search == NULL)
return 0;
@@ -505,6 +523,7 @@ nd6_llinfo_timer(void *arg)
ln->la_hold = m0;
clear_llinfo_pqueue(ln);
}
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT);
(void)nd6_free(ln, 0);
ln = NULL;
if (m != NULL)
@@ -522,6 +541,7 @@ nd6_llinfo_timer(void *arg)
case ND6_LLINFO_STALE:
/* Garbage Collection(RFC 2461 5.3) */
if (!ND6_LLINFO_PERMANENT(ln)) {
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
(void)nd6_free(ln, 1);
ln = NULL;
}
@@ -549,6 +569,7 @@ nd6_llinfo_timer(void *arg)
nd6_ns_output(ifp, dst, dst, ln, 0);
LLE_WLOCK(ln);
} else {
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
(void)nd6_free(ln, 0);
ln = NULL;
}
@@ -809,13 +830,9 @@ nd6_purge(struct ifnet *ifp)
if (V_nd6_defifindex == ifp->if_index)
nd6_setdefaultiface(0);
- if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */
- /* refresh default router list
- *
- *
- */
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
+ /* Refresh default router list. */
defrouter_select();
-
}
/* XXXXX
@@ -949,10 +966,9 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
/*
* If the default router list is empty, all addresses are regarded
* as on-link, and thus, as a neighbor.
- * XXX: we restrict the condition to hosts, because routers usually do
- * not have the "default router list".
*/
- if (!V_ip6_forwarding && TAILQ_EMPTY(&V_nd_defrouter) &&
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV &&
+ TAILQ_EMPTY(&V_nd_defrouter) &&
V_nd6_defifindex == ifp->if_index) {
return (1);
}
@@ -1013,8 +1029,7 @@ nd6_free(struct llentry *ln, int gc)
ifp = ln->lle_tbl->llt_ifp;
- if (!V_ip6_forwarding) {
-
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
if (dr != NULL && dr->expire &&
@@ -1113,8 +1128,14 @@ nd6_free(struct llentry *ln, int gc)
LLE_WUNLOCK(ln);
IF_AFDATA_LOCK(ifp);
LLE_WLOCK(ln);
- LLE_REMREF(ln);
- llentry_free(ln);
+
+ /* Guard against race with other llentry_free(). */
+ if (ln->la_flags & LLE_LINKED) {
+ LLE_REMREF(ln);
+ llentry_free(ln);
+ } else
+ LLE_FREE_LOCKED(ln);
+
IF_AFDATA_UNLOCK(ifp);
return (next);
@@ -1172,11 +1193,13 @@ done:
void
nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
{
- struct sockaddr_in6 *gateway = (struct sockaddr_in6 *)rt->rt_gateway;
+ struct sockaddr_in6 *gateway;
struct nd_defrouter *dr;
- struct ifnet *ifp = rt->rt_ifp;
+ struct ifnet *ifp;
RT_LOCK_ASSERT(rt);
+ gateway = (struct sockaddr_in6 *)rt->rt_gateway;
+ ifp = rt->rt_ifp;
switch (req) {
case RTM_ADD:
@@ -1347,6 +1370,94 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
ND_IFINFO(ifp)->chlim = ND.chlim;
/* FALLTHROUGH */
case SIOCSIFINFO_FLAGS:
+ {
+ struct ifaddr *ifa;
+ struct in6_ifaddr *ia;
+
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
+ !(ND.flags & ND6_IFF_IFDISABLED)) {
+ /* ifdisabled 1->0 transision */
+
+ /*
+ * If the interface is marked as ND6_IFF_IFDISABLED and
+ * has an link-local address with IN6_IFF_DUPLICATED,
+ * do not clear ND6_IFF_IFDISABLED.
+ * See RFC 4862, Section 5.4.5.
+ */
+ int duplicated_linklocal = 0;
+
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ ia = (struct in6_ifaddr *)ifa;
+ if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
+ IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
+ duplicated_linklocal = 1;
+ break;
+ }
+ }
+ IF_ADDR_RUNLOCK(ifp);
+
+ if (duplicated_linklocal) {
+ ND.flags |= ND6_IFF_IFDISABLED;
+ log(LOG_ERR, "Cannot enable an interface"
+ " with a link-local address marked"
+ " duplicate.\n");
+ } else {
+ ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED;
+ if (ifp->if_flags & IFF_UP)
+ in6_if_up(ifp);
+ }
+ } else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
+ (ND.flags & ND6_IFF_IFDISABLED)) {
+ /* ifdisabled 0->1 transision */
+ /* Mark all IPv6 address as tentative. */
+
+ ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ ia = (struct in6_ifaddr *)ifa;
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
+ }
+ IF_ADDR_RUNLOCK(ifp);
+ }
+
+ if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
+ if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) {
+ /* auto_linklocal 0->1 transision */
+
+ /* If no link-local address on ifp, configure */
+ ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL;
+ in6_ifattach(ifp, NULL);
+ } else if (!(ND.flags & ND6_IFF_IFDISABLED) &&
+ ifp->if_flags & IFF_UP) {
+ /*
+ * When the IF already has
+ * ND6_IFF_AUTO_LINKLOCAL, no link-local
+ * address is assigned, and IFF_UP, try to
+ * assign one.
+ */
+ int haslinklocal = 0;
+
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ ia = (struct in6_ifaddr *)ifa;
+ if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
+ haslinklocal = 1;
+ break;
+ }
+ }
+ IF_ADDR_RUNLOCK(ifp);
+ if (!haslinklocal)
+ in6_ifattach(ifp, NULL);
+ }
+ }
+ }
ND_IFINFO(ifp)->flags = ND.flags;
break;
#undef ND
@@ -1457,10 +1568,8 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
IF_AFDATA_UNLOCK_ASSERT(ifp);
- if (ifp == NULL)
- panic("ifp == NULL in nd6_cache_lladdr");
- if (from == NULL)
- panic("from == NULL in nd6_cache_lladdr");
+ KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__));
+ KASSERT(from != NULL, ("%s: from == NULL", __func__));
/* nothing must be updated for unspecified address */
if (IN6_IS_ADDR_UNSPECIFIED(from))
@@ -1521,6 +1630,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
*/
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
}
if (!is_newentry) {
@@ -1681,7 +1791,8 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* for those are not autoconfigured hosts, we explicitly avoid such
* cases for safety.
*/
- if (do_update && router && !V_ip6_forwarding && V_ip6_accept_rtadv) {
+ if (do_update && router &&
+ ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
/*
* guaranteed recursion
*/
@@ -1754,9 +1865,12 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
struct mbuf **chain)
{
struct mbuf *m = m0;
+ struct m_tag *mtag;
struct llentry *ln = lle;
+ struct ip6_hdr *ip6;
int error = 0;
int flags = 0;
+ int ip6len;
#ifdef INVARIANTS
if (lle != NULL) {
@@ -1935,6 +2049,28 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
#ifdef MAC
mac_netinet6_nd6_send(ifp, m);
#endif
+
+ /*
+ * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
+ * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
+ * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
+ * to be diverted to user space. When re-injected into the kernel,
+ * send_output() will directly dispatch them to the outgoing interface.
+ */
+ if (send_sendso_input_hook != NULL) {
+ mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
+ if (mtag != NULL) {
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+ /* Use the SEND socket */
+ error = send_sendso_input_hook(m, ifp, SND_OUT,
+ ip6len);
+ /* -1 == no app on SEND socket */
+ if (error == 0 || error != -1)
+ return (error);
+ }
+ }
+
/*
* We were passed in a pointer to an lle with the lock held
* this means that we can't call if_output as we will
@@ -1958,6 +2094,8 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
}
return (error);
}
+ /* Reset layer specific mbuf flags to avoid confusing lower layers. */
+ m->m_flags &= ~(M_PROTOFLAGS);
if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
NULL));
@@ -2037,6 +2175,7 @@ nd6_need_cache(struct ifnet *ifp)
#ifdef IFT_CARP
case IFT_CARP:
#endif
+ case IFT_INFINIBAND:
case IFT_GIF: /* XXX need more cases? */
case IFT_PPP:
case IFT_TUNNEL:
@@ -2060,7 +2199,7 @@ nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
*lle = NULL;
IF_AFDATA_UNLOCK_ASSERT(ifp);
- if (m->m_flags & M_MCAST) {
+ if (m != NULL && m->m_flags & M_MCAST) {
int i;
switch (ifp->if_type) {
@@ -2125,7 +2264,6 @@ clear_llinfo_pqueue(struct llentry *ln)
for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
m_hold_next = m_hold->m_nextpkt;
- m_hold->m_nextpkt = NULL;
m_freem(m_hold);
}
diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h
index 0893065e..79e41e38 100644
--- a/freebsd/sys/netinet6/nd6.h
+++ b/freebsd/sys/netinet6/nd6.h
@@ -84,6 +84,9 @@ struct nd_ifinfo {
* DAD failure. (XXX: not ND-specific)
*/
#define ND6_IFF_DONT_SET_IFROUTE 0x10
+#define ND6_IFF_AUTO_LINKLOCAL 0x20
+#define ND6_IFF_NO_RADR 0x40
+#define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */
#define ND6_CREATE LLE_CREATE
#define ND6_EXCLUSIVE LLE_EXCLUSIVE
@@ -382,68 +385,68 @@ union nd_opts {
/* XXX: need nd6_var.h?? */
/* nd6.c */
-void nd6_init __P((void));
+void nd6_init(void);
#ifdef VIMAGE
-void nd6_destroy __P((void));
+void nd6_destroy(void);
#endif
-struct nd_ifinfo *nd6_ifattach __P((struct ifnet *));
-void nd6_ifdetach __P((struct nd_ifinfo *));
-int nd6_is_addr_neighbor __P((struct sockaddr_in6 *, struct ifnet *));
-void nd6_option_init __P((void *, int, union nd_opts *));
-struct nd_opt_hdr *nd6_option __P((union nd_opts *));
-int nd6_options __P((union nd_opts *));
-struct llentry *nd6_lookup __P((struct in6_addr *, int, struct ifnet *));
-void nd6_setmtu __P((struct ifnet *));
-void nd6_llinfo_settimer __P((struct llentry *, long));
-void nd6_llinfo_settimer_locked __P((struct llentry *, long));
-void nd6_timer __P((void *));
-void nd6_purge __P((struct ifnet *));
-void nd6_nud_hint __P((struct rtentry *, struct in6_addr *, int));
-int nd6_resolve __P((struct ifnet *, struct rtentry *, struct mbuf *,
- struct sockaddr *, u_char *));
-void nd6_rtrequest __P((int, struct rtentry *, struct rt_addrinfo *));
-int nd6_ioctl __P((u_long, caddr_t, struct ifnet *));
-struct llentry *nd6_cache_lladdr __P((struct ifnet *, struct in6_addr *,
- char *, int, int, int));
-int nd6_output __P((struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct rtentry *));
-int nd6_output_lle __P((struct ifnet *, struct ifnet *, struct mbuf *,
+struct nd_ifinfo *nd6_ifattach(struct ifnet *);
+void nd6_ifdetach(struct nd_ifinfo *);
+int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *);
+void nd6_option_init(void *, int, union nd_opts *);
+struct nd_opt_hdr *nd6_option(union nd_opts *);
+int nd6_options(union nd_opts *);
+struct llentry *nd6_lookup(struct in6_addr *, int, struct ifnet *);
+void nd6_setmtu(struct ifnet *);
+void nd6_llinfo_settimer(struct llentry *, long);
+void nd6_llinfo_settimer_locked(struct llentry *, long);
+void nd6_timer(void *);
+void nd6_purge(struct ifnet *);
+void nd6_nud_hint(struct rtentry *, struct in6_addr *, int);
+int nd6_resolve(struct ifnet *, struct rtentry *, struct mbuf *,
+ struct sockaddr *, u_char *);
+void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
+int nd6_ioctl(u_long, caddr_t, struct ifnet *);
+struct llentry *nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
+ char *, int, int, int);
+int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *, struct rtentry *);
+int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *, struct rtentry *, struct llentry *,
- struct mbuf **));
-int nd6_output_flush __P((struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct route *));
-int nd6_need_cache __P((struct ifnet *));
-int nd6_storelladdr __P((struct ifnet *, struct mbuf *,
- struct sockaddr *, u_char *, struct llentry **));
+ struct mbuf **);
+int nd6_output_flush(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *, struct route *);
+int nd6_need_cache(struct ifnet *);
+int nd6_storelladdr(struct ifnet *, struct mbuf *,
+ struct sockaddr *, u_char *, struct llentry **);
/* nd6_nbr.c */
-void nd6_na_input __P((struct mbuf *, int, int));
-void nd6_na_output __P((struct ifnet *, const struct in6_addr *,
- const struct in6_addr *, u_long, int, struct sockaddr *));
-void nd6_ns_input __P((struct mbuf *, int, int));
-void nd6_ns_output __P((struct ifnet *, const struct in6_addr *,
- const struct in6_addr *, struct llentry *, int));
-caddr_t nd6_ifptomac __P((struct ifnet *));
-void nd6_dad_start __P((struct ifaddr *, int));
-void nd6_dad_stop __P((struct ifaddr *));
-void nd6_dad_duplicated __P((struct ifaddr *));
+void nd6_na_input(struct mbuf *, int, int);
+void nd6_na_output(struct ifnet *, const struct in6_addr *,
+ const struct in6_addr *, u_long, int, struct sockaddr *);
+void nd6_ns_input(struct mbuf *, int, int);
+void nd6_ns_output(struct ifnet *, const struct in6_addr *,
+ const struct in6_addr *, struct llentry *, int);
+caddr_t nd6_ifptomac(struct ifnet *);
+void nd6_dad_start(struct ifaddr *, int);
+void nd6_dad_stop(struct ifaddr *);
+void nd6_dad_duplicated(struct ifaddr *);
/* nd6_rtr.c */
-void nd6_rs_input __P((struct mbuf *, int, int));
-void nd6_ra_input __P((struct mbuf *, int, int));
-void prelist_del __P((struct nd_prefix *));
-void defrouter_reset __P((void));
-void defrouter_select __P((void));
-void defrtrlist_del __P((struct nd_defrouter *));
-void prelist_remove __P((struct nd_prefix *));
-int nd6_prelist_add __P((struct nd_prefixctl *, struct nd_defrouter *,
- struct nd_prefix **));
-void pfxlist_onlink_check __P((void));
-struct nd_defrouter *defrouter_lookup __P((struct in6_addr *, struct ifnet *));
-struct nd_prefix *nd6_prefix_lookup __P((struct nd_prefixctl *));
-void rt6_flush __P((struct in6_addr *, struct ifnet *));
-int nd6_setdefaultiface __P((int));
-int in6_tmpifadd __P((const struct in6_ifaddr *, int, int));
+void nd6_rs_input(struct mbuf *, int, int);
+void nd6_ra_input(struct mbuf *, int, int);
+void prelist_del(struct nd_prefix *);
+void defrouter_reset(void);
+void defrouter_select(void);
+void defrtrlist_del(struct nd_defrouter *);
+void prelist_remove(struct nd_prefix *);
+int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
+ struct nd_prefix **);
+void pfxlist_onlink_check(void);
+struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
+struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *);
+void rt6_flush(struct in6_addr *, struct ifnet *);
+int nd6_setdefaultiface(int);
+int in6_tmpifadd(const struct in6_ifaddr *, int, int);
#endif /* _KERNEL */
diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c
index a67fc68f..4574145f 100644
--- a/freebsd/sys/netinet6/nd6_nbr.c
+++ b/freebsd/sys/netinet6/nd6_nbr.c
@@ -75,6 +75,7 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
#include <netinet/icmp6.h>
#include <netinet/ip_carp.h>
+#include <netinet6/send.h>
#define SDL(s) ((struct sockaddr_dl *)s)
@@ -115,10 +116,14 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
int lladdrlen = 0;
int anycast = 0, proxy = 0, tentative = 0;
int tlladdr;
+ int rflag;
union nd_opts ndopts;
struct sockaddr_dl proxydl;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0;
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif)
+ rflag = 0;
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, icmp6len,);
nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
@@ -345,8 +350,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
goto bad;
nd6_na_output_fib(ifp, &in6_all, &taddr6,
((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
- (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0),
- tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL,
+ rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL,
M_GETFIB(m));
goto freeit;
}
@@ -356,8 +360,8 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
nd6_na_output_fib(ifp, &saddr6, &taddr6,
((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
- (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED,
- tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m));
+ rflag | ND_NA_FLAG_SOLICITED, tlladdr,
+ proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m));
freeit:
if (ifa != NULL)
ifa_free(ifa);
@@ -394,6 +398,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
const struct in6_addr *taddr6, struct llentry *ln, int dad)
{
struct mbuf *m;
+ struct m_tag *mtag;
struct ip6_hdr *ip6;
struct nd_neighbor_solicit *nd_ns;
struct ip6_moptions im6o;
@@ -578,14 +583,23 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
nd_ns->nd_ns_cksum =
in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len);
+ if (send_sendso_input_hook != NULL) {
+ mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
+ sizeof(unsigned short), M_NOWAIT);
+ if (mtag == NULL)
+ goto bad;
+ *(unsigned short *)(mtag + 1) = nd_ns->nd_ns_type;
+ m_tag_prepend(m, mtag);
+ }
+
ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
- if (ro.ro_rt) { /* we don't cache this route. */
- RTFREE(ro.ro_rt);
- }
+ /* We don't cache this route. */
+ RO_RTFREE(&ro);
+
return;
bad:
@@ -625,6 +639,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
struct llentry *ln = NULL;
union nd_opts ndopts;
struct mbuf *chain = NULL;
+ struct m_tag *mtag;
struct sockaddr_in6 sin6;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
@@ -742,6 +757,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
*/
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
if (is_solicited) {
ln->ln_state = ND6_LLINFO_REACHABLE;
ln->ln_byhint = 0;
@@ -817,6 +833,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
if (lladdr != NULL) {
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
+ EVENTHANDLER_INVOKE(lle_event, ln,
+ LLENTRY_RESOLVED);
}
/*
@@ -860,7 +878,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp);
if (dr)
defrtrlist_del(dr);
- else if (!V_ip6_forwarding) {
+ else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags &
+ ND6_IFF_ACCEPT_RTADV) {
/*
* Even if the neighbor is not in the default
* router list, the neighbor may be used
@@ -894,6 +913,15 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* we assume ifp is not a loopback here, so just set
* the 2nd argument as the 1st one.
*/
+
+ if (send_sendso_input_hook != NULL) {
+ mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
+ sizeof(unsigned short), M_NOWAIT);
+ if (mtag == NULL)
+ goto bad;
+ m_tag_prepend(m, mtag);
+ }
+
nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
}
}
@@ -938,6 +966,7 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
struct sockaddr *sdl0, u_int fibnum)
{
struct mbuf *m;
+ struct m_tag *mtag;
struct ifnet *oifp;
struct ip6_hdr *ip6;
struct nd_neighbor_advert *nd_na;
@@ -1079,14 +1108,23 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
nd_na->nd_na_cksum =
in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len);
+ if (send_sendso_input_hook != NULL) {
+ mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
+ sizeof(unsigned short), M_NOWAIT);
+ if (mtag == NULL)
+ goto bad;
+ *(unsigned short *)(mtag + 1) = nd_na->nd_na_type;
+ m_tag_prepend(m, mtag);
+ }
+
ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]);
- if (ro.ro_rt) { /* we don't cache this route. */
- RTFREE(ro.ro_rt);
- }
+ /* We don't cache this route. */
+ RO_RTFREE(&ro);
+
return;
bad:
@@ -1126,6 +1164,7 @@ nd6_ifptomac(struct ifnet *ifp)
#ifdef IFT_CARP
case IFT_CARP:
#endif
+ case IFT_INFINIBAND:
case IFT_BRIDGE:
case IFT_ISO88025:
return IF_LLADDR(ifp);
@@ -1220,6 +1259,8 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
return;
}
+ if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)
+ return;
if (nd6_dad_find(ifa) != NULL) {
/* DAD already in progress */
return;
@@ -1424,7 +1465,7 @@ nd6_dad_duplicated(struct ifaddr *ifa)
* identifier based on the hardware address which is supposed to be
* uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
* operation on the interface SHOULD be disabled.
- * [rfc2462bis-03 Section 5.4.5]
+ * [RFC 4862, Section 5.4.5]
*/
if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
struct in6_addr in6;
@@ -1441,6 +1482,7 @@ nd6_dad_duplicated(struct ifaddr *ifa)
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
+ case IFT_INFINIBAND:
in6 = ia->ia_addr.sin6_addr;
if (in6_get_hw_ifid(ifp, &in6) == 0 &&
IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) {
diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c
index d73ac569..bd6fa33b 100644
--- a/freebsd/sys/netinet6/nd6_rtr.c
+++ b/freebsd/sys/netinet6/nd6_rtr.c
@@ -70,11 +70,11 @@ __FBSDID("$FreeBSD$");
static int rtpref(struct nd_defrouter *);
static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
-static int prelist_update __P((struct nd_prefixctl *, struct nd_defrouter *,
- struct mbuf *, int));
-static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int);
-static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *,
- struct nd_defrouter *));
+static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *,
+ struct mbuf *, int);
+static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int);
+static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *,
+ struct nd_defrouter *);
static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
static void pfxrtr_del(struct nd_pfxrouter *);
static struct nd_pfxrouter *find_pfxlist_reachable_router
@@ -83,8 +83,8 @@ static void defrouter_delreq(struct nd_defrouter *);
static void nd6_rtmsg(int, struct rtentry *);
static int in6_init_prefix_ltimes(struct nd_prefix *);
-static void in6_init_address_ltimes __P((struct nd_prefix *,
- struct in6_addrlifetime *));
+static void in6_init_address_ltimes(struct nd_prefix *,
+ struct in6_addrlifetime *);
static int nd6_prefix_onlink(struct nd_prefix *);
static int nd6_prefix_offlink(struct nd_prefix *);
@@ -132,8 +132,11 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len)
union nd_opts ndopts;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
- /* If I'm not a router, ignore it. */
- if (V_ip6_accept_rtadv != 0 || V_ip6_forwarding != 1)
+ /*
+ * Accept RS only when V_ip6_forwarding=1 and the interface has
+ * no ND6_IFF_ACCEPT_RTADV.
+ */
+ if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV)
goto freeit;
/* Sanity checks */
@@ -218,12 +221,9 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
/*
- * We only accept RAs only when
- * the system-wide variable allows the acceptance, and
- * per-interface variable allows RAs on the receiving interface.
+ * We only accept RAs only when the per-interface flag
+ * ND6_IFF_ACCEPT_RTADV is on the receiving interface.
*/
- if (V_ip6_accept_rtadv == 0)
- goto freeit;
if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
goto freeit;
@@ -273,7 +273,17 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
bzero(&dr0, sizeof(dr0));
dr0.rtaddr = saddr6;
dr0.flags = nd_ra->nd_ra_flags_reserved;
- dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
+ /*
+ * Effectively-disable routes from RA messages when
+ * ND6_IFF_NO_RADR enabled on the receiving interface or
+ * (ip6.forwarding == 1 && ip6.rfc6204w3 != 1).
+ */
+ if (ndi->flags & ND6_IFF_NO_RADR)
+ dr0.rtlifetime = 0;
+ else if (V_ip6_forwarding && !V_ip6_rfc6204w3)
+ dr0.rtlifetime = 0;
+ else
+ dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
dr0.expire = time_second + dr0.rtlifetime;
dr0.ifp = ifp;
/* unspecified or not? (RFC 2461 6.3.4) */
@@ -562,7 +572,7 @@ defrtrlist_del(struct nd_defrouter *dr)
* Flush all the routing table entries that use the router
* as a next hop.
*/
- if (!V_ip6_forwarding && V_ip6_accept_rtadv) /* XXX: better condition? */
+ if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
rt6_flush(&dr->rtaddr, dr->ifp);
if (dr->installed) {
@@ -621,20 +631,6 @@ defrouter_select(void)
struct llentry *ln = NULL;
/*
- * This function should be called only when acting as an autoconfigured
- * host. Although the remaining part of this function is not effective
- * if the node is not an autoconfigured host, we explicitly exclude
- * such cases here for safety.
- */
- if (V_ip6_forwarding || !V_ip6_accept_rtadv) {
- nd6log((LOG_WARNING,
- "defrouter_select: called unexpectedly (forwarding=%d, "
- "accept_rtadv=%d)\n", V_ip6_forwarding, V_ip6_accept_rtadv));
- splx(s);
- return;
- }
-
- /*
* Let's handle easy case (3) first:
* If default router list is empty, there's nothing to be done.
*/
diff --git a/freebsd/sys/netinet6/pim6_var.h b/freebsd/sys/netinet6/pim6_var.h
index 19d0e900..060836ba 100644
--- a/freebsd/sys/netinet6/pim6_var.h
+++ b/freebsd/sys/netinet6/pim6_var.h
@@ -52,7 +52,7 @@ struct pim6stat {
};
#if (defined(KERNEL)) || (defined(_KERNEL))
-int pim6_input __P((struct mbuf **, int*, int));
+int pim6_input(struct mbuf **, int*, int);
#endif /* KERNEL */
/*
diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c
index 06d16034..e2d6693a 100644
--- a/freebsd/sys/netinet6/raw_ip6.c
+++ b/freebsd/sys/netinet6/raw_ip6.c
@@ -94,6 +94,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <netinet/ip6.h>
+#include <netinet/ip_var.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/ip6_mroute.h>
#include <netinet6/in6_pcb.h>
@@ -101,6 +102,7 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
#include <netinet6/raw_ip6.h>
#include <netinet6/scope6_var.h>
+#include <netinet6/send.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -160,7 +162,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
- V_rip6stat.rip6s_ipackets++;
+ RIP6STAT_INC(rip6s_ipackets);
if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
/* XXX Send icmp6 host/port unreach? */
@@ -199,11 +201,11 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
}
INP_RLOCK(in6p);
if (in6p->in6p_cksum != -1) {
- V_rip6stat.rip6s_isum++;
+ RIP6STAT_INC(rip6s_isum);
if (in6_cksum(m, proto, *offp,
m->m_pkthdr.len - *offp)) {
INP_RUNLOCK(in6p);
- V_rip6stat.rip6s_badsum++;
+ RIP6STAT_INC(rip6s_badsum);
continue;
}
}
@@ -263,7 +265,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
*/
if (n && ipsec6_in_reject(n, last)) {
m_freem(n);
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(in_polvio);
/* Do not inject data into pcb. */
} else
#endif /* IPSEC */
@@ -279,7 +281,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
m_freem(n);
if (opts)
m_freem(opts);
- V_rip6stat.rip6s_fullsock++;
+ RIP6STAT_INC(rip6s_fullsock);
} else
sorwakeup(last->inp_socket);
opts = NULL;
@@ -295,8 +297,8 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
*/
if ((last != NULL) && ipsec6_in_reject(m, last)) {
m_freem(m);
- V_ipsec6stat.in_polvio++;
- V_ip6stat.ip6s_delivered--;
+ IPSEC6STAT_INC(in_polvio);
+ IP6STAT_DEC(ip6s_delivered);
/* Do not inject data into pcb. */
INP_RUNLOCK(last);
} else
@@ -312,14 +314,14 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
m_freem(m);
if (opts)
m_freem(opts);
- V_rip6stat.rip6s_fullsock++;
+ RIP6STAT_INC(rip6s_fullsock);
} else
sorwakeup(last->inp_socket);
INP_RUNLOCK(last);
} else {
- V_rip6stat.rip6s_nosock++;
+ RIP6STAT_INC(rip6s_nosock);
if (m->m_flags & M_MCAST)
- V_rip6stat.rip6s_nosockmcast++;
+ RIP6STAT_INC(rip6s_nosockmcast);
if (proto == IPPROTO_NONE)
m_freem(m);
else {
@@ -328,7 +330,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
ICMP6_PARAMPROB_NEXTHEADER,
prvnxtp - mtod(m, char *));
}
- V_ip6stat.ip6s_delivered--;
+ IP6STAT_DEC(ip6s_delivered);
}
return (IPPROTO_DONE);
}
@@ -392,6 +394,7 @@ rip6_output(m, va_alist)
#endif
{
struct mbuf *control;
+ struct m_tag *mtag;
struct socket *so;
struct sockaddr_in6 *dstsock;
struct in6_addr *dst;
@@ -535,13 +538,30 @@ rip6_output(m, va_alist)
*p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
}
+ /*
+ * Send RA/RS messages to user land for protection, before sending
+ * them to rtadvd/rtsol.
+ */
+ if ((send_sendso_input_hook != NULL) &&
+ so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
+ switch (type) {
+ case ND_ROUTER_ADVERT:
+ case ND_ROUTER_SOLICIT:
+ mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
+ sizeof(unsigned short), M_NOWAIT);
+ if (mtag == NULL)
+ goto bad;
+ m_tag_prepend(m, mtag);
+ }
+ }
+
error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p);
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
if (oifp)
icmp6_ifoutstat_inc(oifp, type, code);
ICMP6STAT_INC(icp6s_outhist[type]);
} else
- V_rip6stat.rip6s_opackets++;
+ RIP6STAT_INC(rip6s_opackets);
goto freectl;
diff --git a/freebsd/sys/netinet6/raw_ip6.h b/freebsd/sys/netinet6/raw_ip6.h
index 23395a67..cc4bcdd0 100644
--- a/freebsd/sys/netinet6/raw_ip6.h
+++ b/freebsd/sys/netinet6/raw_ip6.h
@@ -48,6 +48,8 @@ struct rip6stat {
};
#ifdef _KERNEL
+#define RIP6STAT_ADD(name, val) V_rip6stat.name += (val)
+#define RIP6STAT_INC(name) RIP6STAT_ADD(name, 1)
VNET_DECLARE(struct rip6stat, rip6stat);
#define V_rip6stat VNET(rip6stat)
#endif
diff --git a/freebsd/sys/netinet6/route6.c b/freebsd/sys/netinet6/route6.c
index f91b9ea8..90738461 100644
--- a/freebsd/sys/netinet6/route6.c
+++ b/freebsd/sys/netinet6/route6.c
@@ -64,17 +64,19 @@ route6_input(struct mbuf **mp, int *offp, int proto)
struct mbuf *m = *mp;
struct ip6_rthdr *rh;
int off = *offp, rhlen;
+#ifdef __notyet__
struct ip6aux *ip6a;
ip6a = ip6_findaux(m);
if (ip6a) {
/* XXX reject home-address option before rthdr */
if (ip6a->ip6a_flags & IP6A_SWAP) {
- V_ip6stat.ip6s_badoptions++;
+ IP6STAT_INC(ip6s_badoptions);
m_freem(m);
return IPPROTO_DONE;
}
}
+#endif
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, sizeof(*rh), IPPROTO_DONE);
@@ -84,7 +86,7 @@ route6_input(struct mbuf **mp, int *offp, int proto)
ip6 = mtod(m, struct ip6_hdr *);
IP6_EXTHDR_GET(rh, struct ip6_rthdr *, m, off, sizeof(*rh));
if (rh == NULL) {
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
return IPPROTO_DONE;
}
#endif
@@ -100,7 +102,7 @@ route6_input(struct mbuf **mp, int *offp, int proto)
rhlen = (rh->ip6r_len + 1) << 3;
break; /* Final dst. Just ignore the header. */
}
- V_ip6stat.ip6s_badoptions++;
+ IP6STAT_INC(ip6s_badoptions);
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
(caddr_t)&rh->ip6r_type - (caddr_t)ip6);
return (IPPROTO_DONE);
diff --git a/freebsd/sys/netinet6/scope6_var.h b/freebsd/sys/netinet6/scope6_var.h
index 2248037f..ae337b86 100644
--- a/freebsd/sys/netinet6/scope6_var.h
+++ b/freebsd/sys/netinet6/scope6_var.h
@@ -42,18 +42,18 @@ struct scope6_id {
u_int32_t s6id_list[16];
};
-void scope6_init __P((void));
-struct scope6_id *scope6_ifattach __P((struct ifnet *));
-void scope6_ifdetach __P((struct scope6_id *));
-int scope6_set __P((struct ifnet *, struct scope6_id *));
-int scope6_get __P((struct ifnet *, struct scope6_id *));
-void scope6_setdefault __P((struct ifnet *));
-int scope6_get_default __P((struct scope6_id *));
-u_int32_t scope6_addr2default __P((struct in6_addr *));
-int sa6_embedscope __P((struct sockaddr_in6 *, int));
-int sa6_recoverscope __P((struct sockaddr_in6 *));
-int in6_setscope __P((struct in6_addr *, struct ifnet *, u_int32_t *));
-int in6_clearscope __P((struct in6_addr *));
+void scope6_init(void);
+struct scope6_id *scope6_ifattach(struct ifnet *);
+void scope6_ifdetach(struct scope6_id *);
+int scope6_set(struct ifnet *, struct scope6_id *);
+int scope6_get(struct ifnet *, struct scope6_id *);
+void scope6_setdefault(struct ifnet *);
+int scope6_get_default(struct scope6_id *);
+u_int32_t scope6_addr2default(struct in6_addr *);
+int sa6_embedscope(struct sockaddr_in6 *, int);
+int sa6_recoverscope(struct sockaddr_in6 *);
+int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *);
+int in6_clearscope(struct in6_addr *);
uint16_t in6_getscope(struct in6_addr *);
#endif /* _KERNEL */
diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c
index 65253fbb..f4dfe819 100644
--- a/freebsd/sys/netinet6/sctp6_usrreq.c
+++ b/freebsd/sys/netinet6/sctp6_usrreq.c
@@ -791,18 +791,11 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
}
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- if (!MODULE_GLOBAL(ip6_v6only)) {
- struct sockaddr_in sin;
+ struct sockaddr_in sin;
- /* convert v4-mapped into v4 addr and send */
- in6_sin6_2_sin(&sin, sin6);
- return (sctp_sendm(so, flags, m, (struct sockaddr *)&sin,
- control, p));
- } else {
- /* mapped addresses aren't enabled */
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
- return (EINVAL);
- }
+ /* convert v4-mapped into v4 addr and send */
+ in6_sin6_2_sin(&sin, sin6);
+ return (sctp_sendm(so, flags, m, (struct sockaddr *)&sin, control, p));
}
#endif /* INET */
connected_type:
@@ -936,17 +929,9 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
}
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- if (!MODULE_GLOBAL(ip6_v6only)) {
- /* convert v4-mapped into v4 addr */
- in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6);
- addr = (struct sockaddr *)&ss;
- } else {
- /* mapped addresses aren't enabled */
- SCTP_INP_RUNLOCK(inp);
- SCTP_ASOC_CREATE_UNLOCK(inp);
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
- return (EINVAL);
- }
+ /* convert v4-mapped into v4 addr */
+ in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6);
+ addr = (struct sockaddr *)&ss;
}
#endif /* INET */
/* Now do we connect? */
diff --git a/freebsd/sys/netinet6/sctp6_var.h b/freebsd/sys/netinet6/sctp6_var.h
index 53d1d4c5..79d4c52b 100644
--- a/freebsd/sys/netinet6/sctp6_var.h
+++ b/freebsd/sys/netinet6/sctp6_var.h
@@ -41,22 +41,16 @@ __FBSDID("$FreeBSD$");
SYSCTL_DECL(_net_inet6_sctp6);
extern struct pr_usrreqs sctp6_usrreqs;
+int sctp6_input(struct mbuf **, int *, int);
+int sctp6_input_with_port(struct mbuf **, int *, uint16_t);
+int
+sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
+ struct mbuf *, struct proc *);
+void sctp6_ctlinput(int, struct sockaddr *, void *);
+extern void
+sctp6_notify(struct sctp_inpcb *, struct icmp6_hdr *,
+ struct sctphdr *, struct sockaddr *,
+ struct sctp_tcb *, struct sctp_nets *);
-int sctp6_input __P((struct mbuf **, int *, int));
-int sctp6_input_with_port __P((struct mbuf **, int *, uint16_t));
-int sctp6_output
-__P((struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
- struct mbuf *, struct proc *));
- void sctp6_ctlinput __P((int, struct sockaddr *, void *));
-
-
- extern void sctp6_notify(struct sctp_inpcb *inp,
- struct icmp6_hdr *icmph,
- struct sctphdr *sh,
- struct sockaddr *to,
- struct sctp_tcb *stcb,
- struct sctp_nets *net);
-
-
-#endif /* _KERNEL */
+#endif
#endif
diff --git a/freebsd/sys/netinet6/send.h b/freebsd/sys/netinet6/send.h
new file mode 100644
index 00000000..9795d142
--- /dev/null
+++ b/freebsd/sys/netinet6/send.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2009-2010 Ana Kukec <anchie@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_SEND_H_
+#define _NETINET6_SEND_H_
+
+#define SND_OUT 0 /* Outgoing traffic */
+#define SND_IN 1 /* Incoming traffic. */
+
+struct sockaddr_send {
+ uint8_t send_len; /* total length */
+ sa_family_t send_family; /* address family */
+ int send_direction;
+ int send_ifidx;
+ char send_zero[8];
+};
+
+extern int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
+
+#endif /* _NETINET6_SEND_H_ */
diff --git a/freebsd/sys/netinet6/tcp6_var.h b/freebsd/sys/netinet6/tcp6_var.h
index 7da325a1..5cb04f99 100644
--- a/freebsd/sys/netinet6/tcp6_var.h
+++ b/freebsd/sys/netinet6/tcp6_var.h
@@ -71,9 +71,9 @@ VNET_DECLARE(int, tcp_v6mssdflt); /* XXX */
#endif
struct ip6_hdr;
-void tcp6_ctlinput __P((int, struct sockaddr *, void *));
-void tcp6_init __P((void));
-int tcp6_input __P((struct mbuf **, int *, int));
+void tcp6_ctlinput(int, struct sockaddr *, void *);
+void tcp6_init(void);
+int tcp6_input(struct mbuf **, int *, int);
struct rtentry *tcp_rtlookup6(struct in_conninfo *);
extern struct pr_usrreqs tcp6_usrreqs;
diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c
index 260aedf0..8342cf7c 100644
--- a/freebsd/sys/netinet6/udp6_usrreq.c
+++ b/freebsd/sys/netinet6/udp6_usrreq.c
@@ -2,8 +2,12 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -69,6 +73,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/sys/param.h>
@@ -138,7 +143,7 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off,
/* Check AH/ESP integrity. */
if (ipsec6_in_reject(n, inp)) {
m_freem(n);
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(in_polvio);
return;
}
#endif /* IPSEC */
@@ -179,6 +184,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
int off = *offp;
int plen, ulen;
struct sockaddr_in6 fromsa;
+ struct m_tag *fwd_tag;
+ uint16_t uh_sum;
ifp = m->m_pkthdr.rcvif;
ip6 = mtod(m, struct ip6_hdr *);
@@ -222,7 +229,18 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
UDPSTAT_INC(udps_nosum);
goto badunlocked;
}
- if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) {
+
+ if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+ if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+ uh_sum = m->m_pkthdr.csum_data;
+ else
+ uh_sum = in6_cksum_pseudo(ip6, ulen,
+ IPPROTO_UDP, m->m_pkthdr.csum_data);
+ uh_sum ^= 0xffff;
+ } else
+ uh_sum = in6_cksum(m, IPPROTO_UDP, off, ulen);
+
+ if (uh_sum != 0) {
UDPSTAT_INC(udps_badsum);
goto badunlocked;
}
@@ -233,11 +251,11 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
init_sin6(&fromsa, m);
fromsa.sin6_port = uh->uh_sport;
- INP_INFO_RLOCK(&V_udbinfo);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
struct inpcb *last;
struct ip6_moptions *imo;
+ INP_INFO_RLOCK(&V_udbinfo);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -275,11 +293,11 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
}
/*
- * Detached PCBs can linger in the list if someone
- * holds a reference. (e.g. udp_pcblist)
+ * XXXRW: Because we weren't holding either the inpcb
+ * or the hash lock when we checked for a match
+ * before, we should probably recheck now that the
+ * inpcb lock is (supposed to be) held.
*/
- if (inp->inp_socket == NULL)
- continue;
/*
* Handle socket delivery policy for any-source
@@ -375,8 +393,43 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
/*
* Locate pcb for datagram.
*/
- inp = in6_pcblookup_hash(&V_udbinfo, &ip6->ip6_src, uh->uh_sport,
- &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
+
+ /*
+ * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
+ */
+ if ((m->m_flags & M_IP6_NEXTHOP) &&
+ (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
+ struct sockaddr_in6 *next_hop6;
+
+ next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
+
+ /*
+ * Transparently forwarded. Pretend to be the destination.
+ * Already got one like this?
+ */
+ inp = in6_pcblookup_mbuf(&V_udbinfo,
+ &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
+ INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
+ if (!inp) {
+ /*
+ * It's new. Try to find the ambushing socket.
+ * Because we've rewritten the destination address,
+ * any hardware-generated hash is ignored.
+ */
+ inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src,
+ uh->uh_sport, &next_hop6->sin6_addr,
+ next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
+ uh->uh_dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
+ }
+ /* Remove the tag from the packet. We don't need it anymore. */
+ m_tag_delete(m, fwd_tag);
+ m->m_flags &= ~M_IP6_NEXTHOP;
+ } else
+ inp = in6_pcblookup_mbuf(&V_udbinfo, &ip6->ip6_src,
+ uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
+ m->m_pkthdr.rcvif, m);
if (inp == NULL) {
if (udp_log_in_vain) {
char ip6bufs[INET6_ADDRSTRLEN];
@@ -393,9 +446,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
if (m->m_flags & M_MCAST) {
printf("UDP6: M_MCAST is set in a unicast packet.\n");
UDPSTAT_INC(udps_noportmcast);
- goto badheadlocked;
+ goto badunlocked;
}
- INP_INFO_RUNLOCK(&V_udbinfo);
if (V_udp_blackhole)
goto badunlocked;
if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
@@ -403,17 +455,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
return (IPPROTO_DONE);
}
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
-
- /*
- * Detached PCBs can linger in the hash table if someone holds a
- * reference. (e.g. udp_pcblist)
- */
- if (inp->inp_socket == NULL) {
- INP_RUNLOCK(inp);
- goto badunlocked;
- }
+ INP_RLOCK_ASSERT(inp);
up = intoudpcb(inp);
if (up->u_tun_func == NULL) {
udp6_append(inp, m, off, &fromsa);
@@ -523,13 +565,11 @@ udp6_getcred(SYSCTL_HANDLER_ARGS)
(error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
return (error);
}
- INP_INFO_RLOCK(&V_udbinfo);
- inp = in6_pcblookup_hash(&V_udbinfo, &addrs[1].sin6_addr,
- addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1,
- NULL);
+ inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
+ addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -538,10 +578,8 @@ udp6_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_udbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -570,6 +608,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
struct sockaddr_in6 tmp;
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
if (addr6) {
/* addr6 has been validated in udp6_send(). */
@@ -744,10 +783,9 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
ip6->ip6_src = *laddr;
ip6->ip6_dst = *faddr;
- if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP,
- sizeof(struct ip6_hdr), plen)) == 0) {
- udp6->uh_sum = 0xffff;
- }
+ udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
flags = 0;
@@ -790,15 +828,15 @@ udp6_abort(struct socket *so)
}
#endif
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ INP_HASH_WLOCK(&V_udbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -856,8 +894,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_udbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -867,6 +905,7 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
inp->inp_vflag |= INP_IPV4;
+#ifdef INET
else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
struct sockaddr_in sin;
@@ -877,12 +916,15 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
td->td_ucred);
goto out;
}
+#endif
}
error = in6_pcbbind(inp, nam, td->td_ucred);
+#ifdef INET
out:
+#endif
+ INP_HASH_WUNLOCK(&V_udbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -903,15 +945,15 @@ udp6_close(struct socket *so)
return;
}
#endif
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ INP_HASH_WLOCK(&V_udbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -925,8 +967,11 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
sin6 = (struct sockaddr_in6 *)nam;
KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
+ /*
+ * XXXRW: Need to clarify locking of v4/v6 flags.
+ */
INP_WLOCK(inp);
+#ifdef INET
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
struct sockaddr_in sin;
@@ -944,12 +989,15 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
if (error != 0)
goto out;
+ INP_HASH_WLOCK(&V_udbinfo);
error = in_pcbconnect(inp, (struct sockaddr *)&sin,
td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
if (error == 0)
soisconnected(so);
goto out;
}
+#endif
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
error = EISCONN;
goto out;
@@ -959,12 +1007,13 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
if (error != 0)
goto out;
+ INP_HASH_WLOCK(&V_udbinfo);
error = in6_pcbconnect(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
if (error == 0)
soisconnected(so);
out:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -996,32 +1045,32 @@ udp6_disconnect(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
- INP_WLOCK(inp);
-
#ifdef INET
if (inp->inp_vflag & INP_IPV4) {
struct pr_usrreqs *pru;
pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
- error = (*pru->pru_disconnect)(so);
- goto out;
+ (void)(*pru->pru_disconnect)(so);
+ return (0);
}
#endif
+ INP_WLOCK(inp);
+
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
error = ENOTCONN;
goto out;
}
+ INP_HASH_WLOCK(&V_udbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
+ INP_HASH_WUNLOCK(&V_udbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
out:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (0);
}
@@ -1035,7 +1084,6 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (addr) {
if (addr->sa_len != sizeof(struct sockaddr_in6)) {
@@ -1072,7 +1120,6 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
* select the UDPv4 output routine are invalidated?
*/
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
if (sin6)
in6_sin6_2_sin_in_sock(addr);
pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
@@ -1085,14 +1132,16 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
#ifdef MAC
mac_inpcb_create_mbuf(inp, m);
#endif
+ INP_HASH_WLOCK(&V_udbinfo);
error = udp6_output(inp, m, addr, control, td);
+ INP_HASH_WUNLOCK(&V_udbinfo);
+#ifdef INET
+#endif
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
bad:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
m_freem(m);
return (error);
}
diff --git a/freebsd/sys/netipsec/ah_var.h b/freebsd/sys/netipsec/ah_var.h
index 6145dba9..812fe2dc 100644
--- a/freebsd/sys/netipsec/ah_var.h
+++ b/freebsd/sys/netipsec/ah_var.h
@@ -75,6 +75,8 @@ VNET_DECLARE(int, ah_enable);
VNET_DECLARE(int, ah_cleartos);
VNET_DECLARE(struct ahstat, ahstat);
+#define AHSTAT_ADD(name, val) V_ahstat.name += (val)
+#define AHSTAT_INC(name) AHSTAT_ADD(name, 1)
#define V_ah_enable VNET(ah_enable)
#define V_ah_cleartos VNET(ah_cleartos)
#define V_ahstat VNET(ahstat)
diff --git a/freebsd/sys/netipsec/esp_var.h b/freebsd/sys/netipsec/esp_var.h
index 477dcbf5..c6133614 100644
--- a/freebsd/sys/netipsec/esp_var.h
+++ b/freebsd/sys/netipsec/esp_var.h
@@ -75,6 +75,8 @@ struct espstat {
VNET_DECLARE(int, esp_enable);
VNET_DECLARE(struct espstat, espstat);
+#define ESPSTAT_ADD(name, val) V_espstat.name += (val)
+#define ESPSTAT_INC(name) ESPSTAT_ADD(name, 1)
#define V_esp_enable VNET(esp_enable)
#define V_espstat VNET(espstat)
#endif /* _KERNEL */
diff --git a/freebsd/sys/netipsec/ipcomp_var.h b/freebsd/sys/netipsec/ipcomp_var.h
index c99a3be7..ee15598f 100644
--- a/freebsd/sys/netipsec/ipcomp_var.h
+++ b/freebsd/sys/netipsec/ipcomp_var.h
@@ -68,6 +68,8 @@ struct ipcompstat {
VNET_DECLARE(int, ipcomp_enable);
VNET_DECLARE(struct ipcompstat, ipcompstat);
+#define IPCOMPSTAT_ADD(name, val) V_ipcompstat.name += (val)
+#define IPCOMPSTAT_INC(name) IPCOMPSTAT_ADD(name, 1)
#define V_ipcomp_enable VNET(ipcomp_enable)
#define V_ipcompstat VNET(ipcompstat)
#endif /* _KERNEL */
diff --git a/freebsd/sys/netipsec/ipip_var.h b/freebsd/sys/netipsec/ipip_var.h
index 3c8c3974..415d5c10 100644
--- a/freebsd/sys/netipsec/ipip_var.h
+++ b/freebsd/sys/netipsec/ipip_var.h
@@ -62,6 +62,8 @@ struct ipipstat
VNET_DECLARE(int, ipip_allow);
VNET_DECLARE(struct ipipstat, ipipstat);
+#define IPIPSTAT_ADD(name, val) V_ipipstat.name += (val)
+#define IPIPSTAT_INC(name) IPIPSTAT_ADD(name, 1)
#define V_ipip_allow VNET(ipip_allow)
#define V_ipipstat VNET(ipipstat)
#endif /* _KERNEL */
diff --git a/freebsd/sys/netipsec/ipsec.c b/freebsd/sys/netipsec/ipsec.c
index dcf40918..853292c5 100644
--- a/freebsd/sys/netipsec/ipsec.c
+++ b/freebsd/sys/netipsec/ipsec.c
@@ -456,7 +456,7 @@ ipsec4_checkpolicy(struct mbuf *m, u_int dir, u_int flag, int *error,
sp = ipsec_getpolicybysock(m, dir, inp, error);
if (sp == NULL) {
IPSEC_ASSERT(*error != 0, ("getpolicy failed w/o error"));
- V_ipsec4stat.ips_out_inval++;
+ IPSECSTAT_INC(ips_out_inval);
return (NULL);
}
IPSEC_ASSERT(*error == 0, ("sp w/ error set to %u", *error));
@@ -466,7 +466,7 @@ ipsec4_checkpolicy(struct mbuf *m, u_int dir, u_int flag, int *error,
printf("%s: invalid policy %u\n", __func__, sp->policy);
/* FALLTHROUGH */
case IPSEC_POLICY_DISCARD:
- V_ipsec4stat.ips_out_polvio++;
+ IPSECSTAT_INC(ips_out_polvio);
*error = -EINVAL; /* Packet is discarded by caller. */
break;
case IPSEC_POLICY_BYPASS:
@@ -600,7 +600,7 @@ ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport)
IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip),("packet too short"));
/* NB: ip_input() flips it into host endian. XXX Need more checking. */
- if (m->m_len < sizeof (struct ip)) {
+ if (m->m_len >= sizeof (struct ip)) {
struct ip *ip = mtod(m, struct ip *);
if (ip->ip_off & (IP_MF | IP_OFFMASK))
goto done;
@@ -1317,7 +1317,7 @@ ipsec4_in_reject(struct mbuf *m, struct inpcb *inp)
result = ipsec46_in_reject(m, inp);
if (result)
- V_ipsec4stat.ips_in_polvio++;
+ IPSECSTAT_INC(ips_in_polvio);
return (result);
}
@@ -1335,7 +1335,7 @@ ipsec6_in_reject(struct mbuf *m, struct inpcb *inp)
result = ipsec46_in_reject(m, inp);
if (result)
- V_ipsec6stat.ips_in_polvio++;
+ IPSEC6STAT_INC(ips_in_polvio);
return (result);
}
diff --git a/freebsd/sys/netipsec/ipsec.h b/freebsd/sys/netipsec/ipsec.h
index fb987ad2..f3415872 100644
--- a/freebsd/sys/netipsec/ipsec.h
+++ b/freebsd/sys/netipsec/ipsec.h
@@ -61,7 +61,7 @@
* specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code.
*/
struct secpolicyindex {
- u_int8_t dir; /* direction of packet flow, see blow */
+ u_int8_t dir; /* direction of packet flow, see below */
union sockaddr_union src; /* IP src address for SP */
union sockaddr_union dst; /* IP dst address for SP */
u_int8_t prefs; /* prefix length in bits for src */
@@ -359,6 +359,7 @@ VNET_DECLARE(int, ip4_ipsec_ecn);
VNET_DECLARE(int, ip4_esp_randpad);
VNET_DECLARE(int, crypto_support);
+#define IPSECSTAT_INC(name) V_ipsec4stat.name += 1
#define V_ipsec4stat VNET(ipsec4stat)
#define V_ip4_def_policy VNET(ip4_def_policy)
#define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev)
diff --git a/freebsd/sys/netipsec/ipsec6.h b/freebsd/sys/netipsec/ipsec6.h
index c004220e..21ec6b36 100644
--- a/freebsd/sys/netipsec/ipsec6.h
+++ b/freebsd/sys/netipsec/ipsec6.h
@@ -48,6 +48,7 @@ VNET_DECLARE(int, ip6_ah_trans_deflev);
VNET_DECLARE(int, ip6_ah_net_deflev);
VNET_DECLARE(int, ip6_ipsec_ecn);
+#define IPSEC6STAT_INC(name) V_ipsec6stat.name += 1
#define V_ipsec6stat VNET(ipsec6stat)
#define V_ip6_esp_trans_deflev VNET(ip6_esp_trans_deflev)
#define V_ip6_esp_net_deflev VNET(ip6_esp_net_deflev)
diff --git a/freebsd/sys/netipsec/ipsec_input.c b/freebsd/sys/netipsec/ipsec_input.c
index 9a491d41..d910de71 100644
--- a/freebsd/sys/netipsec/ipsec_input.c
+++ b/freebsd/sys/netipsec/ipsec_input.c
@@ -101,8 +101,14 @@
#endif
-#define IPSEC_ISTAT(p,x,y,z) ((p) == IPPROTO_ESP ? (x)++ : \
- (p) == IPPROTO_AH ? (y)++ : (z)++)
+#define IPSEC_ISTAT(proto, name) do { \
+ if ((proto) == IPPROTO_ESP) \
+ ESPSTAT_INC(esps_##name); \
+ else if ((proto) == IPPROTO_AH) \
+ AHSTAT_INC(ahs_##name); \
+ else \
+ IPCOMPSTAT_INC(ipcomps_##name); \
+} while (0)
#ifdef INET
static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int);
@@ -110,7 +116,7 @@ static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int);
/*
* ipsec_common_input gets called when an IPsec-protected packet
- * is received by IPv4 or IPv6. It's job is to find the right SA
+ * is received by IPv4 or IPv6. Its job is to find the right SA
* and call the appropriate transform. The transform callback
* takes care of further processing (like ingress filtering).
*/
@@ -121,12 +127,13 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto)
struct secasvar *sav;
u_int32_t spi;
int error;
+#ifdef INET
#ifdef IPSEC_NAT_T
struct m_tag *tag;
#endif
+#endif
- IPSEC_ISTAT(sproto, V_espstat.esps_input, V_ahstat.ahs_input,
- V_ipcompstat.ipcomps_input);
+ IPSEC_ISTAT(sproto, input);
IPSEC_ASSERT(m != NULL, ("null packet"));
@@ -138,15 +145,13 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto)
(sproto == IPPROTO_AH && !V_ah_enable) ||
(sproto == IPPROTO_IPCOMP && !V_ipcomp_enable)) {
m_freem(m);
- IPSEC_ISTAT(sproto, V_espstat.esps_pdrops, V_ahstat.ahs_pdrops,
- V_ipcompstat.ipcomps_pdrops);
+ IPSEC_ISTAT(sproto, pdrops);
return EOPNOTSUPP;
}
if (m->m_pkthdr.len - skip < 2 * sizeof (u_int32_t)) {
m_freem(m);
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops, V_ahstat.ahs_hdrops,
- V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
DPRINTF(("%s: packet too small\n", __func__));
return EINVAL;
}
@@ -197,8 +202,7 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto)
default:
DPRINTF(("%s: unsupported protocol family %u\n", __func__, af));
m_freem(m);
- IPSEC_ISTAT(sproto, V_espstat.esps_nopf, V_ahstat.ahs_nopf,
- V_ipcompstat.ipcomps_nopf);
+ IPSEC_ISTAT(sproto, nopf);
return EPFNOSUPPORT;
}
@@ -208,8 +212,7 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto)
DPRINTF(("%s: no key association found for SA %s/%08lx/%u\n",
__func__, ipsec_address(&dst_address),
(u_long) ntohl(spi), sproto));
- IPSEC_ISTAT(sproto, V_espstat.esps_notdb, V_ahstat.ahs_notdb,
- V_ipcompstat.ipcomps_notdb);
+ IPSEC_ISTAT(sproto, notdb);
m_freem(m);
return ENOENT;
}
@@ -218,8 +221,7 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto)
DPRINTF(("%s: attempted to use uninitialized SA %s/%08lx/%u\n",
__func__, ipsec_address(&dst_address),
(u_long) ntohl(spi), sproto));
- IPSEC_ISTAT(sproto, V_espstat.esps_noxform, V_ahstat.ahs_noxform,
- V_ipcompstat.ipcomps_noxform);
+ IPSEC_ISTAT(sproto, noxform);
KEY_FREESAV(&sav);
m_freem(m);
return ENXIO;
@@ -321,8 +323,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
/* Sanity check */
if (m == NULL) {
DPRINTF(("%s: null mbuf", __func__));
- IPSEC_ISTAT(sproto, V_espstat.esps_badkcr, V_ahstat.ahs_badkcr,
- V_ipcompstat.ipcomps_badkcr);
+ IPSEC_ISTAT(sproto, badkcr);
KEY_FREESAV(&sav);
return EINVAL;
}
@@ -333,8 +334,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
DPRINTF(("%s: processing failed for SA %s/%08lx\n",
__func__, ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops, V_ahstat.ahs_hdrops,
- V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
error = ENOBUFS;
goto bad;
}
@@ -355,9 +355,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
struct ip ipn;
if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
- V_ahstat.ahs_hdrops,
- V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
error = EINVAL;
goto bad;
}
@@ -386,9 +384,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
ipsp_address(saidx->dst),
(u_long) ntohl(sav->spi)));
- IPSEC_ISTAT(sproto, V_espstat.esps_pdrops,
- V_ahstat.ahs_pdrops,
- V_ipcompstat.ipcomps_pdrops);
+ IPSEC_ISTAT(sproto, pdrops);
error = EACCES;
goto bad;
}
@@ -399,9 +395,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
struct ip6_hdr ip6n;
if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
- V_ahstat.ahs_hdrops,
- V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
error = EINVAL;
goto bad;
}
@@ -428,9 +422,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- IPSEC_ISTAT(sproto, V_espstat.esps_pdrops,
- V_ahstat.ahs_pdrops,
- V_ipcompstat.ipcomps_pdrops);
+ IPSEC_ISTAT(sproto, pdrops);
error = EACCES;
goto bad;
}
@@ -451,8 +443,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
sizeof(struct tdb_ident), M_NOWAIT);
if (mtag == NULL) {
DPRINTF(("%s: failed to get tag\n", __func__));
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
- V_ahstat.ahs_hdrops, V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
error = ENOMEM;
goto bad;
}
@@ -473,6 +464,8 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
key_sa_recordxfer(sav, m); /* record data transfer */
+ m_addr_changed(m);
+
#ifdef DEV_ENC
encif->if_ipackets++;
encif->if_ibytes += m->m_pkthdr.len;
@@ -492,9 +485,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
* Re-dispatch via software interrupt.
*/
if ((error = netisr_queue_src(NETISR_IP, (uintptr_t)sav->spi, m))) {
- IPSEC_ISTAT(sproto, V_espstat.esps_qfull, V_ahstat.ahs_qfull,
- V_ipcompstat.ipcomps_qfull);
-
+ IPSEC_ISTAT(sproto, qfull);
DPRINTF(("%s: queue full; proto %u packet dropped\n",
__func__, sproto));
return error;
@@ -546,9 +537,7 @@ ipsec6_common_input(struct mbuf **mp, int *offp, int proto)
if (protoff + l != *offp) {
DPRINTF(("%s: bad packet header chain, protoff %u, "
"l %u, off %u\n", __func__, protoff, l, *offp));
- IPSEC_ISTAT(proto, V_espstat.esps_hdrops,
- V_ahstat.ahs_hdrops,
- V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(proto, hdrops);
m_freem(*mp);
*mp = NULL;
return IPPROTO_DONE;
@@ -593,8 +582,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
/* Sanity check */
if (m == NULL) {
DPRINTF(("%s: null mbuf", __func__));
- IPSEC_ISTAT(sproto, V_espstat.esps_badkcr, V_ahstat.ahs_badkcr,
- V_ipcompstat.ipcomps_badkcr);
+ IPSEC_ISTAT(sproto, badkcr);
error = EINVAL;
goto bad;
}
@@ -607,8 +595,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
__func__, ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops, V_ahstat.ahs_hdrops,
- V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
error = EACCES;
goto bad;
}
@@ -626,9 +613,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
struct ip ipn;
if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
- V_ahstat.ahs_hdrops,
- V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
error = EINVAL;
goto bad;
}
@@ -653,8 +638,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- IPSEC_ISTATsproto, (V_espstat.esps_pdrops,
- V_ahstat.ahs_pdrops, V_ipcompstat.ipcomps_pdrops);
+ IPSEC_ISTAT(sproto, pdrops);
error = EACCES;
goto bad;
}
@@ -666,9 +650,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
struct ip6_hdr ip6n;
if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
- V_ahstat.ahs_hdrops,
- V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
error = EINVAL;
goto bad;
}
@@ -695,8 +677,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- IPSEC_ISTAT(sproto, V_espstat.esps_pdrops,
- V_ahstat.ahs_pdrops, V_ipcompstat.ipcomps_pdrops);
+ IPSEC_ISTAT(sproto, pdrops);
error = EACCES;
goto bad;
}
@@ -716,8 +697,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
sizeof(struct tdb_ident), M_NOWAIT);
if (mtag == NULL) {
DPRINTF(("%s: failed to get tag\n", __func__));
- IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
- V_ahstat.ahs_hdrops, V_ipcompstat.ipcomps_hdrops);
+ IPSEC_ISTAT(sproto, hdrops);
error = ENOMEM;
goto bad;
}
@@ -766,7 +746,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
nxt = nxt8;
while (nxt != IPPROTO_DONE) {
if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
- V_ip6stat.ip6s_toomanyhdr++;
+ IP6STAT_INC(ip6s_toomanyhdr);
error = EINVAL;
goto bad;
}
@@ -776,7 +756,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto
* more sanity checks in header chain processing.
*/
if (m->m_pkthdr.len < skip) {
- V_ip6stat.ip6s_tooshort++;
+ IP6STAT_INC(ip6s_tooshort);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
error = EINVAL;
goto bad;
diff --git a/freebsd/sys/netipsec/ipsec_mbuf.c b/freebsd/sys/netipsec/ipsec_mbuf.c
index 43a24b0a..2cafe058 100644
--- a/freebsd/sys/netipsec/ipsec_mbuf.c
+++ b/freebsd/sys/netipsec/ipsec_mbuf.c
@@ -137,7 +137,7 @@ m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
m = n; /* header is at front ... */
*off = 0; /* ... of new mbuf */
}
- V_ipsec4stat.ips_mbinserted++;
+ IPSECSTAT_INC(ips_mbinserted);
} else {
/*
* Copy the remainder to the back of the mbuf
@@ -243,7 +243,7 @@ m_striphdr(struct mbuf *m, int skip, int hlen)
/* Remove the header and associated data from the mbuf. */
if (roff == 0) {
/* The header was at the beginning of the mbuf */
- V_ipsec4stat.ips_input_front++;
+ IPSECSTAT_INC(ips_input_front);
m_adj(m1, hlen);
if ((m1->m_flags & M_PKTHDR) == 0)
m->m_pkthdr.len -= hlen;
@@ -255,7 +255,7 @@ m_striphdr(struct mbuf *m, int skip, int hlen)
* so first let's remove the remainder of the header from
* the beginning of the remainder of the mbuf chain, if any.
*/
- V_ipsec4stat.ips_input_end++;
+ IPSECSTAT_INC(ips_input_end);
if (roff + hlen > m1->m_len) {
/* Adjust the next mbuf by the remainder */
m_adj(m1->m_next, roff + hlen - m1->m_len);
@@ -280,7 +280,7 @@ m_striphdr(struct mbuf *m, int skip, int hlen)
* The header lies in the "middle" of the mbuf; copy
* the remainder of the mbuf down over the header.
*/
- V_ipsec4stat.ips_input_middle++;
+ IPSECSTAT_INC(ips_input_middle);
bcopy(mtod(m1, u_char *) + roff + hlen,
mtod(m1, u_char *) + roff,
m1->m_len - (roff + hlen));
diff --git a/freebsd/sys/netipsec/ipsec_output.c b/freebsd/sys/netipsec/ipsec_output.c
index b4ad609f..a02b6ce2 100644
--- a/freebsd/sys/netipsec/ipsec_output.c
+++ b/freebsd/sys/netipsec/ipsec_output.c
@@ -166,11 +166,34 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr)
* doing further processing.
*/
if (isr->next) {
- V_ipsec4stat.ips_out_bundlesa++;
- return ipsec4_process_packet(m, isr->next, 0, 0);
+ IPSECSTAT_INC(ips_out_bundlesa);
+ /* XXX-BZ currently only support same AF bundles. */
+ switch (saidx->dst.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ return ipsec4_process_packet(m, isr->next, 0, 0);
+ /* NOTREACHED */
+#endif
+#ifdef notyet
+#ifdef INET6
+ case AF_INET6:
+ /* XXX */
+ ipsec6_output_trans()
+ ipsec6_output_tunnel()
+ /* NOTREACHED */
+#endif /* INET6 */
+#endif
+ default:
+ DPRINTF(("%s: unknown protocol family %u\n", __func__,
+ saidx->dst.sa.sa_family));
+ error = ENXIO;
+ goto bad;
+ }
}
key_sa_recordxfer(sav, m); /* record data transfer */
+ m_addr_changed(m);
+
/*
* We're done with IPsec processing, transmit the packet using the
* appropriate network protocol (IP or IPv6). SPD lookup will be
@@ -261,8 +284,14 @@ ipsec_nextisr(
int *error
)
{
-#define IPSEC_OSTAT(x,y,z) (isr->saidx.proto == IPPROTO_ESP ? (x)++ : \
- isr->saidx.proto == IPPROTO_AH ? (y)++ : (z)++)
+#define IPSEC_OSTAT(name) do { \
+ if (isr->saidx.proto == IPPROTO_ESP) \
+ ESPSTAT_INC(esps_##name); \
+ else if (isr->saidx.proto == IPPROTO_AH)\
+ AHSTAT_INC(ahs_##name); \
+ else \
+ IPCOMPSTAT_INC(ipcomps_##name); \
+} while (0)
struct secasvar *sav;
IPSECREQUEST_LOCK_ASSERT(isr);
@@ -341,7 +370,7 @@ again:
* this packet because it is responsibility for
* upper layer to retransmit the packet.
*/
- V_ipsec4stat.ips_out_nosa++;
+ IPSECSTAT_INC(ips_out_nosa);
goto bad;
}
sav = isr->sav;
@@ -370,8 +399,7 @@ again:
(isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) {
DPRINTF(("%s: IPsec outbound packet dropped due"
" to policy (check your sysctls)\n", __func__));
- IPSEC_OSTAT(V_espstat.esps_pdrops, V_ahstat.ahs_pdrops,
- V_ipcompstat.ipcomps_pdrops);
+ IPSEC_OSTAT(pdrops);
*error = EHOSTUNREACH;
goto bad;
}
@@ -382,8 +410,7 @@ again:
*/
if (sav->tdb_xform == NULL) {
DPRINTF(("%s: no transform for SA\n", __func__));
- IPSEC_OSTAT(V_espstat.esps_noxform, V_ahstat.ahs_noxform,
- V_ipcompstat.ipcomps_noxform);
+ IPSEC_OSTAT(noxform);
*error = EHOSTUNREACH;
goto bad;
}
@@ -812,14 +839,14 @@ ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int
ipseclog((LOG_ERR, "%s: family mismatched between "
"inner and outer, spi=%u\n", __func__,
ntohl(isr->sav->spi)));
- V_ipsec6stat.ips_out_inval++;
+ IPSEC6STAT_INC(ips_out_inval);
error = EAFNOSUPPORT;
goto bad;
}
m = ipsec6_splithdr(m);
if (!m) {
- V_ipsec6stat.ips_out_nomem++;
+ IPSEC6STAT_INC(ips_out_nomem);
error = ENOMEM;
goto bad;
}
@@ -848,8 +875,8 @@ ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int
rtalloc_ign_fib(state->ro, 0UL, M_GETFIB(m));
}
if (state->ro->ro_rt == NULL) {
- V_ip6stat.ip6s_noroute++;
- V_ipsec6stat.ips_out_noroute++;
+ IP6STAT_INC(ip6s_noroute);
+ IPSEC6STAT_INC(ips_out_noroute);
error = EHOSTUNREACH;
goto bad;
}
@@ -861,7 +888,7 @@ ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int
m = ipsec6_splithdr(m);
if (!m) {
- V_ipsec6stat.ips_out_nomem++;
+ IPSEC6STAT_INC(ips_out_nomem);
error = ENOMEM;
goto bad;
}
diff --git a/freebsd/sys/netipsec/key.c b/freebsd/sys/netipsec/key.c
index 57e8714e..50a90af8 100644
--- a/freebsd/sys/netipsec/key.c
+++ b/freebsd/sys/netipsec/key.c
@@ -75,7 +75,7 @@
#include <netinet6/ip6_var.h>
#endif /* INET6 */
-#ifdef INET
+#if defined(INET) || defined(INET6)
#include <netinet/in_pcb.h>
#endif
#ifdef INET6
@@ -896,6 +896,9 @@ key_allocsa_policy(const struct secasindex *saidx)
u_int stateidx, arraysize;
const u_int *state_valid;
+ state_valid = NULL; /* silence gcc */
+ arraysize = 0; /* silence gcc */
+
SAHTREE_LOCK();
LIST_FOREACH(sah, &V_sahtree, chain) {
if (sah->state == SADB_SASTATE_DEAD)
@@ -908,15 +911,13 @@ key_allocsa_policy(const struct secasindex *saidx)
state_valid = saorder_state_valid_prefer_new;
arraysize = N(saorder_state_valid_prefer_new);
}
- SAHTREE_UNLOCK();
- goto found;
+ break;
}
}
SAHTREE_UNLOCK();
+ if (sah == NULL)
+ return NULL;
- return NULL;
-
- found:
/* search valid state */
for (stateidx = 0; stateidx < arraysize; stateidx++) {
sav = key_do_allocsa_policy(sah, state_valid[stateidx]);
@@ -1927,18 +1928,8 @@ key_spdadd(so, m, mhp)
return key_senderror(so, m, EINVAL);
}
#if 1
- if (newsp->req && newsp->req->saidx.src.sa.sa_family) {
- struct sockaddr *sa;
- sa = (struct sockaddr *)(src0 + 1);
- if (sa->sa_family != newsp->req->saidx.src.sa.sa_family) {
- _key_delsp(newsp);
- return key_senderror(so, m, EINVAL);
- }
- }
- if (newsp->req && newsp->req->saidx.dst.sa.sa_family) {
- struct sockaddr *sa;
- sa = (struct sockaddr *)(dst0 + 1);
- if (sa->sa_family != newsp->req->saidx.dst.sa.sa_family) {
+ if (newsp->req && newsp->req->saidx.src.sa.sa_family && newsp->req->saidx.dst.sa.sa_family) {
+ if (newsp->req->saidx.src.sa.sa_family != newsp->req->saidx.dst.sa.sa_family) {
_key_delsp(newsp);
return key_senderror(so, m, EINVAL);
}
@@ -7351,7 +7342,7 @@ key_parse(m, so)
if ((m->m_flags & M_PKTHDR) == 0 ||
m->m_pkthdr.len != m->m_pkthdr.len) {
ipseclog((LOG_DEBUG, "%s: invalid message length.\n",__func__));
- V_pfkeystat.out_invlen++;
+ PFKEYSTAT_INC(out_invlen);
error = EINVAL;
goto senderror;
}
@@ -7359,7 +7350,7 @@ key_parse(m, so)
if (msg->sadb_msg_version != PF_KEY_V2) {
ipseclog((LOG_DEBUG, "%s: PF_KEY version %u is mismatched.\n",
__func__, msg->sadb_msg_version));
- V_pfkeystat.out_invver++;
+ PFKEYSTAT_INC(out_invver);
error = EINVAL;
goto senderror;
}
@@ -7367,7 +7358,7 @@ key_parse(m, so)
if (msg->sadb_msg_type > SADB_MAX) {
ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n",
__func__, msg->sadb_msg_type));
- V_pfkeystat.out_invmsgtype++;
+ PFKEYSTAT_INC(out_invmsgtype);
error = EINVAL;
goto senderror;
}
@@ -7420,7 +7411,7 @@ key_parse(m, so)
ipseclog((LOG_DEBUG, "%s: must specify satype "
"when msg type=%u.\n", __func__,
msg->sadb_msg_type));
- V_pfkeystat.out_invsatype++;
+ PFKEYSTAT_INC(out_invsatype);
error = EINVAL;
goto senderror;
}
@@ -7440,7 +7431,7 @@ key_parse(m, so)
case SADB_X_SPDDELETE2:
ipseclog((LOG_DEBUG, "%s: illegal satype=%u\n",
__func__, msg->sadb_msg_type));
- V_pfkeystat.out_invsatype++;
+ PFKEYSTAT_INC(out_invsatype);
error = EINVAL;
goto senderror;
}
@@ -7451,7 +7442,7 @@ key_parse(m, so)
case SADB_SATYPE_MIP:
ipseclog((LOG_DEBUG, "%s: type %u isn't supported.\n",
__func__, msg->sadb_msg_satype));
- V_pfkeystat.out_invsatype++;
+ PFKEYSTAT_INC(out_invsatype);
error = EOPNOTSUPP;
goto senderror;
case 1: /* XXX: What does it do? */
@@ -7461,7 +7452,7 @@ key_parse(m, so)
default:
ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n",
__func__, msg->sadb_msg_satype));
- V_pfkeystat.out_invsatype++;
+ PFKEYSTAT_INC(out_invsatype);
error = EINVAL;
goto senderror;
}
@@ -7479,7 +7470,7 @@ key_parse(m, so)
if (src0->sadb_address_proto != dst0->sadb_address_proto) {
ipseclog((LOG_DEBUG, "%s: upper layer protocol "
"mismatched.\n", __func__));
- V_pfkeystat.out_invaddr++;
+ PFKEYSTAT_INC(out_invaddr);
error = EINVAL;
goto senderror;
}
@@ -7489,7 +7480,7 @@ key_parse(m, so)
PFKEY_ADDR_SADDR(dst0)->sa_family) {
ipseclog((LOG_DEBUG, "%s: address family mismatched.\n",
__func__));
- V_pfkeystat.out_invaddr++;
+ PFKEYSTAT_INC(out_invaddr);
error = EINVAL;
goto senderror;
}
@@ -7497,7 +7488,7 @@ key_parse(m, so)
PFKEY_ADDR_SADDR(dst0)->sa_len) {
ipseclog((LOG_DEBUG, "%s: address struct size "
"mismatched.\n", __func__));
- V_pfkeystat.out_invaddr++;
+ PFKEYSTAT_INC(out_invaddr);
error = EINVAL;
goto senderror;
}
@@ -7506,7 +7497,7 @@ key_parse(m, so)
case AF_INET:
if (PFKEY_ADDR_SADDR(src0)->sa_len !=
sizeof(struct sockaddr_in)) {
- V_pfkeystat.out_invaddr++;
+ PFKEYSTAT_INC(out_invaddr);
error = EINVAL;
goto senderror;
}
@@ -7514,7 +7505,7 @@ key_parse(m, so)
case AF_INET6:
if (PFKEY_ADDR_SADDR(src0)->sa_len !=
sizeof(struct sockaddr_in6)) {
- V_pfkeystat.out_invaddr++;
+ PFKEYSTAT_INC(out_invaddr);
error = EINVAL;
goto senderror;
}
@@ -7522,7 +7513,7 @@ key_parse(m, so)
default:
ipseclog((LOG_DEBUG, "%s: unsupported address family\n",
__func__));
- V_pfkeystat.out_invaddr++;
+ PFKEYSTAT_INC(out_invaddr);
error = EAFNOSUPPORT;
goto senderror;
}
@@ -7544,7 +7535,7 @@ key_parse(m, so)
dst0->sadb_address_prefixlen > plen) {
ipseclog((LOG_DEBUG, "%s: illegal prefixlen.\n",
__func__));
- V_pfkeystat.out_invaddr++;
+ PFKEYSTAT_INC(out_invaddr);
error = EINVAL;
goto senderror;
}
@@ -7557,7 +7548,7 @@ key_parse(m, so)
if (msg->sadb_msg_type >= sizeof(key_typesw)/sizeof(key_typesw[0]) ||
key_typesw[msg->sadb_msg_type] == NULL) {
- V_pfkeystat.out_invmsgtype++;
+ PFKEYSTAT_INC(out_invmsgtype);
error = EINVAL;
goto senderror;
}
@@ -7659,7 +7650,7 @@ key_align(m, mhp)
ipseclog((LOG_DEBUG, "%s: duplicate ext_type "
"%u\n", __func__, ext->sadb_ext_type));
m_freem(m);
- V_pfkeystat.out_dupext++;
+ PFKEYSTAT_INC(out_dupext);
return EINVAL;
}
break;
@@ -7667,7 +7658,7 @@ key_align(m, mhp)
ipseclog((LOG_DEBUG, "%s: invalid ext_type %u\n",
__func__, ext->sadb_ext_type));
m_freem(m);
- V_pfkeystat.out_invexttype++;
+ PFKEYSTAT_INC(out_invexttype);
return EINVAL;
}
@@ -7675,7 +7666,7 @@ key_align(m, mhp)
if (key_validate_ext(ext, extlen)) {
m_freem(m);
- V_pfkeystat.out_invlen++;
+ PFKEYSTAT_INC(out_invlen);
return EINVAL;
}
@@ -7693,7 +7684,7 @@ key_align(m, mhp)
if (off != end) {
m_freem(m);
- V_pfkeystat.out_invlen++;
+ PFKEYSTAT_INC(out_invlen);
return EINVAL;
}
diff --git a/freebsd/sys/netipsec/keydb.h b/freebsd/sys/netipsec/keydb.h
index f16d1ea2..7494f5f4 100644
--- a/freebsd/sys/netipsec/keydb.h
+++ b/freebsd/sys/netipsec/keydb.h
@@ -52,7 +52,7 @@ union sockaddr_union {
/* Security Assocciation Index */
/* NOTE: Ensure to be same address family */
struct secasindex {
- union sockaddr_union src; /* srouce address for SA */
+ union sockaddr_union src; /* source address for SA */
union sockaddr_union dst; /* destination address for SA */
u_int16_t proto; /* IPPROTO_ESP or IPPROTO_AH */
u_int8_t mode; /* mode of protocol, see ipsec.h */
diff --git a/freebsd/sys/netipsec/keysock.c b/freebsd/sys/netipsec/keysock.c
index e3ebc49a..8af9512b 100644
--- a/freebsd/sys/netipsec/keysock.c
+++ b/freebsd/sys/netipsec/keysock.c
@@ -93,19 +93,19 @@ key_output(struct mbuf *m, struct socket *so)
if (m == 0)
panic("%s: NULL pointer was passed.\n", __func__);
- V_pfkeystat.out_total++;
- V_pfkeystat.out_bytes += m->m_pkthdr.len;
+ PFKEYSTAT_INC(out_total);
+ PFKEYSTAT_ADD(out_bytes, m->m_pkthdr.len);
len = m->m_pkthdr.len;
if (len < sizeof(struct sadb_msg)) {
- V_pfkeystat.out_tooshort++;
+ PFKEYSTAT_INC(out_tooshort);
error = EINVAL;
goto end;
}
if (m->m_len < sizeof(struct sadb_msg)) {
if ((m = m_pullup(m, sizeof(struct sadb_msg))) == 0) {
- V_pfkeystat.out_nomem++;
+ PFKEYSTAT_INC(out_nomem);
error = ENOBUFS;
goto end;
}
@@ -116,9 +116,9 @@ key_output(struct mbuf *m, struct socket *so)
KEYDEBUG(KEYDEBUG_KEY_DUMP, kdebug_mbuf(m));
msg = mtod(m, struct sadb_msg *);
- V_pfkeystat.out_msgtype[msg->sadb_msg_type]++;
+ PFKEYSTAT_INC(out_msgtype[msg->sadb_msg_type]);
if (len != PFKEY_UNUNIT64(msg->sadb_msg_len)) {
- V_pfkeystat.out_invlen++;
+ PFKEYSTAT_INC(out_invlen);
error = EINVAL;
goto end;
}
@@ -149,7 +149,7 @@ key_sendup0(rp, m, promisc)
if (m && m->m_len < sizeof(struct sadb_msg))
m = m_pullup(m, sizeof(struct sadb_msg));
if (!m) {
- V_pfkeystat.in_nomem++;
+ PFKEYSTAT_INC(in_nomem);
m_freem(m);
return ENOBUFS;
}
@@ -162,12 +162,12 @@ key_sendup0(rp, m, promisc)
pmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len);
/* pid and seq? */
- V_pfkeystat.in_msgtype[pmsg->sadb_msg_type]++;
+ PFKEYSTAT_INC(in_msgtype[pmsg->sadb_msg_type]);
}
if (!sbappendaddr(&rp->rcb_socket->so_rcv, (struct sockaddr *)&key_src,
m, NULL)) {
- V_pfkeystat.in_nomem++;
+ PFKEYSTAT_INC(in_nomem);
m_freem(m);
error = ENOBUFS;
} else
@@ -199,9 +199,9 @@ key_sendup(so, msg, len, target)
* we increment statistics here, just in case we have ENOBUFS
* in this function.
*/
- V_pfkeystat.in_total++;
- V_pfkeystat.in_bytes += len;
- V_pfkeystat.in_msgtype[msg->sadb_msg_type]++;
+ PFKEYSTAT_INC(in_total);
+ PFKEYSTAT_ADD(in_bytes, len);
+ PFKEYSTAT_INC(in_msgtype[msg->sadb_msg_type]);
/*
* Get mbuf chain whenever possible (not clusters),
@@ -218,14 +218,14 @@ key_sendup(so, msg, len, target)
if (tlen == len) {
MGETHDR(n, M_DONTWAIT, MT_DATA);
if (n == NULL) {
- V_pfkeystat.in_nomem++;
+ PFKEYSTAT_INC(in_nomem);
return ENOBUFS;
}
n->m_len = MHLEN;
} else {
MGET(n, M_DONTWAIT, MT_DATA);
if (n == NULL) {
- V_pfkeystat.in_nomem++;
+ PFKEYSTAT_INC(in_nomem);
return ENOBUFS;
}
n->m_len = MLEN;
@@ -235,7 +235,7 @@ key_sendup(so, msg, len, target)
if ((n->m_flags & M_EXT) == 0) {
m_free(n);
m_freem(m);
- V_pfkeystat.in_nomem++;
+ PFKEYSTAT_INC(in_nomem);
return ENOBUFS;
}
n->m_len = MCLBYTES;
@@ -258,9 +258,9 @@ key_sendup(so, msg, len, target)
m_copyback(m, 0, len, (caddr_t)msg);
/* avoid duplicated statistics */
- V_pfkeystat.in_total--;
- V_pfkeystat.in_bytes -= len;
- V_pfkeystat.in_msgtype[msg->sadb_msg_type]--;
+ PFKEYSTAT_ADD(in_total, -1);
+ PFKEYSTAT_ADD(in_bytes, -len);
+ PFKEYSTAT_ADD(in_msgtype[msg->sadb_msg_type], -1);
return key_sendup_mbuf(so, m, target);
}
@@ -283,19 +283,19 @@ key_sendup_mbuf(so, m, target)
if (so == NULL && target == KEY_SENDUP_ONE)
panic("%s: NULL pointer was passed.\n", __func__);
- V_pfkeystat.in_total++;
- V_pfkeystat.in_bytes += m->m_pkthdr.len;
+ PFKEYSTAT_INC(in_total);
+ PFKEYSTAT_ADD(in_bytes, m->m_pkthdr.len);
if (m->m_len < sizeof(struct sadb_msg)) {
m = m_pullup(m, sizeof(struct sadb_msg));
if (m == NULL) {
- V_pfkeystat.in_nomem++;
+ PFKEYSTAT_INC(in_nomem);
return ENOBUFS;
}
}
if (m->m_len >= sizeof(struct sadb_msg)) {
struct sadb_msg *msg;
msg = mtod(m, struct sadb_msg *);
- V_pfkeystat.in_msgtype[msg->sadb_msg_type]++;
+ PFKEYSTAT_INC(in_msgtype[msg->sadb_msg_type]);
}
mtx_lock(&rawcb_mtx);
LIST_FOREACH(rp, &V_rawcb_list, list)
@@ -340,14 +340,14 @@ key_sendup_mbuf(so, m, target)
sendup++;
break;
}
- V_pfkeystat.in_msgtarget[target]++;
+ PFKEYSTAT_INC(in_msgtarget[target]);
if (!sendup)
continue;
if ((n = m_copy(m, 0, (int)M_COPYALL)) == NULL) {
m_freem(m);
- V_pfkeystat.in_nomem++;
+ PFKEYSTAT_INC(in_nomem);
mtx_unlock(&rawcb_mtx);
return ENOBUFS;
}
diff --git a/freebsd/sys/netipsec/keysock.h b/freebsd/sys/netipsec/keysock.h
index 3c0cc8b8..6039dbba 100644
--- a/freebsd/sys/netipsec/keysock.h
+++ b/freebsd/sys/netipsec/keysock.h
@@ -70,6 +70,8 @@ struct keycb {
};
VNET_DECLARE(struct pfkeystat, pfkeystat);
+#define PFKEYSTAT_ADD(name, val) V_pfkeystat.name += (val)
+#define PFKEYSTAT_INC(name) PFKEYSTAT_ADD(name, 1)
#define V_pfkeystat VNET(pfkeystat)
extern int key_output(struct mbuf *m, struct socket *so);
diff --git a/freebsd/sys/netipsec/xform_ah.c b/freebsd/sys/netipsec/xform_ah.c
index cf4fa37a..f1304c24 100644
--- a/freebsd/sys/netipsec/xform_ah.c
+++ b/freebsd/sys/netipsec/xform_ah.c
@@ -93,6 +93,7 @@ VNET_DEFINE(int, ah_enable) = 1; /* control flow of packets with AH */
VNET_DEFINE(int, ah_cleartos) = 1; /* clear ip_tos when doing AH calc */
VNET_DEFINE(struct ahstat, ahstat);
+#ifdef INET
SYSCTL_DECL(_net_inet_ah);
SYSCTL_VNET_INT(_net_inet_ah, OID_AUTO,
ah_enable, CTLFLAG_RW, &VNET_NAME(ah_enable), 0, "");
@@ -100,6 +101,7 @@ SYSCTL_VNET_INT(_net_inet_ah, OID_AUTO,
ah_cleartos, CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, "");
SYSCTL_VNET_STRUCT(_net_inet_ah, IPSECCTL_STATS,
stats, CTLFLAG_RD, &VNET_NAME(ahstat), ahstat, "");
+#endif
static unsigned char ipseczeroes[256]; /* larger than an ip6 extension hdr */
@@ -596,14 +598,14 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
IP6_EXTHDR_GET(ah, struct newah *, m, skip, rplen);
if (ah == NULL) {
DPRINTF(("ah_input: cannot pullup header\n"));
- V_ahstat.ahs_hdrops++; /*XXX*/
+ AHSTAT_INC(ahs_hdrops); /*XXX*/
m_freem(m);
return ENOBUFS;
}
/* Check replay window, if applicable. */
if (sav->replay && !ipsec_chkreplay(ntohl(ah->ah_seq), sav)) {
- V_ahstat.ahs_replay++;
+ AHSTAT_INC(ahs_replay);
DPRINTF(("%s: packet replay failure: %s\n", __func__,
ipsec_logsastr(sav)));
m_freem(m);
@@ -620,17 +622,17 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
hl, (u_long) (authsize + rplen - sizeof (struct ah)),
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
- V_ahstat.ahs_badauthl++;
+ AHSTAT_INC(ahs_badauthl);
m_freem(m);
return EACCES;
}
- V_ahstat.ahs_ibytes += m->m_pkthdr.len - skip - hl;
+ AHSTAT_ADD(ahs_ibytes, m->m_pkthdr.len - skip - hl);
/* Get crypto descriptors. */
crp = crypto_getreq(1);
if (crp == NULL) {
DPRINTF(("%s: failed to acquire crypto descriptor\n",__func__));
- V_ahstat.ahs_crypto++;
+ AHSTAT_INC(ahs_crypto);
m_freem(m);
return ENOBUFS;
}
@@ -670,7 +672,7 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
}
if (tc == NULL) {
DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__));
- V_ahstat.ahs_crypto++;
+ AHSTAT_INC(ahs_crypto);
crypto_freereq(crp);
m_freem(m);
return ENOBUFS;
@@ -694,7 +696,7 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
skip, ahx->type, 0);
if (error != 0) {
/* NB: mbuf is free'd by ah_massage_headers */
- V_ahstat.ahs_hdrops++;
+ AHSTAT_INC(ahs_hdrops);
free(tc, M_XDATA);
crypto_freereq(crp);
return error;
@@ -726,19 +728,6 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
return ah_input_cb(crp);
}
-#ifdef INET6
-#define IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, mtag) do { \
- if (saidx->dst.sa.sa_family == AF_INET6) { \
- error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag); \
- } else { \
- error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag); \
- } \
-} while (0)
-#else
-#define IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, mtag) \
- (error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag))
-#endif
-
/*
* AH input callback from the crypto driver.
*/
@@ -788,19 +777,19 @@ ah_input_cb(struct cryptop *crp)
return error;
}
- V_ahstat.ahs_noxform++;
+ AHSTAT_INC(ahs_noxform);
DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
error = crp->crp_etype;
goto bad;
} else {
- V_ahstat.ahs_hist[sav->alg_auth]++;
+ AHSTAT_INC(ahs_hist[sav->alg_auth]);
crypto_freereq(crp); /* No longer needed. */
crp = NULL;
}
/* Shouldn't happen... */
if (m == NULL) {
- V_ahstat.ahs_crypto++;
+ AHSTAT_INC(ahs_crypto);
DPRINTF(("%s: bogus returned buffer from crypto\n", __func__));
error = EINVAL;
goto bad;
@@ -826,7 +815,7 @@ ah_input_cb(struct cryptop *crp)
"in SA %s/%08lx\n", __func__,
ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- V_ahstat.ahs_badauth++;
+ AHSTAT_INC(ahs_badauth);
error = EACCES;
goto bad;
}
@@ -857,7 +846,7 @@ ah_input_cb(struct cryptop *crp)
m_copydata(m, skip + offsetof(struct newah, ah_seq),
sizeof (seq), (caddr_t) &seq);
if (ipsec_updatereplay(ntohl(seq), sav)) {
- V_ahstat.ahs_replay++;
+ AHSTAT_INC(ahs_replay);
error = ENOBUFS; /*XXX as above*/
goto bad;
}
@@ -871,11 +860,25 @@ ah_input_cb(struct cryptop *crp)
DPRINTF(("%s: mangled mbuf chain for SA %s/%08lx\n", __func__,
ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi)));
- V_ahstat.ahs_hdrops++;
+ AHSTAT_INC(ahs_hdrops);
goto bad;
}
- IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, mtag);
+ switch (saidx->dst.sa.sa_family) {
+#ifdef INET6
+ case AF_INET6:
+ error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag);
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag);
+ break;
+#endif
+ default:
+ panic("%s: Unexpected address family: %d saidx=%p", __func__,
+ saidx->dst.sa.sa_family, saidx);
+ }
KEY_FREESAV(&sav);
return error;
@@ -918,7 +921,7 @@ ah_output(
ahx = sav->tdb_authalgxform;
IPSEC_ASSERT(ahx != NULL, ("null authentication xform"));
- V_ahstat.ahs_output++;
+ AHSTAT_INC(ahs_output);
/* Figure out header size. */
rplen = HDRSIZE(sav);
@@ -941,7 +944,7 @@ ah_output(
sav->sah->saidx.dst.sa.sa_family,
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
- V_ahstat.ahs_nopf++;
+ AHSTAT_INC(ahs_nopf);
error = EPFNOSUPPORT;
goto bad;
}
@@ -952,20 +955,20 @@ ah_output(
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi),
rplen + authsize + m->m_pkthdr.len, maxpacketsize));
- V_ahstat.ahs_toobig++;
+ AHSTAT_INC(ahs_toobig);
error = EMSGSIZE;
goto bad;
}
/* Update the counters. */
- V_ahstat.ahs_obytes += m->m_pkthdr.len - skip;
+ AHSTAT_ADD(ahs_obytes, m->m_pkthdr.len - skip);
m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
- V_ahstat.ahs_hdrops++;
+ AHSTAT_INC(ahs_hdrops);
error = ENOBUFS;
goto bad;
}
@@ -978,7 +981,7 @@ ah_output(
rplen + authsize,
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
- V_ahstat.ahs_hdrops++; /*XXX differs from openbsd */
+ AHSTAT_INC(ahs_hdrops); /*XXX differs from openbsd */
error = ENOBUFS;
goto bad;
}
@@ -1006,7 +1009,7 @@ ah_output(
__func__,
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
- V_ahstat.ahs_wrap++;
+ AHSTAT_INC(ahs_wrap);
error = EINVAL;
goto bad;
}
@@ -1023,7 +1026,7 @@ ah_output(
if (crp == NULL) {
DPRINTF(("%s: failed to acquire crypto descriptors\n",
__func__));
- V_ahstat.ahs_crypto++;
+ AHSTAT_INC(ahs_crypto);
error = ENOBUFS;
goto bad;
}
@@ -1045,7 +1048,7 @@ ah_output(
if (tc == NULL) {
crypto_freereq(crp);
DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__));
- V_ahstat.ahs_crypto++;
+ AHSTAT_INC(ahs_crypto);
error = ENOBUFS;
goto bad;
}
@@ -1150,7 +1153,7 @@ ah_output_cb(struct cryptop *crp)
sav = tc->tc_sav;
/* With the isr lock released SA pointer can be updated. */
if (sav != isr->sav) {
- V_ahstat.ahs_notdb++;
+ AHSTAT_INC(ahs_notdb);
DPRINTF(("%s: SA expired while in crypto\n", __func__));
error = ENOBUFS; /*XXX*/
goto bad;
@@ -1167,7 +1170,7 @@ ah_output_cb(struct cryptop *crp)
return error;
}
- V_ahstat.ahs_noxform++;
+ AHSTAT_INC(ahs_noxform);
DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
error = crp->crp_etype;
goto bad;
@@ -1175,12 +1178,12 @@ ah_output_cb(struct cryptop *crp)
/* Shouldn't happen... */
if (m == NULL) {
- V_ahstat.ahs_crypto++;
+ AHSTAT_INC(ahs_crypto);
DPRINTF(("%s: bogus returned buffer from crypto\n", __func__));
error = EINVAL;
goto bad;
}
- V_ahstat.ahs_hist[sav->alg_auth]++;
+ AHSTAT_INC(ahs_hist[sav->alg_auth]);
/*
* Copy original headers (with the new protocol number) back
diff --git a/freebsd/sys/netipsec/xform_esp.c b/freebsd/sys/netipsec/xform_esp.c
index 9d4df589..20790d0d 100644
--- a/freebsd/sys/netipsec/xform_esp.c
+++ b/freebsd/sys/netipsec/xform_esp.c
@@ -283,9 +283,15 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
IPSEC_ASSERT(sav != NULL, ("null SA"));
IPSEC_ASSERT(sav->tdb_encalgxform != NULL, ("null encoding xform"));
- IPSEC_ASSERT((skip&3) == 0 && (m->m_pkthdr.len&3) == 0,
- ("misaligned packet, skip %u pkt len %u",
- skip, m->m_pkthdr.len));
+
+ /* Valid IP Packet length ? */
+ if ( (skip&3) || (m->m_pkthdr.len&3) ){
+ DPRINTF(("%s: misaligned packet, skip %u pkt len %u",
+ __func__, skip, m->m_pkthdr.len));
+ ESPSTAT_INC(esps_badilen);
+ m_freem(m);
+ return EINVAL;
+ }
/* XXX don't pullup, just copy header */
IP6_EXTHDR_GET(esp, struct newesp *, m, skip, sizeof (struct newesp));
@@ -328,7 +334,7 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
plen, espx->blocksize,
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
- V_espstat.esps_badilen++;
+ ESPSTAT_INC(esps_badilen);
m_freem(m);
return EINVAL;
}
@@ -339,13 +345,13 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
if (esph && sav->replay && !ipsec_chkreplay(ntohl(esp->esp_seq), sav)) {
DPRINTF(("%s: packet replay check for %s\n", __func__,
ipsec_logsastr(sav))); /*XXX*/
- V_espstat.esps_replay++;
+ ESPSTAT_INC(esps_replay);
m_freem(m);
return ENOBUFS; /*XXX*/
}
/* Update the counters */
- V_espstat.esps_ibytes += m->m_pkthdr.len - (skip + hlen + alen);
+ ESPSTAT_ADD(esps_ibytes, m->m_pkthdr.len - (skip + hlen + alen));
/* Find out if we've already done crypto */
for (mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, NULL);
@@ -364,7 +370,7 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
if (crp == NULL) {
DPRINTF(("%s: failed to acquire crypto descriptors\n",
__func__));
- V_espstat.esps_crypto++;
+ ESPSTAT_INC(esps_crypto);
m_freem(m);
return ENOBUFS;
}
@@ -379,7 +385,7 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
if (tc == NULL) {
crypto_freereq(crp);
DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__));
- V_espstat.esps_crypto++;
+ ESPSTAT_INC(esps_crypto);
m_freem(m);
return ENOBUFS;
}
@@ -447,19 +453,6 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
return esp_input_cb(crp);
}
-#ifdef INET6
-#define IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, mtag) do { \
- if (saidx->dst.sa.sa_family == AF_INET6) { \
- error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag); \
- } else { \
- error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag); \
- } \
-} while (0)
-#else
-#define IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, mtag) \
- (error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag))
-#endif
-
/*
* ESP input callback from the crypto driver.
*/
@@ -510,7 +503,7 @@ esp_input_cb(struct cryptop *crp)
return error;
}
- V_espstat.esps_noxform++;
+ ESPSTAT_INC(esps_noxform);
DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
error = crp->crp_etype;
goto bad;
@@ -518,12 +511,12 @@ esp_input_cb(struct cryptop *crp)
/* Shouldn't happen... */
if (m == NULL) {
- V_espstat.esps_crypto++;
+ ESPSTAT_INC(esps_crypto);
DPRINTF(("%s: bogus returned buffer from crypto\n", __func__));
error = EINVAL;
goto bad;
}
- V_espstat.esps_hist[sav->alg_enc]++;
+ ESPSTAT_INC(esps_hist[sav->alg_enc]);
/* If authentication was performed, check now. */
if (esph != NULL) {
@@ -542,7 +535,7 @@ esp_input_cb(struct cryptop *crp)
* the verification for us. Otherwise we need to
* check the authentication calculation.
*/
- V_ahstat.ahs_hist[sav->alg_auth]++;
+ AHSTAT_INC(ahs_hist[sav->alg_auth]);
if (mtag == NULL) {
/* Copy the authenticator from the packet */
m_copydata(m, m->m_pkthdr.len - alen,
@@ -557,7 +550,7 @@ esp_input_cb(struct cryptop *crp)
__func__,
ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- V_espstat.esps_badauth++;
+ ESPSTAT_INC(esps_badauth);
error = EACCES;
goto bad;
}
@@ -587,7 +580,7 @@ esp_input_cb(struct cryptop *crp)
if (ipsec_updatereplay(ntohl(seq), sav)) {
DPRINTF(("%s: packet replay check for %s\n", __func__,
ipsec_logsastr(sav)));
- V_espstat.esps_replay++;
+ ESPSTAT_INC(esps_replay);
error = ENOBUFS;
goto bad;
}
@@ -602,7 +595,7 @@ esp_input_cb(struct cryptop *crp)
/* Remove the ESP header and IV from the mbuf. */
error = m_striphdr(m, skip, hlen);
if (error) {
- V_espstat.esps_hdrops++;
+ ESPSTAT_INC(esps_hdrops);
DPRINTF(("%s: bad mbuf chain, SA %s/%08lx\n", __func__,
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
@@ -614,7 +607,7 @@ esp_input_cb(struct cryptop *crp)
/* Verify pad length */
if (lastthree[1] + 2 > m->m_pkthdr.len - skip) {
- V_espstat.esps_badilen++;
+ ESPSTAT_INC(esps_badilen);
DPRINTF(("%s: invalid padding length %d for %u byte packet "
"in SA %s/%08lx\n", __func__,
lastthree[1], m->m_pkthdr.len - skip,
@@ -627,7 +620,7 @@ esp_input_cb(struct cryptop *crp)
/* Verify correct decryption by checking the last padding bytes */
if ((sav->flags & SADB_X_EXT_PMASK) != SADB_X_EXT_PRAND) {
if (lastthree[1] != lastthree[0] && lastthree[1] != 0) {
- V_espstat.esps_badenc++;
+ ESPSTAT_INC(esps_badenc);
DPRINTF(("%s: decryption failed for packet in "
"SA %s/%08lx\n", __func__,
ipsec_address(&sav->sah->saidx.dst),
@@ -643,7 +636,21 @@ esp_input_cb(struct cryptop *crp)
/* Restore the Next Protocol field */
m_copyback(m, protoff, sizeof (u_int8_t), lastthree + 2);
- IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, mtag);
+ switch (saidx->dst.sa.sa_family) {
+#ifdef INET6
+ case AF_INET6:
+ error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag);
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag);
+ break;
+#endif
+ default:
+ panic("%s: Unexpected address family: %d saidx=%p", __func__,
+ saidx->dst.sa.sa_family, saidx);
+ }
KEY_FREESAV(&sav);
return error;
@@ -721,7 +728,7 @@ esp_output(
else
alen = 0;
- V_espstat.esps_output++;
+ ESPSTAT_INC(esps_output);
saidx = &sav->sah->saidx;
/* Check for maximum packet size violations. */
@@ -741,7 +748,7 @@ esp_output(
"family %d, SA %s/%08lx\n", __func__,
saidx->dst.sa.sa_family, ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- V_espstat.esps_nopf++;
+ ESPSTAT_INC(esps_nopf);
error = EPFNOSUPPORT;
goto bad;
}
@@ -750,19 +757,19 @@ esp_output(
"(len %u, max len %u)\n", __func__,
ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi),
skip + hlen + rlen + padding + alen, maxpacketsize));
- V_espstat.esps_toobig++;
+ ESPSTAT_INC(esps_toobig);
error = EMSGSIZE;
goto bad;
}
/* Update the counters. */
- V_espstat.esps_obytes += m->m_pkthdr.len - skip;
+ ESPSTAT_ADD(esps_obytes, m->m_pkthdr.len - skip);
m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__,
ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi)));
- V_espstat.esps_hdrops++;
+ ESPSTAT_INC(esps_hdrops);
error = ENOBUFS;
goto bad;
}
@@ -773,7 +780,7 @@ esp_output(
DPRINTF(("%s: %u byte ESP hdr inject failed for SA %s/%08lx\n",
__func__, hlen, ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- V_espstat.esps_hdrops++; /* XXX diffs from openbsd */
+ ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */
error = ENOBUFS;
goto bad;
}
@@ -837,7 +844,7 @@ esp_output(
if (crp == NULL) {
DPRINTF(("%s: failed to acquire crypto descriptors\n",
__func__));
- V_espstat.esps_crypto++;
+ ESPSTAT_INC(esps_crypto);
error = ENOBUFS;
goto bad;
}
@@ -866,7 +873,7 @@ esp_output(
if (tc == NULL) {
crypto_freereq(crp);
DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__));
- V_espstat.esps_crypto++;
+ ESPSTAT_INC(esps_crypto);
error = ENOBUFS;
goto bad;
}
@@ -927,7 +934,7 @@ esp_output_cb(struct cryptop *crp)
sav = tc->tc_sav;
/* With the isr lock released SA pointer can be updated. */
if (sav != isr->sav) {
- V_espstat.esps_notdb++;
+ ESPSTAT_INC(esps_notdb);
DPRINTF(("%s: SA gone during crypto (SA %s/%08lx proto %u)\n",
__func__, ipsec_address(&tc->tc_dst),
(u_long) ntohl(tc->tc_spi), tc->tc_proto));
@@ -947,7 +954,7 @@ esp_output_cb(struct cryptop *crp)
return error;
}
- V_espstat.esps_noxform++;
+ ESPSTAT_INC(esps_noxform);
DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
error = crp->crp_etype;
goto bad;
@@ -955,14 +962,14 @@ esp_output_cb(struct cryptop *crp)
/* Shouldn't happen... */
if (m == NULL) {
- V_espstat.esps_crypto++;
+ ESPSTAT_INC(esps_crypto);
DPRINTF(("%s: bogus returned buffer from crypto\n", __func__));
error = EINVAL;
goto bad;
}
- V_espstat.esps_hist[sav->alg_enc]++;
+ ESPSTAT_INC(esps_hist[sav->alg_enc]);
if (sav->tdb_authalgxform != NULL)
- V_ahstat.ahs_hist[sav->alg_auth]++;
+ AHSTAT_INC(ahs_hist[sav->alg_auth]);
/* Release crypto descriptors. */
free(tc, M_XDATA);
diff --git a/freebsd/sys/netipsec/xform_ipcomp.c b/freebsd/sys/netipsec/xform_ipcomp.c
index c3134bdf..2478c948 100644
--- a/freebsd/sys/netipsec/xform_ipcomp.c
+++ b/freebsd/sys/netipsec/xform_ipcomp.c
@@ -154,7 +154,7 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
* compression it means someone is playing tricks on us.
*/
if (m->m_len < skip + hlen && (m = m_pullup(m, skip + hlen)) == NULL) {
- V_ipcompstat.ipcomps_hdrops++; /*XXX*/
+ IPCOMPSTAT_INC(ipcomps_hdrops); /*XXX*/
DPRINTF(("%s: m_pullup failed\n", __func__));
return (ENOBUFS);
}
@@ -162,7 +162,7 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
ipcomp = (struct ipcomp *)addr;
if (ipcomp->comp_nxt == IPPROTO_IPCOMP) {
m_freem(m);
- V_ipcompstat.ipcomps_pdrops++; /* XXX have our own stats? */
+ IPCOMPSTAT_INC(ipcomps_pdrops); /* XXX have our own stats? */
DPRINTF(("%s: recursive compression detected\n", __func__));
return (EINVAL);
}
@@ -172,7 +172,7 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
if (crp == NULL) {
m_freem(m);
DPRINTF(("%s: no crypto descriptors\n", __func__));
- V_ipcompstat.ipcomps_crypto++;
+ IPCOMPSTAT_INC(ipcomps_crypto);
return ENOBUFS;
}
/* Get IPsec-specific opaque pointer */
@@ -181,7 +181,7 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
m_freem(m);
crypto_freereq(crp);
DPRINTF(("%s: cannot allocate tdb_crypto\n", __func__));
- V_ipcompstat.ipcomps_crypto++;
+ IPCOMPSTAT_INC(ipcomps_crypto);
return ENOBUFS;
}
crdc = crp->crp_desc;
@@ -215,19 +215,6 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
return crypto_dispatch(crp);
}
-#ifdef INET6
-#define IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, mtag) do { \
- if (saidx->dst.sa.sa_family == AF_INET6) { \
- error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag); \
- } else { \
- error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag); \
- } \
-} while (0)
-#else
-#define IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, mtag) \
- (error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag))
-#endif
-
/*
* IPComp input callback from the crypto driver.
*/
@@ -271,19 +258,19 @@ ipcomp_input_cb(struct cryptop *crp)
if (crp->crp_etype == EAGAIN) {
return crypto_dispatch(crp);
}
- V_ipcompstat.ipcomps_noxform++;
+ IPCOMPSTAT_INC(ipcomps_noxform);
DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
error = crp->crp_etype;
goto bad;
}
/* Shouldn't happen... */
if (m == NULL) {
- V_ipcompstat.ipcomps_crypto++;
+ IPCOMPSTAT_INC(ipcomps_crypto);
DPRINTF(("%s: null mbuf returned from crypto\n", __func__));
error = EINVAL;
goto bad;
}
- V_ipcompstat.ipcomps_hist[sav->alg_comp]++;
+ IPCOMPSTAT_INC(ipcomps_hist[sav->alg_comp]);
clen = crp->crp_olen; /* Length of data after processing */
@@ -295,7 +282,7 @@ ipcomp_input_cb(struct cryptop *crp)
m->m_pkthdr.len = clen + hlen + skip;
if (m->m_len < skip + hlen && (m = m_pullup(m, skip + hlen)) == 0) {
- V_ipcompstat.ipcomps_hdrops++; /*XXX*/
+ IPCOMPSTAT_INC(ipcomps_hdrops); /*XXX*/
DPRINTF(("%s: m_pullup failed\n", __func__));
error = EINVAL; /*XXX*/
goto bad;
@@ -308,7 +295,7 @@ ipcomp_input_cb(struct cryptop *crp)
/* Remove the IPCOMP header */
error = m_striphdr(m, skip, hlen);
if (error) {
- V_ipcompstat.ipcomps_hdrops++;
+ IPCOMPSTAT_INC(ipcomps_hdrops);
DPRINTF(("%s: bad mbuf chain, IPCA %s/%08lx\n", __func__,
ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
@@ -318,7 +305,21 @@ ipcomp_input_cb(struct cryptop *crp)
/* Restore the Next Protocol field */
m_copyback(m, protoff, sizeof (u_int8_t), (u_int8_t *) &nproto);
- IPSEC_COMMON_INPUT_CB(m, sav, skip, protoff, NULL);
+ switch (saidx->dst.sa.sa_family) {
+#ifdef INET6
+ case AF_INET6:
+ error = ipsec6_common_input_cb(m, sav, skip, protoff, NULL);
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ error = ipsec4_common_input_cb(m, sav, skip, protoff, NULL);
+ break;
+#endif
+ default:
+ panic("%s: Unexpected address family: %d saidx=%p", __func__,
+ saidx->dst.sa.sa_family, saidx);
+ }
KEY_FREESAV(&sav);
return error;
@@ -365,12 +366,12 @@ ipcomp_output(
* See RFC 3173, 2.2. Non-Expansion Policy.
*/
if (m->m_pkthdr.len <= ipcompx->minlen) {
- V_ipcompstat.ipcomps_threshold++;
+ IPCOMPSTAT_INC(ipcomps_threshold);
return ipsec_process_done(m, isr);
}
ralen = m->m_pkthdr.len - skip; /* Raw payload length before comp. */
- V_ipcompstat.ipcomps_output++;
+ IPCOMPSTAT_INC(ipcomps_output);
/* Check for maximum packet size violations. */
switch (sav->sah->saidx.dst.sa.sa_family) {
@@ -385,7 +386,7 @@ ipcomp_output(
break;
#endif /* INET6 */
default:
- V_ipcompstat.ipcomps_nopf++;
+ IPCOMPSTAT_INC(ipcomps_nopf);
DPRINTF(("%s: unknown/unsupported protocol family %d, "
"IPCA %s/%08lx\n", __func__,
sav->sah->saidx.dst.sa.sa_family,
@@ -395,7 +396,7 @@ ipcomp_output(
goto bad;
}
if (ralen + skip + IPCOMP_HLENGTH > maxpacketsize) {
- V_ipcompstat.ipcomps_toobig++;
+ IPCOMPSTAT_INC(ipcomps_toobig);
DPRINTF(("%s: packet in IPCA %s/%08lx got too big "
"(len %u, max len %u)\n", __func__,
ipsec_address(&sav->sah->saidx.dst),
@@ -406,11 +407,11 @@ ipcomp_output(
}
/* Update the counters */
- V_ipcompstat.ipcomps_obytes += m->m_pkthdr.len - skip;
+ IPCOMPSTAT_ADD(ipcomps_obytes, m->m_pkthdr.len - skip);
m = m_unshare(m, M_NOWAIT);
if (m == NULL) {
- V_ipcompstat.ipcomps_hdrops++;
+ IPCOMPSTAT_INC(ipcomps_hdrops);
DPRINTF(("%s: cannot clone mbuf chain, IPCA %s/%08lx\n",
__func__, ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
@@ -423,7 +424,7 @@ ipcomp_output(
/* Get crypto descriptors */
crp = crypto_getreq(1);
if (crp == NULL) {
- V_ipcompstat.ipcomps_crypto++;
+ IPCOMPSTAT_INC(ipcomps_crypto);
DPRINTF(("%s: failed to acquire crypto descriptor\n",__func__));
error = ENOBUFS;
goto bad;
@@ -443,7 +444,7 @@ ipcomp_output(
tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto),
M_XDATA, M_NOWAIT|M_ZERO);
if (tc == NULL) {
- V_ipcompstat.ipcomps_crypto++;
+ IPCOMPSTAT_INC(ipcomps_crypto);
DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__));
crypto_freereq(crp);
error = ENOBUFS;
@@ -496,7 +497,7 @@ ipcomp_output_cb(struct cryptop *crp)
sav = tc->tc_sav;
/* With the isr lock released SA pointer can be updated. */
if (sav != isr->sav) {
- V_ipcompstat.ipcomps_notdb++;
+ IPCOMPSTAT_INC(ipcomps_notdb);
DPRINTF(("%s: SA expired while in crypto\n", __func__));
error = ENOBUFS; /*XXX*/
goto bad;
@@ -512,19 +513,19 @@ ipcomp_output_cb(struct cryptop *crp)
IPSECREQUEST_UNLOCK(isr);
return crypto_dispatch(crp);
}
- V_ipcompstat.ipcomps_noxform++;
+ IPCOMPSTAT_INC(ipcomps_noxform);
DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
error = crp->crp_etype;
goto bad;
}
/* Shouldn't happen... */
if (m == NULL) {
- V_ipcompstat.ipcomps_crypto++;
+ IPCOMPSTAT_INC(ipcomps_crypto);
DPRINTF(("%s: bogus return buffer from crypto\n", __func__));
error = EINVAL;
goto bad;
}
- V_ipcompstat.ipcomps_hist[sav->alg_comp]++;
+ IPCOMPSTAT_INC(ipcomps_hist[sav->alg_comp]);
if (crp->crp_ilen - skip > crp->crp_olen) {
struct mbuf *mo;
@@ -535,7 +536,7 @@ ipcomp_output_cb(struct cryptop *crp)
/* Compression helped, inject IPCOMP header. */
mo = m_makespace(m, skip, IPCOMP_HLENGTH, &roff);
if (mo == NULL) {
- V_ipcompstat.ipcomps_wrap++;
+ IPCOMPSTAT_INC(ipcomps_wrap);
DPRINTF(("%s: IPCOMP header inject failed for IPCA %s/%08lx\n",
__func__, ipsec_address(&sav->sah->saidx.dst),
(u_long) ntohl(sav->spi)));
@@ -580,7 +581,7 @@ ipcomp_output_cb(struct cryptop *crp)
break;
#endif /* INET6 */
default:
- V_ipcompstat.ipcomps_nopf++;
+ IPCOMPSTAT_INC(ipcomps_nopf);
DPRINTF(("%s: unknown/unsupported protocol "
"family %d, IPCA %s/%08lx\n", __func__,
sav->sah->saidx.dst.sa.sa_family,
@@ -591,7 +592,7 @@ ipcomp_output_cb(struct cryptop *crp)
}
} else {
/* Compression was useless, we have lost time. */
- V_ipcompstat.ipcomps_uncompr++;
+ IPCOMPSTAT_INC(ipcomps_uncompr);
DPRINTF(("%s: compressions was useless %d - %d <= %d\n",
__func__, crp->crp_ilen, skip, crp->crp_olen));
/* XXX remember state to not compress the next couple
@@ -637,6 +638,7 @@ static void
vnet_ipcomp_attach(const void *unused __unused)
{
+ /* XXX */
V_ipcompstat.version = IPCOMPSTAT_VERSION;
}
diff --git a/freebsd/sys/netipsec/xform_ipip.c b/freebsd/sys/netipsec/xform_ipip.c
index ece6cbc7..b7234be9 100644
--- a/freebsd/sys/netipsec/xform_ipip.c
+++ b/freebsd/sys/netipsec/xform_ipip.c
@@ -117,7 +117,7 @@ ip4_input6(struct mbuf **m, int *offp, int proto)
/* If we do not accept IP-in-IP explicitly, drop. */
if (!V_ipip_allow && ((*m)->m_flags & M_IPSEC) == 0) {
DPRINTF(("%s: dropped due to policy\n", __func__));
- V_ipipstat.ipips_pdrops++;
+ IPIPSTAT_INC(ipips_pdrops);
m_freem(*m);
return IPPROTO_DONE;
}
@@ -138,7 +138,7 @@ ip4_input(struct mbuf *m, int off)
/* If we do not accept IP-in-IP explicitly, drop. */
if (!V_ipip_allow && (m->m_flags & M_IPSEC) == 0) {
DPRINTF(("%s: dropped due to policy\n", __func__));
- V_ipipstat.ipips_pdrops++;
+ IPIPSTAT_INC(ipips_pdrops);
m_freem(m);
return;
}
@@ -174,7 +174,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
u_int8_t v;
int hlen;
- V_ipipstat.ipips_ipackets++;
+ IPIPSTAT_INC(ipips_ipackets);
m_copydata(m, 0, 1, &v);
@@ -190,7 +190,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
break;
#endif
default:
- V_ipipstat.ipips_family++;
+ IPIPSTAT_INC(ipips_family);
m_freem(m);
return /* EAFNOSUPPORT */;
}
@@ -199,7 +199,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
if (m->m_len < hlen) {
if ((m = m_pullup(m, hlen)) == NULL) {
DPRINTF(("%s: m_pullup (1) failed\n", __func__));
- V_ipipstat.ipips_hdrops++;
+ IPIPSTAT_INC(ipips_hdrops);
return;
}
}
@@ -236,7 +236,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
/* Sanity check */
if (m->m_pkthdr.len < sizeof(struct ip)) {
- V_ipipstat.ipips_hdrops++;
+ IPIPSTAT_INC(ipips_hdrops);
m_freem(m);
return;
}
@@ -256,7 +256,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
break;
#endif
default:
- V_ipipstat.ipips_family++;
+ IPIPSTAT_INC(ipips_family);
m_freem(m);
return; /* EAFNOSUPPORT */
}
@@ -267,7 +267,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
if (m->m_len < hlen) {
if ((m = m_pullup(m, hlen)) == NULL) {
DPRINTF(("%s: m_pullup (2) failed\n", __func__));
- V_ipipstat.ipips_hdrops++;
+ IPIPSTAT_INC(ipips_hdrops);
return;
}
}
@@ -318,7 +318,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
if (sin->sin_addr.s_addr ==
ipo->ip_src.s_addr) {
- V_ipipstat.ipips_spoof++;
+ IPIPSTAT_INC(ipips_spoof);
m_freem(m);
IFNET_RUNLOCK_NOSLEEP();
return;
@@ -335,7 +335,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
sin6 = (struct sockaddr_in6 *) ifa->ifa_addr;
if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_src)) {
- V_ipipstat.ipips_spoof++;
+ IPIPSTAT_INC(ipips_spoof);
m_freem(m);
IFNET_RUNLOCK_NOSLEEP();
return;
@@ -349,7 +349,7 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
}
/* Statistics */
- V_ipipstat.ipips_ibytes += m->m_pkthdr.len - iphlen;
+ IPIPSTAT_ADD(ipips_ibytes, m->m_pkthdr.len - iphlen);
#ifdef DEV_ENC
switch (v >> 4) {
@@ -394,8 +394,10 @@ _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
panic("%s: bogus ip version %u", __func__, v>>4);
}
+ m_addr_changed(m);
+
if (netisr_queue(isr, m)) { /* (0) on success. */
- V_ipipstat.ipips_qfull++;
+ IPIPSTAT_INC(ipips_qfull);
DPRINTF(("%s: packet dropped because of full queue\n",
__func__));
}
@@ -414,8 +416,10 @@ ipip_output(
u_int8_t tp, otos;
struct secasindex *saidx;
int error;
-#ifdef INET
+#if defined(INET) || defined(INET6)
u_int8_t itos;
+#endif
+#ifdef INET
struct ip *ipo;
#endif /* INET */
#ifdef INET6
@@ -442,7 +446,7 @@ ipip_output(
"address in SA %s/%08lx\n", __func__,
ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- V_ipipstat.ipips_unspec++;
+ IPIPSTAT_INC(ipips_unspec);
error = EINVAL;
goto bad;
}
@@ -450,7 +454,7 @@ ipip_output(
M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
if (m == 0) {
DPRINTF(("%s: M_PREPEND failed\n", __func__));
- V_ipipstat.ipips_hdrops++;
+ IPIPSTAT_INC(ipips_hdrops);
error = ENOBUFS;
goto bad;
}
@@ -468,7 +472,8 @@ ipip_output(
ipo->ip_id = ip_newid();
/* If the inner protocol is IP... */
- if (tp == IPVERSION) {
+ switch (tp) {
+ case IPVERSION:
/* Save ECN notification */
m_copydata(m, sizeof(struct ip) +
offsetof(struct ip, ip_tos),
@@ -486,9 +491,10 @@ ipip_output(
ipo->ip_off = ntohs(ipo->ip_off);
ipo->ip_off &= ~(IP_DF | IP_MF | IP_OFFMASK);
ipo->ip_off = htons(ipo->ip_off);
- }
+ break;
#ifdef INET6
- else if (tp == (IPV6_VERSION >> 4)) {
+ case (IPV6_VERSION >> 4):
+ {
u_int32_t itos32;
/* Save ECN notification. */
@@ -498,9 +504,10 @@ ipip_output(
itos = ntohl(itos32) >> 20;
ipo->ip_p = IPPROTO_IPV6;
ipo->ip_off = 0;
+ break;
}
#endif /* INET6 */
- else {
+ default:
goto nofamily;
}
@@ -519,7 +526,7 @@ ipip_output(
"address in SA %s/%08lx\n", __func__,
ipsec_address(&saidx->dst),
(u_long) ntohl(sav->spi)));
- V_ipipstat.ipips_unspec++;
+ IPIPSTAT_INC(ipips_unspec);
error = ENOBUFS;
goto bad;
}
@@ -534,7 +541,7 @@ ipip_output(
M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
if (m == 0) {
DPRINTF(("%s: M_PREPEND failed\n", __func__));
- V_ipipstat.ipips_hdrops++;
+ IPIPSTAT_INC(ipips_hdrops);
error = ENOBUFS;
goto bad;
}
@@ -549,8 +556,9 @@ ipip_output(
ip6o->ip6_dst = saidx->dst.sin6.sin6_addr;
ip6o->ip6_src = saidx->src.sin6.sin6_addr;
+ switch (tp) {
#ifdef INET
- if (tp == IPVERSION) {
+ case IPVERSION:
/* Save ECN notification */
m_copydata(m, sizeof(struct ip6_hdr) +
offsetof(struct ip, ip_tos), sizeof(u_int8_t),
@@ -558,21 +566,24 @@ ipip_output(
/* This is really IPVERSION. */
ip6o->ip6_nxt = IPPROTO_IPIP;
- } else
+ break;
#endif /* INET */
- if (tp == (IPV6_VERSION >> 4)) {
- u_int32_t itos32;
-
- /* Save ECN notification. */
- m_copydata(m, sizeof(struct ip6_hdr) +
- offsetof(struct ip6_hdr, ip6_flow),
- sizeof(u_int32_t), (caddr_t) &itos32);
- itos = ntohl(itos32) >> 20;
-
- ip6o->ip6_nxt = IPPROTO_IPV6;
- } else {
- goto nofamily;
- }
+ case (IPV6_VERSION >> 4):
+ {
+ u_int32_t itos32;
+
+ /* Save ECN notification. */
+ m_copydata(m, sizeof(struct ip6_hdr) +
+ offsetof(struct ip6_hdr, ip6_flow),
+ sizeof(u_int32_t), (caddr_t) &itos32);
+ itos = ntohl(itos32) >> 20;
+
+ ip6o->ip6_nxt = IPPROTO_IPV6;
+ break;
+ }
+ default:
+ goto nofamily;
+ }
otos = 0;
ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
@@ -584,12 +595,12 @@ ipip_output(
nofamily:
DPRINTF(("%s: unsupported protocol family %u\n", __func__,
saidx->dst.sa.sa_family));
- V_ipipstat.ipips_family++;
+ IPIPSTAT_INC(ipips_family);
error = EAFNOSUPPORT; /* XXX diffs from openbsd */
goto bad;
}
- V_ipipstat.ipips_opackets++;
+ IPIPSTAT_INC(ipips_opackets);
*mp = m;
#ifdef INET
@@ -599,7 +610,8 @@ nofamily:
tdb->tdb_cur_bytes +=
m->m_pkthdr.len - sizeof(struct ip);
#endif
- V_ipipstat.ipips_obytes += m->m_pkthdr.len - sizeof(struct ip);
+ IPIPSTAT_ADD(ipips_obytes,
+ m->m_pkthdr.len - sizeof(struct ip));
}
#endif /* INET */
@@ -610,8 +622,8 @@ nofamily:
tdb->tdb_cur_bytes +=
m->m_pkthdr.len - sizeof(struct ip6_hdr);
#endif
- V_ipipstat.ipips_obytes +=
- m->m_pkthdr.len - sizeof(struct ip6_hdr);
+ IPIPSTAT_ADD(ipips_obytes,
+ m->m_pkthdr.len - sizeof(struct ip6_hdr));
}
#endif /* INET6 */
@@ -624,6 +636,7 @@ bad:
}
#ifdef IPSEC
+#if defined(INET) || defined(INET6)
static int
ipe4_init(struct secasvar *sav, struct xformsw *xsp)
{
@@ -654,6 +667,8 @@ static struct xformsw ipe4_xformsw = {
};
extern struct domain inetdomain;
+#endif /* INET || INET6 */
+#ifdef INET
static struct protosw ipe4_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
@@ -663,7 +678,8 @@ static struct protosw ipe4_protosw = {
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
};
-#ifdef INET6
+#endif /* INET */
+#if defined(INET6) && defined(INET)
static struct ip6protosw ipe6_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
@@ -673,8 +689,9 @@ static struct ip6protosw ipe6_protosw = {
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
};
-#endif
+#endif /* INET6 && INET */
+#if defined(INET)
/*
* Check the encapsulated packet to see if we want it
*/
@@ -689,6 +706,7 @@ ipe4_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
*/
return ((m->m_flags & M_IPSEC) != 0 ? 1 : 0);
}
+#endif /* INET */
static void
ipe4_attach(void)
@@ -697,9 +715,11 @@ ipe4_attach(void)
xform_register(&ipe4_xformsw);
/* attach to encapsulation framework */
/* XXX save return cookie for detach on module remove */
+#ifdef INET
(void) encap_attach_func(AF_INET, -1,
ipe4_encapcheck, &ipe4_protosw, NULL);
-#ifdef INET6
+#endif
+#if defined(INET6) && defined(INET)
(void) encap_attach_func(AF_INET6, -1,
ipe4_encapcheck, (struct protosw *)&ipe6_protosw, NULL);
#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_heap.c b/freebsd/sys/netpfil/ipfw/dn_heap.c
new file mode 100644
index 00000000..15e2870d
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_heap.c
@@ -0,0 +1,554 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Binary heap and hash tables, used in dummynet
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+#include <rtems/bsd/sys/param.h>
+#ifdef _KERNEL
+__FBSDID("$FreeBSD$");
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <netpfil/ipfw/dn_heap.h>
+#ifndef log
+#define log(x, arg...)
+#endif
+
+#else /* !_KERNEL */
+
+#include <stdio.h>
+#include <dn_test.h>
+#include <strings.h>
+#include <stdlib.h>
+
+#include "dn_heap.h"
+#define log(x, arg...) fprintf(stderr, ## arg)
+#define panic(x...) fprintf(stderr, ## x), exit(1)
+#define MALLOC_DEFINE(a, b, c)
+static void *my_malloc(int s) { return malloc(s); }
+static void my_free(void *p) { free(p); }
+#define malloc(s, t, w) my_malloc(s)
+#define free(p, t) my_free(p)
+#endif /* !_KERNEL */
+
+MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
+
+/*
+ * Heap management functions.
+ *
+ * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
+ * Some macros help finding parent/children so we can optimize them.
+ *
+ * heap_init() is called to expand the heap when needed.
+ * Increment size in blocks of 16 entries.
+ * Returns 1 on error, 0 on success
+ */
+#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
+#define HEAP_LEFT(x) ( (x)+(x) + 1 )
+#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
+#define HEAP_INCREMENT 15
+
+static int
+heap_resize(struct dn_heap *h, unsigned int new_size)
+{
+ struct dn_heap_entry *p;
+
+ if (h->size >= new_size ) /* have enough room */
+ return 0;
+#if 1 /* round to the next power of 2 */
+ new_size |= new_size >> 1;
+ new_size |= new_size >> 2;
+ new_size |= new_size >> 4;
+ new_size |= new_size >> 8;
+ new_size |= new_size >> 16;
+#else
+ new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT;
+#endif
+ p = malloc(new_size * sizeof(*p), M_DN_HEAP, M_NOWAIT);
+ if (p == NULL) {
+ printf("--- %s, resize %d failed\n", __func__, new_size );
+ return 1; /* error */
+ }
+ if (h->size > 0) {
+ bcopy(h->p, p, h->size * sizeof(*p) );
+ free(h->p, M_DN_HEAP);
+ }
+ h->p = p;
+ h->size = new_size;
+ return 0;
+}
+
+int
+heap_init(struct dn_heap *h, int size, int ofs)
+{
+ if (heap_resize(h, size))
+ return 1;
+ h->elements = 0;
+ h->ofs = ofs;
+ return 0;
+}
+
+/*
+ * Insert element in heap. Normally, p != NULL, we insert p in
+ * a new position and bubble up. If p == NULL, then the element is
+ * already in place, and key is the position where to start the
+ * bubble-up.
+ * Returns 1 on failure (cannot allocate new heap entry)
+ *
+ * If ofs > 0 the position (index, int) of the element in the heap is
+ * also stored in the element itself at the given offset in bytes.
+ */
+#define SET_OFFSET(h, i) do { \
+ if (h->ofs > 0) \
+ *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = i; \
+ } while (0)
+/*
+ * RESET_OFFSET is used for sanity checks. It sets ofs
+ * to an invalid value.
+ */
+#define RESET_OFFSET(h, i) do { \
+ if (h->ofs > 0) \
+ *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = -16; \
+ } while (0)
+
+int
+heap_insert(struct dn_heap *h, uint64_t key1, void *p)
+{
+ int son = h->elements;
+
+ //log("%s key %llu p %p\n", __FUNCTION__, key1, p);
+ if (p == NULL) { /* data already there, set starting point */
+ son = key1;
+ } else { /* insert new element at the end, possibly resize */
+ son = h->elements;
+ if (son == h->size) /* need resize... */
+ // XXX expand by 16 or so
+ if (heap_resize(h, h->elements+16) )
+ return 1; /* failure... */
+ h->p[son].object = p;
+ h->p[son].key = key1;
+ h->elements++;
+ }
+ /* make sure that son >= father along the path */
+ while (son > 0) {
+ int father = HEAP_FATHER(son);
+ struct dn_heap_entry tmp;
+
+ if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
+ break; /* found right position */
+ /* son smaller than father, swap and repeat */
+ HEAP_SWAP(h->p[son], h->p[father], tmp);
+ SET_OFFSET(h, son);
+ son = father;
+ }
+ SET_OFFSET(h, son);
+ return 0;
+}
+
+/*
+ * remove top element from heap, or obj if obj != NULL
+ */
+void
+heap_extract(struct dn_heap *h, void *obj)
+{
+ int child, father, max = h->elements - 1;
+
+ if (max < 0) {
+ printf("--- %s: empty heap 0x%p\n", __FUNCTION__, h);
+ return;
+ }
+ if (obj == NULL)
+ father = 0; /* default: move up smallest child */
+ else { /* extract specific element, index is at offset */
+ if (h->ofs <= 0)
+ panic("%s: extract from middle not set on %p\n",
+ __FUNCTION__, h);
+ father = *((int *)((char *)obj + h->ofs));
+ if (father < 0 || father >= h->elements) {
+ panic("%s: father %d out of bound 0..%d\n",
+ __FUNCTION__, father, h->elements);
+ }
+ }
+ /*
+ * below, father is the index of the empty element, which
+ * we replace at each step with the smallest child until we
+ * reach the bottom level.
+ */
+ // XXX why removing RESET_OFFSET increases runtime by 10% ?
+ RESET_OFFSET(h, father);
+ while ( (child = HEAP_LEFT(father)) <= max ) {
+ if (child != max &&
+ DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
+ child++; /* take right child, otherwise left */
+ h->p[father] = h->p[child];
+ SET_OFFSET(h, father);
+ father = child;
+ }
+ h->elements--;
+ if (father != max) {
+ /*
+ * Fill hole with last entry and bubble up,
+ * reusing the insert code
+ */
+ h->p[father] = h->p[max];
+ heap_insert(h, father, NULL);
+ }
+}
+
+#if 0
+/*
+ * change object position and update references
+ * XXX this one is never used!
+ */
+static void
+heap_move(struct dn_heap *h, uint64_t new_key, void *object)
+{
+ int temp, i, max = h->elements-1;
+ struct dn_heap_entry *p, buf;
+
+ if (h->ofs <= 0)
+ panic("cannot move items on this heap");
+ p = h->p; /* shortcut */
+
+ i = *((int *)((char *)object + h->ofs));
+ if (DN_KEY_LT(new_key, p[i].key) ) { /* must move up */
+ p[i].key = new_key;
+ for (; i>0 &&
+ DN_KEY_LT(new_key, p[(temp = HEAP_FATHER(i))].key);
+ i = temp ) { /* bubble up */
+ HEAP_SWAP(p[i], p[temp], buf);
+ SET_OFFSET(h, i);
+ }
+ } else { /* must move down */
+ p[i].key = new_key;
+ while ( (temp = HEAP_LEFT(i)) <= max ) {
+ /* found left child */
+ if (temp != max &&
+ DN_KEY_LT(p[temp+1].key, p[temp].key))
+ temp++; /* select child with min key */
+ if (DN_KEY_LT(>p[temp].key, new_key)) {
+ /* go down */
+ HEAP_SWAP(p[i], p[temp], buf);
+ SET_OFFSET(h, i);
+ } else
+ break;
+ i = temp;
+ }
+ }
+ SET_OFFSET(h, i);
+}
+#endif /* heap_move, unused */
+
+/*
+ * heapify() will reorganize data inside an array to maintain the
+ * heap property. It is needed when we delete a bunch of entries.
+ */
+static void
+heapify(struct dn_heap *h)
+{
+ int i;
+
+ for (i = 0; i < h->elements; i++ )
+ heap_insert(h, i , NULL);
+}
+
+int
+heap_scan(struct dn_heap *h, int (*fn)(void *, uintptr_t),
+ uintptr_t arg)
+{
+ int i, ret, found;
+
+ for (i = found = 0 ; i < h->elements ;) {
+ ret = fn(h->p[i].object, arg);
+ if (ret & HEAP_SCAN_DEL) {
+ h->elements-- ;
+ h->p[i] = h->p[h->elements] ;
+ found++ ;
+ } else
+ i++ ;
+ if (ret & HEAP_SCAN_END)
+ break;
+ }
+ if (found)
+ heapify(h);
+ return found;
+}
+
+/*
+ * cleanup the heap and free data structure
+ */
+void
+heap_free(struct dn_heap *h)
+{
+ if (h->size >0 )
+ free(h->p, M_DN_HEAP);
+ bzero(h, sizeof(*h) );
+}
+
+/*
+ * hash table support.
+ */
+
+struct dn_ht {
+ int buckets; /* how many buckets, really buckets - 1*/
+ int entries; /* how many entries */
+ int ofs; /* offset of link field */
+ uint32_t (*hash)(uintptr_t, int, void *arg);
+ int (*match)(void *_el, uintptr_t key, int, void *);
+ void *(*newh)(uintptr_t, int, void *);
+ void **ht; /* bucket heads */
+};
+/*
+ * Initialize, allocating bucket pointers inline.
+ * Recycle previous record if possible.
+ * If the 'newh' function is not supplied, we assume that the
+ * key passed to ht_find is the same object to be stored in.
+ */
+struct dn_ht *
+dn_ht_init(struct dn_ht *ht, int buckets, int ofs,
+ uint32_t (*h)(uintptr_t, int, void *),
+ int (*match)(void *, uintptr_t, int, void *),
+ void *(*newh)(uintptr_t, int, void *))
+{
+ int l;
+
+ /*
+ * Notes about rounding bucket size to a power of two.
+ * Given the original bucket size, we compute the nearest lower and
+ * higher power of two, minus 1 (respectively b_min and b_max) because
+ * this value will be used to do an AND with the index returned
+ * by hash function.
+ * To choice between these two values, the original bucket size is
+ * compared with b_min. If the original size is greater than 4/3 b_min,
+ * we round the bucket size to b_max, else to b_min.
+ * This ratio try to round to the nearest power of two, advantaging
+ * the greater size if the different between two power is relatively
+ * big.
+ * Rounding the bucket size to a power of two avoid the use of
+ * module when calculating the correct bucket.
+ * The ht->buckets variable store the bucket size - 1 to simply
+ * do an AND between the index returned by hash function and ht->bucket
+ * instead of a module.
+ */
+ int b_min; /* min buckets */
+ int b_max; /* max buckets */
+ int b_ori; /* original buckets */
+
+ if (h == NULL || match == NULL) {
+ printf("--- missing hash or match function");
+ return NULL;
+ }
+ if (buckets < 1 || buckets > 65536)
+ return NULL;
+
+ b_ori = buckets;
+ /* calculate next power of 2, - 1*/
+ buckets |= buckets >> 1;
+ buckets |= buckets >> 2;
+ buckets |= buckets >> 4;
+ buckets |= buckets >> 8;
+ buckets |= buckets >> 16;
+
+ b_max = buckets; /* Next power */
+ b_min = buckets >> 1; /* Previous power */
+
+ /* Calculate the 'nearest' bucket size */
+ if (b_min * 4000 / 3000 < b_ori)
+ buckets = b_max;
+ else
+ buckets = b_min;
+
+ if (ht) { /* see if we can reuse */
+ if (buckets <= ht->buckets) {
+ ht->buckets = buckets;
+ } else {
+ /* free pointers if not allocated inline */
+ if (ht->ht != (void *)(ht + 1))
+ free(ht->ht, M_DN_HEAP);
+ free(ht, M_DN_HEAP);
+ ht = NULL;
+ }
+ }
+ if (ht == NULL) {
+ /* Allocate buckets + 1 entries because buckets is use to
+ * do the AND with the index returned by hash function
+ */
+ l = sizeof(*ht) + (buckets + 1) * sizeof(void **);
+ ht = malloc(l, M_DN_HEAP, M_NOWAIT | M_ZERO);
+ }
+ if (ht) {
+ ht->ht = (void **)(ht + 1);
+ ht->buckets = buckets;
+ ht->ofs = ofs;
+ ht->hash = h;
+ ht->match = match;
+ ht->newh = newh;
+ }
+ return ht;
+}
+
+/* dummy callback for dn_ht_free to unlink all */
+static int
+do_del(void *obj, void *arg)
+{
+ return DNHT_SCAN_DEL;
+}
+
+void
+dn_ht_free(struct dn_ht *ht, int flags)
+{
+ if (ht == NULL)
+ return;
+ if (flags & DNHT_REMOVE) {
+ (void)dn_ht_scan(ht, do_del, NULL);
+ } else {
+ if (ht->ht && ht->ht != (void *)(ht + 1))
+ free(ht->ht, M_DN_HEAP);
+ free(ht, M_DN_HEAP);
+ }
+}
+
+int
+dn_ht_entries(struct dn_ht *ht)
+{
+ return ht ? ht->entries : 0;
+}
+
+/* lookup and optionally create or delete element */
+void *
+dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg)
+{
+ int i;
+ void **pp, *p;
+
+ if (ht == NULL) /* easy on an empty hash */
+ return NULL;
+ i = (ht->buckets == 1) ? 0 :
+ (ht->hash(key, flags, arg) & ht->buckets);
+
+ for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) {
+ if (flags & DNHT_MATCH_PTR) {
+ if (key == (uintptr_t)p)
+ break;
+ } else if (ht->match(p, key, flags, arg)) /* found match */
+ break;
+ }
+ if (p) {
+ if (flags & DNHT_REMOVE) {
+ /* link in the next element */
+ *pp = *(void **)((char *)p + ht->ofs);
+ *(void **)((char *)p + ht->ofs) = NULL;
+ ht->entries--;
+ }
+ } else if (flags & DNHT_INSERT) {
+ // printf("%s before calling new, bucket %d ofs %d\n",
+ // __FUNCTION__, i, ht->ofs);
+ p = ht->newh ? ht->newh(key, flags, arg) : (void *)key;
+ // printf("%s newh returns %p\n", __FUNCTION__, p);
+ if (p) {
+ ht->entries++;
+ *(void **)((char *)p + ht->ofs) = ht->ht[i];
+ ht->ht[i] = p;
+ }
+ }
+ return p;
+}
+
+/*
+ * do a scan with the option to delete the object. Extract next before
+ * running the callback because the element may be destroyed there.
+ */
+int
+dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg)
+{
+ int i, ret, found = 0;
+ void **curp, *cur, *next;
+
+ if (ht == NULL || fn == NULL)
+ return 0;
+ for (i = 0; i <= ht->buckets; i++) {
+ curp = &ht->ht[i];
+ while ( (cur = *curp) != NULL) {
+ next = *(void **)((char *)cur + ht->ofs);
+ ret = fn(cur, arg);
+ if (ret & DNHT_SCAN_DEL) {
+ found++;
+ ht->entries--;
+ *curp = next;
+ } else {
+ curp = (void **)((char *)cur + ht->ofs);
+ }
+ if (ret & DNHT_SCAN_END)
+ return found;
+ }
+ }
+ return found;
+}
+
+/*
+ * Similar to dn_ht_scan(), except that the scan is performed only
+ * in the bucket 'bucket'. The function returns a correct bucket number if
+ * the original is invalid.
+ * If the callback returns DNHT_SCAN_END, the function move the ht->ht[i]
+ * pointer to the last entry processed. Moreover, the bucket number passed
+ * by caller is decremented, because usually the caller increment it.
+ */
+int
+dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *),
+ void *arg)
+{
+ int i, ret, found = 0;
+ void **curp, *cur, *next;
+
+ if (ht == NULL || fn == NULL)
+ return 0;
+ if (*bucket > ht->buckets)
+ *bucket = 0;
+ i = *bucket;
+
+ curp = &ht->ht[i];
+ while ( (cur = *curp) != NULL) {
+ next = *(void **)((char *)cur + ht->ofs);
+ ret = fn(cur, arg);
+ if (ret & DNHT_SCAN_DEL) {
+ found++;
+ ht->entries--;
+ *curp = next;
+ } else {
+ curp = (void **)((char *)cur + ht->ofs);
+ }
+ if (ret & DNHT_SCAN_END)
+ return found;
+ }
+ return found;
+}
diff --git a/freebsd/sys/netpfil/ipfw/dn_heap.h b/freebsd/sys/netpfil/ipfw/dn_heap.h
new file mode 100644
index 00000000..c95473ad
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_heap.h
@@ -0,0 +1,191 @@
+/*-
+ * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Binary heap and hash tables, header file
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DN_HEAP_H
+#define _IP_DN_HEAP_H
+
+#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0)
+#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0)
+
+/*
+ * This module implements a binary heap supporting random extraction.
+ *
+ * A heap entry contains an uint64_t key and a pointer to object.
+ * DN_KEY_LT(a,b) returns true if key 'a' is smaller than 'b'
+ *
+ * The heap is a struct dn_heap plus a dynamically allocated
+ * array of dn_heap_entry entries. 'size' represents the size of
+ * the array, 'elements' count entries in use. The topmost
+ * element has the smallest key.
+ * The heap supports ordered insert, and extract from the top.
+ * To extract an object from the middle of the heap, we the object
+ * must reserve an 'int32_t' to store the position of the object
+ * in the heap itself, and the location of this field must be
+ * passed as an argument to heap_init() -- use -1 if the feature
+ * is not used.
+ */
+struct dn_heap_entry {
+ uint64_t key; /* sorting key, smallest comes first */
+ void *object; /* object pointer */
+};
+
+struct dn_heap {
+ int size; /* the size of the array */
+ int elements; /* elements in use */
+ int ofs; /* offset in the object of heap index */
+ struct dn_heap_entry *p; /* array of "size" entries */
+};
+
+enum {
+ HEAP_SCAN_DEL = 1,
+ HEAP_SCAN_END = 2,
+};
+
+/*
+ * heap_init() reinitializes the heap setting the size and the offset
+ * of the index for random extraction (use -1 if not used).
+ * The 'elements' counter is set to 0.
+ *
+ * SET_HEAP_OFS() indicates where, in the object, is stored the index
+ * for random extractions from the heap.
+ *
+ * heap_free() frees the memory associated to a heap.
+ *
+ * heap_insert() adds a key-pointer pair to the heap
+ *
+ * HEAP_TOP() returns a pointer to the top element of the heap,
+ * but makes no checks on its existance (XXX should we change ?)
+ *
+ * heap_extract() removes the entry at the top, returing the pointer.
+ * (the key should have been read before).
+ *
+ * heap_scan() invokes a callback on each entry of the heap.
+ * The callback can return a combination of HEAP_SCAN_DEL and
+ * HEAP_SCAN_END. HEAP_SCAN_DEL means the current element must
+ * be removed, and HEAP_SCAN_END means to terminate the scan.
+ * heap_scan() returns the number of elements removed.
+ * Because the order is not guaranteed, we should use heap_scan()
+ * only as a last resort mechanism.
+ */
+#define HEAP_TOP(h) ((h)->p)
+#define SET_HEAP_OFS(h, n) do { (h)->ofs = n; } while (0)
+int heap_init(struct dn_heap *h, int size, int ofs);
+int heap_insert(struct dn_heap *h, uint64_t key1, void *p);
+void heap_extract(struct dn_heap *h, void *obj);
+void heap_free(struct dn_heap *h);
+int heap_scan(struct dn_heap *, int (*)(void *, uintptr_t), uintptr_t);
+
+/*------------------------------------------------------
+ * This module implements a generic hash table with support for
+ * running callbacks on the entire table. To avoid allocating
+ * memory during hash table operations, objects must reserve
+ * space for a link field. XXX if the heap is moderately full,
+ * an SLIST suffices, and we can tolerate the cost of a hash
+ * computation on each removal.
+ *
+ * dn_ht_init() initializes the table, setting the number of
+ * buckets, the offset of the link field, the main callbacks.
+ * Callbacks are:
+ *
+ * hash(key, flags, arg) called to return a bucket index.
+ * match(obj, key, flags, arg) called to determine if key
+ * matches the current 'obj' in the heap
+ * newh(key, flags, arg) optional, used to allocate a new
+ * object during insertions.
+ *
+ * dn_ht_free() frees the heap or unlink elements.
+ * DNHT_REMOVE unlink elements, 0 frees the heap.
+ * You need two calls to do both.
+ *
+ * dn_ht_find() is the main lookup function, which can also be
+ * used to insert or delete elements in the hash table.
+ * The final 'arg' is passed to all callbacks.
+ *
+ * dn_ht_scan() is used to invoke a callback on all entries of
+ * the heap, or possibly on just one bucket. The callback
+ * is invoked with a pointer to the object, and must return
+ * one of DNHT_SCAN_DEL or DNHT_SCAN_END to request the
+ * removal of the object from the heap and the end of the
+ * scan, respectively.
+ *
+ * dn_ht_scan_bucket() is similar to dn_ht_scan(), except that it scans
+ * only the specific bucket of the table. The bucket is a in-out
+ * parameter and return a valid bucket number if the original
+ * is invalid.
+ *
+ * A combination of flags can be used to modify the operation
+ * of the dn_ht_find(), and of the callbacks:
+ *
+ * DNHT_KEY_IS_OBJ means the key is the object pointer.
+ * It is usally of interest for the hash and match functions.
+ *
+ * DNHT_MATCH_PTR during a lookup, match pointers instead
+ * of calling match(). Normally used when removing specific
+ * entries. Does not imply KEY_IS_OBJ as the latter _is_ used
+ * by the match function.
+ *
+ * DNHT_INSERT insert the element if not found.
+ * Calls new() to allocates a new object unless
+ * DNHT_KEY_IS_OBJ is set.
+ *
+ * DNHT_UNIQUE only insert if object not found.
+ * XXX should it imply DNHT_INSERT ?
+ *
+ * DNHT_REMOVE remove objects if we find them.
+ */
+struct dn_ht; /* should be opaque */
+
+struct dn_ht *dn_ht_init(struct dn_ht *, int buckets, int ofs,
+ uint32_t (*hash)(uintptr_t, int, void *),
+ int (*match)(void *, uintptr_t, int, void *),
+ void *(*newh)(uintptr_t, int, void *));
+void dn_ht_free(struct dn_ht *, int flags);
+
+void *dn_ht_find(struct dn_ht *, uintptr_t, int, void *);
+int dn_ht_scan(struct dn_ht *, int (*)(void *, void *), void *);
+int dn_ht_scan_bucket(struct dn_ht *, int * , int (*)(void *, void *), void *);
+int dn_ht_entries(struct dn_ht *);
+
+enum { /* flags values.
+ * first two are returned by the scan callback to indicate
+ * to delete the matching element or to end the scan
+ */
+ DNHT_SCAN_DEL = 0x0001,
+ DNHT_SCAN_END = 0x0002,
+ DNHT_KEY_IS_OBJ = 0x0004, /* key is the obj pointer */
+ DNHT_MATCH_PTR = 0x0008, /* match by pointer, not match() */
+ DNHT_INSERT = 0x0010, /* insert if not found */
+ DNHT_UNIQUE = 0x0020, /* report error if already there */
+ DNHT_REMOVE = 0x0040, /* remove on find or dn_ht_free */
+};
+
+#endif /* _IP_DN_HEAP_H */
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched.h b/freebsd/sys/netpfil/ipfw/dn_sched.h
new file mode 100644
index 00000000..ab823fe7
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The API to write a packet scheduling algorithm for dummynet.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _DN_SCHED_H
+#define _DN_SCHED_H
+
+#define DN_MULTIQUEUE 0x01
+/*
+ * Descriptor for a scheduling algorithm.
+ * Contains all function pointers for a given scheduler
+ * This is typically created when a module is loaded, and stored
+ * in a global list of schedulers.
+ */
+struct dn_alg {
+ uint32_t type; /* the scheduler type */
+ const char *name; /* scheduler name */
+ uint32_t flags; /* DN_MULTIQUEUE if supports multiple queues */
+
+ /*
+ * The following define the size of 3 optional data structures
+ * that may need to be allocated at runtime, and are appended
+ * to each of the base data structures: scheduler, sched.inst,
+ * and queue. We don't have a per-flowset structure.
+ */
+ /* + parameters attached to the template, e.g.
+ * default queue sizes, weights, quantum size, and so on;
+ */
+ size_t schk_datalen;
+
+ /* + per-instance parameters, such as timestamps,
+ * containers for queues, etc;
+ */
+ size_t si_datalen;
+
+ size_t q_datalen; /* per-queue parameters (e.g. S,F) */
+
+ /*
+ * Methods implemented by the scheduler:
+ * enqueue enqueue packet 'm' on scheduler 's', queue 'q'.
+ * q is NULL for !MULTIQUEUE.
+ * Return 0 on success, 1 on drop (packet consumed anyways).
+ * Note that q should be interpreted only as a hint
+ * on the flow that the mbuf belongs to: while a
+ * scheduler will normally enqueue m into q, it is ok
+ * to leave q alone and put the mbuf elsewhere.
+ * This function is called in two cases:
+ * - when a new packet arrives to the scheduler;
+ * - when a scheduler is reconfigured. In this case the
+ * call is issued by the new_queue callback, with a
+ * non empty queue (q) and m pointing to the first
+ * mbuf in the queue. For this reason, the function
+ * should internally check for (m != q->mq.head)
+ * before calling dn_enqueue().
+ *
+ * dequeue Called when scheduler instance 's' can
+ * dequeue a packet. Return NULL if none are available.
+ * XXX what about non work-conserving ?
+ *
+ * config called on 'sched X config ...', normally writes
+ * in the area of size sch_arg
+ *
+ * destroy called on 'sched delete', frees everything
+ * in sch_arg (other parts are handled by more specific
+ * functions)
+ *
+ * new_sched called when a new instance is created, e.g.
+ * to create the local queue for !MULTIQUEUE, set V or
+ * copy parameters for WFQ, and so on.
+ *
+ * free_sched called when deleting an instance, cleans
+ * extra data in the per-instance area.
+ *
+ * new_fsk called when a flowset is linked to a scheduler,
+ * e.g. to validate parameters such as weights etc.
+ * free_fsk when a flowset is unlinked from a scheduler.
+ * (probably unnecessary)
+ *
+ * new_queue called to set the per-queue parameters,
+ * e.g. S and F, adjust sum of weights in the parent, etc.
+ *
+ * The new_queue callback is normally called from when
+ * creating a new queue. In some cases (such as a
+ * scheduler change or reconfiguration) it can be called
+ * with a non empty queue. In this case, the queue
+ * In case of non empty queue, the new_queue callback could
+ * need to call the enqueue function. In this case,
+ * the callback should eventually call enqueue() passing
+ * as m the first element in the queue.
+ *
+ * free_queue actions related to a queue removal, e.g. undo
+ * all the above. If the queue has data in it, also remove
+ * from the scheduler. This can e.g. happen during a reconfigure.
+ */
+ int (*enqueue)(struct dn_sch_inst *, struct dn_queue *,
+ struct mbuf *);
+ struct mbuf * (*dequeue)(struct dn_sch_inst *);
+
+ int (*config)(struct dn_schk *);
+ int (*destroy)(struct dn_schk*);
+ int (*new_sched)(struct dn_sch_inst *);
+ int (*free_sched)(struct dn_sch_inst *);
+ int (*new_fsk)(struct dn_fsk *f);
+ int (*free_fsk)(struct dn_fsk *f);
+ int (*new_queue)(struct dn_queue *q);
+ int (*free_queue)(struct dn_queue *q);
+
+ /* run-time fields */
+ int ref_count; /* XXX number of instances in the system */
+ SLIST_ENTRY(dn_alg) next; /* Next scheduler in the list */
+};
+
+/* MSVC does not support initializers so we need this ugly macro */
+#ifdef _WIN32
+#define _SI(fld)
+#else
+#define _SI(fld) fld
+#endif
+
+/*
+ * Additionally, dummynet exports some functions and macros
+ * to be used by schedulers:
+ */
+
+void dn_free_pkts(struct mbuf *mnext);
+int dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop);
+/* bound a variable between min and max */
+int ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg);
+
+/*
+ * Extract the head of a queue, update stats. Must be the very last
+ * thing done on a dequeue as the queue itself may go away.
+ */
+static __inline struct mbuf*
+dn_dequeue(struct dn_queue *q)
+{
+ struct mbuf *m = q->mq.head;
+ if (m == NULL)
+ return NULL;
+ q->mq.head = m->m_nextpkt;
+
+ /* Update stats for the queue */
+ q->ni.length--;
+ q->ni.len_bytes -= m->m_pkthdr.len;
+ if (q->_si) {
+ q->_si->ni.length--;
+ q->_si->ni.len_bytes -= m->m_pkthdr.len;
+ }
+ if (q->ni.length == 0) /* queue is now idle */
+ q->q_time = dn_cfg.curr_time;
+ return m;
+}
+
+int dn_sched_modevent(module_t mod, int cmd, void *arg);
+
+#define DECLARE_DNSCHED_MODULE(name, dnsched) \
+ static moduledata_t name##_mod = { \
+ #name, dn_sched_modevent, dnsched \
+ }; \
+ DECLARE_MODULE(name, name##_mod, \
+ SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \
+ MODULE_DEPEND(name, dummynet, 3, 3, 3);
+#endif /* _DN_SCHED_H */
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c b/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c
new file mode 100644
index 00000000..154a7ac6
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c
@@ -0,0 +1,122 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+/*
+ * This file implements a FIFO scheduler for a single queue.
+ * The queue is allocated as part of the scheduler instance,
+ * and there is a single flowset is in the template which stores
+ * queue size and policy.
+ * Enqueue and dequeue use the default library functions.
+ */
+static int
+fifo_enqueue(struct dn_sch_inst *si, struct dn_queue *q, struct mbuf *m)
+{
+ /* XXX if called with q != NULL and m=NULL, this is a
+ * re-enqueue from an existing scheduler, which we should
+ * handle.
+ */
+ return dn_enqueue((struct dn_queue *)(si+1), m, 0);
+}
+
+static struct mbuf *
+fifo_dequeue(struct dn_sch_inst *si)
+{
+ return dn_dequeue((struct dn_queue *)(si + 1));
+}
+
+static int
+fifo_new_sched(struct dn_sch_inst *si)
+{
+ /* This scheduler instance contains the queue */
+ struct dn_queue *q = (struct dn_queue *)(si + 1);
+
+ set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
+ q->_si = si;
+ q->fs = si->sched->fs;
+ return 0;
+}
+
+static int
+fifo_free_sched(struct dn_sch_inst *si)
+{
+ struct dn_queue *q = (struct dn_queue *)(si + 1);
+ dn_free_pkts(q->mq.head);
+ bzero(q, sizeof(*q));
+ return 0;
+}
+
+/*
+ * FIFO scheduler descriptor
+ * contains the type of the scheduler, the name, the size of extra
+ * data structures, and function pointers.
+ */
+static struct dn_alg fifo_desc = {
+ _SI( .type = ) DN_SCHED_FIFO,
+ _SI( .name = ) "FIFO",
+ _SI( .flags = ) 0,
+
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct dn_queue),
+ _SI( .q_datalen = ) 0,
+
+ _SI( .enqueue = ) fifo_enqueue,
+ _SI( .dequeue = ) fifo_dequeue,
+ _SI( .config = ) NULL,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) fifo_new_sched,
+ _SI( .free_sched = ) fifo_free_sched,
+ _SI( .new_fsk = ) NULL,
+ _SI( .free_fsk = ) NULL,
+ _SI( .new_queue = ) NULL,
+ _SI( .free_queue = ) NULL,
+};
+
+DECLARE_DNSCHED_MODULE(dn_fifo, &fifo_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_prio.c b/freebsd/sys/netpfil/ipfw/dn_sched_prio.c
new file mode 100644
index 00000000..0679db9d
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_prio.c
@@ -0,0 +1,231 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+#define DN_SCHED_PRIO 5 //XXX
+
+#if !defined(_KERNEL) || !defined(__linux__)
+#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
+#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif
+
+#ifdef __MIPSEL__
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif
+
+/* Size of the array of queues pointers. */
+#define BITMAP_T unsigned long
+#define MAXPRIO (sizeof(BITMAP_T) * 8)
+
+/*
+ * The scheduler instance contains an array of pointers to queues,
+ * one for each priority, and a bitmap listing backlogged queues.
+ */
+struct prio_si {
+ BITMAP_T bitmap; /* array bitmap */
+ struct dn_queue *q_array[MAXPRIO]; /* Array of queues pointers */
+};
+
+/*
+ * If a queue with the same priority is already backlogged, use
+ * that one instead of the queue passed as argument.
+ */
+static int
+prio_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+ struct prio_si *si = (struct prio_si *)(_si + 1);
+ int prio = q->fs->fs.par[0];
+
+ if (test_bit(prio, &si->bitmap) == 0) {
+ /* No queue with this priority, insert */
+ __set_bit(prio, &si->bitmap);
+ si->q_array[prio] = q;
+ } else { /* use the existing queue */
+ q = si->q_array[prio];
+ }
+ if (dn_enqueue(q, m, 0))
+ return 1;
+ return 0;
+}
+
+/*
+ * Packets are dequeued only from the highest priority queue.
+ * The function ffs() return the lowest bit in the bitmap that rapresent
+ * the array index (-1) which contains the pointer to the highest priority
+ * queue.
+ * After the dequeue, if this queue become empty, it is index is removed
+ * from the bitmap.
+ * Scheduler is idle if the bitmap is empty
+ *
+ * NOTE: highest priority is 0, lowest is sched->max_prio_q
+ */
+static struct mbuf *
+prio_dequeue(struct dn_sch_inst *_si)
+{
+ struct prio_si *si = (struct prio_si *)(_si + 1);
+ struct mbuf *m;
+ struct dn_queue *q;
+ int prio;
+
+ if (si->bitmap == 0) /* scheduler idle */
+ return NULL;
+
+ prio = ffs(si->bitmap) - 1;
+
+ /* Take the highest priority queue in the scheduler */
+ q = si->q_array[prio];
+ // assert(q)
+
+ m = dn_dequeue(q);
+ if (q->mq.head == NULL) {
+ /* Queue is now empty, remove from scheduler
+ * and mark it
+ */
+ si->q_array[prio] = NULL;
+ __clear_bit(prio, &si->bitmap);
+ }
+ return m;
+}
+
+static int
+prio_new_sched(struct dn_sch_inst *_si)
+{
+ struct prio_si *si = (struct prio_si *)(_si + 1);
+
+ bzero(si->q_array, sizeof(si->q_array));
+ si->bitmap = 0;
+
+ return 0;
+}
+
+static int
+prio_new_fsk(struct dn_fsk *fs)
+{
+ /* Check if the prioritiy is between 0 and MAXPRIO-1 */
+ ipdn_bound_var(&fs->fs.par[0], 0, 0, MAXPRIO - 1, "PRIO priority");
+ return 0;
+}
+
+static int
+prio_new_queue(struct dn_queue *q)
+{
+ struct prio_si *si = (struct prio_si *)(q->_si + 1);
+ int prio = q->fs->fs.par[0];
+ struct dn_queue *oldq;
+
+ q->ni.oid.subtype = DN_SCHED_PRIO;
+
+ if (q->mq.head == NULL)
+ return 0;
+
+ /* Queue already full, must insert in the scheduler or append
+ * mbufs to existing queue. This partly duplicates prio_enqueue
+ */
+ if (test_bit(prio, &si->bitmap) == 0) {
+ /* No queue with this priority, insert */
+ __set_bit(prio, &si->bitmap);
+ si->q_array[prio] = q;
+ } else if ( (oldq = si->q_array[prio]) != q) {
+ /* must append to the existing queue.
+ * can simply append q->mq.head to q2->...
+ * and add the counters to those of q2
+ */
+ oldq->mq.tail->m_nextpkt = q->mq.head;
+ oldq->mq.tail = q->mq.tail;
+ oldq->ni.length += q->ni.length;
+ q->ni.length = 0;
+ oldq->ni.len_bytes += q->ni.len_bytes;
+ q->ni.len_bytes = 0;
+ q->mq.tail = q->mq.head = NULL;
+ }
+ return 0;
+}
+
+static int
+prio_free_queue(struct dn_queue *q)
+{
+ int prio = q->fs->fs.par[0];
+ struct prio_si *si = (struct prio_si *)(q->_si + 1);
+
+ if (si->q_array[prio] == q) {
+ si->q_array[prio] = NULL;
+ __clear_bit(prio, &si->bitmap);
+ }
+ return 0;
+}
+
+
+static struct dn_alg prio_desc = {
+ _SI( .type = ) DN_SCHED_PRIO,
+ _SI( .name = ) "PRIO",
+ _SI( .flags = ) DN_MULTIQUEUE,
+
+ /* we need extra space in the si and the queue */
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct prio_si),
+ _SI( .q_datalen = ) 0,
+
+ _SI( .enqueue = ) prio_enqueue,
+ _SI( .dequeue = ) prio_dequeue,
+
+ _SI( .config = ) NULL,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) prio_new_sched,
+ _SI( .free_sched = ) NULL,
+
+ _SI( .new_fsk = ) prio_new_fsk,
+ _SI( .free_fsk = ) NULL,
+
+ _SI( .new_queue = ) prio_new_queue,
+ _SI( .free_queue = ) prio_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_prio, &prio_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c b/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c
new file mode 100644
index 00000000..461c40a5
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c
@@ -0,0 +1,866 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+#ifdef QFQ_DEBUG
+struct qfq_sched;
+static void dump_sched(struct qfq_sched *q, const char *msg);
+#define NO(x) x
+#else
+#define NO(x)
+#endif
+#define DN_SCHED_QFQ 4 // XXX Where?
+typedef unsigned long bitmap;
+
+/*
+ * bitmaps ops are critical. Some linux versions have __fls
+ * and the bitmap ops. Some machines have ffs
+ */
+#if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
+int fls(unsigned int n)
+{
+ int i = 0;
+ for (i = 0; n > 0; n >>= 1, i++)
+ ;
+ return i;
+}
+#endif
+
+#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
+static inline unsigned long __fls(unsigned long word)
+{
+ return fls(word) - 1;
+}
+#endif
+
+#if !defined(_KERNEL) || !defined(__linux__)
+#ifdef QFQ_DEBUG
+int test_bit(int ix, bitmap *p)
+{
+ if (ix < 0 || ix > 31)
+ D("bad index %d", ix);
+ return *p & (1<<ix);
+}
+void __set_bit(int ix, bitmap *p)
+{
+ if (ix < 0 || ix > 31)
+ D("bad index %d", ix);
+ *p |= (1<<ix);
+}
+void __clear_bit(int ix, bitmap *p)
+{
+ if (ix < 0 || ix > 31)
+ D("bad index %d", ix);
+ *p &= ~(1<<ix);
+}
+#else /* !QFQ_DEBUG */
+/* XXX do we have fast version, or leave it to the compiler ? */
+#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
+#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif /* !QFQ_DEBUG */
+#endif /* !__linux__ */
+
+#ifdef __MIPSEL__
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif
+
+/*-------------------------------------------*/
+/*
+
+Virtual time computations.
+
+S, F and V are all computed in fixed point arithmetic with
+FRAC_BITS decimal bits.
+
+ QFQ_MAX_INDEX is the maximum index allowed for a group. We need
+ one bit per index.
+ QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
+ The layout of the bits is as below:
+
+ [ MTU_SHIFT ][ FRAC_BITS ]
+ [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
+ ^.__grp->index = 0
+ *.__grp->slot_shift
+
+ where MIN_SLOT_SHIFT is derived by difference from the others.
+
+The max group index corresponds to Lmax/w_min, where
+Lmax=1<<MTU_SHIFT, w_min = 1 .
+From this, and knowing how many groups (MAX_INDEX) we want,
+we can derive the shift corresponding to each group.
+
+Because we often need to compute
+ F = S + len/w_i and V = V + len/wsum
+instead of storing w_i store the value
+ inv_w = (1<<FRAC_BITS)/w_i
+so we can do F = S + len * inv_w * wsum.
+We use W_TOT in the formulas so we can easily move between
+static and adaptive weight sum.
+
+The per-scheduler-instance data contain all the data structures
+for the scheduler: bitmaps and bucket lists.
+
+ */
+/*
+ * Maximum number of consecutive slots occupied by backlogged classes
+ * inside a group. This is approx lmax/lmin + 5.
+ * XXX check because it poses constraints on MAX_INDEX
+ */
+#define QFQ_MAX_SLOTS 32
+/*
+ * Shifts used for class<->group mapping. Class weights are
+ * in the range [1, QFQ_MAX_WEIGHT], we to map each class i to the
+ * group with the smallest index that can support the L_i / r_i
+ * configured for the class.
+ *
+ * grp->index is the index of the group; and grp->slot_shift
+ * is the shift for the corresponding (scaled) sigma_i.
+ *
+ * When computing the group index, we do (len<<FP_SHIFT)/weight,
+ * then compute an FLS (which is like a log2()), and if the result
+ * is below the MAX_INDEX region we use 0 (which is the same as
+ * using a larger len).
+ */
+#define QFQ_MAX_INDEX 19
+#define QFQ_MAX_WSHIFT 16 /* log2(max_weight) */
+
+#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
+#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
+//#define IWSUM (q->i_wsum)
+#define IWSUM ((1<<FRAC_BITS)/QFQ_MAX_WSUM)
+
+#define FRAC_BITS 30 /* fixed point arithmetic */
+#define ONE_FP (1UL << FRAC_BITS)
+
+#define QFQ_MTU_SHIFT 11 /* log2(max_len) */
+#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+
+/*
+ * Possible group states, also indexes for the bitmaps array in
+ * struct qfq_queue. We rely on ER, IR, EB, IB being numbered 0..3
+ */
+enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
+
+struct qfq_group;
+/*
+ * additional queue info. Some of this info should come from
+ * the flowset, we copy them here for faster processing.
+ * This is an overlay of the struct dn_queue
+ */
+struct qfq_class {
+ struct dn_queue _q;
+ uint64_t S, F; /* flow timestamps (exact) */
+ struct qfq_class *next; /* Link for the slot list. */
+
+ /* group we belong to. In principle we would need the index,
+ * which is log_2(lmax/weight), but we never reference it
+ * directly, only the group.
+ */
+ struct qfq_group *grp;
+
+ /* these are copied from the flowset. */
+ uint32_t inv_w; /* ONE_FP/weight */
+ uint32_t lmax; /* Max packet size for this flow. */
+};
+
+/* Group descriptor, see the paper for details.
+ * Basically this contains the bucket lists
+ */
+struct qfq_group {
+ uint64_t S, F; /* group timestamps (approx). */
+ unsigned int slot_shift; /* Slot shift. */
+ unsigned int index; /* Group index. */
+ unsigned int front; /* Index of the front slot. */
+ bitmap full_slots; /* non-empty slots */
+
+ /* Array of lists of active classes. */
+ struct qfq_class *slots[QFQ_MAX_SLOTS];
+};
+
+/* scheduler instance descriptor. */
+struct qfq_sched {
+ uint64_t V; /* Precise virtual time. */
+ uint32_t wsum; /* weight sum */
+ NO(uint32_t i_wsum; /* ONE_FP/w_sum */
+ uint32_t _queued; /* debugging */
+ uint32_t loops; /* debugging */)
+ bitmap bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
+ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
+};
+
+/*---- support functions ----------------------------*/
+
+/* Generic comparison function, handling wraparound. */
+static inline int qfq_gt(uint64_t a, uint64_t b)
+{
+ return (int64_t)(a - b) > 0;
+}
+
+/* Round a precise timestamp to its slotted value. */
+static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
+{
+ return ts & ~((1ULL << shift) - 1);
+}
+
+/* return the pointer to the group with lowest index in the bitmap */
+static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
+ unsigned long bitmap)
+{
+ int index = ffs(bitmap) - 1; // zero-based
+ return &q->groups[index];
+}
+
+/*
+ * Calculate a flow index, given its weight and maximum packet length.
+ * index = log_2(maxlen/weight) but we need to apply the scaling.
+ * This is used only once at flow creation.
+ */
+static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
+{
+ uint64_t slot_size = (uint64_t)maxlen *inv_w;
+ unsigned long size_map;
+ int index = 0;
+
+ size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
+ if (!size_map)
+ goto out;
+
+ index = __fls(size_map) + 1; // basically a log_2()
+ index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
+
+ if (index < 0)
+ index = 0;
+
+out:
+ ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
+ return index;
+}
+/*---- end support functions ----*/
+
+/*-------- API calls --------------------------------*/
+/*
+ * Validate and copy parameters from flowset.
+ */
+static int
+qfq_new_queue(struct dn_queue *_q)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
+ struct qfq_class *cl = (struct qfq_class *)_q;
+ int i;
+ uint32_t w; /* approximated weight */
+
+ /* import parameters from the flowset. They should be correct
+ * already.
+ */
+ w = _q->fs->fs.par[0];
+ cl->lmax = _q->fs->fs.par[1];
+ if (!w || w > QFQ_MAX_WEIGHT) {
+ w = 1;
+ D("rounding weight to 1");
+ }
+ cl->inv_w = ONE_FP/w;
+ w = ONE_FP/cl->inv_w;
+ if (q->wsum + w > QFQ_MAX_WSUM)
+ return EINVAL;
+
+ i = qfq_calc_index(cl->inv_w, cl->lmax);
+ cl->grp = &q->groups[i];
+ q->wsum += w;
+ // XXX cl->S = q->V; ?
+ // XXX compute q->i_wsum
+ return 0;
+}
+
+/* remove an empty queue */
+static int
+qfq_free_queue(struct dn_queue *_q)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
+ struct qfq_class *cl = (struct qfq_class *)_q;
+ if (cl->inv_w) {
+ q->wsum -= ONE_FP/cl->inv_w;
+ cl->inv_w = 0; /* reset weight to avoid run twice */
+ }
+ return 0;
+}
+
+/* Calculate a mask to mimic what would be ffs_from(). */
+static inline unsigned long
+mask_from(unsigned long bitmap, int from)
+{
+ return bitmap & ~((1UL << from) - 1);
+}
+
+/*
+ * The state computation relies on ER=0, IR=1, EB=2, IB=3
+ * First compute eligibility comparing grp->S, q->V,
+ * then check if someone is blocking us and possibly add EB
+ */
+static inline unsigned int
+qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
+{
+ /* if S > V we are not eligible */
+ unsigned int state = qfq_gt(grp->S, q->V);
+ unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
+ struct qfq_group *next;
+
+ if (mask) {
+ next = qfq_ffs(q, mask);
+ if (qfq_gt(grp->F, next->F))
+ state |= EB;
+ }
+
+ return state;
+}
+
+/*
+ * In principle
+ * q->bitmaps[dst] |= q->bitmaps[src] & mask;
+ * q->bitmaps[src] &= ~mask;
+ * but we should make sure that src != dst
+ */
+static inline void
+qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
+{
+ q->bitmaps[dst] |= q->bitmaps[src] & mask;
+ q->bitmaps[src] &= ~mask;
+}
+
+static inline void
+qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
+{
+ unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
+ struct qfq_group *next;
+
+ if (mask) {
+ next = qfq_ffs(q, mask);
+ if (!qfq_gt(next->F, old_finish))
+ return;
+ }
+
+ mask = (1UL << index) - 1;
+ qfq_move_groups(q, mask, EB, ER);
+ qfq_move_groups(q, mask, IB, IR);
+}
+
+/*
+ * perhaps
+ *
+ old_V ^= q->V;
+ old_V >>= QFQ_MIN_SLOT_SHIFT;
+ if (old_V) {
+ ...
+ }
+ *
+ */
+static inline void
+qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
+{
+ unsigned long mask, vslot, old_vslot;
+
+ vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
+ old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
+
+ if (vslot != old_vslot) {
+ mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
+ qfq_move_groups(q, mask, IR, ER);
+ qfq_move_groups(q, mask, IB, EB);
+ }
+}
+
+/*
+ * XXX we should make sure that slot becomes less than 32.
+ * This is guaranteed by the input values.
+ * roundedS is always cl->S rounded on grp->slot_shift bits.
+ */
+static inline void
+qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
+{
+ uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
+ unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+
+ cl->next = grp->slots[i];
+ grp->slots[i] = cl;
+ __set_bit(slot, &grp->full_slots);
+}
+
+/*
+ * remove the entry from the slot
+ */
+static inline void
+qfq_front_slot_remove(struct qfq_group *grp)
+{
+ struct qfq_class **h = &grp->slots[grp->front];
+
+ *h = (*h)->next;
+ if (!*h)
+ __clear_bit(0, &grp->full_slots);
+}
+
+/*
+ * Returns the first full queue in a group. As a side effect,
+ * adjust the bucket list so the first non-empty bucket is at
+ * position 0 in full_slots.
+ */
+static inline struct qfq_class *
+qfq_slot_scan(struct qfq_group *grp)
+{
+ int i;
+
+ ND("grp %d full %x", grp->index, grp->full_slots);
+ if (!grp->full_slots)
+ return NULL;
+
+ i = ffs(grp->full_slots) - 1; // zero-based
+ if (i > 0) {
+ grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
+ grp->full_slots >>= i;
+ }
+
+ return grp->slots[grp->front];
+}
+
+/*
+ * adjust the bucket list. When the start time of a group decreases,
+ * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
+ * move the objects. The mask of occupied slots must be shifted
+ * because we use ffs() to find the first non-empty slot.
+ * This covers decreases in the group's start time, but what about
+ * increases of the start time ?
+ * Here too we should make sure that i is less than 32
+ */
+static inline void
+qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
+{
+ unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
+
+ grp->full_slots <<= i;
+ grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
+}
+
+
+static inline void
+qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
+{
+ bitmap ineligible;
+
+ ineligible = q->bitmaps[IR] | q->bitmaps[IB];
+ if (ineligible) {
+ if (!q->bitmaps[ER]) {
+ struct qfq_group *grp;
+ grp = qfq_ffs(q, ineligible);
+ if (qfq_gt(grp->S, q->V))
+ q->V = grp->S;
+ }
+ qfq_make_eligible(q, old_V);
+ }
+}
+
+/*
+ * Updates the class, returns true if also the group needs to be updated.
+ */
+static inline int
+qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
+ struct qfq_class *cl)
+{
+
+ cl->S = cl->F;
+ if (cl->_q.mq.head == NULL) {
+ qfq_front_slot_remove(grp);
+ } else {
+ unsigned int len;
+ uint64_t roundedS;
+
+ len = cl->_q.mq.head->m_pkthdr.len;
+ cl->F = cl->S + (uint64_t)len * cl->inv_w;
+ roundedS = qfq_round_down(cl->S, grp->slot_shift);
+ if (roundedS == grp->S)
+ return 0;
+
+ qfq_front_slot_remove(grp);
+ qfq_slot_insert(grp, cl, roundedS);
+ }
+ return 1;
+}
+
+static struct mbuf *
+qfq_dequeue(struct dn_sch_inst *si)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+ struct qfq_group *grp;
+ struct qfq_class *cl;
+ struct mbuf *m;
+ uint64_t old_V;
+
+ NO(q->loops++;)
+ if (!q->bitmaps[ER]) {
+ NO(if (q->queued)
+ dump_sched(q, "start dequeue");)
+ return NULL;
+ }
+
+ grp = qfq_ffs(q, q->bitmaps[ER]);
+
+ cl = grp->slots[grp->front];
+ /* extract from the first bucket in the bucket list */
+ m = dn_dequeue(&cl->_q);
+
+ if (!m) {
+ D("BUG/* non-workconserving leaf */");
+ return NULL;
+ }
+ NO(q->queued--;)
+ old_V = q->V;
+ q->V += (uint64_t)m->m_pkthdr.len * IWSUM;
+ ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
+
+ if (qfq_update_class(q, grp, cl)) {
+ uint64_t old_F = grp->F;
+ cl = qfq_slot_scan(grp);
+ if (!cl) { /* group gone, remove from ER */
+ __clear_bit(grp->index, &q->bitmaps[ER]);
+ // grp->S = grp->F + 1; // XXX debugging only
+ } else {
+ uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
+ unsigned int s;
+
+ if (grp->S == roundedS)
+ goto skip_unblock;
+ grp->S = roundedS;
+ grp->F = roundedS + (2ULL << grp->slot_shift);
+ /* remove from ER and put in the new set */
+ __clear_bit(grp->index, &q->bitmaps[ER]);
+ s = qfq_calc_state(q, grp);
+ __set_bit(grp->index, &q->bitmaps[s]);
+ }
+ /* we need to unblock even if the group has gone away */
+ qfq_unblock_groups(q, grp->index, old_F);
+ }
+
+skip_unblock:
+ qfq_update_eligible(q, old_V);
+ NO(if (!q->bitmaps[ER] && q->queued)
+ dump_sched(q, "end dequeue");)
+
+ return m;
+}
+
+/*
+ * Assign a reasonable start time for a new flow k in group i.
+ * Admissible values for \hat(F) are multiples of \sigma_i
+ * no greater than V+\sigma_i . Larger values mean that
+ * we had a wraparound so we consider the timestamp to be stale.
+ *
+ * If F is not stale and F >= V then we set S = F.
+ * Otherwise we should assign S = V, but this may violate
+ * the ordering in ER. So, if we have groups in ER, set S to
+ * the F_j of the first group j which would be blocking us.
+ * We are guaranteed not to move S backward because
+ * otherwise our group i would still be blocked.
+ */
+static inline void
+qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
+{
+ unsigned long mask;
+ uint32_t limit, roundedF;
+ int slot_shift = cl->grp->slot_shift;
+
+ roundedF = qfq_round_down(cl->F, slot_shift);
+ limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
+
+ if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
+ /* timestamp was stale */
+ mask = mask_from(q->bitmaps[ER], cl->grp->index);
+ if (mask) {
+ struct qfq_group *next = qfq_ffs(q, mask);
+ if (qfq_gt(roundedF, next->F)) {
+ cl->S = next->F;
+ return;
+ }
+ }
+ cl->S = q->V;
+ } else { /* timestamp is not stale */
+ cl->S = cl->F;
+ }
+}
+
+static int
+qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+ struct qfq_group *grp;
+ struct qfq_class *cl = (struct qfq_class *)_q;
+ uint64_t roundedS;
+ int s;
+
+ NO(q->loops++;)
+ DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
+ _q, cl->inv_w, cl->grp->index);
+ /* XXX verify that the packet obeys the parameters */
+ if (m != _q->mq.head) {
+ if (dn_enqueue(_q, m, 0)) /* packet was dropped */
+ return 1;
+ NO(q->queued++;)
+ if (m != _q->mq.head)
+ return 0;
+ }
+ /* If reach this point, queue q was idle */
+ grp = cl->grp;
+ qfq_update_start(q, cl); /* adjust start time */
+ /* compute new finish time and rounded start. */
+ cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
+ roundedS = qfq_round_down(cl->S, grp->slot_shift);
+
+ /*
+ * insert cl in the correct bucket.
+ * If cl->S >= grp->S we don't need to adjust the
+ * bucket list and simply go to the insertion phase.
+ * Otherwise grp->S is decreasing, we must make room
+ * in the bucket list, and also recompute the group state.
+ * Finally, if there were no flows in this group and nobody
+ * was in ER make sure to adjust V.
+ */
+ if (grp->full_slots) {
+ if (!qfq_gt(grp->S, cl->S))
+ goto skip_update;
+ /* create a slot for this cl->S */
+ qfq_slot_rotate(q, grp, roundedS);
+ /* group was surely ineligible, remove */
+ __clear_bit(grp->index, &q->bitmaps[IR]);
+ __clear_bit(grp->index, &q->bitmaps[IB]);
+ } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
+ q->V = roundedS;
+
+ grp->S = roundedS;
+ grp->F = roundedS + (2ULL << grp->slot_shift); // i.e. 2\sigma_i
+ s = qfq_calc_state(q, grp);
+ __set_bit(grp->index, &q->bitmaps[s]);
+ ND("new state %d 0x%x", s, q->bitmaps[s]);
+ ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
+skip_update:
+ qfq_slot_insert(grp, cl, roundedS);
+
+ return 0;
+}
+
+
+#if 0
+static inline void
+qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
+ struct qfq_class *cl, struct qfq_class **pprev)
+{
+ unsigned int i, offset;
+ uint64_t roundedS;
+
+ roundedS = qfq_round_down(cl->S, grp->slot_shift);
+ offset = (roundedS - grp->S) >> grp->slot_shift;
+ i = (grp->front + offset) % QFQ_MAX_SLOTS;
+
+#ifdef notyet
+ if (!pprev) {
+ pprev = &grp->slots[i];
+ while (*pprev && *pprev != cl)
+ pprev = &(*pprev)->next;
+ }
+#endif
+
+ *pprev = cl->next;
+ if (!grp->slots[i])
+ __clear_bit(offset, &grp->full_slots);
+}
+
+/*
+ * called to forcibly destroy a queue.
+ * If the queue is not in the front bucket, or if it has
+ * other queues in the front bucket, we can simply remove
+ * the queue with no other side effects.
+ * Otherwise we must propagate the event up.
+ * XXX description to be completed.
+ */
+static void
+qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
+ struct qfq_class **pprev)
+{
+ struct qfq_group *grp = &q->groups[cl->index];
+ unsigned long mask;
+ uint64_t roundedS;
+ int s;
+
+ cl->F = cl->S; // not needed if the class goes away.
+ qfq_slot_remove(q, grp, cl, pprev);
+
+ if (!grp->full_slots) {
+ /* nothing left in the group, remove from all sets.
+ * Do ER last because if we were blocking other groups
+ * we must unblock them.
+ */
+ __clear_bit(grp->index, &q->bitmaps[IR]);
+ __clear_bit(grp->index, &q->bitmaps[EB]);
+ __clear_bit(grp->index, &q->bitmaps[IB]);
+
+ if (test_bit(grp->index, &q->bitmaps[ER]) &&
+ !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
+ mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
+ if (mask)
+ mask = ~((1UL << __fls(mask)) - 1);
+ else
+ mask = ~0UL;
+ qfq_move_groups(q, mask, EB, ER);
+ qfq_move_groups(q, mask, IB, IR);
+ }
+ __clear_bit(grp->index, &q->bitmaps[ER]);
+ } else if (!grp->slots[grp->front]) {
+ cl = qfq_slot_scan(grp);
+ roundedS = qfq_round_down(cl->S, grp->slot_shift);
+ if (grp->S != roundedS) {
+ __clear_bit(grp->index, &q->bitmaps[ER]);
+ __clear_bit(grp->index, &q->bitmaps[IR]);
+ __clear_bit(grp->index, &q->bitmaps[EB]);
+ __clear_bit(grp->index, &q->bitmaps[IB]);
+ grp->S = roundedS;
+ grp->F = roundedS + (2ULL << grp->slot_shift);
+ s = qfq_calc_state(q, grp);
+ __set_bit(grp->index, &q->bitmaps[s]);
+ }
+ }
+ qfq_update_eligible(q, q->V);
+}
+#endif
+
+static int
+qfq_new_fsk(struct dn_fsk *f)
+{
+ ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
+ ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
+ ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
+ return 0;
+}
+
+/*
+ * initialize a new scheduler instance
+ */
+static int
+qfq_new_sched(struct dn_sch_inst *si)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+ struct qfq_group *grp;
+ int i;
+
+ for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+ grp = &q->groups[i];
+ grp->index = i;
+ grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
+ (QFQ_MAX_INDEX - i);
+ }
+ return 0;
+}
+
+/*
+ * QFQ scheduler descriptor
+ */
+static struct dn_alg qfq_desc = {
+ _SI( .type = ) DN_SCHED_QFQ,
+ _SI( .name = ) "QFQ",
+ _SI( .flags = ) DN_MULTIQUEUE,
+
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct qfq_sched),
+ _SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
+
+ _SI( .enqueue = ) qfq_enqueue,
+ _SI( .dequeue = ) qfq_dequeue,
+
+ _SI( .config = ) NULL,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) qfq_new_sched,
+ _SI( .free_sched = ) NULL,
+ _SI( .new_fsk = ) qfq_new_fsk,
+ _SI( .free_fsk = ) NULL,
+ _SI( .new_queue = ) qfq_new_queue,
+ _SI( .free_queue = ) qfq_free_queue,
+};
+
+DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
+
+#ifdef QFQ_DEBUG
+static void
+dump_groups(struct qfq_sched *q, uint32_t mask)
+{
+ int i, j;
+
+ for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
+ struct qfq_group *g = &q->groups[i];
+
+ if (0 == (mask & (1<<i)))
+ continue;
+ for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+ if (g->slots[j])
+ D(" bucket %d %p", j, g->slots[j]);
+ }
+ D("full_slots 0x%x", g->full_slots);
+ D(" %2d S 0x%20llx F 0x%llx %c", i,
+ g->S, g->F,
+ mask & (1<<i) ? '1' : '0');
+ }
+}
+
+static void
+dump_sched(struct qfq_sched *q, const char *msg)
+{
+ D("--- in %s: ---", msg);
+ ND("loops %d queued %d V 0x%llx", q->loops, q->queued, q->V);
+ D(" ER 0x%08x", q->bitmaps[ER]);
+ D(" EB 0x%08x", q->bitmaps[EB]);
+ D(" IR 0x%08x", q->bitmaps[IR]);
+ D(" IB 0x%08x", q->bitmaps[IB]);
+ dump_groups(q, 0xffffffff);
+};
+#endif /* QFQ_DEBUG */
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_rr.c b/freebsd/sys/netpfil/ipfw/dn_sched_rr.c
new file mode 100644
index 00000000..c1862ab0
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_rr.c
@@ -0,0 +1,309 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+#define DN_SCHED_RR 3 // XXX Where?
+
+struct rr_queue {
+ struct dn_queue q; /* Standard queue */
+ int status; /* 1: queue is in the list */
+ int credit; /* Number of bytes to transmit */
+ int quantum; /* quantum * C */
+ struct rr_queue *qnext; /* */
+};
+
+/* struct rr_schk contains global config parameters
+ * and is right after dn_schk
+ */
+struct rr_schk {
+ int min_q; /* Min quantum */
+ int max_q; /* Max quantum */
+ int q_bytes; /* Bytes per quantum */
+};
+
+/* per-instance round robin list, right after dn_sch_inst */
+struct rr_si {
+ struct rr_queue *head, *tail; /* Pointer to current queue */
+};
+
+/* Append a queue to the rr list */
+static inline void
+rr_append(struct rr_queue *q, struct rr_si *si)
+{
+ q->status = 1; /* mark as in-rr_list */
+ q->credit = q->quantum; /* initialize credit */
+
+ /* append to the tail */
+ if (si->head == NULL)
+ si->head = q;
+ else
+ si->tail->qnext = q;
+ si->tail = q; /* advance the tail pointer */
+ q->qnext = si->head; /* make it circular */
+}
+
+/* Remove the head queue from circular list. */
+static inline void
+rr_remove_head(struct rr_si *si)
+{
+ if (si->head == NULL)
+ return; /* empty queue */
+ si->head->status = 0;
+
+ if (si->head == si->tail) {
+ si->head = si->tail = NULL;
+ return;
+ }
+
+ si->head = si->head->qnext;
+ si->tail->qnext = si->head;
+}
+
+/* Remove a queue from circular list.
+ * XXX see if ti can be merge with remove_queue()
+ */
+static inline void
+remove_queue_q(struct rr_queue *q, struct rr_si *si)
+{
+ struct rr_queue *prev;
+
+ if (q->status != 1)
+ return;
+ if (q == si->head) {
+ rr_remove_head(si);
+ return;
+ }
+
+ for (prev = si->head; prev; prev = prev->qnext) {
+ if (prev->qnext != q)
+ continue;
+ prev->qnext = q->qnext;
+ if (q == si->tail)
+ si->tail = prev;
+ q->status = 0;
+ break;
+ }
+}
+
+
+static inline void
+next_pointer(struct rr_si *si)
+{
+ if (si->head == NULL)
+ return; /* empty queue */
+
+ si->head = si->head->qnext;
+ si->tail = si->tail->qnext;
+}
+
+static int
+rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+ struct rr_si *si;
+ struct rr_queue *rrq;
+
+ if (m != q->mq.head) {
+ if (dn_enqueue(q, m, 0)) /* packet was dropped */
+ return 1;
+ if (m != q->mq.head)
+ return 0;
+ }
+
+ /* If reach this point, queue q was idle */
+ si = (struct rr_si *)(_si + 1);
+ rrq = (struct rr_queue *)q;
+
+ if (rrq->status == 1) /* Queue is already in the queue list */
+ return 0;
+
+ /* Insert the queue in the queue list */
+ rr_append(rrq, si);
+
+ return 0;
+}
+
+static struct mbuf *
+rr_dequeue(struct dn_sch_inst *_si)
+{
+ /* Access scheduler instance private data */
+ struct rr_si *si = (struct rr_si *)(_si + 1);
+ struct rr_queue *rrq;
+ uint64_t len;
+
+ while ( (rrq = si->head) ) {
+ struct mbuf *m = rrq->q.mq.head;
+ if ( m == NULL) {
+ /* empty queue, remove from list */
+ rr_remove_head(si);
+ continue;
+ }
+ len = m->m_pkthdr.len;
+
+ if (len > rrq->credit) {
+ /* Packet too big */
+ rrq->credit += rrq->quantum;
+ /* Try next queue */
+ next_pointer(si);
+ } else {
+ rrq->credit -= len;
+ return dn_dequeue(&rrq->q);
+ }
+ }
+
+ /* no packet to dequeue*/
+ return NULL;
+}
+
+static int
+rr_config(struct dn_schk *_schk)
+{
+ struct rr_schk *schk = (struct rr_schk *)(_schk + 1);
+ ND("called");
+
+ /* use reasonable quantums (64..2k bytes, default 1500) */
+ schk->min_q = 64;
+ schk->max_q = 2048;
+ schk->q_bytes = 1500; /* quantum */
+
+ return 0;
+}
+
+static int
+rr_new_sched(struct dn_sch_inst *_si)
+{
+ struct rr_si *si = (struct rr_si *)(_si + 1);
+
+ ND("called");
+ si->head = si->tail = NULL;
+
+ return 0;
+}
+
+static int
+rr_free_sched(struct dn_sch_inst *_si)
+{
+ ND("called");
+ /* Nothing to do? */
+ return 0;
+}
+
+static int
+rr_new_fsk(struct dn_fsk *fs)
+{
+ struct rr_schk *schk = (struct rr_schk *)(fs->sched + 1);
+ /* par[0] is the weight, par[1] is the quantum step */
+ ipdn_bound_var(&fs->fs.par[0], 1,
+ 1, 65536, "RR weight");
+ ipdn_bound_var(&fs->fs.par[1], schk->q_bytes,
+ schk->min_q, schk->max_q, "RR quantum");
+ return 0;
+}
+
+static int
+rr_new_queue(struct dn_queue *_q)
+{
+ struct rr_queue *q = (struct rr_queue *)_q;
+
+ _q->ni.oid.subtype = DN_SCHED_RR;
+
+ q->quantum = _q->fs->fs.par[0] * _q->fs->fs.par[1];
+ ND("called, q->quantum %d", q->quantum);
+ q->credit = q->quantum;
+ q->status = 0;
+
+ if (_q->mq.head != NULL) {
+ /* Queue NOT empty, insert in the queue list */
+ rr_append(q, (struct rr_si *)(_q->_si + 1));
+ }
+ return 0;
+}
+
+static int
+rr_free_queue(struct dn_queue *_q)
+{
+ struct rr_queue *q = (struct rr_queue *)_q;
+
+ ND("called");
+ if (q->status == 1) {
+ struct rr_si *si = (struct rr_si *)(_q->_si + 1);
+ remove_queue_q(q, si);
+ }
+ return 0;
+}
+
+/*
+ * RR scheduler descriptor
+ * contains the type of the scheduler, the name, the size of the
+ * structures and function pointers.
+ */
+static struct dn_alg rr_desc = {
+ _SI( .type = ) DN_SCHED_RR,
+ _SI( .name = ) "RR",
+ _SI( .flags = ) DN_MULTIQUEUE,
+
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct rr_si),
+ _SI( .q_datalen = ) sizeof(struct rr_queue) - sizeof(struct dn_queue),
+
+ _SI( .enqueue = ) rr_enqueue,
+ _SI( .dequeue = ) rr_dequeue,
+
+ _SI( .config = ) rr_config,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) rr_new_sched,
+ _SI( .free_sched = ) rr_free_sched,
+ _SI( .new_fsk = ) rr_new_fsk,
+ _SI( .free_fsk = ) NULL,
+ _SI( .new_queue = ) rr_new_queue,
+ _SI( .free_queue = ) rr_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_rr, &rr_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c b/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c
new file mode 100644
index 00000000..77c4bbad
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c
@@ -0,0 +1,375 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+#ifndef MAX64
+#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
+#endif
+
+/*
+ * timestamps are computed on 64 bit using fixed point arithmetic.
+ * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
+ * and sum of weights, respectively. FRAC_BITS is the number of
+ * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
+ * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
+ * using an unsigned 32-bit division, and to avoid wraparounds we need
+ * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
+ * As an example
+ * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
+ */
+#ifndef FRAC_BITS
+#define FRAC_BITS 28 /* shift for fixed point arithmetic */
+#define ONE_FP (1UL << FRAC_BITS)
+#endif
+
+/*
+ * Private information for the scheduler instance:
+ * sch_heap (key is Finish time) returns the next queue to serve
+ * ne_heap (key is Start time) stores not-eligible queues
+ * idle_heap (key=start/finish time) stores idle flows. It must
+ * support extract-from-middle.
+ * A flow is only in 1 of the three heaps.
+ * XXX todo: use a more efficient data structure, e.g. a tree sorted
+ * by F with min_subtree(S) in each node
+ */
+struct wf2qp_si {
+ struct dn_heap sch_heap; /* top extract - key Finish time */
+ struct dn_heap ne_heap; /* top extract - key Start time */
+ struct dn_heap idle_heap; /* random extract - key Start=Finish time */
+ uint64_t V; /* virtual time */
+ uint32_t inv_wsum; /* inverse of sum of weights */
+ uint32_t wsum; /* sum of weights */
+};
+
+struct wf2qp_queue {
+ struct dn_queue _q;
+ uint64_t S, F; /* start time, finish time */
+ uint32_t inv_w; /* ONE_FP / weight */
+ int32_t heap_pos; /* position (index) of struct in heap */
+};
+
+/*
+ * This file implements a WF2Q+ scheduler as it has been in dummynet
+ * since 2000.
+ * The scheduler supports per-flow queues and has O(log N) complexity.
+ *
+ * WF2Q+ needs to drain entries from the idle heap so that we
+ * can keep the sum of weights up to date. We can do it whenever
+ * we get a chance, or periodically, or following some other
+ * strategy. The function idle_check() drains at most N elements
+ * from the idle heap.
+ */
+static void
+idle_check(struct wf2qp_si *si, int n, int force)
+{
+ struct dn_heap *h = &si->idle_heap;
+ while (n-- > 0 && h->elements > 0 &&
+ (force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
+ struct dn_queue *q = HEAP_TOP(h)->object;
+ struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
+
+ heap_extract(h, NULL);
+ /* XXX to let the flowset delete the queue we should
+ * mark it as 'unused' by the scheduler.
+ */
+ alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
+ si->wsum -= q->fs->fs.par[0]; /* adjust sum of weights */
+ if (si->wsum > 0)
+ si->inv_wsum = ONE_FP/si->wsum;
+ }
+}
+
+static int
+wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+ struct dn_fsk *fs = q->fs;
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+ struct wf2qp_queue *alg_fq;
+ uint64_t len = m->m_pkthdr.len;
+
+ if (m != q->mq.head) {
+ if (dn_enqueue(q, m, 0)) /* packet was dropped */
+ return 1;
+ if (m != q->mq.head) /* queue was already busy */
+ return 0;
+ }
+
+ /* If reach this point, queue q was idle */
+ alg_fq = (struct wf2qp_queue *)q;
+
+ if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
+ /* F<S means timestamps are invalid ->brand new queue. */
+ alg_fq->S = si->V; /* init start time */
+ si->wsum += fs->fs.par[0]; /* add weight of new queue. */
+ si->inv_wsum = ONE_FP/si->wsum;
+ } else { /* if it was idle then it was in the idle heap */
+ heap_extract(&si->idle_heap, q);
+ alg_fq->S = MAX64(alg_fq->F, si->V); /* compute new S */
+ }
+ alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
+
+ /* if nothing is backlogged, make sure this flow is eligible */
+ if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
+ si->V = MAX64(alg_fq->S, si->V);
+
+ /*
+ * Look at eligibility. A flow is not eligibile if S>V (when
+ * this happens, it means that there is some other flow already
+ * scheduled for the same pipe, so the sch_heap cannot be
+ * empty). If the flow is not eligible we just store it in the
+ * ne_heap. Otherwise, we store in the sch_heap.
+ * Note that for all flows in sch_heap (SCH), S_i <= V,
+ * and for all flows in ne_heap (NEH), S_i > V.
+ * So when we need to compute max(V, min(S_i)) forall i in
+ * SCH+NEH, we only need to look into NEH.
+ */
+ if (DN_KEY_LT(si->V, alg_fq->S)) {
+ /* S>V means flow Not eligible. */
+ if (si->sch_heap.elements == 0)
+ D("++ ouch! not eligible but empty scheduler!");
+ heap_insert(&si->ne_heap, alg_fq->S, q);
+ } else {
+ heap_insert(&si->sch_heap, alg_fq->F, q);
+ }
+ return 0;
+}
+
+/* XXX invariant: sch > 0 || V >= min(S in neh) */
+static struct mbuf *
+wf2qp_dequeue(struct dn_sch_inst *_si)
+{
+ /* Access scheduler instance private data */
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+ struct mbuf *m;
+ struct dn_queue *q;
+ struct dn_heap *sch = &si->sch_heap;
+ struct dn_heap *neh = &si->ne_heap;
+ struct wf2qp_queue *alg_fq;
+
+ if (sch->elements == 0 && neh->elements == 0) {
+ /* we have nothing to do. We could kill the idle heap
+ * altogether and reset V
+ */
+ idle_check(si, 0x7fffffff, 1);
+ si->V = 0;
+ si->wsum = 0; /* should be set already */
+ return NULL; /* quick return if nothing to do */
+ }
+ idle_check(si, 1, 0); /* drain something from the idle heap */
+
+ /* make sure at least one element is eligible, bumping V
+ * and moving entries that have become eligible.
+ * We need to repeat the first part twice, before and
+ * after extracting the candidate, or enqueue() will
+ * find the data structure in a wrong state.
+ */
+ m = NULL;
+ for(;;) {
+ /*
+ * Compute V = max(V, min(S_i)). Remember that all elements
+ * in sch have by definition S_i <= V so if sch is not empty,
+ * V is surely the max and we must not update it. Conversely,
+ * if sch is empty we only need to look at neh.
+ * We don't need to move the queues, as it will be done at the
+ * next enqueue
+ */
+ if (sch->elements == 0 && neh->elements > 0) {
+ si->V = MAX64(si->V, HEAP_TOP(neh)->key);
+ }
+ while (neh->elements > 0 &&
+ DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
+ q = HEAP_TOP(neh)->object;
+ alg_fq = (struct wf2qp_queue *)q;
+ heap_extract(neh, NULL);
+ heap_insert(sch, alg_fq->F, q);
+ }
+ if (m) /* pkt found in previous iteration */
+ break;
+ /* ok we have at least one eligible pkt */
+ q = HEAP_TOP(sch)->object;
+ alg_fq = (struct wf2qp_queue *)q;
+ m = dn_dequeue(q);
+ heap_extract(sch, NULL); /* Remove queue from heap. */
+ si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
+ alg_fq->S = alg_fq->F; /* Update start time. */
+ if (q->mq.head == 0) { /* not backlogged any more. */
+ heap_insert(&si->idle_heap, alg_fq->F, q);
+ } else { /* Still backlogged. */
+ /* Update F, store in neh or sch */
+ uint64_t len = q->mq.head->m_pkthdr.len;
+ alg_fq->F += len * alg_fq->inv_w;
+ if (DN_KEY_LEQ(alg_fq->S, si->V)) {
+ heap_insert(sch, alg_fq->F, q);
+ } else {
+ heap_insert(neh, alg_fq->S, q);
+ }
+ }
+ }
+ return m;
+}
+
+static int
+wf2qp_new_sched(struct dn_sch_inst *_si)
+{
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+ int ofs = offsetof(struct wf2qp_queue, heap_pos);
+
+ /* all heaps support extract from middle */
+ if (heap_init(&si->idle_heap, 16, ofs) ||
+ heap_init(&si->sch_heap, 16, ofs) ||
+ heap_init(&si->ne_heap, 16, ofs)) {
+ heap_free(&si->ne_heap);
+ heap_free(&si->sch_heap);
+ heap_free(&si->idle_heap);
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static int
+wf2qp_free_sched(struct dn_sch_inst *_si)
+{
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+
+ heap_free(&si->sch_heap);
+ heap_free(&si->ne_heap);
+ heap_free(&si->idle_heap);
+
+ return 0;
+}
+
+static int
+wf2qp_new_fsk(struct dn_fsk *fs)
+{
+ ipdn_bound_var(&fs->fs.par[0], 1,
+ 1, 100, "WF2Q+ weight");
+ return 0;
+}
+
+static int
+wf2qp_new_queue(struct dn_queue *_q)
+{
+ struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
+
+ _q->ni.oid.subtype = DN_SCHED_WF2QP;
+ q->F = 0; /* not strictly necessary */
+ q->S = q->F + 1; /* mark timestamp as invalid. */
+ q->inv_w = ONE_FP / _q->fs->fs.par[0];
+ if (_q->mq.head != NULL) {
+ wf2qp_enqueue(_q->_si, _q, _q->mq.head);
+ }
+ return 0;
+}
+
+/*
+ * Called when the infrastructure removes a queue (e.g. flowset
+ * is reconfigured). Nothing to do if we did not 'own' the queue,
+ * otherwise remove it from the right heap and adjust the sum
+ * of weights.
+ */
+static int
+wf2qp_free_queue(struct dn_queue *q)
+{
+ struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
+ struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
+
+ if (alg_fq->S >= alg_fq->F + 1)
+ return 0; /* nothing to do, not in any heap */
+ si->wsum -= q->fs->fs.par[0];
+ if (si->wsum > 0)
+ si->inv_wsum = ONE_FP/si->wsum;
+
+ /* extract from the heap. XXX TODO we may need to adjust V
+ * to make sure the invariants hold.
+ */
+ if (q->mq.head == NULL) {
+ heap_extract(&si->idle_heap, q);
+ } else if (DN_KEY_LT(si->V, alg_fq->S)) {
+ heap_extract(&si->ne_heap, q);
+ } else {
+ heap_extract(&si->sch_heap, q);
+ }
+ return 0;
+}
+
+/*
+ * WF2Q+ scheduler descriptor
+ * contains the type of the scheduler, the name, the size of the
+ * structures and function pointers.
+ */
+static struct dn_alg wf2qp_desc = {
+ _SI( .type = ) DN_SCHED_WF2QP,
+ _SI( .name = ) "WF2Q+",
+ _SI( .flags = ) DN_MULTIQUEUE,
+
+ /* we need extra space in the si and the queue */
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct wf2qp_si),
+ _SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
+ sizeof(struct dn_queue),
+
+ _SI( .enqueue = ) wf2qp_enqueue,
+ _SI( .dequeue = ) wf2qp_dequeue,
+
+ _SI( .config = ) NULL,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) wf2qp_new_sched,
+ _SI( .free_sched = ) wf2qp_free_sched,
+
+ _SI( .new_fsk = ) wf2qp_new_fsk,
+ _SI( .free_fsk = ) NULL,
+
+ _SI( .new_queue = ) wf2qp_new_queue,
+ _SI( .free_queue = ) wf2qp_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_glue.c b/freebsd/sys/netpfil/ipfw/ip_dn_glue.c
new file mode 100644
index 00000000..92da3b1a
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_dn_glue.c
@@ -0,0 +1,848 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ *
+ * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8
+ */
+
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <rtems/bsd/sys/time.h>
+#include <sys/taskqueue.h>
+#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+/* FREEBSD7.2 ip_dummynet.h r191715*/
+
+struct dn_heap_entry7 {
+ int64_t key; /* sorting key. Topmost element is smallest one */
+ void *object; /* object pointer */
+};
+
+struct dn_heap7 {
+ int size;
+ int elements;
+ int offset; /* XXX if > 0 this is the offset of direct ptr to obj */
+ struct dn_heap_entry7 *p; /* really an array of "size" entries */
+};
+
+/* Common to 7.2 and 8 */
+struct dn_flow_set {
+ SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */
+
+ u_short fs_nr ; /* flow_set number */
+ u_short flags_fs;
+#define DNOLD_HAVE_FLOW_MASK 0x0001
+#define DNOLD_IS_RED 0x0002
+#define DNOLD_IS_GENTLE_RED 0x0004
+#define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */
+#define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */
+#define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */
+#define DNOLD_IS_PIPE 0x4000
+#define DNOLD_IS_QUEUE 0x8000
+
+ struct dn_pipe7 *pipe ; /* pointer to parent pipe */
+ u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */
+
+ int weight ; /* WFQ queue weight */
+ int qsize ; /* queue size in slots or bytes */
+ int plr ; /* pkt loss rate (2^31-1 means 100%) */
+
+ struct ipfw_flow_id flow_mask ;
+
+ /* hash table of queues onto this flow_set */
+ int rq_size ; /* number of slots */
+ int rq_elements ; /* active elements */
+ struct dn_flow_queue7 **rq; /* array of rq_size entries */
+
+ u_int32_t last_expired ; /* do not expire too frequently */
+ int backlogged ; /* #active queues for this flowset */
+
+ /* RED parameters */
+#define SCALE_RED 16
+#define SCALE(x) ( (x) << SCALE_RED )
+#define SCALE_VAL(x) ( (x) >> SCALE_RED )
+#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
+ int w_q ; /* queue weight (scaled) */
+ int max_th ; /* maximum threshold for queue (scaled) */
+ int min_th ; /* minimum threshold for queue (scaled) */
+ int max_p ; /* maximum value for p_b (scaled) */
+ u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */
+ u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */
+ u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */
+ u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */
+ u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */
+ u_int lookup_depth ; /* depth of lookup table */
+ int lookup_step ; /* granularity inside the lookup table */
+ int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
+ int avg_pkt_size ; /* medium packet size */
+ int max_pkt_size ; /* max packet size */
+};
+SLIST_HEAD(dn_flow_set_head, dn_flow_set);
+
+#define DN_IS_PIPE 0x4000
+#define DN_IS_QUEUE 0x8000
+struct dn_flow_queue7 {
+ struct dn_flow_queue7 *next ;
+ struct ipfw_flow_id id ;
+
+ struct mbuf *head, *tail ; /* queue of packets */
+ u_int len ;
+ u_int len_bytes ;
+
+ u_long numbytes;
+
+ u_int64_t tot_pkts ; /* statistics counters */
+ u_int64_t tot_bytes ;
+ u_int32_t drops ;
+
+ int hash_slot ; /* debugging/diagnostic */
+
+ /* RED parameters */
+ int avg ; /* average queue length est. (scaled) */
+ int count ; /* arrivals since last RED drop */
+ int random ; /* random value (scaled) */
+ u_int32_t q_time; /* start of queue idle time */
+
+ /* WF2Q+ support */
+ struct dn_flow_set *fs ; /* parent flow set */
+ int heap_pos ; /* position (index) of struct in heap */
+ int64_t sched_time ; /* current time when queue enters ready_heap */
+
+ int64_t S,F ; /* start time, finish time */
+};
+
+struct dn_pipe7 { /* a pipe */
+ SLIST_ENTRY(dn_pipe7) next; /* linked list in a hash slot */
+
+ int pipe_nr ; /* number */
+ int bandwidth; /* really, bytes/tick. */
+ int delay ; /* really, ticks */
+
+ struct mbuf *head, *tail ; /* packets in delay line */
+
+ /* WF2Q+ */
+ struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
+ struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
+ struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
+
+ int64_t V ; /* virtual time */
+ int sum; /* sum of weights of all active sessions */
+
+ int numbytes;
+
+ int64_t sched_time ; /* time pipe was scheduled in ready_heap */
+
+ /*
+ * When the tx clock come from an interface (if_name[0] != '\0'), its name
+ * is stored below, whereas the ifp is filled when the rule is configured.
+ */
+ char if_name[IFNAMSIZ];
+ struct ifnet *ifp ;
+ int ready ; /* set if ifp != NULL and we got a signal from it */
+
+ struct dn_flow_set fs ; /* used with fixed-rate flows */
+};
+SLIST_HEAD(dn_pipe_head7, dn_pipe7);
+
+
+/* FREEBSD8 ip_dummynet.h r196045 */
+struct dn_flow_queue8 {
+ struct dn_flow_queue8 *next ;
+ struct ipfw_flow_id id ;
+
+ struct mbuf *head, *tail ; /* queue of packets */
+ u_int len ;
+ u_int len_bytes ;
+
+ uint64_t numbytes ; /* credit for transmission (dynamic queues) */
+ int64_t extra_bits; /* extra bits simulating unavailable channel */
+
+ u_int64_t tot_pkts ; /* statistics counters */
+ u_int64_t tot_bytes ;
+ u_int32_t drops ;
+
+ int hash_slot ; /* debugging/diagnostic */
+
+ /* RED parameters */
+ int avg ; /* average queue length est. (scaled) */
+ int count ; /* arrivals since last RED drop */
+ int random ; /* random value (scaled) */
+ int64_t idle_time; /* start of queue idle time */
+
+ /* WF2Q+ support */
+ struct dn_flow_set *fs ; /* parent flow set */
+ int heap_pos ; /* position (index) of struct in heap */
+ int64_t sched_time ; /* current time when queue enters ready_heap */
+
+ int64_t S,F ; /* start time, finish time */
+};
+
+struct dn_pipe8 { /* a pipe */
+ SLIST_ENTRY(dn_pipe8) next; /* linked list in a hash slot */
+
+ int pipe_nr ; /* number */
+ int bandwidth; /* really, bytes/tick. */
+ int delay ; /* really, ticks */
+
+ struct mbuf *head, *tail ; /* packets in delay line */
+
+ /* WF2Q+ */
+ struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
+ struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
+ struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
+
+ int64_t V ; /* virtual time */
+ int sum; /* sum of weights of all active sessions */
+
+ /* Same as in dn_flow_queue, numbytes can become large */
+ int64_t numbytes; /* bits I can transmit (more or less). */
+ uint64_t burst; /* burst size, scaled: bits * hz */
+
+ int64_t sched_time ; /* time pipe was scheduled in ready_heap */
+ int64_t idle_time; /* start of pipe idle time */
+
+ char if_name[IFNAMSIZ];
+ struct ifnet *ifp ;
+ int ready ; /* set if ifp != NULL and we got a signal from it */
+
+ struct dn_flow_set fs ; /* used with fixed-rate flows */
+
+ /* fields to simulate a delay profile */
+#define ED_MAX_NAME_LEN 32
+ char name[ED_MAX_NAME_LEN];
+ int loss_level;
+ int samples_no;
+ int *samples;
+};
+
+#define ED_MAX_SAMPLES_NO 1024
+struct dn_pipe_max8 {
+ struct dn_pipe8 pipe;
+ int samples[ED_MAX_SAMPLES_NO];
+};
+SLIST_HEAD(dn_pipe_head8, dn_pipe8);
+
+/*
+ * Changes from 7.2 to 8:
+ * dn_pipe:
+ * numbytes from int to int64_t
+ * add burst (int64_t)
+ * add idle_time (int64_t)
+ * add profile
+ * add struct dn_pipe_max
+ * add flag DN_HAS_PROFILE
+ *
+ * dn_flow_queue
+ * numbytes from u_long to int64_t
+ * add extra_bits (int64_t)
+ * q_time from u_int32_t to int64_t and name idle_time
+ *
+ * dn_flow_set unchanged
+ *
+ */
+
+/* NOTE:XXX copied from dummynet.c */
+#define O_NEXT(p, len) ((void *)((char *)p + len))
+static void
+oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
+{
+ oid->len = len;
+ oid->type = type;
+ oid->subtype = 0;
+ oid->id = id;
+}
+/* make room in the buffer and move the pointer forward */
+static void *
+o_next(struct dn_id **o, int len, int type)
+{
+ struct dn_id *ret = *o;
+ oid_fill(ret, len, type, 0);
+ *o = O_NEXT(*o, len);
+ return ret;
+}
+
+
+static size_t pipesize7 = sizeof(struct dn_pipe7);
+static size_t pipesize8 = sizeof(struct dn_pipe8);
+static size_t pipesizemax8 = sizeof(struct dn_pipe_max8);
+
+/* Indicate 'ipfw' version
+ * 1: from FreeBSD 7.2
+ * 0: from FreeBSD 8
+ * -1: unknow (for now is unused)
+ *
+ * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives
+ * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknow,
+ * it is suppose to be the FreeBSD 8 version.
+ */
+static int is7 = 0;
+
+static int
+convertflags2new(int src)
+{
+ int dst = 0;
+
+ if (src & DNOLD_HAVE_FLOW_MASK)
+ dst |= DN_HAVE_MASK;
+ if (src & DNOLD_QSIZE_IS_BYTES)
+ dst |= DN_QSIZE_BYTES;
+ if (src & DNOLD_NOERROR)
+ dst |= DN_NOERROR;
+ if (src & DNOLD_IS_RED)
+ dst |= DN_IS_RED;
+ if (src & DNOLD_IS_GENTLE_RED)
+ dst |= DN_IS_GENTLE_RED;
+ if (src & DNOLD_HAS_PROFILE)
+ dst |= DN_HAS_PROFILE;
+
+ return dst;
+}
+
+static int
+convertflags2old(int src)
+{
+ int dst = 0;
+
+ if (src & DN_HAVE_MASK)
+ dst |= DNOLD_HAVE_FLOW_MASK;
+ if (src & DN_IS_RED)
+ dst |= DNOLD_IS_RED;
+ if (src & DN_IS_GENTLE_RED)
+ dst |= DNOLD_IS_GENTLE_RED;
+ if (src & DN_NOERROR)
+ dst |= DNOLD_NOERROR;
+ if (src & DN_HAS_PROFILE)
+ dst |= DNOLD_HAS_PROFILE;
+ if (src & DN_QSIZE_BYTES)
+ dst |= DNOLD_QSIZE_IS_BYTES;
+
+ return dst;
+}
+
+static int
+dn_compat_del(void *v)
+{
+ struct dn_pipe7 *p = (struct dn_pipe7 *) v;
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *) v;
+ struct {
+ struct dn_id oid;
+ uintptr_t a[1]; /* add more if we want a list */
+ } cmd;
+
+ /* XXX DN_API_VERSION ??? */
+ oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
+
+ if (is7) {
+ if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
+ return EINVAL;
+ if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
+ return EINVAL;
+ } else {
+ if (p8->pipe_nr == 0 && p8->fs.fs_nr == 0)
+ return EINVAL;
+ if (p8->pipe_nr != 0 && p8->fs.fs_nr != 0)
+ return EINVAL;
+ }
+
+ if (p->pipe_nr != 0) { /* pipe x delete */
+ cmd.a[0] = p->pipe_nr;
+ cmd.oid.subtype = DN_LINK;
+ } else { /* queue x delete */
+ cmd.oid.subtype = DN_FS;
+ cmd.a[0] = (is7) ? p->fs.fs_nr : p8->fs.fs_nr;
+ }
+
+ return do_config(&cmd, cmd.oid.len);
+}
+
+static int
+dn_compat_config_queue(struct dn_fs *fs, void* v)
+{
+ struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+ struct dn_flow_set *f;
+
+ if (is7)
+ f = &p7->fs;
+ else
+ f = &p8->fs;
+
+ fs->fs_nr = f->fs_nr;
+ fs->sched_nr = f->parent_nr;
+ fs->flow_mask = f->flow_mask;
+ fs->buckets = f->rq_size;
+ fs->qsize = f->qsize;
+ fs->plr = f->plr;
+ fs->par[0] = f->weight;
+ fs->flags = convertflags2new(f->flags_fs);
+ if (fs->flags & DN_IS_GENTLE_RED || fs->flags & DN_IS_RED) {
+ fs->w_q = f->w_q;
+ fs->max_th = f->max_th;
+ fs->min_th = f->min_th;
+ fs->max_p = f->max_p;
+ }
+
+ return 0;
+}
+
+static int
+dn_compat_config_pipe(struct dn_sch *sch, struct dn_link *p,
+ struct dn_fs *fs, void* v)
+{
+ struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+ int i = p7->pipe_nr;
+
+ sch->sched_nr = i;
+ sch->oid.subtype = 0;
+ p->link_nr = i;
+ fs->fs_nr = i + 2*DN_MAX_ID;
+ fs->sched_nr = i + DN_MAX_ID;
+
+ /* Common to 7 and 8 */
+ p->bandwidth = p7->bandwidth;
+ p->delay = p7->delay;
+ if (!is7) {
+ /* FreeBSD 8 has burst */
+ p->burst = p8->burst;
+ }
+
+ /* fill the fifo flowset */
+ dn_compat_config_queue(fs, v);
+ fs->fs_nr = i + 2*DN_MAX_ID;
+ fs->sched_nr = i + DN_MAX_ID;
+
+ /* Move scheduler related parameter from fs to sch */
+ sch->buckets = fs->buckets; /*XXX*/
+ fs->buckets = 0;
+ if (fs->flags & DN_HAVE_MASK) {
+ sch->flags |= DN_HAVE_MASK;
+ fs->flags &= ~DN_HAVE_MASK;
+ sch->sched_mask = fs->flow_mask;
+ bzero(&fs->flow_mask, sizeof(struct ipfw_flow_id));
+ }
+
+ return 0;
+}
+
+static int
+dn_compat_config_profile(struct dn_profile *pf, struct dn_link *p,
+ void *v)
+{
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+
+ p8->samples = &(((struct dn_pipe_max8 *)p8)->samples[0]);
+
+ pf->link_nr = p->link_nr;
+ pf->loss_level = p8->loss_level;
+// pf->bandwidth = p->bandwidth; //XXX bandwidth redundant?
+ pf->samples_no = p8->samples_no;
+ strncpy(pf->name, p8->name,sizeof(pf->name));
+ bcopy(p8->samples, pf->samples, sizeof(pf->samples));
+
+ return 0;
+}
+
+/*
+ * If p->pipe_nr != 0 the command is 'pipe x config', so need to create
+ * the three main struct, else only a flowset is created
+ */
+static int
+dn_compat_configure(void *v)
+{
+ struct dn_id *buf = NULL, *base;
+ struct dn_sch *sch = NULL;
+ struct dn_link *p = NULL;
+ struct dn_fs *fs = NULL;
+ struct dn_profile *pf = NULL;
+ int lmax;
+ int error;
+
+ struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+
+ int i; /* number of object to configure */
+
+ lmax = sizeof(struct dn_id); /* command header */
+ lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
+ sizeof(struct dn_fs) + sizeof(struct dn_profile);
+
+ base = buf = malloc(lmax, M_DUMMYNET, M_WAIT|M_ZERO);
+ o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
+ base->id = DN_API_VERSION;
+
+ /* pipe_nr is the same in p7 and p8 */
+ i = p7->pipe_nr;
+ if (i != 0) { /* pipe config */
+ sch = o_next(&buf, sizeof(*sch), DN_SCH);
+ p = o_next(&buf, sizeof(*p), DN_LINK);
+ fs = o_next(&buf, sizeof(*fs), DN_FS);
+
+ error = dn_compat_config_pipe(sch, p, fs, v);
+ if (error) {
+ free(buf, M_DUMMYNET);
+ return error;
+ }
+ if (!is7 && p8->samples_no > 0) {
+ /* Add profiles*/
+ pf = o_next(&buf, sizeof(*pf), DN_PROFILE);
+ error = dn_compat_config_profile(pf, p, v);
+ if (error) {
+ free(buf, M_DUMMYNET);
+ return error;
+ }
+ }
+ } else { /* queue config */
+ fs = o_next(&buf, sizeof(*fs), DN_FS);
+ error = dn_compat_config_queue(fs, v);
+ if (error) {
+ free(buf, M_DUMMYNET);
+ return error;
+ }
+ }
+ error = do_config(base, (char *)buf - (char *)base);
+
+ if (buf)
+ free(buf, M_DUMMYNET);
+ return error;
+}
+
+int
+dn_compat_calc_size(void)
+{
+ int need = 0;
+ /* XXX use FreeBSD 8 struct size */
+ /* NOTE:
+ * - half scheduler: schk_count/2
+ * - all flowset: fsk_count
+ * - all flowset queues: queue_count
+ * - all pipe queue: si_count
+ */
+ need += dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2;
+ need += dn_cfg.fsk_count * sizeof(struct dn_flow_set);
+ need += dn_cfg.si_count * sizeof(struct dn_flow_queue8);
+ need += dn_cfg.queue_count * sizeof(struct dn_flow_queue8);
+
+ return need;
+}
+
+int
+dn_c_copy_q (void *_ni, void *arg)
+{
+ struct copy_args *a = arg;
+ struct dn_flow_queue7 *fq7 = (struct dn_flow_queue7 *)*a->start;
+ struct dn_flow_queue8 *fq8 = (struct dn_flow_queue8 *)*a->start;
+ struct dn_flow *ni = (struct dn_flow *)_ni;
+ int size = 0;
+
+ /* XXX hash slot not set */
+ /* No difference between 7.2/8 */
+ fq7->len = ni->length;
+ fq7->len_bytes = ni->len_bytes;
+ fq7->id = ni->fid;
+
+ if (is7) {
+ size = sizeof(struct dn_flow_queue7);
+ fq7->tot_pkts = ni->tot_pkts;
+ fq7->tot_bytes = ni->tot_bytes;
+ fq7->drops = ni->drops;
+ } else {
+ size = sizeof(struct dn_flow_queue8);
+ fq8->tot_pkts = ni->tot_pkts;
+ fq8->tot_bytes = ni->tot_bytes;
+ fq8->drops = ni->drops;
+ }
+
+ *a->start += size;
+ return 0;
+}
+
+int
+dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq)
+{
+ struct dn_link *l = &s->link;
+ struct dn_fsk *f = s->fs;
+
+ struct dn_pipe7 *pipe7 = (struct dn_pipe7 *)*a->start;
+ struct dn_pipe8 *pipe8 = (struct dn_pipe8 *)*a->start;
+ struct dn_flow_set *fs;
+ int size = 0;
+
+ if (is7) {
+ fs = &pipe7->fs;
+ size = sizeof(struct dn_pipe7);
+ } else {
+ fs = &pipe8->fs;
+ size = sizeof(struct dn_pipe8);
+ }
+
+ /* These 4 field are the same in pipe7 and pipe8 */
+ pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE;
+ pipe7->bandwidth = l->bandwidth;
+ pipe7->delay = l->delay * 1000 / hz;
+ pipe7->pipe_nr = l->link_nr - DN_MAX_ID;
+
+ if (!is7) {
+ if (s->profile) {
+ struct dn_profile *pf = s->profile;
+ strncpy(pipe8->name, pf->name, sizeof(pf->name));
+ pipe8->loss_level = pf->loss_level;
+ pipe8->samples_no = pf->samples_no;
+ }
+ pipe8->burst = div64(l->burst , 8 * hz);
+ }
+
+ fs->flow_mask = s->sch.sched_mask;
+ fs->rq_size = s->sch.buckets ? s->sch.buckets : 1;
+
+ fs->parent_nr = l->link_nr - DN_MAX_ID;
+ fs->qsize = f->fs.qsize;
+ fs->plr = f->fs.plr;
+ fs->w_q = f->fs.w_q;
+ fs->max_th = f->max_th;
+ fs->min_th = f->min_th;
+ fs->max_p = f->fs.max_p;
+ fs->rq_elements = nq;
+
+ fs->flags_fs = convertflags2old(f->fs.flags);
+
+ *a->start += size;
+ return 0;
+}
+
+
+int
+dn_compat_copy_pipe(struct copy_args *a, void *_o)
+{
+ int have = a->end - *a->start;
+ int need = 0;
+ int pipe_size = sizeof(struct dn_pipe8);
+ int queue_size = sizeof(struct dn_flow_queue8);
+ int n_queue = 0; /* number of queues */
+
+ struct dn_schk *s = (struct dn_schk *)_o;
+ /* calculate needed space:
+ * - struct dn_pipe
+ * - if there are instances, dn_queue * n_instances
+ */
+ n_queue = (s->sch.flags & DN_HAVE_MASK ? dn_ht_entries(s->siht) :
+ (s->siht ? 1 : 0));
+ need = pipe_size + queue_size * n_queue;
+ if (have < need) {
+ D("have %d < need %d", have, need);
+ return 1;
+ }
+ /* copy pipe */
+ dn_c_copy_pipe(s, a, n_queue);
+
+ /* copy queues */
+ if (s->sch.flags & DN_HAVE_MASK)
+ dn_ht_scan(s->siht, dn_c_copy_q, a);
+ else if (s->siht)
+ dn_c_copy_q(s->siht, a);
+ return 0;
+}
+
+int
+dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq)
+{
+ struct dn_flow_set *fs = (struct dn_flow_set *)*a->start;
+
+ fs->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
+ fs->fs_nr = f->fs.fs_nr;
+ fs->qsize = f->fs.qsize;
+ fs->plr = f->fs.plr;
+ fs->w_q = f->fs.w_q;
+ fs->max_th = f->max_th;
+ fs->min_th = f->min_th;
+ fs->max_p = f->fs.max_p;
+ fs->flow_mask = f->fs.flow_mask;
+ fs->rq_elements = nq;
+ fs->rq_size = (f->fs.buckets ? f->fs.buckets : 1);
+ fs->parent_nr = f->fs.sched_nr;
+ fs->weight = f->fs.par[0];
+
+ fs->flags_fs = convertflags2old(f->fs.flags);
+ *a->start += sizeof(struct dn_flow_set);
+ return 0;
+}
+
+int
+dn_compat_copy_queue(struct copy_args *a, void *_o)
+{
+ int have = a->end - *a->start;
+ int need = 0;
+ int fs_size = sizeof(struct dn_flow_set);
+ int queue_size = sizeof(struct dn_flow_queue8);
+
+ struct dn_fsk *fs = (struct dn_fsk *)_o;
+ int n_queue = 0; /* number of queues */
+
+ n_queue = (fs->fs.flags & DN_HAVE_MASK ? dn_ht_entries(fs->qht) :
+ (fs->qht ? 1 : 0));
+
+ need = fs_size + queue_size * n_queue;
+ if (have < need) {
+ D("have < need");
+ return 1;
+ }
+
+ /* copy flowset */
+ dn_c_copy_fs(fs, a, n_queue);
+
+ /* copy queues */
+ if (fs->fs.flags & DN_HAVE_MASK)
+ dn_ht_scan(fs->qht, dn_c_copy_q, a);
+ else if (fs->qht)
+ dn_c_copy_q(fs->qht, a);
+
+ return 0;
+}
+
+int
+copy_data_helper_compat(void *_o, void *_arg)
+{
+ struct copy_args *a = _arg;
+
+ if (a->type == DN_COMPAT_PIPE) {
+ struct dn_schk *s = _o;
+ if (s->sch.oid.subtype != 1 || s->sch.sched_nr <= DN_MAX_ID) {
+ return 0; /* not old type */
+ }
+ /* copy pipe parameters, and if instance exists, copy
+ * other parameters and eventually queues.
+ */
+ if(dn_compat_copy_pipe(a, _o))
+ return DNHT_SCAN_END;
+ } else if (a->type == DN_COMPAT_QUEUE) {
+ struct dn_fsk *fs = _o;
+ if (fs->fs.fs_nr >= DN_MAX_ID)
+ return 0;
+ if (dn_compat_copy_queue(a, _o))
+ return DNHT_SCAN_END;
+ }
+ return 0;
+}
+
+/* Main function to manage old requests */
+int
+ip_dummynet_compat(struct sockopt *sopt)
+{
+ int error=0;
+ void *v = NULL;
+ struct dn_id oid;
+
+ /* Lenght of data, used to found ipfw version... */
+ int len = sopt->sopt_valsize;
+
+ /* len can be 0 if command was dummynet_flush */
+ if (len == pipesize7) {
+ D("setting compatibility with FreeBSD 7.2");
+ is7 = 1;
+ }
+ else if (len == pipesize8 || len == pipesizemax8) {
+ D("setting compatibility with FreeBSD 8");
+ is7 = 0;
+ }
+
+ switch (sopt->sopt_name) {
+ default:
+ printf("dummynet: -- unknown option %d", sopt->sopt_name);
+ error = EINVAL;
+ break;
+
+ case IP_DUMMYNET_FLUSH:
+ oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION);
+ do_config(&oid, oid.len);
+ break;
+
+ case IP_DUMMYNET_DEL:
+ v = malloc(len, M_TEMP, M_WAITOK);
+ error = sooptcopyin(sopt, v, len, len);
+ if (error)
+ break;
+ error = dn_compat_del(v);
+ free(v, M_TEMP);
+ break;
+
+ case IP_DUMMYNET_CONFIGURE:
+ v = malloc(len, M_TEMP, M_WAITOK);
+ error = sooptcopyin(sopt, v, len, len);
+ if (error)
+ break;
+ error = dn_compat_configure(v);
+ free(v, M_TEMP);
+ break;
+
+ case IP_DUMMYNET_GET: {
+ void *buf;
+ int ret;
+ int original_size = sopt->sopt_valsize;
+ int size;
+
+ ret = dummynet_get(sopt, &buf);
+ if (ret)
+ return 0;//XXX ?
+ size = sopt->sopt_valsize;
+ sopt->sopt_valsize = original_size;
+ D("size=%d, buf=%p", size, buf);
+ ret = sooptcopyout(sopt, buf, size);
+ if (ret)
+ printf(" %s ERROR sooptcopyout\n", __FUNCTION__);
+ if (buf)
+ free(buf, M_DUMMYNET);
+ }
+ }
+
+ return error;
+}
+
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_io.c b/freebsd/sys/netpfil/ipfw/ip_dn_io.c
new file mode 100644
index 00000000..6aaf73c1
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_dn_io.c
@@ -0,0 +1,852 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Dummynet portions related to packet handling.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <rtems/bsd/sys/time.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <net/netisr.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h> /* ip_len, ip_off */
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#include <netinet/if_ether.h> /* various ether_* routines */
+#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+/*
+ * We keep a private variable for the simulation time, but we could
+ * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
+ * instead of dn_cfg.curr_time
+ */
+
+struct dn_parms dn_cfg;
+//VNET_DEFINE(struct dn_parms, _base_dn_cfg);
+
+static long tick_last; /* Last tick duration (usec). */
+static long tick_delta; /* Last vs standard tick diff (usec). */
+static long tick_delta_sum; /* Accumulated tick difference (usec).*/
+static long tick_adjustment; /* Tick adjustments done. */
+static long tick_lost; /* Lost(coalesced) ticks number. */
+/* Adjusted vs non-adjusted curr_time difference (ticks). */
+static long tick_diff;
+
+static unsigned long io_pkt;
+static unsigned long io_pkt_fast;
+static unsigned long io_pkt_drop;
+
+/*
+ * We use a heap to store entities for which we have pending timer events.
+ * The heap is checked at every tick and all entities with expired events
+ * are extracted.
+ */
+
+MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
+
+extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+
+#ifdef SYSCTL_NODE
+
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
+
+/* wrapper to pass dn_cfg fields to SYSCTL_* */
+//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x))
+#define DC(x) (&(dn_cfg.x))
+/* parameters */
+
+static int
+sysctl_hash_size(SYSCTL_HANDLER_ARGS)
+{
+ int error, value;
+
+ value = dn_cfg.hash_size;
+ error = sysctl_handle_int(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 16 || value > 65536)
+ return (EINVAL);
+ dn_cfg.hash_size = value;
+ return (0);
+}
+
+SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
+ CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
+ "I", "Default hash table size");
+
+static int
+sysctl_limits(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ if (arg2 != 0)
+ value = dn_cfg.slot_limit;
+ else
+ value = dn_cfg.byte_limit;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (arg2 != 0) {
+ if (value < 1)
+ return (EINVAL);
+ dn_cfg.slot_limit = value;
+ } else {
+ if (value < 1500)
+ return (EINVAL);
+ dn_cfg.byte_limit = value;
+ }
+ return (0);
+}
+
+SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
+ CTLTYPE_LONG | CTLFLAG_RW, 0, 1, sysctl_limits,
+ "L", "Upper limit in slots for pipe queue.");
+SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
+ CTLTYPE_LONG | CTLFLAG_RW, 0, 0, sysctl_limits,
+ "L", "Upper limit in bytes for pipe queue.");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
+ CTLFLAG_RW, DC(io_fast), 0, "Enable fast dummynet io.");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
+ CTLFLAG_RW, DC(debug), 0, "Dummynet debug level");
+
+/* RED parameters */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
+ CTLFLAG_RD, DC(red_lookup_depth), 0, "Depth of RED lookup table");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
+ CTLFLAG_RD, DC(red_avg_pkt_size), 0, "RED Medium packet size");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
+ CTLFLAG_RD, DC(red_max_pkt_size), 0, "RED Max packet size");
+
+/* time adjustment */
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
+ CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
+ CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
+ CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
+ CTLFLAG_RD, &tick_diff, 0,
+ "Adjusted vs non-adjusted curr_time difference (ticks).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
+ CTLFLAG_RD, &tick_lost, 0,
+ "Number of ticks coalesced by dummynet taskqueue.");
+
+/* Drain parameters */
+SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire,
+ CTLFLAG_RW, DC(expire), 0, "Expire empty queues/pipes");
+SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
+ CTLFLAG_RD, DC(expire_cycle), 0, "Expire cycle for queues/pipes");
+
+/* statistics */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count,
+ CTLFLAG_RD, DC(schk_count), 0, "Number of schedulers");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count,
+ CTLFLAG_RD, DC(si_count), 0, "Number of scheduler instances");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count,
+ CTLFLAG_RD, DC(fsk_count), 0, "Number of flowsets");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count,
+ CTLFLAG_RD, DC(queue_count), 0, "Number of queues");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
+ CTLFLAG_RD, &io_pkt, 0,
+ "Number of packets passed to dummynet.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
+ CTLFLAG_RD, &io_pkt_fast, 0,
+ "Number of packets bypassed dummynet scheduler.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
+ CTLFLAG_RD, &io_pkt_drop, 0,
+ "Number of packets dropped by dummynet.");
+#undef DC
+SYSEND
+
+#endif
+
+static void dummynet_send(struct mbuf *);
+
+/*
+ * Packets processed by dummynet have an mbuf tag associated with
+ * them that carries their dummynet state.
+ * Outside dummynet, only the 'rule' field is relevant, and it must
+ * be at the beginning of the structure.
+ */
+struct dn_pkt_tag {
+ struct ipfw_rule_ref rule; /* matching rule */
+
+ /* second part, dummynet specific */
+ int dn_dir; /* action when packet comes out.*/
+ /* see ip_fw_private.h */
+ uint64_t output_time; /* when the pkt is due for delivery*/
+ struct ifnet *ifp; /* interface, for ip_output */
+ struct _ip6dn_args ip6opt; /* XXX ipv6 options */
+};
+
+/*
+ * Return the mbuf tag holding the dummynet state (it should
+ * be the first one on the list).
+ */
+static struct dn_pkt_tag *
+dn_tag_get(struct mbuf *m)
+{
+ struct m_tag *mtag = m_tag_first(m);
+ KASSERT(mtag != NULL &&
+ mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
+ mtag->m_tag_id == PACKET_TAG_DUMMYNET,
+ ("packet on dummynet queue w/o dummynet tag!"));
+ return (struct dn_pkt_tag *)(mtag+1);
+}
+
+static inline void
+mq_append(struct mq *q, struct mbuf *m)
+{
+ if (q->head == NULL)
+ q->head = m;
+ else
+ q->tail->m_nextpkt = m;
+ q->tail = m;
+ m->m_nextpkt = NULL;
+}
+
+/*
+ * Dispose a list of packet. Use a functions so if we need to do
+ * more work, this is a central point to do it.
+ */
+void dn_free_pkts(struct mbuf *mnext)
+{
+ struct mbuf *m;
+
+ while ((m = mnext) != NULL) {
+ mnext = m->m_nextpkt;
+ FREE_PKT(m);
+ }
+}
+
+static int
+red_drops (struct dn_queue *q, int len)
+{
+ /*
+ * RED algorithm
+ *
+ * RED calculates the average queue size (avg) using a low-pass filter
+ * with an exponential weighted (w_q) moving average:
+ * avg <- (1-w_q) * avg + w_q * q_size
+ * where q_size is the queue length (measured in bytes or * packets).
+ *
+ * If q_size == 0, we compute the idle time for the link, and set
+ * avg = (1 - w_q)^(idle/s)
+ * where s is the time needed for transmitting a medium-sized packet.
+ *
+ * Now, if avg < min_th the packet is enqueued.
+ * If avg > max_th the packet is dropped. Otherwise, the packet is
+ * dropped with probability P function of avg.
+ */
+
+ struct dn_fsk *fs = q->fs;
+ int64_t p_b = 0;
+
+ /* Queue in bytes or packets? */
+ uint32_t q_size = (fs->fs.flags & DN_QSIZE_BYTES) ?
+ q->ni.len_bytes : q->ni.length;
+
+ /* Average queue size estimation. */
+ if (q_size != 0) {
+ /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
+ int diff = SCALE(q_size) - q->avg;
+ int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
+
+ q->avg += (int)v;
+ } else {
+ /*
+ * Queue is empty, find for how long the queue has been
+ * empty and use a lookup table for computing
+ * (1 - * w_q)^(idle_time/s) where s is the time to send a
+ * (small) packet.
+ * XXX check wraps...
+ */
+ if (q->avg) {
+ u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step);
+
+ q->avg = (t < fs->lookup_depth) ?
+ SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
+ }
+ }
+
+ /* Should i drop? */
+ if (q->avg < fs->min_th) {
+ q->count = -1;
+ return (0); /* accept packet */
+ }
+ if (q->avg >= fs->max_th) { /* average queue >= max threshold */
+ if (fs->fs.flags & DN_IS_GENTLE_RED) {
+ /*
+ * According to Gentle-RED, if avg is greater than
+ * max_th the packet is dropped with a probability
+ * p_b = c_3 * avg - c_4
+ * where c_3 = (1 - max_p) / max_th
+ * c_4 = 1 - 2 * max_p
+ */
+ p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
+ fs->c_4;
+ } else {
+ q->count = -1;
+ return (1);
+ }
+ } else if (q->avg > fs->min_th) {
+ /*
+ * We compute p_b using the linear dropping function
+ * p_b = c_1 * avg - c_2
+ * where c_1 = max_p / (max_th - min_th)
+ * c_2 = max_p * min_th / (max_th - min_th)
+ */
+ p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
+ }
+
+ if (fs->fs.flags & DN_QSIZE_BYTES)
+ p_b = div64((p_b * len) , fs->max_pkt_size);
+ if (++q->count == 0)
+ q->random = random() & 0xffff;
+ else {
+ /*
+ * q->count counts packets arrived since last drop, so a greater
+ * value of q->count means a greater packet drop probability.
+ */
+ if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
+ q->count = 0;
+ /* After a drop we calculate a new random value. */
+ q->random = random() & 0xffff;
+ return (1); /* drop */
+ }
+ }
+ /* End of RED algorithm. */
+
+ return (0); /* accept */
+
+}
+
+/*
+ * Enqueue a packet in q, subject to space and queue management policy
+ * (whose parameters are in q->fs).
+ * Update stats for the queue and the scheduler.
+ * Return 0 on success, 1 on drop. The packet is consumed anyways.
+ */
+int
+dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
+{
+ struct dn_fs *f;
+ struct dn_flow *ni; /* stats for scheduler instance */
+ uint64_t len;
+
+ if (q->fs == NULL || q->_si == NULL) {
+ printf("%s fs %p si %p, dropping\n",
+ __FUNCTION__, q->fs, q->_si);
+ FREE_PKT(m);
+ return 1;
+ }
+ f = &(q->fs->fs);
+ ni = &q->_si->ni;
+ len = m->m_pkthdr.len;
+ /* Update statistics, then check reasons to drop pkt. */
+ q->ni.tot_bytes += len;
+ q->ni.tot_pkts++;
+ ni->tot_bytes += len;
+ ni->tot_pkts++;
+ if (drop)
+ goto drop;
+ if (f->plr && random() < f->plr)
+ goto drop;
+ if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
+ goto drop;
+ if (f->flags & DN_QSIZE_BYTES) {
+ if (q->ni.len_bytes > f->qsize)
+ goto drop;
+ } else if (q->ni.length >= f->qsize) {
+ goto drop;
+ }
+ mq_append(&q->mq, m);
+ q->ni.length++;
+ q->ni.len_bytes += len;
+ ni->length++;
+ ni->len_bytes += len;
+ return 0;
+
+drop:
+ io_pkt_drop++;
+ q->ni.drops++;
+ ni->drops++;
+ FREE_PKT(m);
+ return 1;
+}
+
+/*
+ * Fetch packets from the delay line which are due now. If there are
+ * leftover packets, reinsert the delay line in the heap.
+ * Runs under scheduler lock.
+ */
+static void
+transmit_event(struct mq *q, struct delay_line *dline, uint64_t now)
+{
+ struct mbuf *m;
+ struct dn_pkt_tag *pkt = NULL;
+
+ dline->oid.subtype = 0; /* not in heap */
+ while ((m = dline->mq.head) != NULL) {
+ pkt = dn_tag_get(m);
+ if (!DN_KEY_LEQ(pkt->output_time, now))
+ break;
+ dline->mq.head = m->m_nextpkt;
+ mq_append(q, m);
+ }
+ if (m != NULL) {
+ dline->oid.subtype = 1; /* in heap */
+ heap_insert(&dn_cfg.evheap, pkt->output_time, dline);
+ }
+}
+
+/*
+ * Convert the additional MAC overheads/delays into an equivalent
+ * number of bits for the given data rate. The samples are
+ * in milliseconds so we need to divide by 1000.
+ */
+static uint64_t
+extra_bits(struct mbuf *m, struct dn_schk *s)
+{
+ int index;
+ uint64_t bits;
+ struct dn_profile *pf = s->profile;
+
+ if (!pf || pf->samples_no == 0)
+ return 0;
+ index = random() % pf->samples_no;
+ bits = div64((uint64_t)pf->samples[index] * s->link.bandwidth, 1000);
+ if (index >= pf->loss_level) {
+ struct dn_pkt_tag *dt = dn_tag_get(m);
+ if (dt)
+ dt->dn_dir = DIR_DROP;
+ }
+ return bits;
+}
+
+/*
+ * Send traffic from a scheduler instance due by 'now'.
+ * Return a pointer to the head of the queue.
+ */
+static struct mbuf *
+serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now)
+{
+ struct mq def_q;
+ struct dn_schk *s = si->sched;
+ struct mbuf *m = NULL;
+ int delay_line_idle = (si->dline.mq.head == NULL);
+ int done, bw;
+
+ if (q == NULL) {
+ q = &def_q;
+ q->head = NULL;
+ }
+
+ bw = s->link.bandwidth;
+ si->kflags &= ~DN_ACTIVE;
+
+ if (bw > 0)
+ si->credit += (now - si->sched_time) * bw;
+ else
+ si->credit = 0;
+ si->sched_time = now;
+ done = 0;
+ while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
+ uint64_t len_scaled;
+
+ done++;
+ len_scaled = (bw == 0) ? 0 : hz *
+ (m->m_pkthdr.len * 8 + extra_bits(m, s));
+ si->credit -= len_scaled;
+ /* Move packet in the delay line */
+ dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay ;
+ mq_append(&si->dline.mq, m);
+ }
+
+ /*
+ * If credit >= 0 the instance is idle, mark time.
+ * Otherwise put back in the heap, and adjust the output
+ * time of the last inserted packet, m, which was too early.
+ */
+ if (si->credit >= 0) {
+ si->idle_time = now;
+ } else {
+ uint64_t t;
+ KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
+ t = div64(bw - 1 - si->credit, bw);
+ if (m)
+ dn_tag_get(m)->output_time += t;
+ si->kflags |= DN_ACTIVE;
+ heap_insert(&dn_cfg.evheap, now + t, si);
+ }
+ if (delay_line_idle && done)
+ transmit_event(q, &si->dline, now);
+ return q->head;
+}
+
+/*
+ * The timer handler for dummynet. Time is computed in ticks, but
+ * but the code is tolerant to the actual rate at which this is called.
+ * Once complete, the function reschedules itself for the next tick.
+ */
+void
+dummynet_task(void *context, int pending)
+{
+ struct timeval t;
+ struct mq q = { NULL, NULL }; /* queue to accumulate results */
+
+ CURVNET_SET((struct vnet *)context);
+
+ DN_BH_WLOCK();
+
+ /* Update number of lost(coalesced) ticks. */
+ tick_lost += pending - 1;
+
+ getmicrouptime(&t);
+ /* Last tick duration (usec). */
+ tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 +
+ (t.tv_usec - dn_cfg.prev_t.tv_usec);
+ /* Last tick vs standard tick difference (usec). */
+ tick_delta = (tick_last * hz - 1000000) / hz;
+ /* Accumulated tick difference (usec). */
+ tick_delta_sum += tick_delta;
+
+ dn_cfg.prev_t = t;
+
+ /*
+ * Adjust curr_time if the accumulated tick difference is
+ * greater than the 'standard' tick. Since curr_time should
+ * be monotonically increasing, we do positive adjustments
+ * as required, and throttle curr_time in case of negative
+ * adjustment.
+ */
+ dn_cfg.curr_time++;
+ if (tick_delta_sum - tick >= 0) {
+ int diff = tick_delta_sum / tick;
+
+ dn_cfg.curr_time += diff;
+ tick_diff += diff;
+ tick_delta_sum %= tick;
+ tick_adjustment++;
+ } else if (tick_delta_sum + tick <= 0) {
+ dn_cfg.curr_time--;
+ tick_diff--;
+ tick_delta_sum += tick;
+ tick_adjustment++;
+ }
+
+ /* serve pending events, accumulate in q */
+ for (;;) {
+ struct dn_id *p; /* generic parameter to handler */
+
+ if (dn_cfg.evheap.elements == 0 ||
+ DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key))
+ break;
+ p = HEAP_TOP(&dn_cfg.evheap)->object;
+ heap_extract(&dn_cfg.evheap, NULL);
+
+ if (p->type == DN_SCH_I) {
+ serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time);
+ } else { /* extracted a delay line */
+ transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
+ }
+ }
+ if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
+ dn_cfg.expire_cycle = 0;
+ dn_drain_scheduler();
+ dn_drain_queue();
+ }
+
+ DN_BH_WUNLOCK();
+ dn_reschedule();
+ if (q.head != NULL)
+ dummynet_send(q.head);
+ CURVNET_RESTORE();
+}
+
+/*
+ * forward a chain of packets to the proper destination.
+ * This runs outside the dummynet lock.
+ */
+static void
+dummynet_send(struct mbuf *m)
+{
+ struct mbuf *n;
+
+ for (; m != NULL; m = n) {
+ struct ifnet *ifp = NULL; /* gcc 3.4.6 complains */
+ struct m_tag *tag;
+ int dst;
+
+ n = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ tag = m_tag_first(m);
+ if (tag == NULL) { /* should not happen */
+ dst = DIR_DROP;
+ } else {
+ struct dn_pkt_tag *pkt = dn_tag_get(m);
+ /* extract the dummynet info, rename the tag
+ * to carry reinject info.
+ */
+ dst = pkt->dn_dir;
+ ifp = pkt->ifp;
+ tag->m_tag_cookie = MTAG_IPFW_RULE;
+ tag->m_tag_id = 0;
+ }
+
+ switch (dst) {
+ case DIR_OUT:
+ SET_HOST_IPLEN(mtod(m, struct ip *));
+ ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+ break ;
+
+ case DIR_IN :
+ /* put header in network format for ip_input() */
+ //SET_NET_IPLEN(mtod(m, struct ip *));
+ netisr_dispatch(NETISR_IP, m);
+ break;
+
+#ifdef INET6
+ case DIR_IN | PROTO_IPV6:
+ netisr_dispatch(NETISR_IPV6, m);
+ break;
+
+ case DIR_OUT | PROTO_IPV6:
+ ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
+ break;
+#endif
+
+ case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */
+ if (bridge_dn_p != NULL)
+ ((*bridge_dn_p)(m, ifp));
+ else
+ printf("dummynet: if_bridge not loaded\n");
+
+ break;
+
+ case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */
+ /*
+ * The Ethernet code assumes the Ethernet header is
+ * contiguous in the first mbuf header.
+ * Insure this is true.
+ */
+ if (m->m_len < ETHER_HDR_LEN &&
+ (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
+ printf("dummynet/ether: pullup failed, "
+ "dropping packet\n");
+ break;
+ }
+ ether_demux(m->m_pkthdr.rcvif, m);
+ break;
+
+ case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */
+ ether_output_frame(ifp, m);
+ break;
+
+ case DIR_DROP:
+ /* drop the packet after some time */
+ FREE_PKT(m);
+ break;
+
+ default:
+ printf("dummynet: bad switch %d!\n", dst);
+ FREE_PKT(m);
+ break;
+ }
+ }
+}
+
+static inline int
+tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa)
+{
+ struct dn_pkt_tag *dt;
+ struct m_tag *mtag;
+
+ mtag = m_tag_get(PACKET_TAG_DUMMYNET,
+ sizeof(*dt), M_NOWAIT | M_ZERO);
+ if (mtag == NULL)
+ return 1; /* Cannot allocate packet header. */
+ m_tag_prepend(m, mtag); /* Attach to mbuf chain. */
+ dt = (struct dn_pkt_tag *)(mtag + 1);
+ dt->rule = fwa->rule;
+ dt->rule.info &= IPFW_ONEPASS; /* only keep this info */
+ dt->dn_dir = dir;
+ dt->ifp = fwa->oif;
+ /* dt->output tame is updated as we move through */
+ dt->output_time = dn_cfg.curr_time;
+ return 0;
+}
+
+
+/*
+ * dummynet hook for packets.
+ * We use the argument to locate the flowset fs and the sched_set sch
+ * associated to it. The we apply flow_mask and sched_mask to
+ * determine the queue and scheduler instances.
+ *
+ * dir where shall we send the packet after dummynet.
+ * *m0 the mbuf with the packet
+ * ifp the 'ifp' parameter from the caller.
+ * NULL in ip_input, destination interface in ip_output,
+ */
+int
+dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
+{
+ struct mbuf *m = *m0;
+ struct dn_fsk *fs = NULL;
+ struct dn_sch_inst *si;
+ struct dn_queue *q = NULL; /* default */
+
+ int fs_id = (fwa->rule.info & IPFW_INFO_MASK) +
+ ((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0);
+ DN_BH_WLOCK();
+ io_pkt++;
+ /* we could actually tag outside the lock, but who cares... */
+ if (tag_mbuf(m, dir, fwa))
+ goto dropit;
+ if (dn_cfg.busy) {
+ /* if the upper half is busy doing something expensive,
+ * lets queue the packet and move forward
+ */
+ mq_append(&dn_cfg.pending, m);
+ m = *m0 = NULL; /* consumed */
+ goto done; /* already active, nothing to do */
+ }
+ /* XXX locate_flowset could be optimised with a direct ref. */
+ fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL);
+ if (fs == NULL)
+ goto dropit; /* This queue/pipe does not exist! */
+ if (fs->sched == NULL) /* should not happen */
+ goto dropit;
+ /* find scheduler instance, possibly applying sched_mask */
+ si = ipdn_si_find(fs->sched, &(fwa->f_id));
+ if (si == NULL)
+ goto dropit;
+ /*
+ * If the scheduler supports multiple queues, find the right one
+ * (otherwise it will be ignored by enqueue).
+ */
+ if (fs->sched->fp->flags & DN_MULTIQUEUE) {
+ q = ipdn_q_find(fs, si, &(fwa->f_id));
+ if (q == NULL)
+ goto dropit;
+ }
+ if (fs->sched->fp->enqueue(si, q, m)) {
+ /* packet was dropped by enqueue() */
+ m = *m0 = NULL;
+ goto dropit;
+ }
+
+ if (si->kflags & DN_ACTIVE) {
+ m = *m0 = NULL; /* consumed */
+ goto done; /* already active, nothing to do */
+ }
+
+ /* compute the initial allowance */
+ if (si->idle_time < dn_cfg.curr_time) {
+ /* Do this only on the first packet on an idle pipe */
+ struct dn_link *p = &fs->sched->link;
+
+ si->sched_time = dn_cfg.curr_time;
+ si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
+ if (p->burst) {
+ uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth;
+ if (burst > p->burst)
+ burst = p->burst;
+ si->credit += burst;
+ }
+ }
+ /* pass through scheduler and delay line */
+ m = serve_sched(NULL, si, dn_cfg.curr_time);
+
+ /* optimization -- pass it back to ipfw for immediate send */
+ /* XXX Don't call dummynet_send() if scheduler return the packet
+ * just enqueued. This avoid a lock order reversal.
+ *
+ */
+ if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
+ /* fast io, rename the tag * to carry reinject info. */
+ struct m_tag *tag = m_tag_first(m);
+
+ tag->m_tag_cookie = MTAG_IPFW_RULE;
+ tag->m_tag_id = 0;
+ io_pkt_fast++;
+ if (m->m_nextpkt != NULL) {
+ printf("dummynet: fast io: pkt chain detected!\n");
+ m->m_nextpkt = NULL;
+ }
+ m = NULL;
+ } else {
+ *m0 = NULL;
+ }
+done:
+ DN_BH_WUNLOCK();
+ if (m)
+ dummynet_send(m);
+ return 0;
+
+dropit:
+ io_pkt_drop++;
+ DN_BH_WUNLOCK();
+ if (m)
+ FREE_PKT(m);
+ *m0 = NULL;
+ return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS;
+}
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_private.h b/freebsd/sys/netpfil/ipfw/ip_dn_private.h
new file mode 100644
index 00000000..159ddc9a
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_dn_private.h
@@ -0,0 +1,403 @@
+/*-
+ * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * internal dummynet APIs.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DN_PRIVATE_H
+#define _IP_DN_PRIVATE_H
+
+/* debugging support
+ * use ND() to remove debugging, D() to print a line,
+ * DX(level, ...) to print above a certain level
+ * If you redefine D() you are expected to redefine all.
+ */
+#ifndef D
+#define ND(fmt, ...) do {} while (0)
+#define D1(fmt, ...) do {} while (0)
+#define D(fmt, ...) printf("%-10s " fmt "\n", \
+ __FUNCTION__, ## __VA_ARGS__)
+#define DX(lev, fmt, ...) do { \
+ if (dn_cfg.debug > lev) D(fmt, ## __VA_ARGS__); } while (0)
+#endif
+
+MALLOC_DECLARE(M_DUMMYNET);
+
+#ifndef __linux__
+#define div64(a, b) ((int64_t)(a) / (int64_t)(b))
+#endif
+
+#define DN_LOCK_INIT() do { \
+ mtx_init(&dn_cfg.uh_mtx, "dn_uh", NULL, MTX_DEF); \
+ mtx_init(&dn_cfg.bh_mtx, "dn_bh", NULL, MTX_DEF); \
+ } while (0)
+#define DN_LOCK_DESTROY() do { \
+ mtx_destroy(&dn_cfg.uh_mtx); \
+ mtx_destroy(&dn_cfg.bh_mtx); \
+ } while (0)
+#if 0 /* not used yet */
+#define DN_UH_RLOCK() mtx_lock(&dn_cfg.uh_mtx)
+#define DN_UH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_UH_WLOCK() mtx_lock(&dn_cfg.uh_mtx)
+#define DN_UH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_UH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
+#endif
+
+#define DN_BH_RLOCK() mtx_lock(&dn_cfg.uh_mtx)
+#define DN_BH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_BH_WLOCK() mtx_lock(&dn_cfg.uh_mtx)
+#define DN_BH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_BH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
+
+SLIST_HEAD(dn_schk_head, dn_schk);
+SLIST_HEAD(dn_sch_inst_head, dn_sch_inst);
+SLIST_HEAD(dn_fsk_head, dn_fsk);
+SLIST_HEAD(dn_queue_head, dn_queue);
+SLIST_HEAD(dn_alg_head, dn_alg);
+
+struct mq { /* a basic queue of packets*/
+ struct mbuf *head, *tail;
+};
+
+static inline void
+set_oid(struct dn_id *o, int type, int len)
+{
+ o->type = type;
+ o->len = len;
+ o->subtype = 0;
+};
+
+/*
+ * configuration and global data for a dummynet instance
+ *
+ * When a configuration is modified from userland, 'id' is incremented
+ * so we can use the value to check for stale pointers.
+ */
+struct dn_parms {
+ uint32_t id; /* configuration version */
+
+ /* defaults (sysctl-accessible) */
+ int red_lookup_depth;
+ int red_avg_pkt_size;
+ int red_max_pkt_size;
+ int hash_size;
+ int max_hash_size;
+ long byte_limit; /* max queue sizes */
+ long slot_limit;
+
+ int io_fast;
+ int debug;
+
+ /* timekeeping */
+ struct timeval prev_t; /* last time dummynet_tick ran */
+ struct dn_heap evheap; /* scheduled events */
+
+ /* counters of objects -- used for reporting space */
+ int schk_count;
+ int si_count;
+ int fsk_count;
+ int queue_count;
+
+ /* ticks and other stuff */
+ uint64_t curr_time;
+ /* flowsets and schedulers are in hash tables, with 'hash_size'
+ * buckets. fshash is looked up at every packet arrival
+ * so better be generous if we expect many entries.
+ */
+ struct dn_ht *fshash;
+ struct dn_ht *schedhash;
+ /* list of flowsets without a scheduler -- use sch_chain */
+ struct dn_fsk_head fsu; /* list of unlinked flowsets */
+ struct dn_alg_head schedlist; /* list of algorithms */
+
+ /* Store the fs/sch to scan when draining. The value is the
+ * bucket number of the hash table. Expire can be disabled
+ * with net.inet.ip.dummynet.expire=0, or it happens every
+ * expire ticks.
+ **/
+ int drain_fs;
+ int drain_sch;
+ uint32_t expire;
+ uint32_t expire_cycle; /* tick count */
+
+ int init_done;
+
+ /* if the upper half is busy doing something long,
+ * can set the busy flag and we will enqueue packets in
+ * a queue for later processing.
+ */
+ int busy;
+ struct mq pending;
+
+#ifdef _KERNEL
+ /*
+ * This file is normally used in the kernel, unless we do
+ * some userland tests, in which case we do not need a mtx.
+ * uh_mtx arbitrates between system calls and also
+ * protects fshash, schedhash and fsunlinked.
+ * These structures are readonly for the lower half.
+ * bh_mtx protects all other structures which may be
+ * modified upon packet arrivals
+ */
+#if defined( __linux__ ) || defined( _WIN32 )
+ spinlock_t uh_mtx;
+ spinlock_t bh_mtx;
+#else
+ struct mtx uh_mtx;
+ struct mtx bh_mtx;
+#endif
+
+#endif /* _KERNEL */
+};
+
+/*
+ * Delay line, contains all packets on output from a link.
+ * Every scheduler instance has one.
+ */
+struct delay_line {
+ struct dn_id oid;
+ struct dn_sch_inst *si;
+ struct mq mq;
+};
+
+/*
+ * The kernel side of a flowset. It is linked in a hash table
+ * of flowsets, and in a list of children of their parent scheduler.
+ * qht is either the queue or (if HAVE_MASK) a hash table queues.
+ * Note that the mask to use is the (flow_mask|sched_mask), which
+ * changes as we attach/detach schedulers. So we store it here.
+ *
+ * XXX If we want to add scheduler-specific parameters, we need to
+ * put them in external storage because the scheduler may not be
+ * available when the fsk is created.
+ */
+struct dn_fsk { /* kernel side of a flowset */
+ struct dn_fs fs;
+ SLIST_ENTRY(dn_fsk) fsk_next; /* hash chain for fshash */
+
+ struct ipfw_flow_id fsk_mask;
+
+ /* qht is a hash table of queues, or just a single queue
+ * a bit in fs.flags tells us which one
+ */
+ struct dn_ht *qht;
+ struct dn_schk *sched; /* Sched we are linked to */
+ SLIST_ENTRY(dn_fsk) sch_chain; /* list of fsk attached to sched */
+
+ /* bucket index used by drain routine to drain queues for this
+ * flowset
+ */
+ int drain_bucket;
+ /* Parameter realted to RED / GRED */
+ /* original values are in dn_fs*/
+ int w_q ; /* queue weight (scaled) */
+ int max_th ; /* maximum threshold for queue (scaled) */
+ int min_th ; /* minimum threshold for queue (scaled) */
+ int max_p ; /* maximum value for p_b (scaled) */
+
+ u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */
+ u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */
+ u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */
+ u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */
+ u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */
+ u_int lookup_depth ; /* depth of lookup table */
+ int lookup_step ; /* granularity inside the lookup table */
+ int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
+ int avg_pkt_size ; /* medium packet size */
+ int max_pkt_size ; /* max packet size */
+};
+
+/*
+ * A queue is created as a child of a flowset unless it belongs to
+ * a !MULTIQUEUE scheduler. It is normally in a hash table in the
+ * flowset. fs always points to the parent flowset.
+ * si normally points to the sch_inst, unless the flowset has been
+ * detached from the scheduler -- in this case si == NULL and we
+ * should not enqueue.
+ */
+struct dn_queue {
+ struct dn_flow ni; /* oid, flow_id, stats */
+ struct mq mq; /* packets queue */
+ struct dn_sch_inst *_si; /* owner scheduler instance */
+ SLIST_ENTRY(dn_queue) q_next; /* hash chain list for qht */
+ struct dn_fsk *fs; /* parent flowset. */
+
+ /* RED parameters */
+ int avg; /* average queue length est. (scaled) */
+ int count; /* arrivals since last RED drop */
+ int random; /* random value (scaled) */
+ uint64_t q_time; /* start of queue idle time */
+
+};
+
+/*
+ * The kernel side of a scheduler. Contains the userland config,
+ * a link, pointer to extra config arguments from command line,
+ * kernel flags, and a pointer to the scheduler methods.
+ * It is stored in a hash table, and holds a list of all
+ * flowsets and scheduler instances.
+ * XXX sch must be at the beginning, see schk_hash().
+ */
+struct dn_schk {
+ struct dn_sch sch;
+ struct dn_alg *fp; /* Pointer to scheduler functions */
+ struct dn_link link; /* The link, embedded */
+ struct dn_profile *profile; /* delay profile, if any */
+ struct dn_id *cfg; /* extra config arguments */
+
+ SLIST_ENTRY(dn_schk) schk_next; /* hash chain for schedhash */
+
+ struct dn_fsk_head fsk_list; /* all fsk linked to me */
+ struct dn_fsk *fs; /* Flowset for !MULTIQUEUE */
+
+ /* bucket index used by the drain routine to drain the scheduler
+ * instance for this flowset.
+ */
+ int drain_bucket;
+
+ /* Hash table of all instances (through sch.sched_mask)
+ * or single instance if no mask. Always valid.
+ */
+ struct dn_ht *siht;
+};
+
+
+/*
+ * Scheduler instance.
+ * Contains variables and all queues relative to a this instance.
+ * This struct is created a runtime.
+ */
+struct dn_sch_inst {
+ struct dn_flow ni; /* oid, flowid and stats */
+ SLIST_ENTRY(dn_sch_inst) si_next; /* hash chain for siht */
+ struct delay_line dline;
+ struct dn_schk *sched; /* the template */
+ int kflags; /* DN_ACTIVE */
+
+ int64_t credit; /* bits I can transmit (more or less). */
+ uint64_t sched_time; /* time link was scheduled in ready_heap */
+ uint64_t idle_time; /* start of scheduler instance idle time */
+
+ /* q_count is the number of queues that this instance is using.
+ * The counter is incremented or decremented when
+ * a reference from the queue is created or deleted.
+ * It is used to make sure that a scheduler instance can be safely
+ * deleted by the drain routine. See notes below.
+ */
+ int q_count;
+
+};
+
+/*
+ * NOTE about object drain.
+ * The system will automatically (XXX check when) drain queues and
+ * scheduler instances when they are idle.
+ * A queue is idle when it has no packets; an instance is idle when
+ * it is not in the evheap heap, and the corresponding delay line is empty.
+ * A queue can be safely deleted when it is idle because of the scheduler
+ * function xxx_free_queue() will remove any references to it.
+ * An instance can be only deleted when no queues reference it. To be sure
+ * of that, a counter (q_count) stores the number of queues that are pointing
+ * to the instance.
+ *
+ * XXX
+ * Order of scan:
+ * - take all flowset in a bucket for the flowset hash table
+ * - take all queues in a bucket for the flowset
+ * - increment the queue bucket
+ * - scan next flowset bucket
+ * Nothing is done if a bucket contains no entries.
+ *
+ * The same schema is used for sceduler instances
+ */
+
+
+/* kernel-side flags. Linux has DN_DELETE in fcntl.h
+ */
+enum {
+ /* 1 and 2 are reserved for the SCAN flags */
+ DN_DESTROY = 0x0004, /* destroy */
+ DN_DELETE_FS = 0x0008, /* destroy flowset */
+ DN_DETACH = 0x0010,
+ DN_ACTIVE = 0x0020, /* object is in evheap */
+ DN_F_DLINE = 0x0040, /* object is a delay line */
+ DN_DEL_SAFE = 0x0080, /* delete a queue only if no longer needed
+ * by scheduler */
+ DN_QHT_IS_Q = 0x0100, /* in flowset, qht is a single queue */
+};
+
+extern struct dn_parms dn_cfg;
+//VNET_DECLARE(struct dn_parms, _base_dn_cfg);
+//#define dn_cfg VNET(_base_dn_cfg)
+
+int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
+void dummynet_task(void *context, int pending);
+void dn_reschedule(void);
+
+struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *,
+ struct ipfw_flow_id *);
+struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *);
+
+/*
+ * copy_range is a template for requests for ranges of pipes/queues/scheds.
+ * The number of ranges is variable and can be derived by o.len.
+ * As a default, we use a small number of entries so that the struct
+ * fits easily on the stack and is sufficient for most common requests.
+ */
+#define DEFAULT_RANGES 5
+struct copy_range {
+ struct dn_id o;
+ uint32_t r[ 2 * DEFAULT_RANGES ];
+};
+
+struct copy_args {
+ char **start;
+ char *end;
+ int flags;
+ int type;
+ struct copy_range *extra; /* extra filtering */
+};
+
+struct sockopt;
+int ip_dummynet_compat(struct sockopt *sopt);
+int dummynet_get(struct sockopt *sopt, void **compat);
+int dn_c_copy_q (void *_ni, void *arg);
+int dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq);
+int dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq);
+int dn_compat_copy_queue(struct copy_args *a, void *_o);
+int dn_compat_copy_pipe(struct copy_args *a, void *_o);
+int copy_data_helper_compat(void *_o, void *_arg);
+int dn_compat_calc_size(void);
+int do_config(void *p, int l);
+
+/* function to drain idle object */
+void dn_drain_scheduler(void);
+void dn_drain_queue(void);
+
+#endif /* _IP_DN_PRIVATE_H */
diff --git a/freebsd/sys/netpfil/ipfw/ip_dummynet.c b/freebsd/sys/netpfil/ipfw/ip_dummynet.c
new file mode 100644
index 00000000..bd7e3c0b
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_dummynet.c
@@ -0,0 +1,2309 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
+ * Portions Copyright (c) 2000 Akamba Corp.
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Configuration and internal object management for dummynet.
+ */
+
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <rtems/bsd/sys/time.h>
+#include <sys/taskqueue.h>
+#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+/* which objects to copy */
+#define DN_C_LINK 0x01
+#define DN_C_SCH 0x02
+#define DN_C_FLOW 0x04
+#define DN_C_FS 0x08
+#define DN_C_QUEUE 0x10
+
+/* we use this argument in case of a schk_new */
+struct schk_new_arg {
+ struct dn_alg *fp;
+ struct dn_sch *sch;
+};
+
+/*---- callout hooks. ----*/
+static struct callout dn_timeout;
+static struct task dn_task;
+static struct taskqueue *dn_tq = NULL;
+
+static void
+dummynet(void * __unused unused)
+{
+
+ taskqueue_enqueue(dn_tq, &dn_task);
+}
+
+void
+dn_reschedule(void)
+{
+ callout_reset(&dn_timeout, 1, dummynet, NULL);
+}
+/*----- end of callout hooks -----*/
+
+/* Return a scheduler descriptor given the type or name. */
+static struct dn_alg *
+find_sched_type(int type, char *name)
+{
+ struct dn_alg *d;
+
+ SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
+ if (d->type == type || (name && !strcasecmp(d->name, name)))
+ return d;
+ }
+ return NULL; /* not found */
+}
+
+int
+ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
+{
+ int oldv = *v;
+ const char *op = NULL;
+ if (dflt < lo)
+ dflt = lo;
+ if (dflt > hi)
+ dflt = hi;
+ if (oldv < lo) {
+ *v = dflt;
+ op = "Bump";
+ } else if (oldv > hi) {
+ *v = hi;
+ op = "Clamp";
+ } else
+ return *v;
+ if (op && msg)
+ printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
+ return *v;
+}
+
+/*---- flow_id mask, hash and compare functions ---*/
+/*
+ * The flow_id includes the 5-tuple, the queue/pipe number
+ * which we store in the extra area in host order,
+ * and for ipv6 also the flow_id6.
+ * XXX see if we want the tos byte (can store in 'flags')
+ */
+static struct ipfw_flow_id *
+flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
+{
+ int is_v6 = IS_IP6_FLOW_ID(id);
+
+ id->dst_port &= mask->dst_port;
+ id->src_port &= mask->src_port;
+ id->proto &= mask->proto;
+ id->extra &= mask->extra;
+ if (is_v6) {
+ APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
+ APPLY_MASK(&id->src_ip6, &mask->src_ip6);
+ id->flow_id6 &= mask->flow_id6;
+ } else {
+ id->dst_ip &= mask->dst_ip;
+ id->src_ip &= mask->src_ip;
+ }
+ return id;
+}
+
+/* computes an OR of two masks, result in dst and also returned */
+static struct ipfw_flow_id *
+flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
+{
+ int is_v6 = IS_IP6_FLOW_ID(dst);
+
+ dst->dst_port |= src->dst_port;
+ dst->src_port |= src->src_port;
+ dst->proto |= src->proto;
+ dst->extra |= src->extra;
+ if (is_v6) {
+#define OR_MASK(_d, _s) \
+ (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
+ (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
+ (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
+ (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
+ OR_MASK(&dst->dst_ip6, &src->dst_ip6);
+ OR_MASK(&dst->src_ip6, &src->src_ip6);
+#undef OR_MASK
+ dst->flow_id6 |= src->flow_id6;
+ } else {
+ dst->dst_ip |= src->dst_ip;
+ dst->src_ip |= src->src_ip;
+ }
+ return dst;
+}
+
+static int
+nonzero_mask(struct ipfw_flow_id *m)
+{
+ if (m->dst_port || m->src_port || m->proto || m->extra)
+ return 1;
+ if (IS_IP6_FLOW_ID(m)) {
+ return
+ m->dst_ip6.__u6_addr.__u6_addr32[0] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[1] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[2] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[3] ||
+ m->src_ip6.__u6_addr.__u6_addr32[0] ||
+ m->src_ip6.__u6_addr.__u6_addr32[1] ||
+ m->src_ip6.__u6_addr.__u6_addr32[2] ||
+ m->src_ip6.__u6_addr.__u6_addr32[3] ||
+ m->flow_id6;
+ } else {
+ return m->dst_ip || m->src_ip;
+ }
+}
+
+/* XXX we may want a better hash function */
+static uint32_t
+flow_id_hash(struct ipfw_flow_id *id)
+{
+ uint32_t i;
+
+ if (IS_IP6_FLOW_ID(id)) {
+ uint32_t *d = (uint32_t *)&id->dst_ip6;
+ uint32_t *s = (uint32_t *)&id->src_ip6;
+ i = (d[0] ) ^ (d[1]) ^
+ (d[2] ) ^ (d[3]) ^
+ (d[0] >> 15) ^ (d[1] >> 15) ^
+ (d[2] >> 15) ^ (d[3] >> 15) ^
+ (s[0] << 1) ^ (s[1] << 1) ^
+ (s[2] << 1) ^ (s[3] << 1) ^
+ (s[0] << 16) ^ (s[1] << 16) ^
+ (s[2] << 16) ^ (s[3] << 16) ^
+ (id->dst_port << 1) ^ (id->src_port) ^
+ (id->extra) ^
+ (id->proto ) ^ (id->flow_id6);
+ } else {
+ i = (id->dst_ip) ^ (id->dst_ip >> 15) ^
+ (id->src_ip << 1) ^ (id->src_ip >> 16) ^
+ (id->extra) ^
+ (id->dst_port << 1) ^ (id->src_port) ^ (id->proto);
+ }
+ return i;
+}
+
+/* Like bcmp, returns 0 if ids match, 1 otherwise. */
+static int
+flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
+{
+ int is_v6 = IS_IP6_FLOW_ID(id1);
+
+ if (!is_v6) {
+ if (IS_IP6_FLOW_ID(id2))
+ return 1; /* different address families */
+
+ return (id1->dst_ip == id2->dst_ip &&
+ id1->src_ip == id2->src_ip &&
+ id1->dst_port == id2->dst_port &&
+ id1->src_port == id2->src_port &&
+ id1->proto == id2->proto &&
+ id1->extra == id2->extra) ? 0 : 1;
+ }
+ /* the ipv6 case */
+ return (
+ !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
+ !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
+ id1->dst_port == id2->dst_port &&
+ id1->src_port == id2->src_port &&
+ id1->proto == id2->proto &&
+ id1->extra == id2->extra &&
+ id1->flow_id6 == id2->flow_id6) ? 0 : 1;
+}
+/*--------- end of flow-id mask, hash and compare ---------*/
+
+/*--- support functions for the qht hashtable ----
+ * Entries are hashed by flow-id
+ */
+static uint32_t
+q_hash(uintptr_t key, int flags, void *arg)
+{
+ /* compute the hash slot from the flow id */
+ struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_queue *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
+
+ return flow_id_hash(id);
+}
+
+static int
+q_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+ struct dn_queue *o = (struct dn_queue *)obj;
+ struct ipfw_flow_id *id2;
+
+ if (flags & DNHT_KEY_IS_OBJ) {
+ /* compare pointers */
+ id2 = &((struct dn_queue *)key)->ni.fid;
+ } else {
+ id2 = (struct ipfw_flow_id *)key;
+ }
+ return (0 == flow_id_cmp(&o->ni.fid, id2));
+}
+
+/*
+ * create a new queue instance for the given 'key'.
+ */
+static void *
+q_new(uintptr_t key, int flags, void *arg)
+{
+ struct dn_queue *q, *template = arg;
+ struct dn_fsk *fs = template->fs;
+ int size = sizeof(*q) + fs->sched->fp->q_datalen;
+
+ q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (q == NULL) {
+ D("no memory for new queue");
+ return NULL;
+ }
+
+ set_oid(&q->ni.oid, DN_QUEUE, size);
+ if (fs->fs.flags & DN_QHT_HASH)
+ q->ni.fid = *(struct ipfw_flow_id *)key;
+ q->fs = fs;
+ q->_si = template->_si;
+ q->_si->q_count++;
+
+ if (fs->sched->fp->new_queue)
+ fs->sched->fp->new_queue(q);
+ dn_cfg.queue_count++;
+ return q;
+}
+
+/*
+ * Notify schedulers that a queue is going away.
+ * If (flags & DN_DESTROY), also free the packets.
+ * The version for callbacks is called q_delete_cb().
+ */
+static void
+dn_delete_queue(struct dn_queue *q, int flags)
+{
+ struct dn_fsk *fs = q->fs;
+
+ // D("fs %p si %p\n", fs, q->_si);
+ /* notify the parent scheduler that the queue is going away */
+ if (fs && fs->sched->fp->free_queue)
+ fs->sched->fp->free_queue(q);
+ q->_si->q_count--;
+ q->_si = NULL;
+ if (flags & DN_DESTROY) {
+ if (q->mq.head)
+ dn_free_pkts(q->mq.head);
+ bzero(q, sizeof(*q)); // safety
+ free(q, M_DUMMYNET);
+ dn_cfg.queue_count--;
+ }
+}
+
+static int
+q_delete_cb(void *q, void *arg)
+{
+ int flags = (int)(uintptr_t)arg;
+ dn_delete_queue(q, flags);
+ return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0;
+}
+
+/*
+ * calls dn_delete_queue/q_delete_cb on all queues,
+ * which notifies the parent scheduler and possibly drains packets.
+ * flags & DN_DESTROY: drains queues and destroy qht;
+ */
+static void
+qht_delete(struct dn_fsk *fs, int flags)
+{
+ ND("fs %d start flags %d qht %p",
+ fs->fs.fs_nr, flags, fs->qht);
+ if (!fs->qht)
+ return;
+ if (fs->fs.flags & DN_QHT_HASH) {
+ dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags);
+ if (flags & DN_DESTROY) {
+ dn_ht_free(fs->qht, 0);
+ fs->qht = NULL;
+ }
+ } else {
+ dn_delete_queue((struct dn_queue *)(fs->qht), flags);
+ if (flags & DN_DESTROY)
+ fs->qht = NULL;
+ }
+}
+
+/*
+ * Find and possibly create the queue for a MULTIQUEUE scheduler.
+ * We never call it for !MULTIQUEUE (the queue is in the sch_inst).
+ */
+struct dn_queue *
+ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
+ struct ipfw_flow_id *id)
+{
+ struct dn_queue template;
+
+ template._si = si;
+ template.fs = fs;
+
+ if (fs->fs.flags & DN_QHT_HASH) {
+ struct ipfw_flow_id masked_id;
+ if (fs->qht == NULL) {
+ fs->qht = dn_ht_init(NULL, fs->fs.buckets,
+ offsetof(struct dn_queue, q_next),
+ q_hash, q_match, q_new);
+ if (fs->qht == NULL)
+ return NULL;
+ }
+ masked_id = *id;
+ flow_id_mask(&fs->fsk_mask, &masked_id);
+ return dn_ht_find(fs->qht, (uintptr_t)&masked_id,
+ DNHT_INSERT, &template);
+ } else {
+ if (fs->qht == NULL)
+ fs->qht = q_new(0, 0, &template);
+ return (struct dn_queue *)fs->qht;
+ }
+}
+/*--- end of queue hash table ---*/
+
+/*--- support functions for the sch_inst hashtable ----
+ *
+ * These are hashed by flow-id
+ */
+static uint32_t
+si_hash(uintptr_t key, int flags, void *arg)
+{
+ /* compute the hash slot from the flow id */
+ struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_sch_inst *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
+
+ return flow_id_hash(id);
+}
+
+static int
+si_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+ struct dn_sch_inst *o = obj;
+ struct ipfw_flow_id *id2;
+
+ id2 = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_sch_inst *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
+ return flow_id_cmp(&o->ni.fid, id2) == 0;
+}
+
+/*
+ * create a new instance for the given 'key'
+ * Allocate memory for instance, delay line and scheduler private data.
+ */
+static void *
+si_new(uintptr_t key, int flags, void *arg)
+{
+ struct dn_schk *s = arg;
+ struct dn_sch_inst *si;
+ int l = sizeof(*si) + s->fp->si_datalen;
+
+ si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (si == NULL)
+ goto error;
+
+ /* Set length only for the part passed up to userland. */
+ set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
+ set_oid(&(si->dline.oid), DN_DELAY_LINE,
+ sizeof(struct delay_line));
+ /* mark si and dline as outside the event queue */
+ si->ni.oid.id = si->dline.oid.id = -1;
+
+ si->sched = s;
+ si->dline.si = si;
+
+ if (s->fp->new_sched && s->fp->new_sched(si)) {
+ D("new_sched error");
+ goto error;
+ }
+ if (s->sch.flags & DN_HAVE_MASK)
+ si->ni.fid = *(struct ipfw_flow_id *)key;
+
+ dn_cfg.si_count++;
+ return si;
+
+error:
+ if (si) {
+ bzero(si, sizeof(*si)); // safety
+ free(si, M_DUMMYNET);
+ }
+ return NULL;
+}
+
+/*
+ * Callback from siht to delete all scheduler instances. Remove
+ * si and delay line from the system heap, destroy all queues.
+ * We assume that all flowset have been notified and do not
+ * point to us anymore.
+ */
+static int
+si_destroy(void *_si, void *arg)
+{
+ struct dn_sch_inst *si = _si;
+ struct dn_schk *s = si->sched;
+ struct delay_line *dl = &si->dline;
+
+ if (dl->oid.subtype) /* remove delay line from event heap */
+ heap_extract(&dn_cfg.evheap, dl);
+ dn_free_pkts(dl->mq.head); /* drain delay line */
+ if (si->kflags & DN_ACTIVE) /* remove si from event heap */
+ heap_extract(&dn_cfg.evheap, si);
+ if (s->fp->free_sched)
+ s->fp->free_sched(si);
+ bzero(si, sizeof(*si)); /* safety */
+ free(si, M_DUMMYNET);
+ dn_cfg.si_count--;
+ return DNHT_SCAN_DEL;
+}
+
+/*
+ * Find the scheduler instance for this packet. If we need to apply
+ * a mask, do on a local copy of the flow_id to preserve the original.
+ * Assume siht is always initialized if we have a mask.
+ */
+struct dn_sch_inst *
+ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id)
+{
+
+ if (s->sch.flags & DN_HAVE_MASK) {
+ struct ipfw_flow_id id_t = *id;
+ flow_id_mask(&s->sch.sched_mask, &id_t);
+ return dn_ht_find(s->siht, (uintptr_t)&id_t,
+ DNHT_INSERT, s);
+ }
+ if (!s->siht)
+ s->siht = si_new(0, 0, s);
+ return (struct dn_sch_inst *)s->siht;
+}
+
+/* callback to flush credit for the scheduler instance */
+static int
+si_reset_credit(void *_si, void *arg)
+{
+ struct dn_sch_inst *si = _si;
+ struct dn_link *p = &si->sched->link;
+
+ si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0);
+ return 0;
+}
+
+static void
+schk_reset_credit(struct dn_schk *s)
+{
+ if (s->sch.flags & DN_HAVE_MASK)
+ dn_ht_scan(s->siht, si_reset_credit, NULL);
+ else if (s->siht)
+ si_reset_credit(s->siht, NULL);
+}
+/*---- end of sch_inst hashtable ---------------------*/
+
+/*-------------------------------------------------------
+ * flowset hash (fshash) support. Entries are hashed by fs_nr.
+ * New allocations are put in the fsunlinked list, from which
+ * they are removed when they point to a specific scheduler.
+ */
+static uint32_t
+fsk_hash(uintptr_t key, int flags, void *arg)
+{
+ uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+ ((struct dn_fsk *)key)->fs.fs_nr;
+
+ return ( (i>>8)^(i>>4)^i );
+}
+
+static int
+fsk_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+ struct dn_fsk *fs = obj;
+ int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+ ((struct dn_fsk *)key)->fs.fs_nr;
+
+ return (fs->fs.fs_nr == i);
+}
+
+static void *
+fsk_new(uintptr_t key, int flags, void *arg)
+{
+ struct dn_fsk *fs;
+
+ fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (fs) {
+ set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs));
+ dn_cfg.fsk_count++;
+ fs->drain_bucket = 0;
+ SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
+ }
+ return fs;
+}
+
+/*
+ * detach flowset from its current scheduler. Flags as follows:
+ * DN_DETACH removes from the fsk_list
+ * DN_DESTROY deletes individual queues
+ * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked).
+ */
+static void
+fsk_detach(struct dn_fsk *fs, int flags)
+{
+ if (flags & DN_DELETE_FS)
+ flags |= DN_DESTROY;
+ ND("fs %d from sched %d flags %s %s %s",
+ fs->fs.fs_nr, fs->fs.sched_nr,
+ (flags & DN_DELETE_FS) ? "DEL_FS":"",
+ (flags & DN_DESTROY) ? "DEL":"",
+ (flags & DN_DETACH) ? "DET":"");
+ if (flags & DN_DETACH) { /* detach from the list */
+ struct dn_fsk_head *h;
+ h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu;
+ SLIST_REMOVE(h, fs, dn_fsk, sch_chain);
+ }
+ /* Free the RED parameters, they will be recomputed on
+ * subsequent attach if needed.
+ */
+ if (fs->w_q_lookup)
+ free(fs->w_q_lookup, M_DUMMYNET);
+ fs->w_q_lookup = NULL;
+ qht_delete(fs, flags);
+ if (fs->sched && fs->sched->fp->free_fsk)
+ fs->sched->fp->free_fsk(fs);
+ fs->sched = NULL;
+ if (flags & DN_DELETE_FS) {
+ bzero(fs, sizeof(fs)); /* safety */
+ free(fs, M_DUMMYNET);
+ dn_cfg.fsk_count--;
+ } else {
+ SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
+ }
+}
+
+/*
+ * Detach or destroy all flowsets in a list.
+ * flags specifies what to do:
+ * DN_DESTROY: flush all queues
+ * DN_DELETE_FS: DN_DESTROY + destroy flowset
+ * DN_DELETE_FS implies DN_DESTROY
+ */
+static void
+fsk_detach_list(struct dn_fsk_head *h, int flags)
+{
+ struct dn_fsk *fs;
+ int n = 0; /* only for stats */
+
+ ND("head %p flags %x", h, flags);
+ while ((fs = SLIST_FIRST(h))) {
+ SLIST_REMOVE_HEAD(h, sch_chain);
+ n++;
+ fsk_detach(fs, flags);
+ }
+ ND("done %d flowsets", n);
+}
+
+/*
+ * called on 'queue X delete' -- removes the flowset from fshash,
+ * deletes all queues for the flowset, and removes the flowset.
+ */
+static int
+delete_fs(int i, int locked)
+{
+ struct dn_fsk *fs;
+ int err = 0;
+
+ if (!locked)
+ DN_BH_WLOCK();
+ fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL);
+ ND("fs %d found %p", i, fs);
+ if (fs) {
+ fsk_detach(fs, DN_DETACH | DN_DELETE_FS);
+ err = 0;
+ } else
+ err = EINVAL;
+ if (!locked)
+ DN_BH_WUNLOCK();
+ return err;
+}
+
+/*----- end of flowset hashtable support -------------*/
+
+/*------------------------------------------------------------
+ * Scheduler hash. When searching by index we pass sched_nr,
+ * otherwise we pass struct dn_sch * which is the first field in
+ * struct dn_schk so we can cast between the two. We use this trick
+ * because in the create phase (but it should be fixed).
+ */
+static uint32_t
+schk_hash(uintptr_t key, int flags, void *_arg)
+{
+ uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+ ((struct dn_schk *)key)->sch.sched_nr;
+ return ( (i>>8)^(i>>4)^i );
+}
+
+static int
+schk_match(void *obj, uintptr_t key, int flags, void *_arg)
+{
+ struct dn_schk *s = (struct dn_schk *)obj;
+ int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+ ((struct dn_schk *)key)->sch.sched_nr;
+ return (s->sch.sched_nr == i);
+}
+
+/*
+ * Create the entry and intialize with the sched hash if needed.
+ * Leave s->fp unset so we can tell whether a dn_ht_find() returns
+ * a new object or a previously existing one.
+ */
+static void *
+schk_new(uintptr_t key, int flags, void *arg)
+{
+ struct schk_new_arg *a = arg;
+ struct dn_schk *s;
+ int l = sizeof(*s) +a->fp->schk_datalen;
+
+ s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (s == NULL)
+ return NULL;
+ set_oid(&s->link.oid, DN_LINK, sizeof(s->link));
+ s->sch = *a->sch; // copy initial values
+ s->link.link_nr = s->sch.sched_nr;
+ SLIST_INIT(&s->fsk_list);
+ /* initialize the hash table or create the single instance */
+ s->fp = a->fp; /* si_new needs this */
+ s->drain_bucket = 0;
+ if (s->sch.flags & DN_HAVE_MASK) {
+ s->siht = dn_ht_init(NULL, s->sch.buckets,
+ offsetof(struct dn_sch_inst, si_next),
+ si_hash, si_match, si_new);
+ if (s->siht == NULL) {
+ free(s, M_DUMMYNET);
+ return NULL;
+ }
+ }
+ s->fp = NULL; /* mark as a new scheduler */
+ dn_cfg.schk_count++;
+ return s;
+}
+
+/*
+ * Callback for sched delete. Notify all attached flowsets to
+ * detach from the scheduler, destroy the internal flowset, and
+ * all instances. The scheduler goes away too.
+ * arg is 0 (only detach flowsets and destroy instances)
+ * DN_DESTROY (detach & delete queues, delete schk)
+ * or DN_DELETE_FS (delete queues and flowsets, delete schk)
+ */
+static int
+schk_delete_cb(void *obj, void *arg)
+{
+ struct dn_schk *s = obj;
+#if 0
+ int a = (int)arg;
+ ND("sched %d arg %s%s",
+ s->sch.sched_nr,
+ a&DN_DESTROY ? "DEL ":"",
+ a&DN_DELETE_FS ? "DEL_FS":"");
+#endif
+ fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0);
+ /* no more flowset pointing to us now */
+ if (s->sch.flags & DN_HAVE_MASK) {
+ dn_ht_scan(s->siht, si_destroy, NULL);
+ dn_ht_free(s->siht, 0);
+ } else if (s->siht)
+ si_destroy(s->siht, NULL);
+ if (s->profile) {
+ free(s->profile, M_DUMMYNET);
+ s->profile = NULL;
+ }
+ s->siht = NULL;
+ if (s->fp->destroy)
+ s->fp->destroy(s);
+ bzero(s, sizeof(*s)); // safety
+ free(obj, M_DUMMYNET);
+ dn_cfg.schk_count--;
+ return DNHT_SCAN_DEL;
+}
+
+/*
+ * called on a 'sched X delete' command. Deletes a single scheduler.
+ * This is done by removing from the schedhash, unlinking all
+ * flowsets and deleting their traffic.
+ */
+static int
+delete_schk(int i)
+{
+ struct dn_schk *s;
+
+ s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
+ ND("%d %p", i, s);
+ if (!s)
+ return EINVAL;
+ delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */
+ /* then detach flowsets, delete traffic */
+ schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY);
+ return 0;
+}
+/*--- end of schk hashtable support ---*/
+
+static int
+copy_obj(char **start, char *end, void *_o, const char *msg, int i)
+{
+ struct dn_id *o = _o;
+ int have = end - *start;
+
+ if (have < o->len || o->len == 0 || o->type == 0) {
+ D("(WARN) type %d %s %d have %d need %d",
+ o->type, msg, i, have, o->len);
+ return 1;
+ }
+ ND("type %d %s %d len %d", o->type, msg, i, o->len);
+ bcopy(_o, *start, o->len);
+ if (o->type == DN_LINK) {
+ /* Adjust burst parameter for link */
+ struct dn_link *l = (struct dn_link *)*start;
+ l->burst = div64(l->burst, 8 * hz);
+ l->delay = l->delay * 1000 / hz;
+ } else if (o->type == DN_SCH) {
+ /* Set id->id to the number of instances */
+ struct dn_schk *s = _o;
+ struct dn_id *id = (struct dn_id *)(*start);
+ id->id = (s->sch.flags & DN_HAVE_MASK) ?
+ dn_ht_entries(s->siht) : (s->siht ? 1 : 0);
+ }
+ *start += o->len;
+ return 0;
+}
+
+/* Specific function to copy a queue.
+ * Copies only the user-visible part of a queue (which is in
+ * a struct dn_flow), and sets len accordingly.
+ */
+static int
+copy_obj_q(char **start, char *end, void *_o, const char *msg, int i)
+{
+ struct dn_id *o = _o;
+ int have = end - *start;
+ int len = sizeof(struct dn_flow); /* see above comment */
+
+ if (have < len || o->len == 0 || o->type != DN_QUEUE) {
+ D("ERROR type %d %s %d have %d need %d",
+ o->type, msg, i, have, len);
+ return 1;
+ }
+ ND("type %d %s %d len %d", o->type, msg, i, len);
+ bcopy(_o, *start, len);
+ ((struct dn_id*)(*start))->len = len;
+ *start += len;
+ return 0;
+}
+
+static int
+copy_q_cb(void *obj, void *arg)
+{
+ struct dn_queue *q = obj;
+ struct copy_args *a = arg;
+ struct dn_flow *ni = (struct dn_flow *)(*a->start);
+ if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1))
+ return DNHT_SCAN_END;
+ ni->oid.type = DN_FLOW; /* override the DN_QUEUE */
+ ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL);
+ return 0;
+}
+
+static int
+copy_q(struct copy_args *a, struct dn_fsk *fs, int flags)
+{
+ if (!fs->qht)
+ return 0;
+ if (fs->fs.flags & DN_QHT_HASH)
+ dn_ht_scan(fs->qht, copy_q_cb, a);
+ else
+ copy_q_cb(fs->qht, a);
+ return 0;
+}
+
+/*
+ * This routine only copies the initial part of a profile ? XXX
+ */
+static int
+copy_profile(struct copy_args *a, struct dn_profile *p)
+{
+ int have = a->end - *a->start;
+ /* XXX here we check for max length */
+ int profile_len = sizeof(struct dn_profile) -
+ ED_MAX_SAMPLES_NO*sizeof(int);
+
+ if (p == NULL)
+ return 0;
+ if (have < profile_len) {
+ D("error have %d need %d", have, profile_len);
+ return 1;
+ }
+ bcopy(p, *a->start, profile_len);
+ ((struct dn_id *)(*a->start))->len = profile_len;
+ *a->start += profile_len;
+ return 0;
+}
+
+static int
+copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags)
+{
+ struct dn_fs *ufs = (struct dn_fs *)(*a->start);
+ if (!fs)
+ return 0;
+ ND("flowset %d", fs->fs.fs_nr);
+ if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr))
+ return DNHT_SCAN_END;
+ ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ?
+ dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0);
+ if (flags) { /* copy queues */
+ copy_q(a, fs, 0);
+ }
+ return 0;
+}
+
+static int
+copy_si_cb(void *obj, void *arg)
+{
+ struct dn_sch_inst *si = obj;
+ struct copy_args *a = arg;
+ struct dn_flow *ni = (struct dn_flow *)(*a->start);
+ if (copy_obj(a->start, a->end, &si->ni, "inst",
+ si->sched->sch.sched_nr))
+ return DNHT_SCAN_END;
+ ni->oid.type = DN_FLOW; /* override the DN_SCH_I */
+ ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL);
+ return 0;
+}
+
+static int
+copy_si(struct copy_args *a, struct dn_schk *s, int flags)
+{
+ if (s->sch.flags & DN_HAVE_MASK)
+ dn_ht_scan(s->siht, copy_si_cb, a);
+ else if (s->siht)
+ copy_si_cb(s->siht, a);
+ return 0;
+}
+
+/*
+ * compute a list of children of a scheduler and copy up
+ */
+static int
+copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags)
+{
+ struct dn_fsk *fs;
+ struct dn_id *o;
+ uint32_t *p;
+
+ int n = 0, space = sizeof(*o);
+ SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
+ if (fs->fs.fs_nr < DN_MAX_ID)
+ n++;
+ }
+ space += n * sizeof(uint32_t);
+ DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n);
+ if (a->end - *(a->start) < space)
+ return DNHT_SCAN_END;
+ o = (struct dn_id *)(*(a->start));
+ o->len = space;
+ *a->start += o->len;
+ o->type = DN_TEXT;
+ p = (uint32_t *)(o+1);
+ SLIST_FOREACH(fs, &s->fsk_list, sch_chain)
+ if (fs->fs.fs_nr < DN_MAX_ID)
+ *p++ = fs->fs.fs_nr;
+ return 0;
+}
+
+static int
+copy_data_helper(void *_o, void *_arg)
+{
+ struct copy_args *a = _arg;
+ uint32_t *r = a->extra->r; /* start of first range */
+ uint32_t *lim; /* first invalid pointer */
+ int n;
+
+ lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len);
+
+ if (a->type == DN_LINK || a->type == DN_SCH) {
+ /* pipe|sched show, we receive a dn_schk */
+ struct dn_schk *s = _o;
+
+ n = s->sch.sched_nr;
+ if (a->type == DN_SCH && n >= DN_MAX_ID)
+ return 0; /* not a scheduler */
+ if (a->type == DN_LINK && n <= DN_MAX_ID)
+ return 0; /* not a pipe */
+
+ /* see if the object is within one of our ranges */
+ for (;r < lim; r += 2) {
+ if (n < r[0] || n > r[1])
+ continue;
+ /* Found a valid entry, copy and we are done */
+ if (a->flags & DN_C_LINK) {
+ if (copy_obj(a->start, a->end,
+ &s->link, "link", n))
+ return DNHT_SCAN_END;
+ if (copy_profile(a, s->profile))
+ return DNHT_SCAN_END;
+ if (copy_flowset(a, s->fs, 0))
+ return DNHT_SCAN_END;
+ }
+ if (a->flags & DN_C_SCH) {
+ if (copy_obj(a->start, a->end,
+ &s->sch, "sched", n))
+ return DNHT_SCAN_END;
+ /* list all attached flowsets */
+ if (copy_fsk_list(a, s, 0))
+ return DNHT_SCAN_END;
+ }
+ if (a->flags & DN_C_FLOW)
+ copy_si(a, s, 0);
+ break;
+ }
+ } else if (a->type == DN_FS) {
+ /* queue show, skip internal flowsets */
+ struct dn_fsk *fs = _o;
+
+ n = fs->fs.fs_nr;
+ if (n >= DN_MAX_ID)
+ return 0;
+ /* see if the object is within one of our ranges */
+ for (;r < lim; r += 2) {
+ if (n < r[0] || n > r[1])
+ continue;
+ if (copy_flowset(a, fs, 0))
+ return DNHT_SCAN_END;
+ copy_q(a, fs, 0);
+ break; /* we are done */
+ }
+ }
+ return 0;
+}
+
+static inline struct dn_schk *
+locate_scheduler(int i)
+{
+ return dn_ht_find(dn_cfg.schedhash, i, 0, NULL);
+}
+
+/*
+ * red parameters are in fixed point arithmetic.
+ */
+static int
+config_red(struct dn_fsk *fs)
+{
+ int64_t s, idle, weight, w0;
+ int t, i;
+
+ fs->w_q = fs->fs.w_q;
+ fs->max_p = fs->fs.max_p;
+ ND("called");
+ /* Doing stuff that was in userland */
+ i = fs->sched->link.bandwidth;
+ s = (i <= 0) ? 0 :
+ hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i;
+
+ idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */
+ fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth);
+ /* fs->lookup_step not scaled, */
+ if (!fs->lookup_step)
+ fs->lookup_step = 1;
+ w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled
+
+ for (t = fs->lookup_step; t > 1; --t)
+ weight = SCALE_MUL(weight, w0);
+ fs->lookup_weight = (int)(weight); // scaled
+
+ /* Now doing stuff that was in kerneland */
+ fs->min_th = SCALE(fs->fs.min_th);
+ fs->max_th = SCALE(fs->fs.max_th);
+
+ fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
+ fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
+
+ if (fs->fs.flags & DN_IS_GENTLE_RED) {
+ fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th;
+ fs->c_4 = SCALE(1) - 2 * fs->max_p;
+ }
+
+ /* If the lookup table already exist, free and create it again. */
+ if (fs->w_q_lookup) {
+ free(fs->w_q_lookup, M_DUMMYNET);
+ fs->w_q_lookup = NULL;
+ }
+ if (dn_cfg.red_lookup_depth == 0) {
+ printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
+ "must be > 0\n");
+ fs->fs.flags &= ~DN_IS_RED;
+ fs->fs.flags &= ~DN_IS_GENTLE_RED;
+ return (EINVAL);
+ }
+ fs->lookup_depth = dn_cfg.red_lookup_depth;
+ fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int),
+ M_DUMMYNET, M_NOWAIT);
+ if (fs->w_q_lookup == NULL) {
+ printf("dummynet: sorry, cannot allocate red lookup table\n");
+ fs->fs.flags &= ~DN_IS_RED;
+ fs->fs.flags &= ~DN_IS_GENTLE_RED;
+ return(ENOSPC);
+ }
+
+ /* Fill the lookup table with (1 - w_q)^x */
+ fs->w_q_lookup[0] = SCALE(1) - fs->w_q;
+
+ for (i = 1; i < fs->lookup_depth; i++)
+ fs->w_q_lookup[i] =
+ SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight);
+
+ if (dn_cfg.red_avg_pkt_size < 1)
+ dn_cfg.red_avg_pkt_size = 512;
+ fs->avg_pkt_size = dn_cfg.red_avg_pkt_size;
+ if (dn_cfg.red_max_pkt_size < 1)
+ dn_cfg.red_max_pkt_size = 1500;
+ fs->max_pkt_size = dn_cfg.red_max_pkt_size;
+ ND("exit");
+ return 0;
+}
+
+/* Scan all flowset attached to this scheduler and update red */
+static void
+update_red(struct dn_schk *s)
+{
+ struct dn_fsk *fs;
+ SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
+ if (fs && (fs->fs.flags & DN_IS_RED))
+ config_red(fs);
+ }
+}
+
+/* attach flowset to scheduler s, possibly requeue */
+static void
+fsk_attach(struct dn_fsk *fs, struct dn_schk *s)
+{
+ ND("remove fs %d from fsunlinked, link to sched %d",
+ fs->fs.fs_nr, s->sch.sched_nr);
+ SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain);
+ fs->sched = s;
+ SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
+ if (s->fp->new_fsk)
+ s->fp->new_fsk(fs);
+ /* XXX compute fsk_mask */
+ fs->fsk_mask = fs->fs.flow_mask;
+ if (fs->sched->sch.flags & DN_HAVE_MASK)
+ flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask);
+ if (fs->qht) {
+ /*
+ * we must drain qht according to the old
+ * type, and reinsert according to the new one.
+ * The requeue is complex -- in general we need to
+ * reclassify every single packet.
+ * For the time being, let's hope qht is never set
+ * when we reach this point.
+ */
+ D("XXX TODO requeue from fs %d to sch %d",
+ fs->fs.fs_nr, s->sch.sched_nr);
+ fs->qht = NULL;
+ }
+ /* set the new type for qht */
+ if (nonzero_mask(&fs->fsk_mask))
+ fs->fs.flags |= DN_QHT_HASH;
+ else
+ fs->fs.flags &= ~DN_QHT_HASH;
+
+ /* XXX config_red() can fail... */
+ if (fs->fs.flags & DN_IS_RED)
+ config_red(fs);
+}
+
+/* update all flowsets which may refer to this scheduler */
+static void
+update_fs(struct dn_schk *s)
+{
+ struct dn_fsk *fs, *tmp;
+
+ SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) {
+ if (s->sch.sched_nr != fs->fs.sched_nr) {
+ D("fs %d for sch %d not %d still unlinked",
+ fs->fs.fs_nr, fs->fs.sched_nr,
+ s->sch.sched_nr);
+ continue;
+ }
+ fsk_attach(fs, s);
+ }
+}
+
+/*
+ * Configuration -- to preserve backward compatibility we use
+ * the following scheme (N is 65536)
+ * NUMBER SCHED LINK FLOWSET
+ * 1 .. N-1 (1)WFQ (2)WFQ (3)queue
+ * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1
+ * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1
+ *
+ * "pipe i config" configures #1, #2 and #3
+ * "sched i config" configures #1 and possibly #6
+ * "queue i config" configures #3
+ * #1 is configured with 'pipe i config' or 'sched i config'
+ * #2 is configured with 'pipe i config', and created if not
+ * existing with 'sched i config'
+ * #3 is configured with 'queue i config'
+ * #4 is automatically configured after #1, can only be FIFO
+ * #5 is automatically configured after #2
+ * #6 is automatically created when #1 is !MULTIQUEUE,
+ * and can be updated.
+ * #7 is automatically configured after #2
+ */
+
+/*
+ * configure a link (and its FIFO instance)
+ */
+static int
+config_link(struct dn_link *p, struct dn_id *arg)
+{
+ int i;
+
+ if (p->oid.len != sizeof(*p)) {
+ D("invalid pipe len %d", p->oid.len);
+ return EINVAL;
+ }
+ i = p->link_nr;
+ if (i <= 0 || i >= DN_MAX_ID)
+ return EINVAL;
+ /*
+ * The config program passes parameters as follows:
+ * bw = bits/second (0 means no limits),
+ * delay = ms, must be translated into ticks.
+ * qsize = slots/bytes
+ * burst ???
+ */
+ p->delay = (p->delay * hz) / 1000;
+ /* Scale burst size: bytes -> bits * hz */
+ p->burst *= 8 * hz;
+
+ DN_BH_WLOCK();
+ /* do it twice, base link and FIFO link */
+ for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
+ struct dn_schk *s = locate_scheduler(i);
+ if (s == NULL) {
+ DN_BH_WUNLOCK();
+ D("sched %d not found", i);
+ return EINVAL;
+ }
+ /* remove profile if exists */
+ if (s->profile) {
+ free(s->profile, M_DUMMYNET);
+ s->profile = NULL;
+ }
+ /* copy all parameters */
+ s->link.oid = p->oid;
+ s->link.link_nr = i;
+ s->link.delay = p->delay;
+ if (s->link.bandwidth != p->bandwidth) {
+ /* XXX bandwidth changes, need to update red params */
+ s->link.bandwidth = p->bandwidth;
+ update_red(s);
+ }
+ s->link.burst = p->burst;
+ schk_reset_credit(s);
+ }
+ dn_cfg.id++;
+ DN_BH_WUNLOCK();
+ return 0;
+}
+
+/*
+ * configure a flowset. Can be called from inside with locked=1,
+ */
+static struct dn_fsk *
+config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked)
+{
+ int i;
+ struct dn_fsk *fs;
+
+ if (nfs->oid.len != sizeof(*nfs)) {
+ D("invalid flowset len %d", nfs->oid.len);
+ return NULL;
+ }
+ i = nfs->fs_nr;
+ if (i <= 0 || i >= 3*DN_MAX_ID)
+ return NULL;
+ ND("flowset %d", i);
+ /* XXX other sanity checks */
+ if (nfs->flags & DN_QSIZE_BYTES) {
+ ipdn_bound_var(&nfs->qsize, 16384,
+ 1500, dn_cfg.byte_limit, NULL); // "queue byte size");
+ } else {
+ ipdn_bound_var(&nfs->qsize, 50,
+ 1, dn_cfg.slot_limit, NULL); // "queue slot size");
+ }
+ if (nfs->flags & DN_HAVE_MASK) {
+ /* make sure we have some buckets */
+ ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size,
+ 1, dn_cfg.max_hash_size, "flowset buckets");
+ } else {
+ nfs->buckets = 1; /* we only need 1 */
+ }
+ if (!locked)
+ DN_BH_WLOCK();
+ do { /* exit with break when done */
+ struct dn_schk *s;
+ int flags = nfs->sched_nr ? DNHT_INSERT : 0;
+ int j;
+ int oldc = dn_cfg.fsk_count;
+ fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL);
+ if (fs == NULL) {
+ D("missing sched for flowset %d", i);
+ break;
+ }
+ /* grab some defaults from the existing one */
+ if (nfs->sched_nr == 0) /* reuse */
+ nfs->sched_nr = fs->fs.sched_nr;
+ for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) {
+ if (nfs->par[j] == -1) /* reuse */
+ nfs->par[j] = fs->fs.par[j];
+ }
+ if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) {
+ ND("flowset %d unchanged", i);
+ break; /* no change, nothing to do */
+ }
+ if (oldc != dn_cfg.fsk_count) /* new item */
+ dn_cfg.id++;
+ s = locate_scheduler(nfs->sched_nr);
+ /* detach from old scheduler if needed, preserving
+ * queues if we need to reattach. Then update the
+ * configuration, and possibly attach to the new sched.
+ */
+ DX(2, "fs %d changed sched %d@%p to %d@%p",
+ fs->fs.fs_nr,
+ fs->fs.sched_nr, fs->sched, nfs->sched_nr, s);
+ if (fs->sched) {
+ int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY);
+ flags |= DN_DESTROY; /* XXX temporary */
+ fsk_detach(fs, flags);
+ }
+ fs->fs = *nfs; /* copy configuration */
+ if (s != NULL)
+ fsk_attach(fs, s);
+ } while (0);
+ if (!locked)
+ DN_BH_WUNLOCK();
+ return fs;
+}
+
+/*
+ * config/reconfig a scheduler and its FIFO variant.
+ * For !MULTIQUEUE schedulers, also set up the flowset.
+ *
+ * On reconfigurations (detected because s->fp is set),
+ * detach existing flowsets preserving traffic, preserve link,
+ * and delete the old scheduler creating a new one.
+ */
+static int
+config_sched(struct dn_sch *_nsch, struct dn_id *arg)
+{
+ struct dn_schk *s;
+ struct schk_new_arg a; /* argument for schk_new */
+ int i;
+ struct dn_link p; /* copy of oldlink */
+ struct dn_profile *pf = NULL; /* copy of old link profile */
+ /* Used to preserv mask parameter */
+ struct ipfw_flow_id new_mask;
+ int new_buckets = 0;
+ int new_flags = 0;
+ int pipe_cmd;
+ int err = ENOMEM;
+
+ a.sch = _nsch;
+ if (a.sch->oid.len != sizeof(*a.sch)) {
+ D("bad sched len %d", a.sch->oid.len);
+ return EINVAL;
+ }
+ i = a.sch->sched_nr;
+ if (i <= 0 || i >= DN_MAX_ID)
+ return EINVAL;
+ /* make sure we have some buckets */
+ if (a.sch->flags & DN_HAVE_MASK)
+ ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size,
+ 1, dn_cfg.max_hash_size, "sched buckets");
+ /* XXX other sanity checks */
+ bzero(&p, sizeof(p));
+
+ pipe_cmd = a.sch->flags & DN_PIPE_CMD;
+ a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set?
+ if (pipe_cmd) {
+ /* Copy mask parameter */
+ new_mask = a.sch->sched_mask;
+ new_buckets = a.sch->buckets;
+ new_flags = a.sch->flags;
+ }
+ DN_BH_WLOCK();
+again: /* run twice, for wfq and fifo */
+ /*
+ * lookup the type. If not supplied, use the previous one
+ * or default to WF2Q+. Otherwise, return an error.
+ */
+ dn_cfg.id++;
+ a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name);
+ if (a.fp != NULL) {
+ /* found. Lookup or create entry */
+ s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a);
+ } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) {
+ /* No type. search existing s* or retry with WF2Q+ */
+ s = dn_ht_find(dn_cfg.schedhash, i, 0, &a);
+ if (s != NULL) {
+ a.fp = s->fp;
+ /* Scheduler exists, skip to FIFO scheduler
+ * if command was pipe config...
+ */
+ if (pipe_cmd)
+ goto next;
+ } else {
+ /* New scheduler, create a wf2q+ with no mask
+ * if command was pipe config...
+ */
+ if (pipe_cmd) {
+ /* clear mask parameter */
+ bzero(&a.sch->sched_mask, sizeof(new_mask));
+ a.sch->buckets = 0;
+ a.sch->flags &= ~DN_HAVE_MASK;
+ }
+ a.sch->oid.subtype = DN_SCHED_WF2QP;
+ goto again;
+ }
+ } else {
+ D("invalid scheduler type %d %s",
+ a.sch->oid.subtype, a.sch->name);
+ err = EINVAL;
+ goto error;
+ }
+ /* normalize name and subtype */
+ a.sch->oid.subtype = a.fp->type;
+ bzero(a.sch->name, sizeof(a.sch->name));
+ strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name));
+ if (s == NULL) {
+ D("cannot allocate scheduler %d", i);
+ goto error;
+ }
+ /* restore existing link if any */
+ if (p.link_nr) {
+ s->link = p;
+ if (!pf || pf->link_nr != p.link_nr) { /* no saved value */
+ s->profile = NULL; /* XXX maybe not needed */
+ } else {
+ s->profile = malloc(sizeof(struct dn_profile),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (s->profile == NULL) {
+ D("cannot allocate profile");
+ goto error; //XXX
+ }
+ bcopy(pf, s->profile, sizeof(*pf));
+ }
+ }
+ p.link_nr = 0;
+ if (s->fp == NULL) {
+ DX(2, "sched %d new type %s", i, a.fp->name);
+ } else if (s->fp != a.fp ||
+ bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) {
+ /* already existing. */
+ DX(2, "sched %d type changed from %s to %s",
+ i, s->fp->name, a.fp->name);
+ DX(4, " type/sub %d/%d -> %d/%d",
+ s->sch.oid.type, s->sch.oid.subtype,
+ a.sch->oid.type, a.sch->oid.subtype);
+ if (s->link.link_nr == 0)
+ D("XXX WARNING link 0 for sched %d", i);
+ p = s->link; /* preserve link */
+ if (s->profile) {/* preserve profile */
+ if (!pf)
+ pf = malloc(sizeof(*pf),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (pf) /* XXX should issue a warning otherwise */
+ bcopy(s->profile, pf, sizeof(*pf));
+ }
+ /* remove from the hash */
+ dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
+ /* Detach flowsets, preserve queues. */
+ // schk_delete_cb(s, NULL);
+ // XXX temporarily, kill queues
+ schk_delete_cb(s, (void *)DN_DESTROY);
+ goto again;
+ } else {
+ DX(4, "sched %d unchanged type %s", i, a.fp->name);
+ }
+ /* complete initialization */
+ s->sch = *a.sch;
+ s->fp = a.fp;
+ s->cfg = arg;
+ // XXX schk_reset_credit(s);
+ /* create the internal flowset if needed,
+ * trying to reuse existing ones if available
+ */
+ if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) {
+ s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL);
+ if (!s->fs) {
+ struct dn_fs fs;
+ bzero(&fs, sizeof(fs));
+ set_oid(&fs.oid, DN_FS, sizeof(fs));
+ fs.fs_nr = i + DN_MAX_ID;
+ fs.sched_nr = i;
+ s->fs = config_fs(&fs, NULL, 1 /* locked */);
+ }
+ if (!s->fs) {
+ schk_delete_cb(s, (void *)DN_DESTROY);
+ D("error creating internal fs for %d", i);
+ goto error;
+ }
+ }
+ /* call init function after the flowset is created */
+ if (s->fp->config)
+ s->fp->config(s);
+ update_fs(s);
+next:
+ if (i < DN_MAX_ID) { /* now configure the FIFO instance */
+ i += DN_MAX_ID;
+ if (pipe_cmd) {
+ /* Restore mask parameter for FIFO */
+ a.sch->sched_mask = new_mask;
+ a.sch->buckets = new_buckets;
+ a.sch->flags = new_flags;
+ } else {
+ /* sched config shouldn't modify the FIFO scheduler */
+ if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) {
+ /* FIFO already exist, don't touch it */
+ err = 0; /* and this is not an error */
+ goto error;
+ }
+ }
+ a.sch->sched_nr = i;
+ a.sch->oid.subtype = DN_SCHED_FIFO;
+ bzero(a.sch->name, sizeof(a.sch->name));
+ goto again;
+ }
+ err = 0;
+error:
+ DN_BH_WUNLOCK();
+ if (pf)
+ free(pf, M_DUMMYNET);
+ return err;
+}
+
+/*
+ * attach a profile to a link
+ */
+static int
+config_profile(struct dn_profile *pf, struct dn_id *arg)
+{
+ struct dn_schk *s;
+ int i, olen, err = 0;
+
+ if (pf->oid.len < sizeof(*pf)) {
+ D("short profile len %d", pf->oid.len);
+ return EINVAL;
+ }
+ i = pf->link_nr;
+ if (i <= 0 || i >= DN_MAX_ID)
+ return EINVAL;
+ /* XXX other sanity checks */
+ DN_BH_WLOCK();
+ for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
+ s = locate_scheduler(i);
+
+ if (s == NULL) {
+ err = EINVAL;
+ break;
+ }
+ dn_cfg.id++;
+ /*
+ * If we had a profile and the new one does not fit,
+ * or it is deleted, then we need to free memory.
+ */
+ if (s->profile && (pf->samples_no == 0 ||
+ s->profile->oid.len < pf->oid.len)) {
+ free(s->profile, M_DUMMYNET);
+ s->profile = NULL;
+ }
+ if (pf->samples_no == 0)
+ continue;
+ /*
+ * new profile, possibly allocate memory
+ * and copy data.
+ */
+ if (s->profile == NULL)
+ s->profile = malloc(pf->oid.len,
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (s->profile == NULL) {
+ D("no memory for profile %d", i);
+ err = ENOMEM;
+ break;
+ }
+ /* preserve larger length XXX double check */
+ olen = s->profile->oid.len;
+ if (olen < pf->oid.len)
+ olen = pf->oid.len;
+ bcopy(pf, s->profile, pf->oid.len);
+ s->profile->oid.len = olen;
+ }
+ DN_BH_WUNLOCK();
+ return err;
+}
+
+/*
+ * Delete all objects:
+ */
+static void
+dummynet_flush(void)
+{
+
+ /* delete all schedulers and related links/queues/flowsets */
+ dn_ht_scan(dn_cfg.schedhash, schk_delete_cb,
+ (void *)(uintptr_t)DN_DELETE_FS);
+ /* delete all remaining (unlinked) flowsets */
+ DX(4, "still %d unlinked fs", dn_cfg.fsk_count);
+ dn_ht_free(dn_cfg.fshash, DNHT_REMOVE);
+ fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS);
+ /* Reinitialize system heap... */
+ heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
+}
+
+/*
+ * Main handler for configuration. We are guaranteed to be called
+ * with an oid which is at least a dn_id.
+ * - the first object is the command (config, delete, flush, ...)
+ * - config_link must be issued after the corresponding config_sched
+ * - parameters (DN_TXT) for an object must preceed the object
+ * processed on a config_sched.
+ */
+int
+do_config(void *p, int l)
+{
+ struct dn_id *next, *o;
+ int err = 0, err2 = 0;
+ struct dn_id *arg = NULL;
+ uintptr_t *a;
+
+ o = p;
+ if (o->id != DN_API_VERSION) {
+ D("invalid api version got %d need %d",
+ o->id, DN_API_VERSION);
+ return EINVAL;
+ }
+ for (; l >= sizeof(*o); o = next) {
+ struct dn_id *prev = arg;
+ if (o->len < sizeof(*o) || l < o->len) {
+ D("bad len o->len %d len %d", o->len, l);
+ err = EINVAL;
+ break;
+ }
+ l -= o->len;
+ next = (struct dn_id *)((char *)o + o->len);
+ err = 0;
+ switch (o->type) {
+ default:
+ D("cmd %d not implemented", o->type);
+ break;
+
+#ifdef EMULATE_SYSCTL
+ /* sysctl emulation.
+ * if we recognize the command, jump to the correct
+ * handler and return
+ */
+ case DN_SYSCTL_SET:
+ err = kesysctl_emu_set(p, l);
+ return err;
+#endif
+
+ case DN_CMD_CONFIG: /* simply a header */
+ break;
+
+ case DN_CMD_DELETE:
+ /* the argument is in the first uintptr_t after o */
+ a = (uintptr_t *)(o+1);
+ if (o->len < sizeof(*o) + sizeof(*a)) {
+ err = EINVAL;
+ break;
+ }
+ switch (o->subtype) {
+ case DN_LINK:
+ /* delete base and derived schedulers */
+ DN_BH_WLOCK();
+ err = delete_schk(*a);
+ err2 = delete_schk(*a + DN_MAX_ID);
+ DN_BH_WUNLOCK();
+ if (!err)
+ err = err2;
+ break;
+
+ default:
+ D("invalid delete type %d",
+ o->subtype);
+ err = EINVAL;
+ break;
+
+ case DN_FS:
+ err = (*a <1 || *a >= DN_MAX_ID) ?
+ EINVAL : delete_fs(*a, 0) ;
+ break;
+ }
+ break;
+
+ case DN_CMD_FLUSH:
+ DN_BH_WLOCK();
+ dummynet_flush();
+ DN_BH_WUNLOCK();
+ break;
+ case DN_TEXT: /* store argument the next block */
+ prev = NULL;
+ arg = o;
+ break;
+ case DN_LINK:
+ err = config_link((struct dn_link *)o, arg);
+ break;
+ case DN_PROFILE:
+ err = config_profile((struct dn_profile *)o, arg);
+ break;
+ case DN_SCH:
+ err = config_sched((struct dn_sch *)o, arg);
+ break;
+ case DN_FS:
+ err = (NULL==config_fs((struct dn_fs *)o, arg, 0));
+ break;
+ }
+ if (prev)
+ arg = NULL;
+ if (err != 0)
+ break;
+ }
+ return err;
+}
+
+static int
+compute_space(struct dn_id *cmd, struct copy_args *a)
+{
+ int x = 0, need = 0;
+ int profile_size = sizeof(struct dn_profile) -
+ ED_MAX_SAMPLES_NO*sizeof(int);
+
+ /* NOTE about compute space:
+ * NP = dn_cfg.schk_count
+ * NSI = dn_cfg.si_count
+ * NF = dn_cfg.fsk_count
+ * NQ = dn_cfg.queue_count
+ * - ipfw pipe show
+ * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
+ * link, scheduler template, flowset
+ * integrated in scheduler and header
+ * for flowset list
+ * (NSI)*(dn_flow) all scheduler instance (includes
+ * the queue instance)
+ * - ipfw sched show
+ * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
+ * link, scheduler template, flowset
+ * integrated in scheduler and header
+ * for flowset list
+ * (NSI * dn_flow) all scheduler instances
+ * (NF * sizeof(uint_32)) space for flowset list linked to scheduler
+ * (NQ * dn_queue) all queue [XXXfor now not listed]
+ * - ipfw queue show
+ * (NF * dn_fs) all flowset
+ * (NQ * dn_queue) all queues
+ */
+ switch (cmd->subtype) {
+ default:
+ return -1;
+ /* XXX where do LINK and SCH differ ? */
+ /* 'ipfw sched show' could list all queues associated to
+ * a scheduler. This feature for now is disabled
+ */
+ case DN_LINK: /* pipe show */
+ x = DN_C_LINK | DN_C_SCH | DN_C_FLOW;
+ need += dn_cfg.schk_count *
+ (sizeof(struct dn_fs) + profile_size) / 2;
+ need += dn_cfg.fsk_count * sizeof(uint32_t);
+ break;
+ case DN_SCH: /* sched show */
+ need += dn_cfg.schk_count *
+ (sizeof(struct dn_fs) + profile_size) / 2;
+ need += dn_cfg.fsk_count * sizeof(uint32_t);
+ x = DN_C_SCH | DN_C_LINK | DN_C_FLOW;
+ break;
+ case DN_FS: /* queue show */
+ x = DN_C_FS | DN_C_QUEUE;
+ break;
+ case DN_GET_COMPAT: /* compatibility mode */
+ need = dn_compat_calc_size();
+ break;
+ }
+ a->flags = x;
+ if (x & DN_C_SCH) {
+ need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2;
+ /* NOT also, each fs might be attached to a sched */
+ need += dn_cfg.schk_count * sizeof(struct dn_id) / 2;
+ }
+ if (x & DN_C_FS)
+ need += dn_cfg.fsk_count * sizeof(struct dn_fs);
+ if (x & DN_C_LINK) {
+ need += dn_cfg.schk_count * sizeof(struct dn_link) / 2;
+ }
+ /*
+ * When exporting a queue to userland, only pass up the
+ * struct dn_flow, which is the only visible part.
+ */
+
+ if (x & DN_C_QUEUE)
+ need += dn_cfg.queue_count * sizeof(struct dn_flow);
+ if (x & DN_C_FLOW)
+ need += dn_cfg.si_count * (sizeof(struct dn_flow));
+ return need;
+}
+
+/*
+ * If compat != NULL dummynet_get is called in compatibility mode.
+ * *compat will be the pointer to the buffer to pass to ipfw
+ */
+int
+dummynet_get(struct sockopt *sopt, void **compat)
+{
+ int have, i, need, error;
+ char *start = NULL, *buf;
+ size_t sopt_valsize;
+ struct dn_id *cmd;
+ struct copy_args a;
+ struct copy_range r;
+ int l = sizeof(struct dn_id);
+
+ bzero(&a, sizeof(a));
+ bzero(&r, sizeof(r));
+
+ /* save and restore original sopt_valsize around copyin */
+ sopt_valsize = sopt->sopt_valsize;
+
+ cmd = &r.o;
+
+ if (!compat) {
+ /* copy at least an oid, and possibly a full object */
+ error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd));
+ sopt->sopt_valsize = sopt_valsize;
+ if (error)
+ goto done;
+ l = cmd->len;
+#ifdef EMULATE_SYSCTL
+ /* sysctl emulation. */
+ if (cmd->type == DN_SYSCTL_GET)
+ return kesysctl_emu_get(sopt);
+#endif
+ if (l > sizeof(r)) {
+ /* request larger than default, allocate buffer */
+ cmd = malloc(l, M_DUMMYNET, M_WAITOK);
+ error = sooptcopyin(sopt, cmd, l, l);
+ sopt->sopt_valsize = sopt_valsize;
+ if (error)
+ goto done;
+ }
+ } else { /* compatibility */
+ error = 0;
+ cmd->type = DN_CMD_GET;
+ cmd->len = sizeof(struct dn_id);
+ cmd->subtype = DN_GET_COMPAT;
+ // cmd->id = sopt_valsize;
+ D("compatibility mode");
+ }
+ a.extra = (struct copy_range *)cmd;
+ if (cmd->len == sizeof(*cmd)) { /* no range, create a default */
+ uint32_t *rp = (uint32_t *)(cmd + 1);
+ cmd->len += 2* sizeof(uint32_t);
+ rp[0] = 1;
+ rp[1] = DN_MAX_ID - 1;
+ if (cmd->subtype == DN_LINK) {
+ rp[0] += DN_MAX_ID;
+ rp[1] += DN_MAX_ID;
+ }
+ }
+ /* Count space (under lock) and allocate (outside lock).
+ * Exit with lock held if we manage to get enough buffer.
+ * Try a few times then give up.
+ */
+ for (have = 0, i = 0; i < 10; i++) {
+ DN_BH_WLOCK();
+ need = compute_space(cmd, &a);
+
+ /* if there is a range, ignore value from compute_space() */
+ if (l > sizeof(*cmd))
+ need = sopt_valsize - sizeof(*cmd);
+
+ if (need < 0) {
+ DN_BH_WUNLOCK();
+ error = EINVAL;
+ goto done;
+ }
+ need += sizeof(*cmd);
+ cmd->id = need;
+ if (have >= need)
+ break;
+
+ DN_BH_WUNLOCK();
+ if (start)
+ free(start, M_DUMMYNET);
+ start = NULL;
+ if (need > sopt_valsize)
+ break;
+
+ have = need;
+ start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO);
+ }
+
+ if (start == NULL) {
+ if (compat) {
+ *compat = NULL;
+ error = 1; // XXX
+ } else {
+ error = sooptcopyout(sopt, cmd, sizeof(*cmd));
+ }
+ goto done;
+ }
+ ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, "
+ "%d:%d si %d, %d:%d queues %d",
+ dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH,
+ dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK,
+ dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS,
+ dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I,
+ dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE);
+ sopt->sopt_valsize = sopt_valsize;
+ a.type = cmd->subtype;
+
+ if (compat == NULL) {
+ bcopy(cmd, start, sizeof(*cmd));
+ ((struct dn_id*)(start))->len = sizeof(struct dn_id);
+ buf = start + sizeof(*cmd);
+ } else
+ buf = start;
+ a.start = &buf;
+ a.end = start + have;
+ /* start copying other objects */
+ if (compat) {
+ a.type = DN_COMPAT_PIPE;
+ dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a);
+ a.type = DN_COMPAT_QUEUE;
+ dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a);
+ } else if (a.type == DN_FS) {
+ dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a);
+ } else {
+ dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a);
+ }
+ DN_BH_WUNLOCK();
+
+ if (compat) {
+ *compat = start;
+ sopt->sopt_valsize = buf - start;
+ /* free() is done by ip_dummynet_compat() */
+ start = NULL; //XXX hack
+ } else {
+ error = sooptcopyout(sopt, start, buf - start);
+ }
+done:
+ if (cmd && cmd != &r.o)
+ free(cmd, M_DUMMYNET);
+ if (start)
+ free(start, M_DUMMYNET);
+ return error;
+}
+
+/* Callback called on scheduler instance to delete it if idle */
+static int
+drain_scheduler_cb(void *_si, void *arg)
+{
+ struct dn_sch_inst *si = _si;
+
+ if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL)
+ return 0;
+
+ if (si->sched->fp->flags & DN_MULTIQUEUE) {
+ if (si->q_count == 0)
+ return si_destroy(si, NULL);
+ else
+ return 0;
+ } else { /* !DN_MULTIQUEUE */
+ if ((si+1)->ni.length == 0)
+ return si_destroy(si, NULL);
+ else
+ return 0;
+ }
+ return 0; /* unreachable */
+}
+
+/* Callback called on scheduler to check if it has instances */
+static int
+drain_scheduler_sch_cb(void *_s, void *arg)
+{
+ struct dn_schk *s = _s;
+
+ if (s->sch.flags & DN_HAVE_MASK) {
+ dn_ht_scan_bucket(s->siht, &s->drain_bucket,
+ drain_scheduler_cb, NULL);
+ s->drain_bucket++;
+ } else {
+ if (s->siht) {
+ if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL)
+ s->siht = NULL;
+ }
+ }
+ return 0;
+}
+
+/* Called every tick, try to delete a 'bucket' of scheduler */
+void
+dn_drain_scheduler(void)
+{
+ dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch,
+ drain_scheduler_sch_cb, NULL);
+ dn_cfg.drain_sch++;
+}
+
+/* Callback called on queue to delete if it is idle */
+static int
+drain_queue_cb(void *_q, void *arg)
+{
+ struct dn_queue *q = _q;
+
+ if (q->ni.length == 0) {
+ dn_delete_queue(q, DN_DESTROY);
+ return DNHT_SCAN_DEL; /* queue is deleted */
+ }
+
+ return 0; /* queue isn't deleted */
+}
+
+/* Callback called on flowset used to check if it has queues */
+static int
+drain_queue_fs_cb(void *_fs, void *arg)
+{
+ struct dn_fsk *fs = _fs;
+
+ if (fs->fs.flags & DN_QHT_HASH) {
+ /* Flowset has a hash table for queues */
+ dn_ht_scan_bucket(fs->qht, &fs->drain_bucket,
+ drain_queue_cb, NULL);
+ fs->drain_bucket++;
+ } else {
+ /* No hash table for this flowset, null the pointer
+ * if the queue is deleted
+ */
+ if (fs->qht) {
+ if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL)
+ fs->qht = NULL;
+ }
+ }
+ return 0;
+}
+
+/* Called every tick, try to delete a 'bucket' of queue */
+void
+dn_drain_queue(void)
+{
+ /* scan a bucket of flowset */
+ dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs,
+ drain_queue_fs_cb, NULL);
+ dn_cfg.drain_fs++;
+}
+
+/*
+ * Handler for the various dummynet socket options
+ */
+static int
+ip_dn_ctl(struct sockopt *sopt)
+{
+ void *p = NULL;
+ int error, l;
+
+ error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
+ if (error)
+ return (error);
+
+ /* Disallow sets in really-really secure mode. */
+ if (sopt->sopt_dir == SOPT_SET) {
+ error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
+ if (error)
+ return (error);
+ }
+
+ switch (sopt->sopt_name) {
+ default :
+ D("dummynet: unknown option %d", sopt->sopt_name);
+ error = EINVAL;
+ break;
+
+ case IP_DUMMYNET_FLUSH:
+ case IP_DUMMYNET_CONFIGURE:
+ case IP_DUMMYNET_DEL: /* remove a pipe or queue */
+ case IP_DUMMYNET_GET:
+ D("dummynet: compat option %d", sopt->sopt_name);
+ error = ip_dummynet_compat(sopt);
+ break;
+
+ case IP_DUMMYNET3 :
+ if (sopt->sopt_dir == SOPT_GET) {
+ error = dummynet_get(sopt, NULL);
+ break;
+ }
+ l = sopt->sopt_valsize;
+ if (l < sizeof(struct dn_id) || l > 12000) {
+ D("argument len %d invalid", l);
+ break;
+ }
+ p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ?
+ error = sooptcopyin(sopt, p, l, l);
+ if (error)
+ break ;
+ error = do_config(p, l);
+ break;
+ }
+
+ if (p != NULL)
+ free(p, M_TEMP);
+
+ return error ;
+}
+
+
+static void
+ip_dn_init(void)
+{
+ if (dn_cfg.init_done)
+ return;
+ printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet);
+ dn_cfg.init_done = 1;
+ /* Set defaults here. MSVC does not accept initializers,
+ * and this is also useful for vimages
+ */
+ /* queue limits */
+ dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */
+ dn_cfg.byte_limit = 1024 * 1024;
+ dn_cfg.expire = 1;
+
+ /* RED parameters */
+ dn_cfg.red_lookup_depth = 256; /* default lookup table depth */
+ dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */
+ dn_cfg.red_max_pkt_size = 1500; /* default max packet size */
+
+ /* hash tables */
+ dn_cfg.max_hash_size = 65536; /* max in the hash tables */
+ dn_cfg.hash_size = 64; /* default hash size */
+
+ /* create hash tables for schedulers and flowsets.
+ * In both we search by key and by pointer.
+ */
+ dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size,
+ offsetof(struct dn_schk, schk_next),
+ schk_hash, schk_match, schk_new);
+ dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size,
+ offsetof(struct dn_fsk, fsk_next),
+ fsk_hash, fsk_match, fsk_new);
+
+ /* bucket index to drain object */
+ dn_cfg.drain_fs = 0;
+ dn_cfg.drain_sch = 0;
+
+ heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
+ SLIST_INIT(&dn_cfg.fsu);
+ SLIST_INIT(&dn_cfg.schedlist);
+
+ DN_LOCK_INIT();
+
+ TASK_INIT(&dn_task, 0, dummynet_task, curvnet);
+ dn_tq = taskqueue_create("dummynet", M_WAITOK,
+ taskqueue_thread_enqueue, &dn_tq);
+ taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
+
+ callout_init(&dn_timeout, CALLOUT_MPSAFE);
+ callout_reset(&dn_timeout, 1, dummynet, NULL);
+
+ /* Initialize curr_time adjustment mechanics. */
+ getmicrouptime(&dn_cfg.prev_t);
+}
+
+static void
+ip_dn_destroy(int last)
+{
+ callout_drain(&dn_timeout);
+
+ DN_BH_WLOCK();
+ if (last) {
+ ND("removing last instance\n");
+ ip_dn_ctl_ptr = NULL;
+ ip_dn_io_ptr = NULL;
+ }
+
+ dummynet_flush();
+ DN_BH_WUNLOCK();
+ taskqueue_drain(dn_tq, &dn_task);
+ taskqueue_free(dn_tq);
+
+ dn_ht_free(dn_cfg.schedhash, 0);
+ dn_ht_free(dn_cfg.fshash, 0);
+ heap_free(&dn_cfg.evheap);
+
+ DN_LOCK_DESTROY();
+}
+
+static int
+dummynet_modevent(module_t mod, int type, void *data)
+{
+
+ if (type == MOD_LOAD) {
+ if (ip_dn_io_ptr) {
+ printf("DUMMYNET already loaded\n");
+ return EEXIST ;
+ }
+ ip_dn_init();
+ ip_dn_ctl_ptr = ip_dn_ctl;
+ ip_dn_io_ptr = dummynet_io;
+ return 0;
+ } else if (type == MOD_UNLOAD) {
+ ip_dn_destroy(1 /* last */);
+ return 0;
+ } else
+ return EOPNOTSUPP;
+}
+
+/* modevent helpers for the modules */
+static int
+load_dn_sched(struct dn_alg *d)
+{
+ struct dn_alg *s;
+
+ if (d == NULL)
+ return 1; /* error */
+ ip_dn_init(); /* just in case, we need the lock */
+
+ /* Check that mandatory funcs exists */
+ if (d->enqueue == NULL || d->dequeue == NULL) {
+ D("missing enqueue or dequeue for %s", d->name);
+ return 1;
+ }
+
+ /* Search if scheduler already exists */
+ DN_BH_WLOCK();
+ SLIST_FOREACH(s, &dn_cfg.schedlist, next) {
+ if (strcmp(s->name, d->name) == 0) {
+ D("%s already loaded", d->name);
+ break; /* scheduler already exists */
+ }
+ }
+ if (s == NULL)
+ SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next);
+ DN_BH_WUNLOCK();
+ D("dn_sched %s %sloaded", d->name, s ? "not ":"");
+ return s ? 1 : 0;
+}
+
+static int
+unload_dn_sched(struct dn_alg *s)
+{
+ struct dn_alg *tmp, *r;
+ int err = EINVAL;
+
+ ND("called for %s", s->name);
+
+ DN_BH_WLOCK();
+ SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) {
+ if (strcmp(s->name, r->name) != 0)
+ continue;
+ ND("ref_count = %d", r->ref_count);
+ err = (r->ref_count != 0) ? EBUSY : 0;
+ if (err == 0)
+ SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next);
+ break;
+ }
+ DN_BH_WUNLOCK();
+ D("dn_sched %s %sunloaded", s->name, err ? "not ":"");
+ return err;
+}
+
+int
+dn_sched_modevent(module_t mod, int cmd, void *arg)
+{
+ struct dn_alg *sch = arg;
+
+ if (cmd == MOD_LOAD)
+ return load_dn_sched(sch);
+ else if (cmd == MOD_UNLOAD)
+ return unload_dn_sched(sch);
+ else
+ return EINVAL;
+}
+
+static moduledata_t dummynet_mod = {
+ "dummynet", dummynet_modevent, NULL
+};
+
+#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN
+#define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */
+DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD);
+MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
+MODULE_VERSION(dummynet, 3);
+
+/*
+ * Starting up. Done in order after dummynet_modevent() has been called.
+ * VNET_SYSINIT is also called for each existing vnet and each new vnet.
+ */
+//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL);
+
+/*
+ * Shutdown handlers up shop. These are done in REVERSE ORDER, but still
+ * after dummynet_modevent() has been called. Not called on reboot.
+ * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
+ * or when the module is unloaded.
+ */
+//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL);
+
+/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw2.c b/freebsd/sys/netpfil/ipfw/ip_fw2.c
new file mode 100644
index 00000000..1bd1b6fc
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw2.c
@@ -0,0 +1,2825 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * The FreeBSD IP packet firewall, main file
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_ipdivert.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_ipsec.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/condvar.h>
+#include <sys/eventhandler.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/jail.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/ucred.h>
+#include <net/ethernet.h> /* for ETHERTYPE_IP */
+#include <net/if.h>
+#include <net/route.h>
+#include <net/pf_mtag.h>
+#include <net/pfil.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_carp.h>
+#include <netinet/pim.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/sctp.h>
+
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#include <netinet6/scope6_var.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#include <machine/in_cksum.h> /* XXX for in_cksum */
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+
+/*
+ * static variables followed by global ones.
+ * All ipfw global variables are here.
+ */
+
+/* ipfw_vnet_ready controls when we are open for business */
+static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
+#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready)
+
+static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
+#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs)
+
+static VNET_DEFINE(int, fw_permit_single_frag6) = 1;
+#define V_fw_permit_single_frag6 VNET(fw_permit_single_frag6)
+
+#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
+static int default_to_accept = 1;
+#else
+static int default_to_accept;
+#endif
+
+VNET_DEFINE(int, autoinc_step);
+VNET_DEFINE(int, fw_one_pass) = 1;
+
+VNET_DEFINE(unsigned int, fw_tables_max);
+/* Use 128 tables by default */
+static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
+
+/*
+ * Each rule belongs to one of 32 different sets (0..31).
+ * The variable set_disable contains one bit per set.
+ * If the bit is set, all rules in the corresponding set
+ * are disabled. Set RESVD_SET(31) is reserved for the default rule
+ * and rules that are not deleted by the flush command,
+ * and CANNOT be disabled.
+ * Rules in set RESVD_SET can only be deleted individually.
+ */
+VNET_DEFINE(u_int32_t, set_disable);
+#define V_set_disable VNET(set_disable)
+
+VNET_DEFINE(int, fw_verbose);
+/* counter for ipfw_log(NULL...) */
+VNET_DEFINE(u_int64_t, norule_counter);
+VNET_DEFINE(int, verbose_limit);
+
+/* layer3_chain contains the list of rules for layer 3 */
+VNET_DEFINE(struct ip_fw_chain, layer3_chain);
+
+ipfw_nat_t *ipfw_nat_ptr = NULL;
+struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
+ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
+ipfw_nat_cfg_t *ipfw_nat_del_ptr;
+ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
+ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
+
+#ifdef SYSCTL_NODE
+uint32_t dummy_def = IPFW_DEFAULT_RULE;
+static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
+
+SYSBEGIN(f3)
+
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
+ CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
+ "Only do a single pass through ipfw when using dummynet(4)");
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
+ CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
+ "Rule number auto-increment step");
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
+ CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
+ "Log matches to ipfw rules");
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
+ CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
+ "Set upper limit of matches of ipfw rules logged");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
+ &dummy_def, 0,
+ "The default/max possible rule number.");
+SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
+ CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
+ "Maximum number of tables");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
+ &default_to_accept, 0,
+ "Make the default rule accept all packets.");
+TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
+TUNABLE_INT("net.inet.ip.fw.tables_max", &default_fw_tables);
+SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
+ CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
+ "Number of static rules");
+
+#ifdef INET6
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
+SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
+ CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
+ "Deny packets with unknown IPv6 Extension Headers");
+SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
+ CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_permit_single_frag6), 0,
+ "Permit single packet IPv6 fragments");
+#endif /* INET6 */
+
+SYSEND
+
+#endif /* SYSCTL_NODE */
+
+
+/*
+ * Some macros used in the various matching options.
+ * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
+ * Other macros just cast void * into the appropriate type
+ */
+#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
+#define TCP(p) ((struct tcphdr *)(p))
+#define SCTP(p) ((struct sctphdr *)(p))
+#define UDP(p) ((struct udphdr *)(p))
+#define ICMP(p) ((struct icmphdr *)(p))
+#define ICMP6(p) ((struct icmp6_hdr *)(p))
+
+static __inline int
+icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
+{
+ int type = icmp->icmp_type;
+
+ return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
+}
+
+#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
+ (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
+
+static int
+is_icmp_query(struct icmphdr *icmp)
+{
+ int type = icmp->icmp_type;
+
+ return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
+}
+#undef TT
+
+/*
+ * The following checks use two arrays of 8 or 16 bits to store the
+ * bits that we want set or clear, respectively. They are in the
+ * low and high half of cmd->arg1 or cmd->d[0].
+ *
+ * We scan options and store the bits we find set. We succeed if
+ *
+ * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
+ *
+ * The code is sometimes optimized not to store additional variables.
+ */
+
+static int
+flags_match(ipfw_insn *cmd, u_int8_t bits)
+{
+ u_char want_clear;
+ bits = ~bits;
+
+ if ( ((cmd->arg1 & 0xff) & bits) != 0)
+ return 0; /* some bits we want set were clear */
+ want_clear = (cmd->arg1 >> 8) & 0xff;
+ if ( (want_clear & bits) != want_clear)
+ return 0; /* some bits we want clear were set */
+ return 1;
+}
+
+static int
+ipopts_match(struct ip *ip, ipfw_insn *cmd)
+{
+ int optlen, bits = 0;
+ u_char *cp = (u_char *)(ip + 1);
+ int x = (ip->ip_hl << 2) - sizeof (struct ip);
+
+ for (; x > 0; x -= optlen, cp += optlen) {
+ int opt = cp[IPOPT_OPTVAL];
+
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[IPOPT_OLEN];
+ if (optlen <= 0 || optlen > x)
+ return 0; /* invalid or truncated */
+ }
+ switch (opt) {
+
+ default:
+ break;
+
+ case IPOPT_LSRR:
+ bits |= IP_FW_IPOPT_LSRR;
+ break;
+
+ case IPOPT_SSRR:
+ bits |= IP_FW_IPOPT_SSRR;
+ break;
+
+ case IPOPT_RR:
+ bits |= IP_FW_IPOPT_RR;
+ break;
+
+ case IPOPT_TS:
+ bits |= IP_FW_IPOPT_TS;
+ break;
+ }
+ }
+ return (flags_match(cmd, bits));
+}
+
+static int
+tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
+{
+ int optlen, bits = 0;
+ u_char *cp = (u_char *)(tcp + 1);
+ int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
+
+ for (; x > 0; x -= optlen, cp += optlen) {
+ int opt = cp[0];
+ if (opt == TCPOPT_EOL)
+ break;
+ if (opt == TCPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[1];
+ if (optlen <= 0)
+ break;
+ }
+
+ switch (opt) {
+
+ default:
+ break;
+
+ case TCPOPT_MAXSEG:
+ bits |= IP_FW_TCPOPT_MSS;
+ break;
+
+ case TCPOPT_WINDOW:
+ bits |= IP_FW_TCPOPT_WINDOW;
+ break;
+
+ case TCPOPT_SACK_PERMITTED:
+ case TCPOPT_SACK:
+ bits |= IP_FW_TCPOPT_SACK;
+ break;
+
+ case TCPOPT_TIMESTAMP:
+ bits |= IP_FW_TCPOPT_TS;
+ break;
+
+ }
+ }
+ return (flags_match(cmd, bits));
+}
+
+static int
+iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg)
+{
+ if (ifp == NULL) /* no iface with this packet, match fails */
+ return 0;
+ /* Check by name or by IP address */
+ if (cmd->name[0] != '\0') { /* match by name */
+ if (cmd->name[0] == '\1') /* use tablearg to match */
+ return ipfw_lookup_table_extended(chain, cmd->p.glob,
+ ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE);
+ /* Check name */
+ if (cmd->p.glob) {
+ if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
+ return(1);
+ } else {
+ if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
+ return(1);
+ }
+ } else {
+#ifdef __FreeBSD__ /* and OSX too ? */
+ struct ifaddr *ia;
+
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
+ if (ia->ifa_addr->sa_family != AF_INET)
+ continue;
+ if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
+ (ia->ifa_addr))->sin_addr.s_addr) {
+ if_addr_runlock(ifp);
+ return(1); /* match */
+ }
+ }
+ if_addr_runlock(ifp);
+#endif /* __FreeBSD__ */
+ }
+ return(0); /* no match, fail ... */
+}
+
+/*
+ * The verify_path function checks if a route to the src exists and
+ * if it is reachable via ifp (when provided).
+ *
+ * The 'verrevpath' option checks that the interface that an IP packet
+ * arrives on is the same interface that traffic destined for the
+ * packet's source address would be routed out of.
+ * The 'versrcreach' option just checks that the source address is
+ * reachable via any route (except default) in the routing table.
+ * These two are a measure to block forged packets. This is also
+ * commonly known as "anti-spoofing" or Unicast Reverse Path
+ * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
+ * is purposely reminiscent of the Cisco IOS command,
+ *
+ * ip verify unicast reverse-path
+ * ip verify unicast source reachable-via any
+ *
+ * which implements the same functionality. But note that the syntax
+ * is misleading, and the check may be performed on all IP packets
+ * whether unicast, multicast, or broadcast.
+ */
+static int
+verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
+{
+#ifndef __FreeBSD__
+ return 0;
+#else
+ struct route ro;
+ struct sockaddr_in *dst;
+
+ bzero(&ro, sizeof(ro));
+
+ dst = (struct sockaddr_in *)&(ro.ro_dst);
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr = src;
+ in_rtalloc_ign(&ro, 0, fib);
+
+ if (ro.ro_rt == NULL)
+ return 0;
+
+ /*
+ * If ifp is provided, check for equality with rtentry.
+ * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
+ * in order to pass packets injected back by if_simloop():
+ * if useloopback == 1 routing entry (via lo0) for our own address
+ * may exist, so we need to handle routing assymetry.
+ */
+ if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
+ RTFREE(ro.ro_rt);
+ return 0;
+ }
+
+ /* if no ifp provided, check if rtentry is not default route */
+ if (ifp == NULL &&
+ satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
+ RTFREE(ro.ro_rt);
+ return 0;
+ }
+
+ /* or if this is a blackhole/reject route */
+ if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ RTFREE(ro.ro_rt);
+ return 0;
+ }
+
+ /* found valid route */
+ RTFREE(ro.ro_rt);
+ return 1;
+#endif /* __FreeBSD__ */
+}
+
+#ifdef INET6
+/*
+ * ipv6 specific rules here...
+ */
+static __inline int
+icmp6type_match (int type, ipfw_insn_u32 *cmd)
+{
+ return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
+}
+
+static int
+flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
+{
+ int i;
+ for (i=0; i <= cmd->o.arg1; ++i )
+ if (curr_flow == cmd->d[i] )
+ return 1;
+ return 0;
+}
+
+/* support for IP6_*_ME opcodes */
+static int
+search_ip6_addr_net (struct in6_addr * ip6_addr)
+{
+ struct ifnet *mdc;
+ struct ifaddr *mdc2;
+ struct in6_ifaddr *fdm;
+ struct in6_addr copia;
+
+ TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
+ if_addr_rlock(mdc);
+ TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
+ if (mdc2->ifa_addr->sa_family == AF_INET6) {
+ fdm = (struct in6_ifaddr *)mdc2;
+ copia = fdm->ia_addr.sin6_addr;
+ /* need for leaving scope_id in the sock_addr */
+ in6_clearscope(&copia);
+ if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
+ if_addr_runlock(mdc);
+ return 1;
+ }
+ }
+ }
+ if_addr_runlock(mdc);
+ }
+ return 0;
+}
+
+static int
+verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
+{
+ struct route_in6 ro;
+ struct sockaddr_in6 *dst;
+
+ bzero(&ro, sizeof(ro));
+
+ dst = (struct sockaddr_in6 * )&(ro.ro_dst);
+ dst->sin6_family = AF_INET6;
+ dst->sin6_len = sizeof(*dst);
+ dst->sin6_addr = *src;
+
+ in6_rtalloc_ign(&ro, 0, fib);
+ if (ro.ro_rt == NULL)
+ return 0;
+
+ /*
+ * if ifp is provided, check for equality with rtentry
+ * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
+ * to support the case of sending packets to an address of our own.
+ * (where the former interface is the first argument of if_simloop()
+ * (=ifp), the latter is lo0)
+ */
+ if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
+ RTFREE(ro.ro_rt);
+ return 0;
+ }
+
+ /* if no ifp provided, check if rtentry is not default route */
+ if (ifp == NULL &&
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
+ RTFREE(ro.ro_rt);
+ return 0;
+ }
+
+ /* or if this is a blackhole/reject route */
+ if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ RTFREE(ro.ro_rt);
+ return 0;
+ }
+
+ /* found valid route */
+ RTFREE(ro.ro_rt);
+ return 1;
+
+}
+
+static int
+is_icmp6_query(int icmp6_type)
+{
+ if ((icmp6_type <= ICMP6_MAXTYPE) &&
+ (icmp6_type == ICMP6_ECHO_REQUEST ||
+ icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
+ icmp6_type == ICMP6_WRUREQUEST ||
+ icmp6_type == ICMP6_FQDN_QUERY ||
+ icmp6_type == ICMP6_NI_QUERY))
+ return (1);
+
+ return (0);
+}
+
+static void
+send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
+{
+ struct mbuf *m;
+
+ m = args->m;
+ if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
+ struct tcphdr *tcp;
+ tcp = (struct tcphdr *)((char *)ip6 + hlen);
+
+ if ((tcp->th_flags & TH_RST) == 0) {
+ struct mbuf *m0;
+ m0 = ipfw_send_pkt(args->m, &(args->f_id),
+ ntohl(tcp->th_seq), ntohl(tcp->th_ack),
+ tcp->th_flags | TH_RST);
+ if (m0 != NULL)
+ ip6_output(m0, NULL, NULL, 0, NULL, NULL,
+ NULL);
+ }
+ FREE_PKT(m);
+ } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
+#if 0
+ /*
+ * Unlike above, the mbufs need to line up with the ip6 hdr,
+ * as the contents are read. We need to m_adj() the
+ * needed amount.
+ * The mbuf will however be thrown away so we can adjust it.
+ * Remember we did an m_pullup on it already so we
+ * can make some assumptions about contiguousness.
+ */
+ if (args->L3offset)
+ m_adj(m, args->L3offset);
+#endif
+ icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
+ } else
+ FREE_PKT(m);
+
+ args->m = NULL;
+}
+
+#endif /* INET6 */
+
+
+/*
+ * sends a reject message, consuming the mbuf passed as an argument.
+ */
+static void
+send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
+{
+
+#if 0
+ /* XXX When ip is not guaranteed to be at mtod() we will
+ * need to account for this */
+ * The mbuf will however be thrown away so we can adjust it.
+ * Remember we did an m_pullup on it already so we
+ * can make some assumptions about contiguousness.
+ */
+ if (args->L3offset)
+ m_adj(m, args->L3offset);
+#endif
+ if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
+ /* We need the IP header in host order for icmp_error(). */
+ SET_HOST_IPLEN(ip);
+ icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
+ } else if (args->f_id.proto == IPPROTO_TCP) {
+ struct tcphdr *const tcp =
+ L3HDR(struct tcphdr, mtod(args->m, struct ip *));
+ if ( (tcp->th_flags & TH_RST) == 0) {
+ struct mbuf *m;
+ m = ipfw_send_pkt(args->m, &(args->f_id),
+ ntohl(tcp->th_seq), ntohl(tcp->th_ack),
+ tcp->th_flags | TH_RST);
+ if (m != NULL)
+ ip_output(m, NULL, NULL, 0, NULL, NULL);
+ }
+ FREE_PKT(args->m);
+ } else
+ FREE_PKT(args->m);
+ args->m = NULL;
+}
+
+/*
+ * Support for uid/gid/jail lookup. These tests are expensive
+ * (because we may need to look into the list of active sockets)
+ * so we cache the results. ugid_lookupp is 0 if we have not
+ * yet done a lookup, 1 if we succeeded, and -1 if we tried
+ * and failed. The function always returns the match value.
+ * We could actually spare the variable and use *uc, setting
+ * it to '(void *)check_uidgid if we have no info, NULL if
+ * we tried and failed, or any other value if successful.
+ */
+static int
+check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
+ struct ucred **uc)
+{
+#ifndef __FreeBSD__
+ /* XXX */
+ return cred_check(insn, proto, oif,
+ dst_ip, dst_port, src_ip, src_port,
+ (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
+#else /* FreeBSD */
+ struct in_addr src_ip, dst_ip;
+ struct inpcbinfo *pi;
+ struct ipfw_flow_id *id;
+ struct inpcb *pcb, *inp;
+ struct ifnet *oif;
+ int lookupflags;
+ int match;
+
+ id = &args->f_id;
+ inp = args->inp;
+ oif = args->oif;
+
+ /*
+ * Check to see if the UDP or TCP stack supplied us with
+ * the PCB. If so, rather then holding a lock and looking
+ * up the PCB, we can use the one that was supplied.
+ */
+ if (inp && *ugid_lookupp == 0) {
+ INP_LOCK_ASSERT(inp);
+ if (inp->inp_socket != NULL) {
+ *uc = crhold(inp->inp_cred);
+ *ugid_lookupp = 1;
+ } else
+ *ugid_lookupp = -1;
+ }
+ /*
+ * If we have already been here and the packet has no
+ * PCB entry associated with it, then we can safely
+ * assume that this is a no match.
+ */
+ if (*ugid_lookupp == -1)
+ return (0);
+ if (id->proto == IPPROTO_TCP) {
+ lookupflags = 0;
+ pi = &V_tcbinfo;
+ } else if (id->proto == IPPROTO_UDP) {
+ lookupflags = INPLOOKUP_WILDCARD;
+ pi = &V_udbinfo;
+ } else
+ return 0;
+ lookupflags |= INPLOOKUP_RLOCKPCB;
+ match = 0;
+ if (*ugid_lookupp == 0) {
+ if (id->addr_type == 6) {
+#ifdef INET6
+ if (oif == NULL)
+ pcb = in6_pcblookup_mbuf(pi,
+ &id->src_ip6, htons(id->src_port),
+ &id->dst_ip6, htons(id->dst_port),
+ lookupflags, oif, args->m);
+ else
+ pcb = in6_pcblookup_mbuf(pi,
+ &id->dst_ip6, htons(id->dst_port),
+ &id->src_ip6, htons(id->src_port),
+ lookupflags, oif, args->m);
+#else
+ *ugid_lookupp = -1;
+ return (0);
+#endif
+ } else {
+ src_ip.s_addr = htonl(id->src_ip);
+ dst_ip.s_addr = htonl(id->dst_ip);
+ if (oif == NULL)
+ pcb = in_pcblookup_mbuf(pi,
+ src_ip, htons(id->src_port),
+ dst_ip, htons(id->dst_port),
+ lookupflags, oif, args->m);
+ else
+ pcb = in_pcblookup_mbuf(pi,
+ dst_ip, htons(id->dst_port),
+ src_ip, htons(id->src_port),
+ lookupflags, oif, args->m);
+ }
+ if (pcb != NULL) {
+ INP_RLOCK_ASSERT(pcb);
+ *uc = crhold(pcb->inp_cred);
+ *ugid_lookupp = 1;
+ INP_RUNLOCK(pcb);
+ }
+ if (*ugid_lookupp == 0) {
+ /*
+ * We tried and failed, set the variable to -1
+ * so we will not try again on this packet.
+ */
+ *ugid_lookupp = -1;
+ return (0);
+ }
+ }
+ if (insn->o.opcode == O_UID)
+#ifndef __rtems__
+ match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
+#else /* __rtems__ */
+ match = (BSD_DEFAULT_UID == (uid_t)insn->d[0]);
+#endif /* __rtems__ */
+ else if (insn->o.opcode == O_GID)
+ match = groupmember((gid_t)insn->d[0], *uc);
+ else if (insn->o.opcode == O_JAIL)
+#ifndef __rtems__
+ match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
+#else /* __rtems__ */
+ match = (BSD_DEFAULT_PRISON->pr_id == (int)insn->d[0]);
+#endif /* __rtems__ */
+ return (match);
+#endif /* __FreeBSD__ */
+}
+
+/*
+ * Helper function to set args with info on the rule after the matching
+ * one. slot is precise, whereas we guess rule_id as they are
+ * assigned sequentially.
+ */
+static inline void
+set_match(struct ip_fw_args *args, int slot,
+ struct ip_fw_chain *chain)
+{
+ args->rule.chain_id = chain->id;
+ args->rule.slot = slot + 1; /* we use 0 as a marker */
+ args->rule.rule_id = 1 + chain->map[slot]->id;
+ args->rule.rulenum = chain->map[slot]->rulenum;
+}
+
+/*
+ * Helper function to enable cached rule lookups using
+ * x_next and next_rule fields in ipfw rule.
+ */
+static int
+jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
+ int tablearg, int jump_backwards)
+{
+ int f_pos;
+
+ /* If possible use cached f_pos (in f->next_rule),
+ * whose version is written in f->next_rule
+ * (horrible hacks to avoid changing the ABI).
+ */
+ if (num != IP_FW_TABLEARG && (uintptr_t)f->x_next == chain->id)
+ f_pos = (uintptr_t)f->next_rule;
+ else {
+ int i = IP_FW_ARG_TABLEARG(num);
+ /* make sure we do not jump backward */
+ if (jump_backwards == 0 && i <= f->rulenum)
+ i = f->rulenum + 1;
+ f_pos = ipfw_find_rule(chain, i, 0);
+ /* update the cache */
+ if (num != IP_FW_TABLEARG) {
+ f->next_rule = (void *)(uintptr_t)f_pos;
+ f->x_next = (void *)(uintptr_t)chain->id;
+ }
+ }
+
+ return (f_pos);
+}
+
+/*
+ * The main check routine for the firewall.
+ *
+ * All arguments are in args so we can modify them and return them
+ * back to the caller.
+ *
+ * Parameters:
+ *
+ * args->m (in/out) The packet; we set to NULL when/if we nuke it.
+ * Starts with the IP header.
+ * args->eh (in) Mac header if present, NULL for layer3 packet.
+ * args->L3offset Number of bytes bypassed if we came from L2.
+ * e.g. often sizeof(eh) ** NOTYET **
+ * args->oif Outgoing interface, NULL if packet is incoming.
+ * The incoming interface is in the mbuf. (in)
+ * args->divert_rule (in/out)
+ * Skip up to the first rule past this rule number;
+ * upon return, non-zero port number for divert or tee.
+ *
+ * args->rule Pointer to the last matching rule (in/out)
+ * args->next_hop Socket we are forwarding to (out).
+ * args->next_hop6 IPv6 next hop we are forwarding to (out).
+ * args->f_id Addresses grabbed from the packet (out)
+ * args->rule.info a cookie depending on rule action
+ *
+ * Return value:
+ *
+ * IP_FW_PASS the packet must be accepted
+ * IP_FW_DENY the packet must be dropped
+ * IP_FW_DIVERT divert packet, port in m_tag
+ * IP_FW_TEE tee packet, port in m_tag
+ * IP_FW_DUMMYNET to dummynet, pipe in args->cookie
+ * IP_FW_NETGRAPH into netgraph, cookie args->cookie
+ * args->rule contains the matching rule,
+ * args->rule.info has additional information.
+ *
+ */
+int
+ipfw_chk(struct ip_fw_args *args)
+{
+
+ /*
+ * Local variables holding state while processing a packet:
+ *
+ * IMPORTANT NOTE: to speed up the processing of rules, there
+ * are some assumption on the values of the variables, which
+ * are documented here. Should you change them, please check
+ * the implementation of the various instructions to make sure
+ * that they still work.
+ *
+ * args->eh The MAC header. It is non-null for a layer2
+ * packet, it is NULL for a layer-3 packet.
+ * **notyet**
+ * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
+ *
+ * m | args->m Pointer to the mbuf, as received from the caller.
+ * It may change if ipfw_chk() does an m_pullup, or if it
+ * consumes the packet because it calls send_reject().
+ * XXX This has to change, so that ipfw_chk() never modifies
+ * or consumes the buffer.
+ * ip is the beginning of the ip(4 or 6) header.
+ * Calculated by adding the L3offset to the start of data.
+ * (Until we start using L3offset, the packet is
+ * supposed to start with the ip header).
+ */
+ struct mbuf *m = args->m;
+ struct ip *ip = mtod(m, struct ip *);
+
+ /*
+ * For rules which contain uid/gid or jail constraints, cache
+ * a copy of the users credentials after the pcb lookup has been
+ * executed. This will speed up the processing of rules with
+ * these types of constraints, as well as decrease contention
+ * on pcb related locks.
+ */
+#ifndef __FreeBSD__
+ struct bsd_ucred ucred_cache;
+#else
+ struct ucred *ucred_cache = NULL;
+#endif
+ int ucred_lookup = 0;
+
+ /*
+ * oif | args->oif If NULL, ipfw_chk has been called on the
+ * inbound path (ether_input, ip_input).
+ * If non-NULL, ipfw_chk has been called on the outbound path
+ * (ether_output, ip_output).
+ */
+ struct ifnet *oif = args->oif;
+
+ int f_pos = 0; /* index of current rule in the array */
+ int retval = 0;
+
+ /*
+ * hlen The length of the IP header.
+ */
+ u_int hlen = 0; /* hlen >0 means we have an IP pkt */
+
+ /*
+ * offset The offset of a fragment. offset != 0 means that
+ * we have a fragment at this offset of an IPv4 packet.
+ * offset == 0 means that (if this is an IPv4 packet)
+ * this is the first or only fragment.
+ * For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
+ * or there is a single packet fragement (fragement header added
+ * without needed). We will treat a single packet fragment as if
+ * there was no fragment header (or log/block depending on the
+ * V_fw_permit_single_frag6 sysctl setting).
+ */
+ u_short offset = 0;
+ u_short ip6f_mf = 0;
+
+ /*
+ * Local copies of addresses. They are only valid if we have
+ * an IP packet.
+ *
+ * proto The protocol. Set to 0 for non-ip packets,
+ * or to the protocol read from the packet otherwise.
+ * proto != 0 means that we have an IPv4 packet.
+ *
+ * src_port, dst_port port numbers, in HOST format. Only
+ * valid for TCP and UDP packets.
+ *
+ * src_ip, dst_ip ip addresses, in NETWORK format.
+ * Only valid for IPv4 packets.
+ */
+ uint8_t proto;
+ uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */
+ struct in_addr src_ip, dst_ip; /* NOTE: network format */
+ uint16_t iplen=0;
+ int pktlen;
+ uint16_t etype = 0; /* Host order stored ether type */
+
+ /*
+ * dyn_dir = MATCH_UNKNOWN when rules unchecked,
+ * MATCH_NONE when checked and not matched (q = NULL),
+ * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
+ */
+ int dyn_dir = MATCH_UNKNOWN;
+ ipfw_dyn_rule *q = NULL;
+ struct ip_fw_chain *chain = &V_layer3_chain;
+
+ /*
+ * We store in ulp a pointer to the upper layer protocol header.
+ * In the ipv4 case this is easy to determine from the header,
+ * but for ipv6 we might have some additional headers in the middle.
+ * ulp is NULL if not found.
+ */
+ void *ulp = NULL; /* upper layer protocol pointer. */
+
+ /* XXX ipv6 variables */
+ int is_ipv6 = 0;
+ uint8_t icmp6_type = 0;
+ uint16_t ext_hd = 0; /* bits vector for extension header filtering */
+ /* end of ipv6 variables */
+
+ int is_ipv4 = 0;
+
+ int done = 0; /* flag to exit the outer loop */
+
+ if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
+ return (IP_FW_PASS); /* accept */
+
+ dst_ip.s_addr = 0; /* make sure it is initialized */
+ src_ip.s_addr = 0; /* make sure it is initialized */
+ pktlen = m->m_pkthdr.len;
+ args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
+ proto = args->f_id.proto = 0; /* mark f_id invalid */
+ /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
+
+/*
+ * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
+ * then it sets p to point at the offset "len" in the mbuf. WARNING: the
+ * pointer might become stale after other pullups (but we never use it
+ * this way).
+ */
+#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T))
+#define PULLUP_LEN(_len, p, T) \
+do { \
+ int x = (_len) + T; \
+ if ((m)->m_len < x) { \
+ args->m = m = m_pullup(m, x); \
+ if (m == NULL) \
+ goto pullup_failed; \
+ } \
+ p = (mtod(m, char *) + (_len)); \
+} while (0)
+
+ /*
+ * if we have an ether header,
+ */
+ if (args->eh)
+ etype = ntohs(args->eh->ether_type);
+
+ /* Identify IP packets and fill up variables. */
+ if (pktlen >= sizeof(struct ip6_hdr) &&
+ (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+ is_ipv6 = 1;
+ args->f_id.addr_type = 6;
+ hlen = sizeof(struct ip6_hdr);
+ proto = ip6->ip6_nxt;
+
+ /* Search extension headers to find upper layer protocols */
+ while (ulp == NULL && offset == 0) {
+ switch (proto) {
+ case IPPROTO_ICMPV6:
+ PULLUP_TO(hlen, ulp, struct icmp6_hdr);
+ icmp6_type = ICMP6(ulp)->icmp6_type;
+ break;
+
+ case IPPROTO_TCP:
+ PULLUP_TO(hlen, ulp, struct tcphdr);
+ dst_port = TCP(ulp)->th_dport;
+ src_port = TCP(ulp)->th_sport;
+ /* save flags for dynamic rules */
+ args->f_id._flags = TCP(ulp)->th_flags;
+ break;
+
+ case IPPROTO_SCTP:
+ PULLUP_TO(hlen, ulp, struct sctphdr);
+ src_port = SCTP(ulp)->src_port;
+ dst_port = SCTP(ulp)->dest_port;
+ break;
+
+ case IPPROTO_UDP:
+ PULLUP_TO(hlen, ulp, struct udphdr);
+ dst_port = UDP(ulp)->uh_dport;
+ src_port = UDP(ulp)->uh_sport;
+ break;
+
+ case IPPROTO_HOPOPTS: /* RFC 2460 */
+ PULLUP_TO(hlen, ulp, struct ip6_hbh);
+ ext_hd |= EXT_HOPOPTS;
+ hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
+ proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
+ ulp = NULL;
+ break;
+
+ case IPPROTO_ROUTING: /* RFC 2460 */
+ PULLUP_TO(hlen, ulp, struct ip6_rthdr);
+ switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
+ case 0:
+ ext_hd |= EXT_RTHDR0;
+ break;
+ case 2:
+ ext_hd |= EXT_RTHDR2;
+ break;
+ default:
+ if (V_fw_verbose)
+ printf("IPFW2: IPV6 - Unknown "
+ "Routing Header type(%d)\n",
+ ((struct ip6_rthdr *)
+ ulp)->ip6r_type);
+ if (V_fw_deny_unknown_exthdrs)
+ return (IP_FW_DENY);
+ break;
+ }
+ ext_hd |= EXT_ROUTING;
+ hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
+ proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
+ ulp = NULL;
+ break;
+
+ case IPPROTO_FRAGMENT: /* RFC 2460 */
+ PULLUP_TO(hlen, ulp, struct ip6_frag);
+ ext_hd |= EXT_FRAGMENT;
+ hlen += sizeof (struct ip6_frag);
+ proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
+ offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
+ IP6F_OFF_MASK;
+ ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg &
+ IP6F_MORE_FRAG;
+ if (V_fw_permit_single_frag6 == 0 &&
+ offset == 0 && ip6f_mf == 0) {
+ if (V_fw_verbose)
+ printf("IPFW2: IPV6 - Invalid "
+ "Fragment Header\n");
+ if (V_fw_deny_unknown_exthdrs)
+ return (IP_FW_DENY);
+ break;
+ }
+ args->f_id.extra =
+ ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
+ ulp = NULL;
+ break;
+
+ case IPPROTO_DSTOPTS: /* RFC 2460 */
+ PULLUP_TO(hlen, ulp, struct ip6_hbh);
+ ext_hd |= EXT_DSTOPTS;
+ hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
+ proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
+ ulp = NULL;
+ break;
+
+ case IPPROTO_AH: /* RFC 2402 */
+ PULLUP_TO(hlen, ulp, struct ip6_ext);
+ ext_hd |= EXT_AH;
+ hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
+ proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
+ ulp = NULL;
+ break;
+
+ case IPPROTO_ESP: /* RFC 2406 */
+ PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */
+ /* Anything past Seq# is variable length and
+ * data past this ext. header is encrypted. */
+ ext_hd |= EXT_ESP;
+ break;
+
+ case IPPROTO_NONE: /* RFC 2460 */
+ /*
+ * Packet ends here, and IPv6 header has
+ * already been pulled up. If ip6e_len!=0
+ * then octets must be ignored.
+ */
+ ulp = ip; /* non-NULL to get out of loop. */
+ break;
+
+ case IPPROTO_OSPFIGP:
+ /* XXX OSPF header check? */
+ PULLUP_TO(hlen, ulp, struct ip6_ext);
+ break;
+
+ case IPPROTO_PIM:
+ /* XXX PIM header check? */
+ PULLUP_TO(hlen, ulp, struct pim);
+ break;
+
+ case IPPROTO_CARP:
+ PULLUP_TO(hlen, ulp, struct carp_header);
+ if (((struct carp_header *)ulp)->carp_version !=
+ CARP_VERSION)
+ return (IP_FW_DENY);
+ if (((struct carp_header *)ulp)->carp_type !=
+ CARP_ADVERTISEMENT)
+ return (IP_FW_DENY);
+ break;
+
+ case IPPROTO_IPV6: /* RFC 2893 */
+ PULLUP_TO(hlen, ulp, struct ip6_hdr);
+ break;
+
+ case IPPROTO_IPV4: /* RFC 2893 */
+ PULLUP_TO(hlen, ulp, struct ip);
+ break;
+
+ default:
+ if (V_fw_verbose)
+ printf("IPFW2: IPV6 - Unknown "
+ "Extension Header(%d), ext_hd=%x\n",
+ proto, ext_hd);
+ if (V_fw_deny_unknown_exthdrs)
+ return (IP_FW_DENY);
+ PULLUP_TO(hlen, ulp, struct ip6_ext);
+ break;
+ } /*switch */
+ }
+ ip = mtod(m, struct ip *);
+ ip6 = (struct ip6_hdr *)ip;
+ args->f_id.src_ip6 = ip6->ip6_src;
+ args->f_id.dst_ip6 = ip6->ip6_dst;
+ args->f_id.src_ip = 0;
+ args->f_id.dst_ip = 0;
+ args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
+ } else if (pktlen >= sizeof(struct ip) &&
+ (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
+ is_ipv4 = 1;
+ hlen = ip->ip_hl << 2;
+ args->f_id.addr_type = 4;
+
+ /*
+ * Collect parameters into local variables for faster matching.
+ */
+ proto = ip->ip_p;
+ src_ip = ip->ip_src;
+ dst_ip = ip->ip_dst;
+ offset = ntohs(ip->ip_off) & IP_OFFMASK;
+ iplen = ntohs(ip->ip_len);
+ pktlen = iplen < pktlen ? iplen : pktlen;
+
+ if (offset == 0) {
+ switch (proto) {
+ case IPPROTO_TCP:
+ PULLUP_TO(hlen, ulp, struct tcphdr);
+ dst_port = TCP(ulp)->th_dport;
+ src_port = TCP(ulp)->th_sport;
+ /* save flags for dynamic rules */
+ args->f_id._flags = TCP(ulp)->th_flags;
+ break;
+
+ case IPPROTO_SCTP:
+ PULLUP_TO(hlen, ulp, struct sctphdr);
+ src_port = SCTP(ulp)->src_port;
+ dst_port = SCTP(ulp)->dest_port;
+ break;
+
+ case IPPROTO_UDP:
+ PULLUP_TO(hlen, ulp, struct udphdr);
+ dst_port = UDP(ulp)->uh_dport;
+ src_port = UDP(ulp)->uh_sport;
+ break;
+
+ case IPPROTO_ICMP:
+ PULLUP_TO(hlen, ulp, struct icmphdr);
+ //args->f_id.flags = ICMP(ulp)->icmp_type;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ ip = mtod(m, struct ip *);
+ args->f_id.src_ip = ntohl(src_ip.s_addr);
+ args->f_id.dst_ip = ntohl(dst_ip.s_addr);
+ }
+#undef PULLUP_TO
+ if (proto) { /* we may have port numbers, store them */
+ args->f_id.proto = proto;
+ args->f_id.src_port = src_port = ntohs(src_port);
+ args->f_id.dst_port = dst_port = ntohs(dst_port);
+ }
+
+ IPFW_RLOCK(chain);
+ if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
+ IPFW_RUNLOCK(chain);
+ return (IP_FW_PASS); /* accept */
+ }
+ if (args->rule.slot) {
+ /*
+ * Packet has already been tagged as a result of a previous
+ * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
+ * REASS, NETGRAPH, DIVERT/TEE...)
+ * Validate the slot and continue from the next one
+ * if still present, otherwise do a lookup.
+ */
+ f_pos = (args->rule.chain_id == chain->id) ?
+ args->rule.slot :
+ ipfw_find_rule(chain, args->rule.rulenum,
+ args->rule.rule_id);
+ } else {
+ f_pos = 0;
+ }
+
+ /*
+ * Now scan the rules, and parse microinstructions for each rule.
+ * We have two nested loops and an inner switch. Sometimes we
+ * need to break out of one or both loops, or re-enter one of
+ * the loops with updated variables. Loop variables are:
+ *
+ * f_pos (outer loop) points to the current rule.
+ * On output it points to the matching rule.
+ * done (outer loop) is used as a flag to break the loop.
+ * l (inner loop) residual length of current rule.
+ * cmd points to the current microinstruction.
+ *
+ * We break the inner loop by setting l=0 and possibly
+ * cmdlen=0 if we don't want to advance cmd.
+ * We break the outer loop by setting done=1
+ * We can restart the inner loop by setting l>0 and f_pos, f, cmd
+ * as needed.
+ */
+ for (; f_pos < chain->n_rules; f_pos++) {
+ ipfw_insn *cmd;
+ uint32_t tablearg = 0;
+ int l, cmdlen, skip_or; /* skip rest of OR block */
+ struct ip_fw *f;
+
+ f = chain->map[f_pos];
+ if (V_set_disable & (1 << f->set) )
+ continue;
+
+ skip_or = 0;
+ for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
+ l -= cmdlen, cmd += cmdlen) {
+ int match;
+
+ /*
+ * check_body is a jump target used when we find a
+ * CHECK_STATE, and need to jump to the body of
+ * the target rule.
+ */
+
+/* check_body: */
+ cmdlen = F_LEN(cmd);
+ /*
+ * An OR block (insn_1 || .. || insn_n) has the
+ * F_OR bit set in all but the last instruction.
+ * The first match will set "skip_or", and cause
+ * the following instructions to be skipped until
+ * past the one with the F_OR bit clear.
+ */
+ if (skip_or) { /* skip this instruction */
+ if ((cmd->len & F_OR) == 0)
+ skip_or = 0; /* next one is good */
+ continue;
+ }
+ match = 0; /* set to 1 if we succeed */
+
+ switch (cmd->opcode) {
+ /*
+ * The first set of opcodes compares the packet's
+ * fields with some pattern, setting 'match' if a
+ * match is found. At the end of the loop there is
+ * logic to deal with F_NOT and F_OR flags associated
+ * with the opcode.
+ */
+ case O_NOP:
+ match = 1;
+ break;
+
+ case O_FORWARD_MAC:
+ printf("ipfw: opcode %d unimplemented\n",
+ cmd->opcode);
+ break;
+
+ case O_GID:
+ case O_UID:
+ case O_JAIL:
+ /*
+ * We only check offset == 0 && proto != 0,
+ * as this ensures that we have a
+ * packet with the ports info.
+ */
+ if (offset != 0)
+ break;
+ if (proto == IPPROTO_TCP ||
+ proto == IPPROTO_UDP)
+ match = check_uidgid(
+ (ipfw_insn_u32 *)cmd,
+ args, &ucred_lookup,
+#ifdef __FreeBSD__
+ &ucred_cache);
+#else
+ (void *)&ucred_cache);
+#endif
+ break;
+
+ case O_RECV:
+ match = iface_match(m->m_pkthdr.rcvif,
+ (ipfw_insn_if *)cmd, chain, &tablearg);
+ break;
+
+ case O_XMIT:
+ match = iface_match(oif, (ipfw_insn_if *)cmd,
+ chain, &tablearg);
+ break;
+
+ case O_VIA:
+ match = iface_match(oif ? oif :
+ m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd,
+ chain, &tablearg);
+ break;
+
+ case O_MACADDR2:
+ if (args->eh != NULL) { /* have MAC header */
+ u_int32_t *want = (u_int32_t *)
+ ((ipfw_insn_mac *)cmd)->addr;
+ u_int32_t *mask = (u_int32_t *)
+ ((ipfw_insn_mac *)cmd)->mask;
+ u_int32_t *hdr = (u_int32_t *)args->eh;
+
+ match =
+ ( want[0] == (hdr[0] & mask[0]) &&
+ want[1] == (hdr[1] & mask[1]) &&
+ want[2] == (hdr[2] & mask[2]) );
+ }
+ break;
+
+ case O_MAC_TYPE:
+ if (args->eh != NULL) {
+ u_int16_t *p =
+ ((ipfw_insn_u16 *)cmd)->ports;
+ int i;
+
+ for (i = cmdlen - 1; !match && i>0;
+ i--, p += 2)
+ match = (etype >= p[0] &&
+ etype <= p[1]);
+ }
+ break;
+
+ case O_FRAG:
+ match = (offset != 0);
+ break;
+
+ case O_IN: /* "out" is "not in" */
+ match = (oif == NULL);
+ break;
+
+ case O_LAYER2:
+ match = (args->eh != NULL);
+ break;
+
+ case O_DIVERTED:
+ {
+ /* For diverted packets, args->rule.info
+ * contains the divert port (in host format)
+ * reason and direction.
+ */
+ uint32_t i = args->rule.info;
+ match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT &&
+ cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2);
+ }
+ break;
+
+ case O_PROTO:
+ /*
+ * We do not allow an arg of 0 so the
+ * check of "proto" only suffices.
+ */
+ match = (proto == cmd->arg1);
+ break;
+
+ case O_IP_SRC:
+ match = is_ipv4 &&
+ (((ipfw_insn_ip *)cmd)->addr.s_addr ==
+ src_ip.s_addr);
+ break;
+
+ case O_IP_SRC_LOOKUP:
+ case O_IP_DST_LOOKUP:
+ if (is_ipv4) {
+ uint32_t key =
+ (cmd->opcode == O_IP_DST_LOOKUP) ?
+ dst_ip.s_addr : src_ip.s_addr;
+ uint32_t v = 0;
+
+ if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
+ /* generic lookup. The key must be
+ * in 32bit big-endian format.
+ */
+ v = ((ipfw_insn_u32 *)cmd)->d[1];
+ if (v == 0)
+ key = dst_ip.s_addr;
+ else if (v == 1)
+ key = src_ip.s_addr;
+ else if (v == 6) /* dscp */
+ key = (ip->ip_tos >> 2) & 0x3f;
+ else if (offset != 0)
+ break;
+ else if (proto != IPPROTO_TCP &&
+ proto != IPPROTO_UDP)
+ break;
+ else if (v == 2)
+ key = htonl(dst_port);
+ else if (v == 3)
+ key = htonl(src_port);
+ else if (v == 4 || v == 5) {
+ check_uidgid(
+ (ipfw_insn_u32 *)cmd,
+ args, &ucred_lookup,
+#ifdef __FreeBSD__
+ &ucred_cache);
+ if (v == 4 /* O_UID */)
+#ifndef __rtems__
+ key = ucred_cache->cr_uid;
+#else /* __rtems__ */
+ key = BSD_DEFAULT_UID;
+#endif /* __rtems__ */
+ else if (v == 5 /* O_JAIL */)
+#ifndef __rtems__
+ key = ucred_cache->cr_prison->pr_id;
+#else /* __rtems__ */
+ key = BSD_DEFAULT_PRISON->pr_id;
+#endif /* __rtems__ */
+#else /* !__FreeBSD__ */
+ (void *)&ucred_cache);
+ if (v ==4 /* O_UID */)
+ key = ucred_cache.uid;
+ else if (v == 5 /* O_JAIL */)
+ key = ucred_cache.xid;
+#endif /* !__FreeBSD__ */
+ key = htonl(key);
+ } else
+ break;
+ }
+ match = ipfw_lookup_table(chain,
+ cmd->arg1, key, &v);
+ if (!match)
+ break;
+ if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
+ match =
+ ((ipfw_insn_u32 *)cmd)->d[0] == v;
+ else
+ tablearg = v;
+ } else if (is_ipv6) {
+ uint32_t v = 0;
+ void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ?
+ &args->f_id.dst_ip6: &args->f_id.src_ip6;
+ match = ipfw_lookup_table_extended(chain,
+ cmd->arg1, pkey, &v,
+ IPFW_TABLE_CIDR);
+ if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
+ match = ((ipfw_insn_u32 *)cmd)->d[0] == v;
+ if (match)
+ tablearg = v;
+ }
+ break;
+
+ case O_IP_SRC_MASK:
+ case O_IP_DST_MASK:
+ if (is_ipv4) {
+ uint32_t a =
+ (cmd->opcode == O_IP_DST_MASK) ?
+ dst_ip.s_addr : src_ip.s_addr;
+ uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
+ int i = cmdlen-1;
+
+ for (; !match && i>0; i-= 2, p+= 2)
+ match = (p[0] == (a & p[1]));
+ }
+ break;
+
+ case O_IP_SRC_ME:
+ if (is_ipv4) {
+ struct ifnet *tif;
+
+ INADDR_TO_IFP(src_ip, tif);
+ match = (tif != NULL);
+ break;
+ }
+#ifdef INET6
+ /* FALLTHROUGH */
+ case O_IP6_SRC_ME:
+ match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
+#endif
+ break;
+
+ case O_IP_DST_SET:
+ case O_IP_SRC_SET:
+ if (is_ipv4) {
+ u_int32_t *d = (u_int32_t *)(cmd+1);
+ u_int32_t addr =
+ cmd->opcode == O_IP_DST_SET ?
+ args->f_id.dst_ip :
+ args->f_id.src_ip;
+
+ if (addr < d[0])
+ break;
+ addr -= d[0]; /* subtract base */
+ match = (addr < cmd->arg1) &&
+ ( d[ 1 + (addr>>5)] &
+ (1<<(addr & 0x1f)) );
+ }
+ break;
+
+ case O_IP_DST:
+ match = is_ipv4 &&
+ (((ipfw_insn_ip *)cmd)->addr.s_addr ==
+ dst_ip.s_addr);
+ break;
+
+ case O_IP_DST_ME:
+ if (is_ipv4) {
+ struct ifnet *tif;
+
+ INADDR_TO_IFP(dst_ip, tif);
+ match = (tif != NULL);
+ break;
+ }
+#ifdef INET6
+ /* FALLTHROUGH */
+ case O_IP6_DST_ME:
+ match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
+#endif
+ break;
+
+
+ case O_IP_SRCPORT:
+ case O_IP_DSTPORT:
+ /*
+ * offset == 0 && proto != 0 is enough
+ * to guarantee that we have a
+ * packet with port info.
+ */
+ if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
+ && offset == 0) {
+ u_int16_t x =
+ (cmd->opcode == O_IP_SRCPORT) ?
+ src_port : dst_port ;
+ u_int16_t *p =
+ ((ipfw_insn_u16 *)cmd)->ports;
+ int i;
+
+ for (i = cmdlen - 1; !match && i>0;
+ i--, p += 2)
+ match = (x>=p[0] && x<=p[1]);
+ }
+ break;
+
+ case O_ICMPTYPE:
+ match = (offset == 0 && proto==IPPROTO_ICMP &&
+ icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
+ break;
+
+#ifdef INET6
+ case O_ICMP6TYPE:
+ match = is_ipv6 && offset == 0 &&
+ proto==IPPROTO_ICMPV6 &&
+ icmp6type_match(
+ ICMP6(ulp)->icmp6_type,
+ (ipfw_insn_u32 *)cmd);
+ break;
+#endif /* INET6 */
+
+ case O_IPOPT:
+ match = (is_ipv4 &&
+ ipopts_match(ip, cmd) );
+ break;
+
+ case O_IPVER:
+ match = (is_ipv4 &&
+ cmd->arg1 == ip->ip_v);
+ break;
+
+ case O_IPID:
+ case O_IPLEN:
+ case O_IPTTL:
+ if (is_ipv4) { /* only for IP packets */
+ uint16_t x;
+ uint16_t *p;
+ int i;
+
+ if (cmd->opcode == O_IPLEN)
+ x = iplen;
+ else if (cmd->opcode == O_IPTTL)
+ x = ip->ip_ttl;
+ else /* must be IPID */
+ x = ntohs(ip->ip_id);
+ if (cmdlen == 1) {
+ match = (cmd->arg1 == x);
+ break;
+ }
+ /* otherwise we have ranges */
+ p = ((ipfw_insn_u16 *)cmd)->ports;
+ i = cmdlen - 1;
+ for (; !match && i>0; i--, p += 2)
+ match = (x >= p[0] && x <= p[1]);
+ }
+ break;
+
+ case O_IPPRECEDENCE:
+ match = (is_ipv4 &&
+ (cmd->arg1 == (ip->ip_tos & 0xe0)) );
+ break;
+
+ case O_IPTOS:
+ match = (is_ipv4 &&
+ flags_match(cmd, ip->ip_tos));
+ break;
+
+ case O_DSCP:
+ {
+ uint32_t *p;
+ uint16_t x;
+
+ p = ((ipfw_insn_u32 *)cmd)->d;
+
+ if (is_ipv4)
+ x = ip->ip_tos >> 2;
+ else if (is_ipv6) {
+ uint8_t *v;
+ v = &((struct ip6_hdr *)ip)->ip6_vfc;
+ x = (*v & 0x0F) << 2;
+ v++;
+ x |= *v >> 6;
+ } else
+ break;
+
+ /* DSCP bitmask is stored as low_u32 high_u32 */
+ if (x > 32)
+ match = *(p + 1) & (1 << (x - 32));
+ else
+ match = *p & (1 << x);
+ }
+ break;
+
+ case O_TCPDATALEN:
+ if (proto == IPPROTO_TCP && offset == 0) {
+ struct tcphdr *tcp;
+ uint16_t x;
+ uint16_t *p;
+ int i;
+
+ tcp = TCP(ulp);
+ x = iplen -
+ ((ip->ip_hl + tcp->th_off) << 2);
+ if (cmdlen == 1) {
+ match = (cmd->arg1 == x);
+ break;
+ }
+ /* otherwise we have ranges */
+ p = ((ipfw_insn_u16 *)cmd)->ports;
+ i = cmdlen - 1;
+ for (; !match && i>0; i--, p += 2)
+ match = (x >= p[0] && x <= p[1]);
+ }
+ break;
+
+ case O_TCPFLAGS:
+ match = (proto == IPPROTO_TCP && offset == 0 &&
+ flags_match(cmd, TCP(ulp)->th_flags));
+ break;
+
+ case O_TCPOPTS:
+ PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2));
+ match = (proto == IPPROTO_TCP && offset == 0 &&
+ tcpopts_match(TCP(ulp), cmd));
+ break;
+
+ case O_TCPSEQ:
+ match = (proto == IPPROTO_TCP && offset == 0 &&
+ ((ipfw_insn_u32 *)cmd)->d[0] ==
+ TCP(ulp)->th_seq);
+ break;
+
+ case O_TCPACK:
+ match = (proto == IPPROTO_TCP && offset == 0 &&
+ ((ipfw_insn_u32 *)cmd)->d[0] ==
+ TCP(ulp)->th_ack);
+ break;
+
+ case O_TCPWIN:
+ if (proto == IPPROTO_TCP && offset == 0) {
+ uint16_t x;
+ uint16_t *p;
+ int i;
+
+ x = ntohs(TCP(ulp)->th_win);
+ if (cmdlen == 1) {
+ match = (cmd->arg1 == x);
+ break;
+ }
+ /* Otherwise we have ranges. */
+ p = ((ipfw_insn_u16 *)cmd)->ports;
+ i = cmdlen - 1;
+ for (; !match && i > 0; i--, p += 2)
+ match = (x >= p[0] && x <= p[1]);
+ }
+ break;
+
+ case O_ESTAB:
+ /* reject packets which have SYN only */
+ /* XXX should i also check for TH_ACK ? */
+ match = (proto == IPPROTO_TCP && offset == 0 &&
+ (TCP(ulp)->th_flags &
+ (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
+ break;
+
+ case O_ALTQ: {
+ struct pf_mtag *at;
+ ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+
+ match = 1;
+ at = pf_find_mtag(m);
+ if (at != NULL && at->qid != 0)
+ break;
+ at = pf_get_mtag(m);
+ if (at == NULL) {
+ /*
+ * Let the packet fall back to the
+ * default ALTQ.
+ */
+ break;
+ }
+ at->qid = altq->qid;
+ at->hdr = ip;
+ break;
+ }
+
+ case O_LOG:
+ ipfw_log(f, hlen, args, m,
+ oif, offset | ip6f_mf, tablearg, ip);
+ match = 1;
+ break;
+
+ case O_PROB:
+ match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
+ break;
+
+ case O_VERREVPATH:
+ /* Outgoing packets automatically pass/match */
+ match = ((oif != NULL) ||
+ (m->m_pkthdr.rcvif == NULL) ||
+ (
+#ifdef INET6
+ is_ipv6 ?
+ verify_path6(&(args->f_id.src_ip6),
+ m->m_pkthdr.rcvif, args->f_id.fib) :
+#endif
+ verify_path(src_ip, m->m_pkthdr.rcvif,
+ args->f_id.fib)));
+ break;
+
+ case O_VERSRCREACH:
+ /* Outgoing packets automatically pass/match */
+ match = (hlen > 0 && ((oif != NULL) ||
+#ifdef INET6
+ is_ipv6 ?
+ verify_path6(&(args->f_id.src_ip6),
+ NULL, args->f_id.fib) :
+#endif
+ verify_path(src_ip, NULL, args->f_id.fib)));
+ break;
+
+ case O_ANTISPOOF:
+ /* Outgoing packets automatically pass/match */
+ if (oif == NULL && hlen > 0 &&
+ ( (is_ipv4 && in_localaddr(src_ip))
+#ifdef INET6
+ || (is_ipv6 &&
+ in6_localaddr(&(args->f_id.src_ip6)))
+#endif
+ ))
+ match =
+#ifdef INET6
+ is_ipv6 ? verify_path6(
+ &(args->f_id.src_ip6),
+ m->m_pkthdr.rcvif,
+ args->f_id.fib) :
+#endif
+ verify_path(src_ip,
+ m->m_pkthdr.rcvif,
+ args->f_id.fib);
+ else
+ match = 1;
+ break;
+
+ case O_IPSEC:
+#ifdef IPSEC
+ match = (m_tag_find(m,
+ PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
+#endif
+ /* otherwise no match */
+ break;
+
+#ifdef INET6
+ case O_IP6_SRC:
+ match = is_ipv6 &&
+ IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
+ &((ipfw_insn_ip6 *)cmd)->addr6);
+ break;
+
+ case O_IP6_DST:
+ match = is_ipv6 &&
+ IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
+ &((ipfw_insn_ip6 *)cmd)->addr6);
+ break;
+ case O_IP6_SRC_MASK:
+ case O_IP6_DST_MASK:
+ if (is_ipv6) {
+ int i = cmdlen - 1;
+ struct in6_addr p;
+ struct in6_addr *d =
+ &((ipfw_insn_ip6 *)cmd)->addr6;
+
+ for (; !match && i > 0; d += 2,
+ i -= F_INSN_SIZE(struct in6_addr)
+ * 2) {
+ p = (cmd->opcode ==
+ O_IP6_SRC_MASK) ?
+ args->f_id.src_ip6:
+ args->f_id.dst_ip6;
+ APPLY_MASK(&p, &d[1]);
+ match =
+ IN6_ARE_ADDR_EQUAL(&d[0],
+ &p);
+ }
+ }
+ break;
+
+ case O_FLOW6ID:
+ match = is_ipv6 &&
+ flow6id_match(args->f_id.flow_id6,
+ (ipfw_insn_u32 *) cmd);
+ break;
+
+ case O_EXT_HDR:
+ match = is_ipv6 &&
+ (ext_hd & ((ipfw_insn *) cmd)->arg1);
+ break;
+
+ case O_IP6:
+ match = is_ipv6;
+ break;
+#endif
+
+ case O_IP4:
+ match = is_ipv4;
+ break;
+
+ case O_TAG: {
+ struct m_tag *mtag;
+ uint32_t tag = IP_FW_ARG_TABLEARG(cmd->arg1);
+
+ /* Packet is already tagged with this tag? */
+ mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
+
+ /* We have `untag' action when F_NOT flag is
+ * present. And we must remove this mtag from
+ * mbuf and reset `match' to zero (`match' will
+ * be inversed later).
+ * Otherwise we should allocate new mtag and
+ * push it into mbuf.
+ */
+ if (cmd->len & F_NOT) { /* `untag' action */
+ if (mtag != NULL)
+ m_tag_delete(m, mtag);
+ match = 0;
+ } else {
+ if (mtag == NULL) {
+ mtag = m_tag_alloc( MTAG_IPFW,
+ tag, 0, M_NOWAIT);
+ if (mtag != NULL)
+ m_tag_prepend(m, mtag);
+ }
+ match = 1;
+ }
+ break;
+ }
+
+ case O_FIB: /* try match the specified fib */
+ if (args->f_id.fib == cmd->arg1)
+ match = 1;
+ break;
+
+ case O_SOCKARG: {
+ struct inpcb *inp = args->inp;
+ struct inpcbinfo *pi;
+
+ if (is_ipv6) /* XXX can we remove this ? */
+ break;
+
+ if (proto == IPPROTO_TCP)
+ pi = &V_tcbinfo;
+ else if (proto == IPPROTO_UDP)
+ pi = &V_udbinfo;
+ else
+ break;
+
+ /*
+ * XXXRW: so_user_cookie should almost
+ * certainly be inp_user_cookie?
+ */
+
+ /* For incomming packet, lookup up the
+ inpcb using the src/dest ip/port tuple */
+ if (inp == NULL) {
+ inp = in_pcblookup(pi,
+ src_ip, htons(src_port),
+ dst_ip, htons(dst_port),
+ INPLOOKUP_RLOCKPCB, NULL);
+ if (inp != NULL) {
+ tablearg =
+ inp->inp_socket->so_user_cookie;
+ if (tablearg)
+ match = 1;
+ INP_RUNLOCK(inp);
+ }
+ } else {
+ if (inp->inp_socket) {
+ tablearg =
+ inp->inp_socket->so_user_cookie;
+ if (tablearg)
+ match = 1;
+ }
+ }
+ break;
+ }
+
+ case O_TAGGED: {
+ struct m_tag *mtag;
+ uint32_t tag = IP_FW_ARG_TABLEARG(cmd->arg1);
+
+ if (cmdlen == 1) {
+ match = m_tag_locate(m, MTAG_IPFW,
+ tag, NULL) != NULL;
+ break;
+ }
+
+ /* we have ranges */
+ for (mtag = m_tag_first(m);
+ mtag != NULL && !match;
+ mtag = m_tag_next(m, mtag)) {
+ uint16_t *p;
+ int i;
+
+ if (mtag->m_tag_cookie != MTAG_IPFW)
+ continue;
+
+ p = ((ipfw_insn_u16 *)cmd)->ports;
+ i = cmdlen - 1;
+ for(; !match && i > 0; i--, p += 2)
+ match =
+ mtag->m_tag_id >= p[0] &&
+ mtag->m_tag_id <= p[1];
+ }
+ break;
+ }
+
+ /*
+ * The second set of opcodes represents 'actions',
+ * i.e. the terminal part of a rule once the packet
+ * matches all previous patterns.
+ * Typically there is only one action for each rule,
+ * and the opcode is stored at the end of the rule
+ * (but there are exceptions -- see below).
+ *
+ * In general, here we set retval and terminate the
+ * outer loop (would be a 'break 3' in some language,
+ * but we need to set l=0, done=1)
+ *
+ * Exceptions:
+ * O_COUNT and O_SKIPTO actions:
+ * instead of terminating, we jump to the next rule
+ * (setting l=0), or to the SKIPTO target (setting
+ * f/f_len, cmd and l as needed), respectively.
+ *
+ * O_TAG, O_LOG and O_ALTQ action parameters:
+ * perform some action and set match = 1;
+ *
+ * O_LIMIT and O_KEEP_STATE: these opcodes are
+ * not real 'actions', and are stored right
+ * before the 'action' part of the rule.
+ * These opcodes try to install an entry in the
+ * state tables; if successful, we continue with
+ * the next opcode (match=1; break;), otherwise
+ * the packet must be dropped (set retval,
+ * break loops with l=0, done=1)
+ *
+ * O_PROBE_STATE and O_CHECK_STATE: these opcodes
+ * cause a lookup of the state table, and a jump
+ * to the 'action' part of the parent rule
+ * if an entry is found, or
+ * (CHECK_STATE only) a jump to the next rule if
+ * the entry is not found.
+ * The result of the lookup is cached so that
+ * further instances of these opcodes become NOPs.
+ * The jump to the next rule is done by setting
+ * l=0, cmdlen=0.
+ */
+ case O_LIMIT:
+ case O_KEEP_STATE:
+ if (ipfw_install_state(f,
+ (ipfw_insn_limit *)cmd, args, tablearg)) {
+ /* error or limit violation */
+ retval = IP_FW_DENY;
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ }
+ match = 1;
+ break;
+
+ case O_PROBE_STATE:
+ case O_CHECK_STATE:
+ /*
+ * dynamic rules are checked at the first
+ * keep-state or check-state occurrence,
+ * with the result being stored in dyn_dir.
+ * The compiler introduces a PROBE_STATE
+ * instruction for us when we have a
+ * KEEP_STATE (because PROBE_STATE needs
+ * to be run first).
+ */
+ if (dyn_dir == MATCH_UNKNOWN &&
+ (q = ipfw_lookup_dyn_rule(&args->f_id,
+ &dyn_dir, proto == IPPROTO_TCP ?
+ TCP(ulp) : NULL))
+ != NULL) {
+ /*
+ * Found dynamic entry, update stats
+ * and jump to the 'action' part of
+ * the parent rule by setting
+ * f, cmd, l and clearing cmdlen.
+ */
+ IPFW_INC_DYN_COUNTER(q, pktlen);
+ /* XXX we would like to have f_pos
+ * readily accessible in the dynamic
+ * rule, instead of having to
+ * lookup q->rule.
+ */
+ f = q->rule;
+ f_pos = ipfw_find_rule(chain,
+ f->rulenum, f->id);
+ cmd = ACTION_PTR(f);
+ l = f->cmd_len - f->act_ofs;
+ ipfw_dyn_unlock(q);
+ cmdlen = 0;
+ match = 1;
+ break;
+ }
+ /*
+ * Dynamic entry not found. If CHECK_STATE,
+ * skip to next rule, if PROBE_STATE just
+ * ignore and continue with next opcode.
+ */
+ if (cmd->opcode == O_CHECK_STATE)
+ l = 0; /* exit inner loop */
+ match = 1;
+ break;
+
+ case O_ACCEPT:
+ retval = 0; /* accept */
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ break;
+
+ case O_PIPE:
+ case O_QUEUE:
+ set_match(args, f_pos, chain);
+ args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+ if (cmd->opcode == O_PIPE)
+ args->rule.info |= IPFW_IS_PIPE;
+ if (V_fw_one_pass)
+ args->rule.info |= IPFW_ONEPASS;
+ retval = IP_FW_DUMMYNET;
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ break;
+
+ case O_DIVERT:
+ case O_TEE:
+ if (args->eh) /* not on layer 2 */
+ break;
+ /* otherwise this is terminal */
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ retval = (cmd->opcode == O_DIVERT) ?
+ IP_FW_DIVERT : IP_FW_TEE;
+ set_match(args, f_pos, chain);
+ args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+ break;
+
+ case O_COUNT:
+ IPFW_INC_RULE_COUNTER(f, pktlen);
+ l = 0; /* exit inner loop */
+ break;
+
+ case O_SKIPTO:
+ IPFW_INC_RULE_COUNTER(f, pktlen);
+ f_pos = jump_fast(chain, f, cmd->arg1, tablearg, 0);
+ /*
+ * Skip disabled rules, and re-enter
+ * the inner loop with the correct
+ * f_pos, f, l and cmd.
+ * Also clear cmdlen and skip_or
+ */
+ for (; f_pos < chain->n_rules - 1 &&
+ (V_set_disable &
+ (1 << chain->map[f_pos]->set));
+ f_pos++)
+ ;
+ /* Re-enter the inner loop at the skipto rule. */
+ f = chain->map[f_pos];
+ l = f->cmd_len;
+ cmd = f->cmd;
+ match = 1;
+ cmdlen = 0;
+ skip_or = 0;
+ continue;
+ break; /* not reached */
+
+ case O_CALLRETURN: {
+ /*
+ * Implementation of `subroutine' call/return,
+ * in the stack carried in an mbuf tag. This
+ * is different from `skipto' in that any call
+ * address is possible (`skipto' must prevent
+ * backward jumps to avoid endless loops).
+ * We have `return' action when F_NOT flag is
+ * present. The `m_tag_id' field is used as
+ * stack pointer.
+ */
+ struct m_tag *mtag;
+ uint16_t jmpto, *stack;
+
+#define IS_CALL ((cmd->len & F_NOT) == 0)
+#define IS_RETURN ((cmd->len & F_NOT) != 0)
+ /*
+ * Hand-rolled version of m_tag_locate() with
+ * wildcard `type'.
+ * If not already tagged, allocate new tag.
+ */
+ mtag = m_tag_first(m);
+ while (mtag != NULL) {
+ if (mtag->m_tag_cookie ==
+ MTAG_IPFW_CALL)
+ break;
+ mtag = m_tag_next(m, mtag);
+ }
+ if (mtag == NULL && IS_CALL) {
+ mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
+ IPFW_CALLSTACK_SIZE *
+ sizeof(uint16_t), M_NOWAIT);
+ if (mtag != NULL)
+ m_tag_prepend(m, mtag);
+ }
+
+ /*
+ * On error both `call' and `return' just
+ * continue with next rule.
+ */
+ if (IS_RETURN && (mtag == NULL ||
+ mtag->m_tag_id == 0)) {
+ l = 0; /* exit inner loop */
+ break;
+ }
+ if (IS_CALL && (mtag == NULL ||
+ mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) {
+ printf("ipfw: call stack error, "
+ "go to next rule\n");
+ l = 0; /* exit inner loop */
+ break;
+ }
+
+ IPFW_INC_RULE_COUNTER(f, pktlen);
+ stack = (uint16_t *)(mtag + 1);
+
+ /*
+ * The `call' action may use cached f_pos
+ * (in f->next_rule), whose version is written
+ * in f->next_rule.
+ * The `return' action, however, doesn't have
+ * fixed jump address in cmd->arg1 and can't use
+ * cache.
+ */
+ if (IS_CALL) {
+ stack[mtag->m_tag_id] = f->rulenum;
+ mtag->m_tag_id++;
+ f_pos = jump_fast(chain, f, cmd->arg1,
+ tablearg, 1);
+ } else { /* `return' action */
+ mtag->m_tag_id--;
+ jmpto = stack[mtag->m_tag_id] + 1;
+ f_pos = ipfw_find_rule(chain, jmpto, 0);
+ }
+
+ /*
+ * Skip disabled rules, and re-enter
+ * the inner loop with the correct
+ * f_pos, f, l and cmd.
+ * Also clear cmdlen and skip_or
+ */
+ for (; f_pos < chain->n_rules - 1 &&
+ (V_set_disable &
+ (1 << chain->map[f_pos]->set)); f_pos++)
+ ;
+ /* Re-enter the inner loop at the dest rule. */
+ f = chain->map[f_pos];
+ l = f->cmd_len;
+ cmd = f->cmd;
+ cmdlen = 0;
+ skip_or = 0;
+ continue;
+ break; /* NOTREACHED */
+ }
+#undef IS_CALL
+#undef IS_RETURN
+
+ case O_REJECT:
+ /*
+ * Drop the packet and send a reject notice
+ * if the packet is not ICMP (or is an ICMP
+ * query), and it is not multicast/broadcast.
+ */
+ if (hlen > 0 && is_ipv4 && offset == 0 &&
+ (proto != IPPROTO_ICMP ||
+ is_icmp_query(ICMP(ulp))) &&
+ !(m->m_flags & (M_BCAST|M_MCAST)) &&
+ !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
+ send_reject(args, cmd->arg1, iplen, ip);
+ m = args->m;
+ }
+ /* FALLTHROUGH */
+#ifdef INET6
+ case O_UNREACH6:
+ if (hlen > 0 && is_ipv6 &&
+ ((offset & IP6F_OFF_MASK) == 0) &&
+ (proto != IPPROTO_ICMPV6 ||
+ (is_icmp6_query(icmp6_type) == 1)) &&
+ !(m->m_flags & (M_BCAST|M_MCAST)) &&
+ !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
+ send_reject6(
+ args, cmd->arg1, hlen,
+ (struct ip6_hdr *)ip);
+ m = args->m;
+ }
+ /* FALLTHROUGH */
+#endif
+ case O_DENY:
+ retval = IP_FW_DENY;
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ break;
+
+ case O_FORWARD_IP:
+ if (args->eh) /* not valid on layer2 pkts */
+ break;
+ if (q == NULL || q->rule != f ||
+ dyn_dir == MATCH_FORWARD) {
+ struct sockaddr_in *sa;
+ sa = &(((ipfw_insn_sa *)cmd)->sa);
+ if (sa->sin_addr.s_addr == INADDR_ANY) {
+ bcopy(sa, &args->hopstore,
+ sizeof(*sa));
+ args->hopstore.sin_addr.s_addr =
+ htonl(tablearg);
+ args->next_hop = &args->hopstore;
+ } else {
+ args->next_hop = sa;
+ }
+ }
+ retval = IP_FW_PASS;
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ break;
+
+#ifdef INET6
+ case O_FORWARD_IP6:
+ if (args->eh) /* not valid on layer2 pkts */
+ break;
+ if (q == NULL || q->rule != f ||
+ dyn_dir == MATCH_FORWARD) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = &(((ipfw_insn_sa6 *)cmd)->sa);
+ args->next_hop6 = sin6;
+ }
+ retval = IP_FW_PASS;
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ break;
+#endif
+
+ case O_NETGRAPH:
+ case O_NGTEE:
+ set_match(args, f_pos, chain);
+ args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+ if (V_fw_one_pass)
+ args->rule.info |= IPFW_ONEPASS;
+ retval = (cmd->opcode == O_NETGRAPH) ?
+ IP_FW_NETGRAPH : IP_FW_NGTEE;
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ break;
+
+ case O_SETFIB: {
+ uint32_t fib;
+
+ IPFW_INC_RULE_COUNTER(f, pktlen);
+ fib = IP_FW_ARG_TABLEARG(cmd->arg1);
+ if (fib >= rt_numfibs)
+ fib = 0;
+ M_SETFIB(m, fib);
+ args->f_id.fib = fib;
+ l = 0; /* exit inner loop */
+ break;
+ }
+
+ case O_SETDSCP: {
+ uint16_t code;
+
+ code = IP_FW_ARG_TABLEARG(cmd->arg1) & 0x3F;
+ l = 0; /* exit inner loop */
+ if (is_ipv4) {
+ uint16_t a;
+
+ a = ip->ip_tos;
+ ip->ip_tos = (code << 2) | (ip->ip_tos & 0x03);
+ a += ntohs(ip->ip_sum) - ip->ip_tos;
+ ip->ip_sum = htons(a);
+ } else if (is_ipv6) {
+ uint8_t *v;
+
+ v = &((struct ip6_hdr *)ip)->ip6_vfc;
+ *v = (*v & 0xF0) | (code >> 2);
+ v++;
+ *v = (*v & 0x3F) | ((code & 0x03) << 6);
+ } else
+ break;
+
+ IPFW_INC_RULE_COUNTER(f, pktlen);
+ break;
+ }
+
+ case O_NAT:
+ if (!IPFW_NAT_LOADED) {
+ retval = IP_FW_DENY;
+ } else {
+ struct cfg_nat *t;
+ int nat_id;
+
+ set_match(args, f_pos, chain);
+ /* Check if this is 'global' nat rule */
+ if (cmd->arg1 == 0) {
+ retval = ipfw_nat_ptr(args, NULL, m);
+ l = 0;
+ done = 1;
+ break;
+ }
+ t = ((ipfw_insn_nat *)cmd)->nat;
+ if (t == NULL) {
+ nat_id = IP_FW_ARG_TABLEARG(cmd->arg1);
+ t = (*lookup_nat_ptr)(&chain->nat, nat_id);
+
+ if (t == NULL) {
+ retval = IP_FW_DENY;
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ break;
+ }
+ if (cmd->arg1 != IP_FW_TABLEARG)
+ ((ipfw_insn_nat *)cmd)->nat = t;
+ }
+ retval = ipfw_nat_ptr(args, t, m);
+ }
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
+ break;
+
+ case O_REASS: {
+ int ip_off;
+
+ IPFW_INC_RULE_COUNTER(f, pktlen);
+ l = 0; /* in any case exit inner loop */
+ ip_off = ntohs(ip->ip_off);
+
+ /* if not fragmented, go to next rule */
+ if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
+ break;
+ /*
+ * ip_reass() expects len & off in host
+ * byte order.
+ */
+ SET_HOST_IPLEN(ip);
+
+ args->m = m = ip_reass(m);
+
+ /*
+ * do IP header checksum fixup.
+ */
+ if (m == NULL) { /* fragment got swallowed */
+ retval = IP_FW_DENY;
+ } else { /* good, packet complete */
+ int hlen;
+
+ ip = mtod(m, struct ip *);
+ hlen = ip->ip_hl << 2;
+ SET_NET_IPLEN(ip);
+ ip->ip_sum = 0;
+ if (hlen == sizeof(struct ip))
+ ip->ip_sum = in_cksum_hdr(ip);
+ else
+ ip->ip_sum = in_cksum(m, hlen);
+ retval = IP_FW_REASS;
+ set_match(args, f_pos, chain);
+ }
+ done = 1; /* exit outer loop */
+ break;
+ }
+
+ default:
+ panic("-- unknown opcode %d\n", cmd->opcode);
+ } /* end of switch() on opcodes */
+ /*
+ * if we get here with l=0, then match is irrelevant.
+ */
+
+ if (cmd->len & F_NOT)
+ match = !match;
+
+ if (match) {
+ if (cmd->len & F_OR)
+ skip_or = 1;
+ } else {
+ if (!(cmd->len & F_OR)) /* not an OR block, */
+ break; /* try next rule */
+ }
+
+ } /* end of inner loop, scan opcodes */
+#undef PULLUP_LEN
+
+ if (done)
+ break;
+
+/* next_rule:; */ /* try next rule */
+
+ } /* end of outer for, scan rules */
+
+ if (done) {
+ struct ip_fw *rule = chain->map[f_pos];
+ /* Update statistics */
+ IPFW_INC_RULE_COUNTER(rule, pktlen);
+ } else {
+ retval = IP_FW_DENY;
+ printf("ipfw: ouch!, skip past end of rules, denying packet\n");
+ }
+ IPFW_RUNLOCK(chain);
+#ifdef __FreeBSD__
+ if (ucred_cache != NULL)
+ crfree(ucred_cache);
+#endif
+ return (retval);
+
+pullup_failed:
+ if (V_fw_verbose)
+ printf("ipfw: pullup failed\n");
+ return (IP_FW_DENY);
+}
+
+/*
+ * Set maximum number of tables that can be used in given VNET ipfw instance.
+ */
+#ifdef SYSCTL_NODE
+static int
+sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int ntables;
+
+ ntables = V_fw_tables_max;
+
+ error = sysctl_handle_int(oidp, &ntables, 0, req);
+ /* Read operation or some error */
+ if ((error != 0) || (req->newptr == NULL))
+ return (error);
+
+ return (ipfw_resize_tables(&V_layer3_chain, ntables));
+}
+#endif
+/*
+ * Module and VNET glue
+ */
+
+/*
+ * Stuff that must be initialised only on boot or module load
+ */
+static int
+ipfw_init(void)
+{
+ int error = 0;
+
+ /*
+ * Only print out this stuff the first time around,
+ * when called from the sysinit code.
+ */
+ printf("ipfw2 "
+#ifdef INET6
+ "(+ipv6) "
+#endif
+ "initialized, divert %s, nat %s, "
+ "default to %s, logging ",
+#ifdef IPDIVERT
+ "enabled",
+#else
+ "loadable",
+#endif
+#ifdef IPFIREWALL_NAT
+ "enabled",
+#else
+ "loadable",
+#endif
+ default_to_accept ? "accept" : "deny");
+
+ /*
+ * Note: V_xxx variables can be accessed here but the vnet specific
+ * initializer may not have been called yet for the VIMAGE case.
+ * Tuneables will have been processed. We will print out values for
+ * the default vnet.
+ * XXX This should all be rationalized AFTER 8.0
+ */
+ if (V_fw_verbose == 0)
+ printf("disabled\n");
+ else if (V_verbose_limit == 0)
+ printf("unlimited\n");
+ else
+ printf("limited to %d packets/entry by default\n",
+ V_verbose_limit);
+
+ /* Check user-supplied table count for validness */
+ if (default_fw_tables > IPFW_TABLES_MAX)
+ default_fw_tables = IPFW_TABLES_MAX;
+
+ ipfw_log_bpf(1); /* init */
+ return (error);
+}
+
+#ifndef __rtems__
+/*
+ * Called for the removal of the last instance only on module unload.
+ */
+static void
+ipfw_destroy(void)
+{
+
+ ipfw_log_bpf(0); /* uninit */
+ printf("IP firewall unloaded\n");
+}
+#endif /* __rtems__ */
+
+/*
+ * Stuff that must be initialized for every instance
+ * (including the first of course).
+ */
+static int
+vnet_ipfw_init(const void *unused)
+{
+ int error;
+ struct ip_fw *rule = NULL;
+ struct ip_fw_chain *chain;
+
+ chain = &V_layer3_chain;
+
+ /* First set up some values that are compile time options */
+ V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
+ V_fw_deny_unknown_exthdrs = 1;
+#ifdef IPFIREWALL_VERBOSE
+ V_fw_verbose = 1;
+#endif
+#ifdef IPFIREWALL_VERBOSE_LIMIT
+ V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
+#endif
+#ifdef IPFIREWALL_NAT
+ LIST_INIT(&chain->nat);
+#endif
+
+ /* insert the default rule and create the initial map */
+ chain->n_rules = 1;
+ chain->static_len = sizeof(struct ip_fw);
+ chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO);
+ if (chain->map)
+ rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO);
+
+ /* Set initial number of tables */
+ V_fw_tables_max = default_fw_tables;
+ error = ipfw_init_tables(chain);
+ if (error) {
+ printf("ipfw2: setting up tables failed\n");
+ free(chain->map, M_IPFW);
+ free(rule, M_IPFW);
+ return (ENOSPC);
+ }
+
+ /* fill and insert the default rule */
+ rule->act_ofs = 0;
+ rule->rulenum = IPFW_DEFAULT_RULE;
+ rule->cmd_len = 1;
+ rule->set = RESVD_SET;
+ rule->cmd[0].len = 1;
+ rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
+ chain->rules = chain->default_rule = chain->map[0] = rule;
+ chain->id = rule->id = 1;
+
+ IPFW_LOCK_INIT(chain);
+ ipfw_dyn_init(chain);
+
+ /* First set up some values that are compile time options */
+ V_ipfw_vnet_ready = 1; /* Open for business */
+
+ /*
+ * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
+ * and pfil hooks for ipv4 and ipv6. Even if the latter two fail
+ * we still keep the module alive because the sockopt and
+ * layer2 paths are still useful.
+ * ipfw[6]_hook return 0 on success, ENOENT on failure,
+ * so we can ignore the exact return value and just set a flag.
+ *
+ * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
+ * changes in the underlying (per-vnet) variables trigger
+ * immediate hook()/unhook() calls.
+ * In layer2 we have the same behaviour, except that V_ether_ipfw
+ * is checked on each packet because there are no pfil hooks.
+ */
+ V_ip_fw_ctl_ptr = ipfw_ctl;
+ V_ip_fw_chk_ptr = ipfw_chk;
+ error = ipfw_attach_hooks(1);
+ return (error);
+}
+
+#ifndef __rtems__
+/*
+ * Called for the removal of each instance.
+ */
+static int
+vnet_ipfw_uninit(const void *unused)
+{
+ struct ip_fw *reap, *rule;
+ struct ip_fw_chain *chain = &V_layer3_chain;
+ int i;
+
+ V_ipfw_vnet_ready = 0; /* tell new callers to go away */
+ /*
+ * disconnect from ipv4, ipv6, layer2 and sockopt.
+ * Then grab, release and grab again the WLOCK so we make
+ * sure the update is propagated and nobody will be in.
+ */
+ (void)ipfw_attach_hooks(0 /* detach */);
+ V_ip_fw_chk_ptr = NULL;
+ V_ip_fw_ctl_ptr = NULL;
+ IPFW_UH_WLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+ IPFW_UH_WLOCK(chain);
+
+ IPFW_WLOCK(chain);
+ ipfw_dyn_uninit(0); /* run the callout_drain */
+ IPFW_WUNLOCK(chain);
+
+ ipfw_destroy_tables(chain);
+ reap = NULL;
+ IPFW_WLOCK(chain);
+ for (i = 0; i < chain->n_rules; i++) {
+ rule = chain->map[i];
+ rule->x_next = reap;
+ reap = rule;
+ }
+ if (chain->map)
+ free(chain->map, M_IPFW);
+ IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+ if (reap != NULL)
+ ipfw_reap_rules(reap);
+ IPFW_LOCK_DESTROY(chain);
+ ipfw_dyn_uninit(1); /* free the remaining parts */
+ return 0;
+}
+#endif /* __rtems__ */
+
+/*
+ * Module event handler.
+ * In general we have the choice of handling most of these events by the
+ * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
+ * use the SYSINIT handlers as they are more capable of expressing the
+ * flow of control during module and vnet operations, so this is just
+ * a skeleton. Note there is no SYSINIT equivalent of the module
+ * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
+ */
+static int
+ipfw_modevent(module_t mod, int type, void *unused)
+{
+ int err = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ /* Called once at module load or
+ * system boot if compiled in. */
+ break;
+ case MOD_QUIESCE:
+ /* Called before unload. May veto unloading. */
+ break;
+ case MOD_UNLOAD:
+ /* Called during unload. */
+ break;
+ case MOD_SHUTDOWN:
+ /* Called during system shutdown. */
+ break;
+ default:
+ err = EOPNOTSUPP;
+ break;
+ }
+ return err;
+}
+
+static moduledata_t ipfwmod = {
+ "ipfw",
+ ipfw_modevent,
+ 0
+};
+
+/* Define startup order. */
+#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */
+#define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */
+#define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */
+
+DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
+MODULE_VERSION(ipfw, 2);
+/* should declare some dependencies here */
+
+/*
+ * Starting up. Done in order after ipfwmod() has been called.
+ * VNET_SYSINIT is also called for each existing vnet and each new vnet.
+ */
+SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
+ ipfw_init, NULL);
+VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
+ vnet_ipfw_init, NULL);
+
+/*
+ * Closing up shop. These are done in REVERSE ORDER, but still
+ * after ipfwmod() has been called. Not called on reboot.
+ * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
+ * or when the module is unloaded.
+ */
+SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
+ ipfw_destroy, NULL);
+VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
+ vnet_ipfw_uninit, NULL);
+/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_log.c b/freebsd/sys/netpfil/ipfw/ip_fw_log.c
new file mode 100644
index 00000000..97132257
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_log.c
@@ -0,0 +1,470 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Logging support for ipfw
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <net/ethernet.h> /* for ETHERTYPE_IP */
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/if_types.h> /* for IFT_ETHER */
+#include <net/bpf.h> /* for BPF */
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#ifdef INET6
+#include <netinet6/in6_var.h> /* ip6_sprintf() */
+#endif
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+
+/*
+ * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
+ * Other macros just cast void * into the appropriate type
+ */
+#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
+#define TCP(p) ((struct tcphdr *)(p))
+#define SCTP(p) ((struct sctphdr *)(p))
+#define UDP(p) ((struct udphdr *)(p))
+#define ICMP(p) ((struct icmphdr *)(p))
+#define ICMP6(p) ((struct icmp6_hdr *)(p))
+
+#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
+#define SNP(buf) buf, sizeof(buf)
+
+#ifdef WITHOUT_BPF
+void
+ipfw_log_bpf(int onoff)
+{
+}
+#else /* !WITHOUT_BPF */
+static struct ifnet *log_if; /* hook to attach to bpf */
+
+/* we use this dummy function for all ifnet callbacks */
+static int
+log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+ return EINVAL;
+}
+
+static int
+ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro)
+{
+ if (m != NULL)
+ m_freem(m);
+ return EINVAL;
+}
+
+static void
+ipfw_log_start(struct ifnet* ifp)
+{
+ panic("ipfw_log_start() must not be called");
+}
+
+static const u_char ipfwbroadcastaddr[6] =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+void
+ipfw_log_bpf(int onoff)
+{
+ struct ifnet *ifp;
+
+ if (onoff) {
+ if (log_if)
+ return;
+ ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL)
+ return;
+ if_initname(ifp, "ipfw", 0);
+ ifp->if_mtu = 65536;
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_init = (void *)log_dummy;
+ ifp->if_ioctl = log_dummy;
+ ifp->if_start = ipfw_log_start;
+ ifp->if_output = ipfw_log_output;
+ ifp->if_addrlen = 6;
+ ifp->if_hdrlen = 14;
+ if_attach(ifp);
+ ifp->if_broadcastaddr = ipfwbroadcastaddr;
+ ifp->if_baudrate = IF_Mbps(10);
+ bpfattach(ifp, DLT_EN10MB, 14);
+ log_if = ifp;
+ } else {
+ if (log_if) {
+ ether_ifdetach(log_if);
+ if_free(log_if);
+ }
+ log_if = NULL;
+ }
+}
+#endif /* !WITHOUT_BPF */
+
+/*
+ * We enter here when we have a rule with O_LOG.
+ * XXX this function alone takes about 2Kbytes of code!
+ */
+void
+ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
+ struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
+ struct ip *ip)
+{
+ char *action;
+ int limit_reached = 0;
+ char action2[92], proto[128], fragment[32];
+
+ if (V_fw_verbose == 0) {
+#ifndef WITHOUT_BPF
+
+ if (log_if == NULL || log_if->if_bpf == NULL)
+ return;
+
+ if (args->eh) /* layer2, use orig hdr */
+ BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
+ else
+ /* Add fake header. Later we will store
+ * more info in the header.
+ */
+ BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
+#endif /* !WITHOUT_BPF */
+ return;
+ }
+ /* the old 'log' function */
+ fragment[0] = '\0';
+ proto[0] = '\0';
+
+ if (f == NULL) { /* bogus pkt */
+ if (V_verbose_limit != 0 && V_norule_counter >= V_verbose_limit)
+ return;
+ V_norule_counter++;
+ if (V_norule_counter == V_verbose_limit)
+ limit_reached = V_verbose_limit;
+ action = "Refuse";
+ } else { /* O_LOG is the first action, find the real one */
+ ipfw_insn *cmd = ACTION_PTR(f);
+ ipfw_insn_log *l = (ipfw_insn_log *)cmd;
+
+ if (l->max_log != 0 && l->log_left == 0)
+ return;
+ l->log_left--;
+ if (l->log_left == 0)
+ limit_reached = l->max_log;
+ cmd += F_LEN(cmd); /* point to first action */
+ if (cmd->opcode == O_ALTQ) {
+ ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+
+ snprintf(SNPARGS(action2, 0), "Altq %d",
+ altq->qid);
+ cmd += F_LEN(cmd);
+ }
+ if (cmd->opcode == O_PROB || cmd->opcode == O_TAG ||
+ cmd->opcode == O_SETDSCP)
+ cmd += F_LEN(cmd);
+
+ action = action2;
+ switch (cmd->opcode) {
+ case O_DENY:
+ action = "Deny";
+ break;
+
+ case O_REJECT:
+ if (cmd->arg1==ICMP_REJECT_RST)
+ action = "Reset";
+ else if (cmd->arg1==ICMP_UNREACH_HOST)
+ action = "Reject";
+ else
+ snprintf(SNPARGS(action2, 0), "Unreach %d",
+ cmd->arg1);
+ break;
+
+ case O_UNREACH6:
+ if (cmd->arg1==ICMP6_UNREACH_RST)
+ action = "Reset";
+ else
+ snprintf(SNPARGS(action2, 0), "Unreach %d",
+ cmd->arg1);
+ break;
+
+ case O_ACCEPT:
+ action = "Accept";
+ break;
+ case O_COUNT:
+ action = "Count";
+ break;
+ case O_DIVERT:
+ snprintf(SNPARGS(action2, 0), "Divert %d",
+ cmd->arg1);
+ break;
+ case O_TEE:
+ snprintf(SNPARGS(action2, 0), "Tee %d",
+ cmd->arg1);
+ break;
+ case O_SETFIB:
+ snprintf(SNPARGS(action2, 0), "SetFib %d",
+ IP_FW_ARG_TABLEARG(cmd->arg1));
+ break;
+ case O_SKIPTO:
+ snprintf(SNPARGS(action2, 0), "SkipTo %d",
+ IP_FW_ARG_TABLEARG(cmd->arg1));
+ break;
+ case O_PIPE:
+ snprintf(SNPARGS(action2, 0), "Pipe %d",
+ IP_FW_ARG_TABLEARG(cmd->arg1));
+ break;
+ case O_QUEUE:
+ snprintf(SNPARGS(action2, 0), "Queue %d",
+ IP_FW_ARG_TABLEARG(cmd->arg1));
+ break;
+ case O_FORWARD_IP: {
+ ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
+ int len;
+ struct in_addr dummyaddr;
+ if (sa->sa.sin_addr.s_addr == INADDR_ANY)
+ dummyaddr.s_addr = htonl(tablearg);
+ else
+ dummyaddr.s_addr = sa->sa.sin_addr.s_addr;
+
+ len = snprintf(SNPARGS(action2, 0), "Forward to %s",
+ inet_ntoa(dummyaddr));
+
+ if (sa->sa.sin_port)
+ snprintf(SNPARGS(action2, len), ":%d",
+ sa->sa.sin_port);
+ }
+ break;
+#ifdef INET6
+ case O_FORWARD_IP6: {
+ char buf[INET6_ADDRSTRLEN];
+ ipfw_insn_sa6 *sa = (ipfw_insn_sa6 *)cmd;
+ int len;
+
+ len = snprintf(SNPARGS(action2, 0), "Forward to [%s]",
+ ip6_sprintf(buf, &sa->sa.sin6_addr));
+
+ if (sa->sa.sin6_port)
+ snprintf(SNPARGS(action2, len), ":%u",
+ sa->sa.sin6_port);
+ }
+ break;
+#endif
+ case O_NETGRAPH:
+ snprintf(SNPARGS(action2, 0), "Netgraph %d",
+ cmd->arg1);
+ break;
+ case O_NGTEE:
+ snprintf(SNPARGS(action2, 0), "Ngtee %d",
+ cmd->arg1);
+ break;
+ case O_NAT:
+ action = "Nat";
+ break;
+ case O_REASS:
+ action = "Reass";
+ break;
+ case O_CALLRETURN:
+ if (cmd->len & F_NOT)
+ action = "Return";
+ else
+ snprintf(SNPARGS(action2, 0), "Call %d",
+ cmd->arg1);
+ break;
+ default:
+ action = "UNKNOWN";
+ break;
+ }
+ }
+
+ if (hlen == 0) { /* non-ip */
+ snprintf(SNPARGS(proto, 0), "MAC");
+
+ } else {
+ int len;
+#ifdef INET6
+ char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2];
+#else
+ char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
+#endif
+ struct icmphdr *icmp;
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL;
+ struct icmp6_hdr *icmp6;
+ u_short ip6f_mf;
+#endif
+ src[0] = '\0';
+ dst[0] = '\0';
+#ifdef INET6
+ ip6f_mf = offset & IP6F_MORE_FRAG;
+ offset &= IP6F_OFF_MASK;
+
+ if (IS_IP6_FLOW_ID(&(args->f_id))) {
+ char ip6buf[INET6_ADDRSTRLEN];
+ snprintf(src, sizeof(src), "[%s]",
+ ip6_sprintf(ip6buf, &args->f_id.src_ip6));
+ snprintf(dst, sizeof(dst), "[%s]",
+ ip6_sprintf(ip6buf, &args->f_id.dst_ip6));
+
+ ip6 = (struct ip6_hdr *)ip;
+ tcp = (struct tcphdr *)(((char *)ip) + hlen);
+ udp = (struct udphdr *)(((char *)ip) + hlen);
+ } else
+#endif
+ {
+ tcp = L3HDR(struct tcphdr, ip);
+ udp = L3HDR(struct udphdr, ip);
+
+ inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src));
+ inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst));
+ }
+
+ switch (args->f_id.proto) {
+ case IPPROTO_TCP:
+ len = snprintf(SNPARGS(proto, 0), "TCP %s", src);
+ if (offset == 0)
+ snprintf(SNPARGS(proto, len), ":%d %s:%d",
+ ntohs(tcp->th_sport),
+ dst,
+ ntohs(tcp->th_dport));
+ else
+ snprintf(SNPARGS(proto, len), " %s", dst);
+ break;
+
+ case IPPROTO_UDP:
+ len = snprintf(SNPARGS(proto, 0), "UDP %s", src);
+ if (offset == 0)
+ snprintf(SNPARGS(proto, len), ":%d %s:%d",
+ ntohs(udp->uh_sport),
+ dst,
+ ntohs(udp->uh_dport));
+ else
+ snprintf(SNPARGS(proto, len), " %s", dst);
+ break;
+
+ case IPPROTO_ICMP:
+ icmp = L3HDR(struct icmphdr, ip);
+ if (offset == 0)
+ len = snprintf(SNPARGS(proto, 0),
+ "ICMP:%u.%u ",
+ icmp->icmp_type, icmp->icmp_code);
+ else
+ len = snprintf(SNPARGS(proto, 0), "ICMP ");
+ len += snprintf(SNPARGS(proto, len), "%s", src);
+ snprintf(SNPARGS(proto, len), " %s", dst);
+ break;
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+ icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen);
+ if (offset == 0)
+ len = snprintf(SNPARGS(proto, 0),
+ "ICMPv6:%u.%u ",
+ icmp6->icmp6_type, icmp6->icmp6_code);
+ else
+ len = snprintf(SNPARGS(proto, 0), "ICMPv6 ");
+ len += snprintf(SNPARGS(proto, len), "%s", src);
+ snprintf(SNPARGS(proto, len), " %s", dst);
+ break;
+#endif
+ default:
+ len = snprintf(SNPARGS(proto, 0), "P:%d %s",
+ args->f_id.proto, src);
+ snprintf(SNPARGS(proto, len), " %s", dst);
+ break;
+ }
+
+#ifdef INET6
+ if (IS_IP6_FLOW_ID(&(args->f_id))) {
+ if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
+ snprintf(SNPARGS(fragment, 0),
+ " (frag %08x:%d@%d%s)",
+ args->f_id.extra,
+ ntohs(ip6->ip6_plen) - hlen,
+ ntohs(offset) << 3, ip6f_mf ? "+" : "");
+ } else
+#endif
+ {
+ int ipoff, iplen;
+ ipoff = ntohs(ip->ip_off);
+ iplen = ntohs(ip->ip_len);
+ if (ipoff & (IP_MF | IP_OFFMASK))
+ snprintf(SNPARGS(fragment, 0),
+ " (frag %d:%d@%d%s)",
+ ntohs(ip->ip_id), iplen - (ip->ip_hl << 2),
+ offset << 3,
+ (ipoff & IP_MF) ? "+" : "");
+ }
+ }
+#ifdef __FreeBSD__
+ if (oif || m->m_pkthdr.rcvif)
+ log(LOG_SECURITY | LOG_INFO,
+ "ipfw: %d %s %s %s via %s%s\n",
+ f ? f->rulenum : -1,
+ action, proto, oif ? "out" : "in",
+ oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
+ fragment);
+ else
+#endif
+ log(LOG_SECURITY | LOG_INFO,
+ "ipfw: %d %s %s [no if info]%s\n",
+ f ? f->rulenum : -1,
+ action, proto, fragment);
+ if (limit_reached)
+ log(LOG_SECURITY | LOG_NOTICE,
+ "ipfw: limit %d reached on entry %d\n",
+ limit_reached, f ? f->rulenum : -1);
+}
+/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c
new file mode 100644
index 00000000..142c46c5
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c
@@ -0,0 +1,670 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2008 Paolo Pisati
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/eventhandler.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+
+#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */
+
+#include <netinet/libalias/alias.h>
+#include <netinet/libalias/alias_local.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#include <machine/in_cksum.h> /* XXX for in_cksum */
+
+static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
+#define V_ifaddr_event_tag VNET(ifaddr_event_tag)
+
+static void
+ifaddr_change(void *arg __unused, struct ifnet *ifp)
+{
+ struct cfg_nat *ptr;
+ struct ifaddr *ifa;
+ struct ip_fw_chain *chain;
+
+ chain = &V_layer3_chain;
+ IPFW_WLOCK(chain);
+ /* Check every nat entry... */
+ LIST_FOREACH(ptr, &chain->nat, _next) {
+ /* ...using nic 'ifp->if_xname' as dynamic alias address. */
+ if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0)
+ continue;
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr == NULL)
+ continue;
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ ptr->ip = ((struct sockaddr_in *)
+ (ifa->ifa_addr))->sin_addr;
+ LibAliasSetAddress(ptr->lib, ptr->ip);
+ }
+ if_addr_runlock(ifp);
+ }
+ IPFW_WUNLOCK(chain);
+}
+
+/*
+ * delete the pointers for nat entry ix, or all of them if ix < 0
+ */
+static void
+flush_nat_ptrs(struct ip_fw_chain *chain, const int ix)
+{
+ int i;
+ ipfw_insn_nat *cmd;
+
+ IPFW_WLOCK_ASSERT(chain);
+ for (i = 0; i < chain->n_rules; i++) {
+ cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]);
+ /* XXX skip log and the like ? */
+ if (cmd->o.opcode == O_NAT && cmd->nat != NULL &&
+ (ix < 0 || cmd->nat->id == ix))
+ cmd->nat = NULL;
+ }
+}
+
+static void
+del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
+{
+ struct cfg_redir *r, *tmp_r;
+ struct cfg_spool *s, *tmp_s;
+ int i, num;
+
+ LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
+ num = 1; /* Number of alias_link to delete. */
+ switch (r->mode) {
+ case REDIR_PORT:
+ num = r->pport_cnt;
+ /* FALLTHROUGH */
+ case REDIR_ADDR:
+ case REDIR_PROTO:
+ /* Delete all libalias redirect entry. */
+ for (i = 0; i < num; i++)
+ LibAliasRedirectDelete(n->lib, r->alink[i]);
+ /* Del spool cfg if any. */
+ LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
+ LIST_REMOVE(s, _next);
+ free(s, M_IPFW);
+ }
+ free(r->alink, M_IPFW);
+ LIST_REMOVE(r, _next);
+ free(r, M_IPFW);
+ break;
+ default:
+ printf("unknown redirect mode: %u\n", r->mode);
+ /* XXX - panic?!?!? */
+ break;
+ }
+ }
+}
+
+static void
+add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
+{
+ struct cfg_redir *r, *ser_r;
+ struct cfg_spool *s, *ser_s;
+ int cnt, off, i;
+
+ for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
+ ser_r = (struct cfg_redir *)&buf[off];
+ r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
+ memcpy(r, ser_r, SOF_REDIR);
+ LIST_INIT(&r->spool_chain);
+ off += SOF_REDIR;
+ r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
+ M_IPFW, M_WAITOK | M_ZERO);
+ switch (r->mode) {
+ case REDIR_ADDR:
+ r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
+ r->paddr);
+ break;
+ case REDIR_PORT:
+ for (i = 0 ; i < r->pport_cnt; i++) {
+ /* If remotePort is all ports, set it to 0. */
+ u_short remotePortCopy = r->rport + i;
+ if (r->rport_cnt == 1 && r->rport == 0)
+ remotePortCopy = 0;
+ r->alink[i] = LibAliasRedirectPort(ptr->lib,
+ r->laddr, htons(r->lport + i), r->raddr,
+ htons(remotePortCopy), r->paddr,
+ htons(r->pport + i), r->proto);
+ if (r->alink[i] == NULL) {
+ r->alink[0] = NULL;
+ break;
+ }
+ }
+ break;
+ case REDIR_PROTO:
+ r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
+ r->raddr, r->paddr, r->proto);
+ break;
+ default:
+ printf("unknown redirect mode: %u\n", r->mode);
+ break;
+ }
+ /* XXX perhaps return an error instead of panic ? */
+ if (r->alink[0] == NULL)
+ panic("LibAliasRedirect* returned NULL");
+ /* LSNAT handling. */
+ for (i = 0; i < r->spool_cnt; i++) {
+ ser_s = (struct cfg_spool *)&buf[off];
+ s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
+ memcpy(s, ser_s, SOF_SPOOL);
+ LibAliasAddServer(ptr->lib, r->alink[0],
+ s->addr, htons(s->port));
+ off += SOF_SPOOL;
+ /* Hook spool entry. */
+ LIST_INSERT_HEAD(&r->spool_chain, s, _next);
+ }
+ /* And finally hook this redir entry. */
+ LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
+ }
+}
+
+/*
+ * ipfw_nat - perform mbuf header translation.
+ *
+ * Note V_layer3_chain has to be locked while calling ipfw_nat() in
+ * 'global' operation mode (t == NULL).
+ *
+ */
+static int
+ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
+{
+ struct mbuf *mcl;
+ struct ip *ip;
+ /* XXX - libalias duct tape */
+ int ldt, retval, found;
+ struct ip_fw_chain *chain;
+ char *c;
+
+ ldt = 0;
+ retval = 0;
+ mcl = m_megapullup(m, m->m_pkthdr.len);
+ if (mcl == NULL) {
+ args->m = NULL;
+ return (IP_FW_DENY);
+ }
+ ip = mtod(mcl, struct ip *);
+
+ /*
+ * XXX - Libalias checksum offload 'duct tape':
+ *
+ * locally generated packets have only pseudo-header checksum
+ * calculated and libalias will break it[1], so mark them for
+ * later fix. Moreover there are cases when libalias modifies
+ * tcp packet data[2], mark them for later fix too.
+ *
+ * [1] libalias was never meant to run in kernel, so it does
+ * not have any knowledge about checksum offloading, and
+ * expects a packet with a full internet checksum.
+ * Unfortunately, packets generated locally will have just the
+ * pseudo header calculated, and when libalias tries to adjust
+ * the checksum it will actually compute a wrong value.
+ *
+ * [2] when libalias modifies tcp's data content, full TCP
+ * checksum has to be recomputed: the problem is that
+ * libalias does not have any idea about checksum offloading.
+ * To work around this, we do not do checksumming in LibAlias,
+ * but only mark the packets in th_x2 field. If we receive a
+ * marked packet, we calculate correct checksum for it
+ * aware of offloading. Why such a terrible hack instead of
+ * recalculating checksum for each packet?
+ * Because the previous checksum was not checked!
+ * Recalculating checksums for EVERY packet will hide ALL
+ * transmission errors. Yes, marked packets still suffer from
+ * this problem. But, sigh, natd(8) has this problem, too.
+ *
+ * TODO: -make libalias mbuf aware (so
+ * it can handle delayed checksum and tso)
+ */
+
+ if (mcl->m_pkthdr.rcvif == NULL &&
+ mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+ ldt = 1;
+
+ c = mtod(mcl, char *);
+
+ /* Check if this is 'global' instance */
+ if (t == NULL) {
+ if (args->oif == NULL) {
+ /* Wrong direction, skip processing */
+ args->m = mcl;
+ return (IP_FW_NAT);
+ }
+
+ found = 0;
+ chain = &V_layer3_chain;
+ IPFW_RLOCK_ASSERT(chain);
+ /* Check every nat entry... */
+ LIST_FOREACH(t, &chain->nat, _next) {
+ if ((t->mode & PKT_ALIAS_SKIP_GLOBAL) != 0)
+ continue;
+ retval = LibAliasOutTry(t->lib, c,
+ mcl->m_len + M_TRAILINGSPACE(mcl), 0);
+ if (retval == PKT_ALIAS_OK) {
+ /* Nat instance recognises state */
+ found = 1;
+ break;
+ }
+ }
+ if (found != 1) {
+ /* No instance found, return ignore */
+ args->m = mcl;
+ return (IP_FW_NAT);
+ }
+ } else {
+ if (args->oif == NULL)
+ retval = LibAliasIn(t->lib, c,
+ mcl->m_len + M_TRAILINGSPACE(mcl));
+ else
+ retval = LibAliasOut(t->lib, c,
+ mcl->m_len + M_TRAILINGSPACE(mcl));
+ }
+
+ /*
+ * We drop packet when:
+ * 1. libalias returns PKT_ALIAS_ERROR;
+ * 2. For incoming packets:
+ * a) for unresolved fragments;
+ * b) libalias returns PKT_ALIAS_IGNORED and
+ * PKT_ALIAS_DENY_INCOMING flag is set.
+ */
+ if (retval == PKT_ALIAS_ERROR ||
+ (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
+ (retval == PKT_ALIAS_IGNORED &&
+ (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) {
+ /* XXX - should i add some logging? */
+ m_free(mcl);
+ args->m = NULL;
+ return (IP_FW_DENY);
+ }
+
+ if (retval == PKT_ALIAS_RESPOND)
+ mcl->m_flags |= M_SKIP_FIREWALL;
+ mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
+
+ /*
+ * XXX - libalias checksum offload
+ * 'duct tape' (see above)
+ */
+
+ if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
+ ip->ip_p == IPPROTO_TCP) {
+ struct tcphdr *th;
+
+ th = (struct tcphdr *)(ip + 1);
+ if (th->th_x2)
+ ldt = 1;
+ }
+
+ if (ldt) {
+ struct tcphdr *th;
+ struct udphdr *uh;
+ u_short cksum;
+
+ ip->ip_len = ntohs(ip->ip_len);
+ cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
+
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)(ip + 1);
+ /*
+ * Maybe it was set in
+ * libalias...
+ */
+ th->th_x2 = 0;
+ th->th_sum = cksum;
+ mcl->m_pkthdr.csum_data =
+ offsetof(struct tcphdr, th_sum);
+ break;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)(ip + 1);
+ uh->uh_sum = cksum;
+ mcl->m_pkthdr.csum_data =
+ offsetof(struct udphdr, uh_sum);
+ break;
+ }
+ /* No hw checksum offloading: do it ourselves */
+ if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) {
+ in_delayed_cksum(mcl);
+ mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+ ip->ip_len = htons(ip->ip_len);
+ }
+ args->m = mcl;
+ return (IP_FW_NAT);
+}
+
+static struct cfg_nat *
+lookup_nat(struct nat_list *l, int nat_id)
+{
+ struct cfg_nat *res;
+
+ LIST_FOREACH(res, l, _next) {
+ if (res->id == nat_id)
+ break;
+ }
+ return res;
+}
+
+static int
+ipfw_nat_cfg(struct sockopt *sopt)
+{
+ struct cfg_nat *cfg, *ptr;
+ char *buf;
+ struct ip_fw_chain *chain = &V_layer3_chain;
+ size_t len;
+ int gencnt, error = 0;
+
+ len = sopt->sopt_valsize;
+ buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
+ if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0)
+ goto out;
+
+ cfg = (struct cfg_nat *)buf;
+ if (cfg->id < 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * Find/create nat rule.
+ */
+ IPFW_WLOCK(chain);
+ gencnt = chain->gencnt;
+ ptr = lookup_nat(&chain->nat, cfg->id);
+ if (ptr == NULL) {
+ IPFW_WUNLOCK(chain);
+ /* New rule: allocate and init new instance. */
+ ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO);
+ ptr->lib = LibAliasInit(NULL);
+ LIST_INIT(&ptr->redir_chain);
+ } else {
+ /* Entry already present: temporarily unhook it. */
+ LIST_REMOVE(ptr, _next);
+ flush_nat_ptrs(chain, cfg->id);
+ IPFW_WUNLOCK(chain);
+ }
+
+ /*
+ * Basic nat configuration.
+ */
+ ptr->id = cfg->id;
+ /*
+ * XXX - what if this rule doesn't nat any ip and just
+ * redirect?
+ * do we set aliasaddress to 0.0.0.0?
+ */
+ ptr->ip = cfg->ip;
+ ptr->redir_cnt = cfg->redir_cnt;
+ ptr->mode = cfg->mode;
+ LibAliasSetMode(ptr->lib, cfg->mode, cfg->mode);
+ LibAliasSetAddress(ptr->lib, ptr->ip);
+ memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE);
+
+ /*
+ * Redir and LSNAT configuration.
+ */
+ /* Delete old cfgs. */
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ /* Add new entries. */
+ add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
+
+ IPFW_WLOCK(chain);
+ /* Extra check to avoid race with another ipfw_nat_cfg() */
+ if (gencnt != chain->gencnt &&
+ ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL))
+ LIST_REMOVE(cfg, _next);
+ LIST_INSERT_HEAD(&chain->nat, ptr, _next);
+ chain->gencnt++;
+ IPFW_WUNLOCK(chain);
+
+out:
+ free(buf, M_TEMP);
+ return (error);
+}
+
+static int
+ipfw_nat_del(struct sockopt *sopt)
+{
+ struct cfg_nat *ptr;
+ struct ip_fw_chain *chain = &V_layer3_chain;
+ int i;
+
+ sooptcopyin(sopt, &i, sizeof i, sizeof i);
+ /* XXX validate i */
+ IPFW_WLOCK(chain);
+ ptr = lookup_nat(&chain->nat, i);
+ if (ptr == NULL) {
+ IPFW_WUNLOCK(chain);
+ return (EINVAL);
+ }
+ LIST_REMOVE(ptr, _next);
+ flush_nat_ptrs(chain, i);
+ IPFW_WUNLOCK(chain);
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ LibAliasUninit(ptr->lib);
+ free(ptr, M_IPFW);
+ return (0);
+}
+
+static int
+ipfw_nat_get_cfg(struct sockopt *sopt)
+{
+ struct ip_fw_chain *chain = &V_layer3_chain;
+ struct cfg_nat *n;
+ struct cfg_redir *r;
+ struct cfg_spool *s;
+ char *data;
+ int gencnt, nat_cnt, len, error;
+
+ nat_cnt = 0;
+ len = sizeof(nat_cnt);
+
+ IPFW_RLOCK(chain);
+retry:
+ gencnt = chain->gencnt;
+ /* Estimate memory amount */
+ LIST_FOREACH(n, &chain->nat, _next) {
+ nat_cnt++;
+ len += sizeof(struct cfg_nat);
+ LIST_FOREACH(r, &n->redir_chain, _next) {
+ len += sizeof(struct cfg_redir);
+ LIST_FOREACH(s, &r->spool_chain, _next)
+ len += sizeof(struct cfg_spool);
+ }
+ }
+ IPFW_RUNLOCK(chain);
+
+ data = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
+ bcopy(&nat_cnt, data, sizeof(nat_cnt));
+
+ nat_cnt = 0;
+ len = sizeof(nat_cnt);
+
+ IPFW_RLOCK(chain);
+ if (gencnt != chain->gencnt) {
+ free(data, M_TEMP);
+ goto retry;
+ }
+ /* Serialize all the data. */
+ LIST_FOREACH(n, &chain->nat, _next) {
+ bcopy(n, &data[len], sizeof(struct cfg_nat));
+ len += sizeof(struct cfg_nat);
+ LIST_FOREACH(r, &n->redir_chain, _next) {
+ bcopy(r, &data[len], sizeof(struct cfg_redir));
+ len += sizeof(struct cfg_redir);
+ LIST_FOREACH(s, &r->spool_chain, _next) {
+ bcopy(s, &data[len], sizeof(struct cfg_spool));
+ len += sizeof(struct cfg_spool);
+ }
+ }
+ }
+ IPFW_RUNLOCK(chain);
+
+ error = sooptcopyout(sopt, data, len);
+ free(data, M_TEMP);
+
+ return (error);
+}
+
+static int
+ipfw_nat_get_log(struct sockopt *sopt)
+{
+ uint8_t *data;
+ struct cfg_nat *ptr;
+ int i, size;
+ struct ip_fw_chain *chain;
+
+ chain = &V_layer3_chain;
+
+ IPFW_RLOCK(chain);
+ /* one pass to count, one to copy the data */
+ i = 0;
+ LIST_FOREACH(ptr, &chain->nat, _next) {
+ if (ptr->lib->logDesc == NULL)
+ continue;
+ i++;
+ }
+ size = i * (LIBALIAS_BUF_SIZE + sizeof(int));
+ data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO);
+ if (data == NULL) {
+ IPFW_RUNLOCK(chain);
+ return (ENOSPC);
+ }
+ i = 0;
+ LIST_FOREACH(ptr, &chain->nat, _next) {
+ if (ptr->lib->logDesc == NULL)
+ continue;
+ bcopy(&ptr->id, &data[i], sizeof(int));
+ i += sizeof(int);
+ bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE);
+ i += LIBALIAS_BUF_SIZE;
+ }
+ IPFW_RUNLOCK(chain);
+ sooptcopyout(sopt, data, size);
+ free(data, M_IPFW);
+ return(0);
+}
+
+static void
+ipfw_nat_init(void)
+{
+
+ IPFW_WLOCK(&V_layer3_chain);
+ /* init ipfw hooks */
+ ipfw_nat_ptr = ipfw_nat;
+ lookup_nat_ptr = lookup_nat;
+ ipfw_nat_cfg_ptr = ipfw_nat_cfg;
+ ipfw_nat_del_ptr = ipfw_nat_del;
+ ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
+ ipfw_nat_get_log_ptr = ipfw_nat_get_log;
+ IPFW_WUNLOCK(&V_layer3_chain);
+ V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
+ ifaddr_event, ifaddr_change,
+ NULL, EVENTHANDLER_PRI_ANY);
+}
+
+static void
+ipfw_nat_destroy(void)
+{
+ struct cfg_nat *ptr, *ptr_temp;
+ struct ip_fw_chain *chain;
+
+ chain = &V_layer3_chain;
+ IPFW_WLOCK(chain);
+ LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
+ LIST_REMOVE(ptr, _next);
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ LibAliasUninit(ptr->lib);
+ free(ptr, M_IPFW);
+ }
+ EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
+ flush_nat_ptrs(chain, -1 /* flush all */);
+ /* deregister ipfw_nat */
+ ipfw_nat_ptr = NULL;
+ lookup_nat_ptr = NULL;
+ ipfw_nat_cfg_ptr = NULL;
+ ipfw_nat_del_ptr = NULL;
+ ipfw_nat_get_cfg_ptr = NULL;
+ ipfw_nat_get_log_ptr = NULL;
+ IPFW_WUNLOCK(chain);
+}
+
+static int
+ipfw_nat_modevent(module_t mod, int type, void *unused)
+{
+ int err = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ ipfw_nat_init();
+ break;
+
+ case MOD_UNLOAD:
+ ipfw_nat_destroy();
+ break;
+
+ default:
+ return EOPNOTSUPP;
+ break;
+ }
+ return err;
+}
+
+static moduledata_t ipfw_nat_mod = {
+ "ipfw_nat",
+ ipfw_nat_modevent,
+ 0
+};
+
+DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
+MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
+MODULE_VERSION(ipfw_nat, 1);
+/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c b/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
new file mode 100644
index 00000000..d2e1b448
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
@@ -0,0 +1,466 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/pfil.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#include <netgraph/ng_ipfw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#include <machine/in_cksum.h>
+
+static VNET_DEFINE(int, fw_enable) = 1;
+#define V_fw_enable VNET(fw_enable)
+
+#ifdef INET6
+static VNET_DEFINE(int, fw6_enable) = 1;
+#define V_fw6_enable VNET(fw6_enable)
+#endif
+
+int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
+
+/* Forward declarations. */
+static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
+
+#ifdef SYSCTL_NODE
+
+SYSBEGIN(f1)
+
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0,
+ ipfw_chg_hook, "I", "Enable ipfw");
+#ifdef INET6
+SYSCTL_DECL(_net_inet6_ip6_fw);
+SYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0,
+ ipfw_chg_hook, "I", "Enable ipfw+6");
+#endif /* INET6 */
+
+SYSEND
+
+#endif /* SYSCTL_NODE */
+
+/*
+ * The pfilter hook to pass packets to ipfw_chk and then to
+ * dummynet, divert, netgraph or other modules.
+ * The packet may be consumed.
+ */
+int
+ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+ struct inpcb *inp)
+{
+ struct ip_fw_args args;
+ struct m_tag *tag;
+ int ipfw;
+ int ret;
+
+ /* all the processing now uses ip_len in net format */
+ if (mtod(*m0, struct ip *)->ip_v == 4)
+ SET_NET_IPLEN(mtod(*m0, struct ip *));
+
+ /* convert dir to IPFW values */
+ dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
+ bzero(&args, sizeof(args));
+
+again:
+ /*
+ * extract and remove the tag if present. If we are left
+ * with onepass, optimize the outgoing path.
+ */
+ tag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
+ if (tag != NULL) {
+ args.rule = *((struct ipfw_rule_ref *)(tag+1));
+ m_tag_delete(*m0, tag);
+ if (args.rule.info & IPFW_ONEPASS) {
+ if (mtod(*m0, struct ip *)->ip_v == 4)
+ SET_HOST_IPLEN(mtod(*m0, struct ip *));
+ return (0);
+ }
+ }
+
+ args.m = *m0;
+ args.oif = dir == DIR_OUT ? ifp : NULL;
+ args.inp = inp;
+
+ ipfw = ipfw_chk(&args);
+ *m0 = args.m;
+
+ KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL",
+ __func__));
+
+ /* breaking out of the switch means drop */
+ ret = 0; /* default return value for pass */
+ switch (ipfw) {
+ case IP_FW_PASS:
+ /* next_hop may be set by ipfw_chk */
+ if (args.next_hop == NULL && args.next_hop6 == NULL)
+ break; /* pass */
+#if (!defined(INET6) && !defined(INET))
+ ret = EACCES;
+#else
+ {
+ struct m_tag *fwd_tag;
+ size_t len;
+
+ KASSERT(args.next_hop == NULL || args.next_hop6 == NULL,
+ ("%s: both next_hop=%p and next_hop6=%p not NULL", __func__,
+ args.next_hop, args.next_hop6));
+#ifdef INET6
+ if (args.next_hop6 != NULL)
+ len = sizeof(struct sockaddr_in6);
+#endif
+#ifdef INET
+ if (args.next_hop != NULL)
+ len = sizeof(struct sockaddr_in);
+#endif
+
+ /* Incoming packets should not be tagged so we do not
+ * m_tag_find. Outgoing packets may be tagged, so we
+ * reuse the tag if present.
+ */
+ fwd_tag = (dir == DIR_IN) ? NULL :
+ m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL);
+ if (fwd_tag != NULL) {
+ m_tag_unlink(*m0, fwd_tag);
+ } else {
+ fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, len,
+ M_NOWAIT);
+ if (fwd_tag == NULL) {
+ ret = EACCES;
+ break; /* i.e. drop */
+ }
+ }
+#ifdef INET6
+ if (args.next_hop6 != NULL) {
+ bcopy(args.next_hop6, (fwd_tag+1), len);
+ if (in6_localip(&args.next_hop6->sin6_addr))
+ (*m0)->m_flags |= M_FASTFWD_OURS;
+ (*m0)->m_flags |= M_IP6_NEXTHOP;
+ }
+#endif
+#ifdef INET
+ if (args.next_hop != NULL) {
+ bcopy(args.next_hop, (fwd_tag+1), len);
+ if (in_localip(args.next_hop->sin_addr))
+ (*m0)->m_flags |= M_FASTFWD_OURS;
+ (*m0)->m_flags |= M_IP_NEXTHOP;
+ }
+#endif
+ m_tag_prepend(*m0, fwd_tag);
+ }
+#endif /* INET || INET6 */
+ break;
+
+ case IP_FW_DENY:
+ ret = EACCES;
+ break; /* i.e. drop */
+
+ case IP_FW_DUMMYNET:
+ ret = EACCES;
+ if (ip_dn_io_ptr == NULL)
+ break; /* i.e. drop */
+ if (mtod(*m0, struct ip *)->ip_v == 4)
+ ret = ip_dn_io_ptr(m0, dir, &args);
+ else if (mtod(*m0, struct ip *)->ip_v == 6)
+ ret = ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args);
+ else
+ break; /* drop it */
+ /*
+ * XXX should read the return value.
+ * dummynet normally eats the packet and sets *m0=NULL
+ * unless the packet can be sent immediately. In this
+ * case args is updated and we should re-run the
+ * check without clearing args.
+ */
+ if (*m0 != NULL)
+ goto again;
+ break;
+
+ case IP_FW_TEE:
+ case IP_FW_DIVERT:
+ if (ip_divert_ptr == NULL) {
+ ret = EACCES;
+ break; /* i.e. drop */
+ }
+ ret = ipfw_divert(m0, dir, &args.rule,
+ (ipfw == IP_FW_TEE) ? 1 : 0);
+ /* continue processing for the original packet (tee). */
+ if (*m0)
+ goto again;
+ break;
+
+ case IP_FW_NGTEE:
+ case IP_FW_NETGRAPH:
+ if (ng_ipfw_input_p == NULL) {
+ ret = EACCES;
+ break; /* i.e. drop */
+ }
+ ret = ng_ipfw_input_p(m0, dir, &args,
+ (ipfw == IP_FW_NGTEE) ? 1 : 0);
+ if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */
+ goto again; /* continue with packet */
+ break;
+
+ case IP_FW_NAT:
+ /* honor one-pass in case of successful nat */
+ if (V_fw_one_pass)
+ break; /* ret is already 0 */
+ goto again;
+
+ case IP_FW_REASS:
+ goto again; /* continue with packet */
+
+ default:
+ KASSERT(0, ("%s: unknown retval", __func__));
+ }
+
+ if (ret != 0) {
+ if (*m0)
+ FREE_PKT(*m0);
+ *m0 = NULL;
+ }
+ if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
+ SET_HOST_IPLEN(mtod(*m0, struct ip *));
+ return ret;
+}
+
+/* do the divert, return 1 on error 0 on success */
+static int
+ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
+ int tee)
+{
+ /*
+ * ipfw_chk() has already tagged the packet with the divert tag.
+ * If tee is set, copy packet and return original.
+ * If not tee, consume packet and send it to divert socket.
+ */
+ struct mbuf *clone;
+ struct ip *ip = mtod(*m0, struct ip *);
+ struct m_tag *tag;
+
+ /* Cloning needed for tee? */
+ if (tee == 0) {
+ clone = *m0; /* use the original mbuf */
+ *m0 = NULL;
+ } else {
+ clone = m_dup(*m0, M_DONTWAIT);
+ /* If we cannot duplicate the mbuf, we sacrifice the divert
+ * chain and continue with the tee-ed packet.
+ */
+ if (clone == NULL)
+ return 1;
+ }
+
+ /*
+ * Divert listeners can normally handle non-fragmented packets,
+ * but we can only reass in the non-tee case.
+ * This means that listeners on a tee rule may get fragments,
+ * and have to live with that.
+ * Note that we now have the 'reass' ipfw option so if we care
+ * we can do it before a 'tee'.
+ */
+ if (!tee) switch (ip->ip_v) {
+ case IPVERSION:
+ if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) {
+ int hlen;
+ struct mbuf *reass;
+
+ SET_HOST_IPLEN(ip); /* ip_reass wants host order */
+ reass = ip_reass(clone); /* Reassemble packet. */
+ if (reass == NULL)
+ return 0; /* not an error */
+ /* if reass = NULL then it was consumed by ip_reass */
+ /*
+ * IP header checksum fixup after reassembly and leave header
+ * in network byte order.
+ */
+ ip = mtod(reass, struct ip *);
+ hlen = ip->ip_hl << 2;
+ SET_NET_IPLEN(ip);
+ ip->ip_sum = 0;
+ if (hlen == sizeof(struct ip))
+ ip->ip_sum = in_cksum_hdr(ip);
+ else
+ ip->ip_sum = in_cksum(reass, hlen);
+ clone = reass;
+ }
+ break;
+#ifdef INET6
+ case IPV6_VERSION >> 4:
+ {
+ struct ip6_hdr *const ip6 = mtod(clone, struct ip6_hdr *);
+
+ if (ip6->ip6_nxt == IPPROTO_FRAGMENT) {
+ int nxt, off;
+
+ off = sizeof(struct ip6_hdr);
+ nxt = frag6_input(&clone, &off, 0);
+ if (nxt == IPPROTO_DONE)
+ return (0);
+ }
+ break;
+ }
+#endif
+ }
+
+ /* attach a tag to the packet with the reinject info */
+ tag = m_tag_alloc(MTAG_IPFW_RULE, 0,
+ sizeof(struct ipfw_rule_ref), M_NOWAIT);
+ if (tag == NULL) {
+ FREE_PKT(clone);
+ return 1;
+ }
+ *((struct ipfw_rule_ref *)(tag+1)) = *rule;
+ m_tag_prepend(clone, tag);
+
+ /* Do the dirty job... */
+ ip_divert_ptr(clone, incoming);
+ return 0;
+}
+
+/*
+ * attach or detach hooks for a given protocol family
+ */
+static int
+ipfw_hook(int onoff, int pf)
+{
+ struct pfil_head *pfh;
+
+ pfh = pfil_head_get(PFIL_TYPE_AF, pf);
+ if (pfh == NULL)
+ return ENOENT;
+
+ (void) (onoff ? pfil_add_hook : pfil_remove_hook)
+ (ipfw_check_hook, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
+
+ return 0;
+}
+
+int
+ipfw_attach_hooks(int arg)
+{
+ int error = 0;
+
+ if (arg == 0) /* detach */
+ ipfw_hook(0, AF_INET);
+ else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) {
+ error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */
+ printf("ipfw_hook() error\n");
+ }
+#ifdef INET6
+ if (arg == 0) /* detach */
+ ipfw_hook(0, AF_INET6);
+ else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) {
+ error = ENOENT;
+ printf("ipfw6_hook() error\n");
+ }
+#endif
+ return error;
+}
+
+int
+ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
+{
+ int enable;
+ int oldenable;
+ int error;
+ int af;
+
+ if (arg1 == &VNET_NAME(fw_enable)) {
+ enable = V_fw_enable;
+ af = AF_INET;
+ }
+#ifdef INET6
+ else if (arg1 == &VNET_NAME(fw6_enable)) {
+ enable = V_fw6_enable;
+ af = AF_INET6;
+ }
+#endif
+ else
+ return (EINVAL);
+
+ oldenable = enable;
+
+ error = sysctl_handle_int(oidp, &enable, 0, req);
+
+ if (error)
+ return (error);
+
+ enable = (enable) ? 1 : 0;
+
+ if (enable == oldenable)
+ return (0);
+
+ error = ipfw_hook(enable, af);
+ if (error)
+ return (error);
+ if (af == AF_INET)
+ V_fw_enable = enable;
+#ifdef INET6
+ else if (af == AF_INET6)
+ V_fw6_enable = enable;
+#endif
+
+ return (0);
+}
+/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_private.h b/freebsd/sys/netpfil/ipfw/ip_fw_private.h
new file mode 100644
index 00000000..869d9721
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_private.h
@@ -0,0 +1,341 @@
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IPFW2_PRIVATE_H
+#define _IPFW2_PRIVATE_H
+
+/*
+ * Internal constants and data structures used by ipfw components
+ * and not meant to be exported outside the kernel.
+ */
+
+#ifdef _KERNEL
+
+/*
+ * For platforms that do not have SYSCTL support, we wrap the
+ * SYSCTL_* into a function (one per file) to collect the values
+ * into an array at module initialization. The wrapping macros,
+ * SYSBEGIN() and SYSEND, are empty in the default case.
+ */
+#ifndef SYSBEGIN
+#define SYSBEGIN(x)
+#endif
+#ifndef SYSEND
+#define SYSEND
+#endif
+
+/* Return values from ipfw_chk() */
+enum {
+ IP_FW_PASS = 0,
+ IP_FW_DENY,
+ IP_FW_DIVERT,
+ IP_FW_TEE,
+ IP_FW_DUMMYNET,
+ IP_FW_NETGRAPH,
+ IP_FW_NGTEE,
+ IP_FW_NAT,
+ IP_FW_REASS,
+};
+
+/*
+ * Structure for collecting parameters to dummynet for ip6_output forwarding
+ */
+struct _ip6dn_args {
+ struct ip6_pktopts *opt_or;
+ struct route_in6 ro_or;
+ int flags_or;
+ struct ip6_moptions *im6o_or;
+ struct ifnet *origifp_or;
+ struct ifnet *ifp_or;
+ struct sockaddr_in6 dst_or;
+ u_long mtu_or;
+ struct route_in6 ro_pmtu_or;
+};
+
+
+/*
+ * Arguments for calling ipfw_chk() and dummynet_io(). We put them
+ * all into a structure because this way it is easier and more
+ * efficient to pass variables around and extend the interface.
+ */
+struct ip_fw_args {
+ struct mbuf *m; /* the mbuf chain */
+ struct ifnet *oif; /* output interface */
+ struct sockaddr_in *next_hop; /* forward address */
+ struct sockaddr_in6 *next_hop6; /* ipv6 forward address */
+
+ /*
+ * On return, it points to the matching rule.
+ * On entry, rule.slot > 0 means the info is valid and
+ * contains the starting rule for an ipfw search.
+ * If chain_id == chain->id && slot >0 then jump to that slot.
+ * Otherwise, we locate the first rule >= rulenum:rule_id
+ */
+ struct ipfw_rule_ref rule; /* match/restart info */
+
+ struct ether_header *eh; /* for bridged packets */
+
+ struct ipfw_flow_id f_id; /* grabbed from IP header */
+ //uint32_t cookie; /* a cookie depending on rule action */
+ struct inpcb *inp;
+
+ struct _ip6dn_args dummypar; /* dummynet->ip6_output */
+ struct sockaddr_in hopstore; /* store here if cannot use a pointer */
+};
+
+MALLOC_DECLARE(M_IPFW);
+
+/*
+ * Hooks sometime need to know the direction of the packet
+ * (divert, dummynet, netgraph, ...)
+ * We use a generic definition here, with bit0-1 indicating the
+ * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the
+ * specific protocol
+ * indicating the protocol (if necessary)
+ */
+enum {
+ DIR_MASK = 0x3,
+ DIR_OUT = 0,
+ DIR_IN = 1,
+ DIR_FWD = 2,
+ DIR_DROP = 3,
+ PROTO_LAYER2 = 0x4, /* set for layer 2 */
+ /* PROTO_DEFAULT = 0, */
+ PROTO_IPV4 = 0x08,
+ PROTO_IPV6 = 0x10,
+ PROTO_IFB = 0x0c, /* layer2 + ifbridge */
+ /* PROTO_OLDBDG = 0x14, unused, old bridge */
+};
+
+/* wrapper for freeing a packet, in case we need to do more work */
+#ifndef FREE_PKT
+#if defined(__linux__) || defined(_WIN32)
+#define FREE_PKT(m) netisr_dispatch(-1, m)
+#else
+#define FREE_PKT(m) m_freem(m)
+#endif
+#endif /* !FREE_PKT */
+
+/*
+ * Function definitions.
+ */
+
+/* attach (arg = 1) or detach (arg = 0) hooks */
+int ipfw_attach_hooks(int);
+#ifdef NOTYET
+void ipfw_nat_destroy(void);
+#endif
+
+/* In ip_fw_log.c */
+struct ip;
+void ipfw_log_bpf(int);
+void ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
+ struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
+ struct ip *ip);
+VNET_DECLARE(u_int64_t, norule_counter);
+#define V_norule_counter VNET(norule_counter)
+VNET_DECLARE(int, verbose_limit);
+#define V_verbose_limit VNET(verbose_limit)
+
+/* In ip_fw_dynamic.c */
+
+enum { /* result for matching dynamic rules */
+ MATCH_REVERSE = 0,
+ MATCH_FORWARD,
+ MATCH_NONE,
+ MATCH_UNKNOWN,
+};
+
+/*
+ * The lock for dynamic rules is only used once outside the file,
+ * and only to release the result of lookup_dyn_rule().
+ * Eventually we may implement it with a callback on the function.
+ */
+struct ip_fw_chain;
+void ipfw_expire_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int);
+void ipfw_dyn_unlock(ipfw_dyn_rule *q);
+
+struct tcphdr;
+struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
+ u_int32_t, u_int32_t, int);
+int ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
+ struct ip_fw_args *args, uint32_t tablearg);
+ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt,
+ int *match_direction, struct tcphdr *tcp);
+void ipfw_remove_dyn_children(struct ip_fw *rule);
+void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep);
+
+void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */
+void ipfw_dyn_uninit(int); /* per-vnet deinitialization */
+int ipfw_dyn_len(void);
+
+/* common variables */
+VNET_DECLARE(int, fw_one_pass);
+#define V_fw_one_pass VNET(fw_one_pass)
+
+VNET_DECLARE(int, fw_verbose);
+#define V_fw_verbose VNET(fw_verbose)
+
+VNET_DECLARE(struct ip_fw_chain, layer3_chain);
+#define V_layer3_chain VNET(layer3_chain)
+
+VNET_DECLARE(u_int32_t, set_disable);
+#define V_set_disable VNET(set_disable)
+
+VNET_DECLARE(int, autoinc_step);
+#define V_autoinc_step VNET(autoinc_step)
+
+VNET_DECLARE(unsigned int, fw_tables_max);
+#define V_fw_tables_max VNET(fw_tables_max)
+
+struct ip_fw_chain {
+ struct ip_fw *rules; /* list of rules */
+ struct ip_fw *reap; /* list of rules to reap */
+ struct ip_fw *default_rule;
+ int n_rules; /* number of static rules */
+ int static_len; /* total len of static rules */
+ struct ip_fw **map; /* array of rule ptrs to ease lookup */
+ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
+ struct radix_node_head **tables; /* IPv4 tables */
+ struct radix_node_head **xtables; /* extended tables */
+ uint8_t *tabletype; /* Array of table types */
+#if defined( __linux__ ) || defined( _WIN32 )
+ spinlock_t rwmtx;
+ spinlock_t uh_lock;
+#else
+ struct rwlock rwmtx;
+ struct rwlock uh_lock; /* lock for upper half */
+#endif
+ uint32_t id; /* ruleset id */
+ uint32_t gencnt; /* generation count */
+};
+
+struct sockopt; /* used by tcp_var.h */
+
+/* Macro for working with various counters */
+#define IPFW_INC_RULE_COUNTER(_cntr, _bytes) do { \
+ (_cntr)->pcnt++; \
+ (_cntr)->bcnt += _bytes; \
+ (_cntr)->timestamp = time_uptime; \
+ } while (0)
+
+#define IPFW_INC_DYN_COUNTER(_cntr, _bytes) do { \
+ (_cntr)->pcnt++; \
+ (_cntr)->bcnt += _bytes; \
+ } while (0)
+
+#define IPFW_ZERO_RULE_COUNTER(_cntr) do { \
+ (_cntr)->pcnt = 0; \
+ (_cntr)->bcnt = 0; \
+ (_cntr)->timestamp = 0; \
+ } while (0)
+
+#define IPFW_ZERO_DYN_COUNTER(_cntr) do { \
+ (_cntr)->pcnt = 0; \
+ (_cntr)->bcnt = 0; \
+ } while (0)
+
+#define IP_FW_ARG_TABLEARG(a) ((a) == IP_FW_TABLEARG) ? tablearg : (a)
+/*
+ * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
+ * so the variable and the macros must be here.
+ */
+
+#define IPFW_LOCK_INIT(_chain) do { \
+ rw_init(&(_chain)->rwmtx, "IPFW static rules"); \
+ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \
+ } while (0)
+
+#define IPFW_LOCK_DESTROY(_chain) do { \
+ rw_destroy(&(_chain)->rwmtx); \
+ rw_destroy(&(_chain)->uh_lock); \
+ } while (0)
+
+#define IPFW_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_RLOCKED)
+#define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
+
+#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
+#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
+#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
+
+#define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED)
+#define IPFW_UH_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_WLOCKED)
+
+#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
+#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
+#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
+#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
+
+/* In ip_fw_sockopt.c */
+int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
+int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
+int ipfw_ctl(struct sockopt *sopt);
+int ipfw_chk(struct ip_fw_args *args);
+void ipfw_reap_rules(struct ip_fw *head);
+
+/* In ip_fw_pfil */
+int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+ struct inpcb *inp);
+
+/* In ip_fw_table.c */
+struct radix_node;
+int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+ uint32_t *val);
+int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
+ uint32_t *val, int type);
+int ipfw_init_tables(struct ip_fw_chain *ch);
+void ipfw_destroy_tables(struct ip_fw_chain *ch);
+int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl);
+int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
+ uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value);
+int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
+ uint8_t plen, uint8_t mlen, uint8_t type);
+int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
+int ipfw_dump_table_entry(struct radix_node *rn, void *arg);
+int ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl);
+int ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
+int ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl);
+int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
+
+/* In ip_fw_nat.c -- XXX to be moved to ip_var.h */
+
+extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
+
+typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
+typedef int ipfw_nat_cfg_t(struct sockopt *);
+
+extern ipfw_nat_t *ipfw_nat_ptr;
+#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
+
+extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
+extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
+extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
+extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
+
+#endif /* _KERNEL */
+#endif /* _IPFW2_PRIVATE_H */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
new file mode 100644
index 00000000..40448a86
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -0,0 +1,1449 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Supported by: Valeria Paoli
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Sockopt support for ipfw. The routines here implement
+ * the upper half of the ipfw code.
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h> /* struct m_tag used by nested headers */
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* hooks */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+
+MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
+
+/*
+ * static variables followed by global ones (none in this file)
+ */
+
+/*
+ * Find the smallest rule >= key, id.
+ * We could use bsearch but it is so simple that we code it directly
+ */
+int
+ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
+{
+ int i, lo, hi;
+ struct ip_fw *r;
+
+ for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
+ i = (lo + hi) / 2;
+ r = chain->map[i];
+ if (r->rulenum < key)
+ lo = i + 1; /* continue from the next one */
+ else if (r->rulenum > key)
+ hi = i; /* this might be good */
+ else if (r->id < id)
+ lo = i + 1; /* continue from the next one */
+ else /* r->id >= id */
+ hi = i; /* this might be good */
+ };
+ return hi;
+}
+
+/*
+ * allocate a new map, returns the chain locked. extra is the number
+ * of entries to add or delete.
+ */
+static struct ip_fw **
+get_map(struct ip_fw_chain *chain, int extra, int locked)
+{
+
+ for (;;) {
+ struct ip_fw **map;
+ int i;
+
+ i = chain->n_rules + extra;
+ map = malloc(i * sizeof(struct ip_fw *), M_IPFW,
+ locked ? M_NOWAIT : M_WAITOK);
+ if (map == NULL) {
+ printf("%s: cannot allocate map\n", __FUNCTION__);
+ return NULL;
+ }
+ if (!locked)
+ IPFW_UH_WLOCK(chain);
+ if (i >= chain->n_rules + extra) /* good */
+ return map;
+ /* otherwise we lost the race, free and retry */
+ if (!locked)
+ IPFW_UH_WUNLOCK(chain);
+ free(map, M_IPFW);
+ }
+}
+
+/*
+ * swap the maps. It is supposed to be called with IPFW_UH_WLOCK
+ */
+static struct ip_fw **
+swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
+{
+ struct ip_fw **old_map;
+
+ IPFW_WLOCK(chain);
+ chain->id++;
+ chain->n_rules = new_len;
+ old_map = chain->map;
+ chain->map = new_map;
+ IPFW_WUNLOCK(chain);
+ return old_map;
+}
+
+/*
+ * Add a new rule to the list. Copy the rule into a malloc'ed area, then
+ * possibly create a rule number and add the rule to the list.
+ * Update the rule_number in the input struct so the caller knows it as well.
+ * XXX DO NOT USE FOR THE DEFAULT RULE.
+ * Must be called without IPFW_UH held
+ */
+int
+ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
+{
+ struct ip_fw *rule;
+ int i, l, insert_before;
+ struct ip_fw **map; /* the new array of pointers */
+
+ if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
+ return (EINVAL);
+
+ l = RULESIZE(input_rule);
+ rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
+ if (rule == NULL)
+ return (ENOSPC);
+ /* get_map returns with IPFW_UH_WLOCK if successful */
+ map = get_map(chain, 1, 0 /* not locked */);
+ if (map == NULL) {
+ free(rule, M_IPFW);
+ return ENOSPC;
+ }
+
+ bcopy(input_rule, rule, l);
+ /* clear fields not settable from userland */
+ rule->x_next = NULL;
+ rule->next_rule = NULL;
+ IPFW_ZERO_RULE_COUNTER(rule);
+
+ if (V_autoinc_step < 1)
+ V_autoinc_step = 1;
+ else if (V_autoinc_step > 1000)
+ V_autoinc_step = 1000;
+ /* find the insertion point, we will insert before */
+ insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
+ i = ipfw_find_rule(chain, insert_before, 0);
+ /* duplicate first part */
+ if (i > 0)
+ bcopy(chain->map, map, i * sizeof(struct ip_fw *));
+ map[i] = rule;
+ /* duplicate remaining part, we always have the default rule */
+ bcopy(chain->map + i, map + i + 1,
+ sizeof(struct ip_fw *) *(chain->n_rules - i));
+ if (rule->rulenum == 0) {
+ /* write back the number */
+ rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
+ if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
+ rule->rulenum += V_autoinc_step;
+ input_rule->rulenum = rule->rulenum;
+ }
+
+ rule->id = chain->id + 1;
+ map = swap_map(chain, map, chain->n_rules + 1);
+ chain->static_len += l;
+ IPFW_UH_WUNLOCK(chain);
+ if (map)
+ free(map, M_IPFW);
+ return (0);
+}
+
+/*
+ * Reclaim storage associated with a list of rules. This is
+ * typically the list created using remove_rule.
+ * A NULL pointer on input is handled correctly.
+ */
+void
+ipfw_reap_rules(struct ip_fw *head)
+{
+ struct ip_fw *rule;
+
+ while ((rule = head) != NULL) {
+ head = head->x_next;
+ free(rule, M_IPFW);
+ }
+}
+
+/*
+ * Used by del_entry() to check if a rule should be kept.
+ * Returns 1 if the rule must be kept, 0 otherwise.
+ *
+ * Called with cmd = {0,1,5}.
+ * cmd == 0 matches on rule numbers, excludes rules in RESVD_SET if n == 0 ;
+ * cmd == 1 matches on set numbers only, rule numbers are ignored;
+ * cmd == 5 matches on rule and set numbers.
+ *
+ * n == 0 is a wildcard for rule numbers, there is no wildcard for sets.
+ *
+ * Rules to keep are
+ * (default || reserved || !match_set || !match_number)
+ * where
+ * default ::= (rule->rulenum == IPFW_DEFAULT_RULE)
+ * // the default rule is always protected
+ *
+ * reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET)
+ * // RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush")
+ *
+ * match_set ::= (cmd == 0 || rule->set == set)
+ * // set number is ignored for cmd == 0
+ *
+ * match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum)
+ * // number is ignored for cmd == 1 or n == 0
+ *
+ */
+static int
+keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n)
+{
+ return
+ (rule->rulenum == IPFW_DEFAULT_RULE) ||
+ (cmd == 0 && n == 0 && rule->set == RESVD_SET) ||
+ !(cmd == 0 || rule->set == set) ||
+ !(cmd == 1 || n == 0 || n == rule->rulenum);
+}
+
+/**
+ * Remove all rules with given number, or do set manipulation.
+ * Assumes chain != NULL && *chain != NULL.
+ *
+ * The argument is an uint32_t. The low 16 bit are the rule or set number;
+ * the next 8 bits are the new set; the top 8 bits indicate the command:
+ *
+ * 0 delete rules numbered "rulenum"
+ * 1 delete rules in set "rulenum"
+ * 2 move rules "rulenum" to set "new_set"
+ * 3 move rules from set "rulenum" to set "new_set"
+ * 4 swap sets "rulenum" and "new_set"
+ * 5 delete rules "rulenum" and set "new_set"
+ */
+static int
+del_entry(struct ip_fw_chain *chain, uint32_t arg)
+{
+ struct ip_fw *rule;
+ uint32_t num; /* rule number or old_set */
+ uint8_t cmd, new_set;
+ int start, end, i, ofs, n;
+ struct ip_fw **map = NULL;
+ int error = 0;
+
+ num = arg & 0xffff;
+ cmd = (arg >> 24) & 0xff;
+ new_set = (arg >> 16) & 0xff;
+
+ if (cmd > 5 || new_set > RESVD_SET)
+ return EINVAL;
+ if (cmd == 0 || cmd == 2 || cmd == 5) {
+ if (num >= IPFW_DEFAULT_RULE)
+ return EINVAL;
+ } else {
+ if (num > RESVD_SET) /* old_set */
+ return EINVAL;
+ }
+
+ IPFW_UH_WLOCK(chain); /* arbitrate writers */
+ chain->reap = NULL; /* prepare for deletions */
+
+ switch (cmd) {
+ case 0: /* delete rules "num" (num == 0 matches all) */
+ case 1: /* delete all rules in set N */
+ case 5: /* delete rules with number N and set "new_set". */
+
+ /*
+ * Locate first rule to delete (start), the rule after
+ * the last one to delete (end), and count how many
+ * rules to delete (n). Always use keep_rule() to
+ * determine which rules to keep.
+ */
+ n = 0;
+ if (cmd == 1) {
+ /* look for a specific set including RESVD_SET.
+ * Must scan the entire range, ignore num.
+ */
+ new_set = num;
+ for (start = -1, end = i = 0; i < chain->n_rules; i++) {
+ if (keep_rule(chain->map[i], cmd, new_set, 0))
+ continue;
+ if (start < 0)
+ start = i;
+ end = i;
+ n++;
+ }
+ end++; /* first non-matching */
+ } else {
+ /* Optimized search on rule numbers */
+ start = ipfw_find_rule(chain, num, 0);
+ for (end = start; end < chain->n_rules; end++) {
+ rule = chain->map[end];
+ if (num > 0 && rule->rulenum != num)
+ break;
+ if (!keep_rule(rule, cmd, new_set, num))
+ n++;
+ }
+ }
+
+ if (n == 0) {
+ /* A flush request (arg == 0 or cmd == 1) on empty
+ * ruleset returns with no error. On the contrary,
+ * if there is no match on a specific request,
+ * we return EINVAL.
+ */
+ if (arg != 0 && cmd != 1)
+ error = EINVAL;
+ break;
+ }
+
+ /* We have something to delete. Allocate the new map */
+ map = get_map(chain, -n, 1 /* locked */);
+ if (map == NULL) {
+ error = EINVAL;
+ break;
+ }
+
+ /* 1. bcopy the initial part of the map */
+ if (start > 0)
+ bcopy(chain->map, map, start * sizeof(struct ip_fw *));
+ /* 2. copy active rules between start and end */
+ for (i = ofs = start; i < end; i++) {
+ rule = chain->map[i];
+ if (keep_rule(rule, cmd, new_set, num))
+ map[ofs++] = rule;
+ }
+ /* 3. copy the final part of the map */
+ bcopy(chain->map + end, map + ofs,
+ (chain->n_rules - end) * sizeof(struct ip_fw *));
+ /* 4. swap the maps (under BH_LOCK) */
+ map = swap_map(chain, map, chain->n_rules - n);
+ /* 5. now remove the rules deleted from the old map */
+ if (cmd == 1)
+ ipfw_expire_dyn_rules(chain, NULL, new_set);
+ for (i = start; i < end; i++) {
+ rule = map[i];
+ if (keep_rule(rule, cmd, new_set, num))
+ continue;
+ chain->static_len -= RULESIZE(rule);
+ if (cmd != 1)
+ ipfw_expire_dyn_rules(chain, rule, RESVD_SET);
+ rule->x_next = chain->reap;
+ chain->reap = rule;
+ }
+ break;
+
+ /*
+ * In the next 3 cases the loop stops at (n_rules - 1)
+ * because the default rule is never eligible..
+ */
+
+ case 2: /* move rules with given RULE number to new set */
+ for (i = 0; i < chain->n_rules - 1; i++) {
+ rule = chain->map[i];
+ if (rule->rulenum == num)
+ rule->set = new_set;
+ }
+ break;
+
+ case 3: /* move rules with given SET number to new set */
+ for (i = 0; i < chain->n_rules - 1; i++) {
+ rule = chain->map[i];
+ if (rule->set == num)
+ rule->set = new_set;
+ }
+ break;
+
+ case 4: /* swap two sets */
+ for (i = 0; i < chain->n_rules - 1; i++) {
+ rule = chain->map[i];
+ if (rule->set == num)
+ rule->set = new_set;
+ else if (rule->set == new_set)
+ rule->set = num;
+ }
+ break;
+ }
+
+ rule = chain->reap;
+ chain->reap = NULL;
+ IPFW_UH_WUNLOCK(chain);
+ ipfw_reap_rules(rule);
+ if (map)
+ free(map, M_IPFW);
+ return error;
+}
+
+/*
+ * Clear counters for a specific rule.
+ * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
+ * so we only care that rules do not disappear.
+ */
+static void
+clear_counters(struct ip_fw *rule, int log_only)
+{
+ ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
+
+ if (log_only == 0)
+ IPFW_ZERO_RULE_COUNTER(rule);
+ if (l->o.opcode == O_LOG)
+ l->log_left = l->max_log;
+}
+
+/**
+ * Reset some or all counters on firewall rules.
+ * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
+ * the next 8 bits are the set number, the top 8 bits are the command:
+ * 0 work with rules from all set's;
+ * 1 work with rules only from specified set.
+ * Specified rule number is zero if we want to clear all entries.
+ * log_only is 1 if we only want to reset logs, zero otherwise.
+ */
+static int
+zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
+{
+ struct ip_fw *rule;
+ char *msg;
+ int i;
+
+ uint16_t rulenum = arg & 0xffff;
+ uint8_t set = (arg >> 16) & 0xff;
+ uint8_t cmd = (arg >> 24) & 0xff;
+
+ if (cmd > 1)
+ return (EINVAL);
+ if (cmd == 1 && set > RESVD_SET)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(chain);
+ if (rulenum == 0) {
+ V_norule_counter = 0;
+ for (i = 0; i < chain->n_rules; i++) {
+ rule = chain->map[i];
+ /* Skip rules not in our set. */
+ if (cmd == 1 && rule->set != set)
+ continue;
+ clear_counters(rule, log_only);
+ }
+ msg = log_only ? "All logging counts reset" :
+ "Accounting cleared";
+ } else {
+ int cleared = 0;
+ for (i = 0; i < chain->n_rules; i++) {
+ rule = chain->map[i];
+ if (rule->rulenum == rulenum) {
+ if (cmd == 0 || rule->set == set)
+ clear_counters(rule, log_only);
+ cleared = 1;
+ }
+ if (rule->rulenum > rulenum)
+ break;
+ }
+ if (!cleared) { /* we did not find any matching rules */
+ IPFW_UH_RUNLOCK(chain);
+ return (EINVAL);
+ }
+ msg = log_only ? "logging count reset" : "cleared";
+ }
+ IPFW_UH_RUNLOCK(chain);
+
+ if (V_fw_verbose) {
+ int lev = LOG_SECURITY | LOG_NOTICE;
+
+ if (rulenum)
+ log(lev, "ipfw: Entry %d %s.\n", rulenum, msg);
+ else
+ log(lev, "ipfw: %s.\n", msg);
+ }
+ return (0);
+}
+
+/*
+ * Check validity of the structure before insert.
+ * Rules are simple, so this mostly need to check rule sizes.
+ */
+static int
+check_ipfw_struct(struct ip_fw *rule, int size)
+{
+ int l, cmdlen = 0;
+ int have_action=0;
+ ipfw_insn *cmd;
+
+ if (size < sizeof(*rule)) {
+ printf("ipfw: rule too short\n");
+ return (EINVAL);
+ }
+ /* first, check for valid size */
+ l = RULESIZE(rule);
+ if (l != size) {
+ printf("ipfw: size mismatch (have %d want %d)\n", size, l);
+ return (EINVAL);
+ }
+ if (rule->act_ofs >= rule->cmd_len) {
+ printf("ipfw: bogus action offset (%u > %u)\n",
+ rule->act_ofs, rule->cmd_len - 1);
+ return (EINVAL);
+ }
+ /*
+ * Now go for the individual checks. Very simple ones, basically only
+ * instruction sizes.
+ */
+ for (l = rule->cmd_len, cmd = rule->cmd ;
+ l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+ if (cmdlen > l) {
+ printf("ipfw: opcode %d size truncated\n",
+ cmd->opcode);
+ return EINVAL;
+ }
+ switch (cmd->opcode) {
+ case O_PROBE_STATE:
+ case O_KEEP_STATE:
+ case O_PROTO:
+ case O_IP_SRC_ME:
+ case O_IP_DST_ME:
+ case O_LAYER2:
+ case O_IN:
+ case O_FRAG:
+ case O_DIVERTED:
+ case O_IPOPT:
+ case O_IPTOS:
+ case O_IPPRECEDENCE:
+ case O_IPVER:
+ case O_SOCKARG:
+ case O_TCPFLAGS:
+ case O_TCPOPTS:
+ case O_ESTAB:
+ case O_VERREVPATH:
+ case O_VERSRCREACH:
+ case O_ANTISPOOF:
+ case O_IPSEC:
+#ifdef INET6
+ case O_IP6_SRC_ME:
+ case O_IP6_DST_ME:
+ case O_EXT_HDR:
+ case O_IP6:
+#endif
+ case O_IP4:
+ case O_TAG:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+ break;
+
+ case O_FIB:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+ if (cmd->arg1 >= rt_numfibs) {
+ printf("ipfw: invalid fib number %d\n",
+ cmd->arg1);
+ return EINVAL;
+ }
+ break;
+
+ case O_SETFIB:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+ if ((cmd->arg1 != IP_FW_TABLEARG) &&
+ (cmd->arg1 >= rt_numfibs)) {
+ printf("ipfw: invalid fib number %d\n",
+ cmd->arg1);
+ return EINVAL;
+ }
+ goto check_action;
+
+ case O_UID:
+ case O_GID:
+ case O_JAIL:
+ case O_IP_SRC:
+ case O_IP_DST:
+ case O_TCPSEQ:
+ case O_TCPACK:
+ case O_PROB:
+ case O_ICMPTYPE:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+ goto bad_size;
+ break;
+
+ case O_LIMIT:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
+ goto bad_size;
+ break;
+
+ case O_LOG:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
+ goto bad_size;
+
+ ((ipfw_insn_log *)cmd)->log_left =
+ ((ipfw_insn_log *)cmd)->max_log;
+
+ break;
+
+ case O_IP_SRC_MASK:
+ case O_IP_DST_MASK:
+ /* only odd command lengths */
+ if ( !(cmdlen & 1) || cmdlen > 31)
+ goto bad_size;
+ break;
+
+ case O_IP_SRC_SET:
+ case O_IP_DST_SET:
+ if (cmd->arg1 == 0 || cmd->arg1 > 256) {
+ printf("ipfw: invalid set size %d\n",
+ cmd->arg1);
+ return EINVAL;
+ }
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
+ (cmd->arg1+31)/32 )
+ goto bad_size;
+ break;
+
+ case O_IP_SRC_LOOKUP:
+ case O_IP_DST_LOOKUP:
+ if (cmd->arg1 >= IPFW_TABLES_MAX) {
+ printf("ipfw: invalid table number %d\n",
+ cmd->arg1);
+ return (EINVAL);
+ }
+ if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
+ cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
+ cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+ goto bad_size;
+ break;
+ case O_MACADDR2:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
+ goto bad_size;
+ break;
+
+ case O_NOP:
+ case O_IPID:
+ case O_IPTTL:
+ case O_IPLEN:
+ case O_TCPDATALEN:
+ case O_TCPWIN:
+ case O_TAGGED:
+ if (cmdlen < 1 || cmdlen > 31)
+ goto bad_size;
+ break;
+
+ case O_DSCP:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1)
+ goto bad_size;
+ break;
+
+ case O_MAC_TYPE:
+ case O_IP_SRCPORT:
+ case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
+ if (cmdlen < 2 || cmdlen > 31)
+ goto bad_size;
+ break;
+
+ case O_RECV:
+ case O_XMIT:
+ case O_VIA:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
+ goto bad_size;
+ break;
+
+ case O_ALTQ:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
+ goto bad_size;
+ break;
+
+ case O_PIPE:
+ case O_QUEUE:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+ goto check_action;
+
+ case O_FORWARD_IP:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
+ goto bad_size;
+ goto check_action;
+#ifdef INET6
+ case O_FORWARD_IP6:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_sa6))
+ goto bad_size;
+ goto check_action;
+#endif /* INET6 */
+
+ case O_DIVERT:
+ case O_TEE:
+ if (ip_divert_ptr == NULL)
+ return EINVAL;
+ else
+ goto check_size;
+ case O_NETGRAPH:
+ case O_NGTEE:
+ if (ng_ipfw_input_p == NULL)
+ return EINVAL;
+ else
+ goto check_size;
+ case O_NAT:
+ if (!IPFW_NAT_LOADED)
+ return EINVAL;
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
+ goto bad_size;
+ goto check_action;
+ case O_FORWARD_MAC: /* XXX not implemented yet */
+ case O_CHECK_STATE:
+ case O_COUNT:
+ case O_ACCEPT:
+ case O_DENY:
+ case O_REJECT:
+ case O_SETDSCP:
+#ifdef INET6
+ case O_UNREACH6:
+#endif
+ case O_SKIPTO:
+ case O_REASS:
+ case O_CALLRETURN:
+check_size:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+check_action:
+ if (have_action) {
+ printf("ipfw: opcode %d, multiple actions"
+ " not allowed\n",
+ cmd->opcode);
+ return EINVAL;
+ }
+ have_action = 1;
+ if (l != cmdlen) {
+ printf("ipfw: opcode %d, action must be"
+ " last opcode\n",
+ cmd->opcode);
+ return EINVAL;
+ }
+ break;
+#ifdef INET6
+ case O_IP6_SRC:
+ case O_IP6_DST:
+ if (cmdlen != F_INSN_SIZE(struct in6_addr) +
+ F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+ break;
+
+ case O_FLOW6ID:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
+ ((ipfw_insn_u32 *)cmd)->o.arg1)
+ goto bad_size;
+ break;
+
+ case O_IP6_SRC_MASK:
+ case O_IP6_DST_MASK:
+ if ( !(cmdlen & 1) || cmdlen > 127)
+ goto bad_size;
+ break;
+ case O_ICMP6TYPE:
+ if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
+ goto bad_size;
+ break;
+#endif
+
+ default:
+ switch (cmd->opcode) {
+#ifndef INET6
+ case O_IP6_SRC_ME:
+ case O_IP6_DST_ME:
+ case O_EXT_HDR:
+ case O_IP6:
+ case O_UNREACH6:
+ case O_IP6_SRC:
+ case O_IP6_DST:
+ case O_FLOW6ID:
+ case O_IP6_SRC_MASK:
+ case O_IP6_DST_MASK:
+ case O_ICMP6TYPE:
+ printf("ipfw: no IPv6 support in kernel\n");
+ return EPROTONOSUPPORT;
+#endif
+ default:
+ printf("ipfw: opcode %d, unknown opcode\n",
+ cmd->opcode);
+ return EINVAL;
+ }
+ }
+ }
+ if (have_action == 0) {
+ printf("ipfw: missing action\n");
+ return EINVAL;
+ }
+ return 0;
+
+bad_size:
+ printf("ipfw: opcode %d size %d wrong\n",
+ cmd->opcode, cmdlen);
+ return EINVAL;
+}
+
+
+/*
+ * Translation of requests for compatibility with FreeBSD 7.2/8.
+ * a static variable tells us if we have an old client from userland,
+ * and if necessary we translate requests and responses between the
+ * two formats.
+ */
+static int is7 = 0;
+
+struct ip_fw7 {
+ struct ip_fw7 *next; /* linked list of rules */
+ struct ip_fw7 *next_rule; /* ptr to next [skipto] rule */
+ /* 'next_rule' is used to pass up 'set_disable' status */
+
+ uint16_t act_ofs; /* offset of action in 32-bit units */
+ uint16_t cmd_len; /* # of 32-bit words in cmd */
+ uint16_t rulenum; /* rule number */
+ uint8_t set; /* rule set (0..31) */
+ // #define RESVD_SET 31 /* set for default and persistent rules */
+ uint8_t _pad; /* padding */
+ // uint32_t id; /* rule id, only in v.8 */
+ /* These fields are present in all rules. */
+ uint64_t pcnt; /* Packet counter */
+ uint64_t bcnt; /* Byte counter */
+ uint32_t timestamp; /* tv_sec of last match */
+
+ ipfw_insn cmd[1]; /* storage for commands */
+};
+
+ int convert_rule_to_7(struct ip_fw *rule);
+int convert_rule_to_8(struct ip_fw *rule);
+
+#ifndef RULESIZE7
+#define RULESIZE7(rule) (sizeof(struct ip_fw7) + \
+ ((struct ip_fw7 *)(rule))->cmd_len * 4 - 4)
+#endif
+
+
+/*
+ * Copy the static and dynamic rules to the supplied buffer
+ * and return the amount of space actually used.
+ * Must be run under IPFW_UH_RLOCK
+ */
+static size_t
+ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
+{
+ char *bp = buf;
+ char *ep = bp + space;
+ struct ip_fw *rule, *dst;
+ int l, i;
+ time_t boot_seconds;
+
+ boot_seconds = boottime.tv_sec;
+ for (i = 0; i < chain->n_rules; i++) {
+ rule = chain->map[i];
+
+ if (is7) {
+ /* Convert rule to FreeBSd 7.2 format */
+ l = RULESIZE7(rule);
+ if (bp + l + sizeof(uint32_t) <= ep) {
+ int error;
+ bcopy(rule, bp, l + sizeof(uint32_t));
+ error = convert_rule_to_7((struct ip_fw *) bp);
+ if (error)
+ return 0; /*XXX correct? */
+ /*
+ * XXX HACK. Store the disable mask in the "next"
+ * pointer in a wild attempt to keep the ABI the same.
+ * Why do we do this on EVERY rule?
+ */
+ bcopy(&V_set_disable,
+ &(((struct ip_fw7 *)bp)->next_rule),
+ sizeof(V_set_disable));
+ if (((struct ip_fw7 *)bp)->timestamp)
+ ((struct ip_fw7 *)bp)->timestamp += boot_seconds;
+ bp += l;
+ }
+ continue; /* go to next rule */
+ }
+
+ /* normal mode, don't touch rules */
+ l = RULESIZE(rule);
+ if (bp + l > ep) { /* should not happen */
+ printf("overflow dumping static rules\n");
+ break;
+ }
+ dst = (struct ip_fw *)bp;
+ bcopy(rule, dst, l);
+ /*
+ * XXX HACK. Store the disable mask in the "next"
+ * pointer in a wild attempt to keep the ABI the same.
+ * Why do we do this on EVERY rule?
+ */
+ bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
+ if (dst->timestamp)
+ dst->timestamp += boot_seconds;
+ bp += l;
+ }
+ ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */
+ return (bp - (char *)buf);
+}
+
+
+#define IP_FW3_OPLENGTH(x) ((x)->sopt_valsize - sizeof(ip_fw3_opheader))
+/**
+ * {set|get}sockopt parser.
+ */
+int
+ipfw_ctl(struct sockopt *sopt)
+{
+#define RULE_MAXSIZE (256*sizeof(u_int32_t))
+ int error;
+ size_t size, len, valsize;
+ struct ip_fw *buf, *rule;
+ struct ip_fw_chain *chain;
+ u_int32_t rulenum[2];
+ uint32_t opt;
+ char xbuf[128];
+ ip_fw3_opheader *op3 = NULL;
+
+ error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
+ if (error)
+ return (error);
+
+ /*
+ * Disallow modifications in really-really secure mode, but still allow
+ * the logging counters to be reset.
+ */
+ if (sopt->sopt_name == IP_FW_ADD ||
+ (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
+ error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
+ if (error)
+ return (error);
+ }
+
+ chain = &V_layer3_chain;
+ error = 0;
+
+ /* Save original valsize before it is altered via sooptcopyin() */
+ valsize = sopt->sopt_valsize;
+ if ((opt = sopt->sopt_name) == IP_FW3) {
+ /*
+ * Copy not less than sizeof(ip_fw3_opheader).
+ * We hope any IP_FW3 command will fit into 128-byte buffer.
+ */
+ if ((error = sooptcopyin(sopt, xbuf, sizeof(xbuf),
+ sizeof(ip_fw3_opheader))) != 0)
+ return (error);
+ op3 = (ip_fw3_opheader *)xbuf;
+ opt = op3->opcode;
+ }
+
+ switch (opt) {
+ case IP_FW_GET:
+ /*
+ * pass up a copy of the current rules. Static rules
+ * come first (the last of which has number IPFW_DEFAULT_RULE),
+ * followed by a possibly empty list of dynamic rule.
+ * The last dynamic rule has NULL in the "next" field.
+ *
+ * Note that the calculated size is used to bound the
+ * amount of data returned to the user. The rule set may
+ * change between calculating the size and returning the
+ * data in which case we'll just return what fits.
+ */
+ for (;;) {
+ int len = 0, want;
+
+ size = chain->static_len;
+ size += ipfw_dyn_len();
+ if (size >= sopt->sopt_valsize)
+ break;
+ buf = malloc(size, M_TEMP, M_WAITOK);
+ if (buf == NULL)
+ break;
+ IPFW_UH_RLOCK(chain);
+ /* check again how much space we need */
+ want = chain->static_len + ipfw_dyn_len();
+ if (size >= want)
+ len = ipfw_getrules(chain, buf, size);
+ IPFW_UH_RUNLOCK(chain);
+ if (size >= want)
+ error = sooptcopyout(sopt, buf, len);
+ free(buf, M_TEMP);
+ if (size >= want)
+ break;
+ }
+ break;
+
+ case IP_FW_FLUSH:
+ /* locking is done within del_entry() */
+ error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */
+ break;
+
+ case IP_FW_ADD:
+ rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
+ error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
+ sizeof(struct ip_fw7) );
+
+ /*
+ * If the size of commands equals RULESIZE7 then we assume
+ * a FreeBSD7.2 binary is talking to us (set is7=1).
+ * is7 is persistent so the next 'ipfw list' command
+ * will use this format.
+ * NOTE: If wrong version is guessed (this can happen if
+ * the first ipfw command is 'ipfw [pipe] list')
+ * the ipfw binary may crash or loop infinitly...
+ */
+ if (sopt->sopt_valsize == RULESIZE7(rule)) {
+ is7 = 1;
+ error = convert_rule_to_8(rule);
+ if (error)
+ return error;
+ if (error == 0)
+ error = check_ipfw_struct(rule, RULESIZE(rule));
+ } else {
+ is7 = 0;
+ if (error == 0)
+ error = check_ipfw_struct(rule, sopt->sopt_valsize);
+ }
+ if (error == 0) {
+ /* locking is done within ipfw_add_rule() */
+ error = ipfw_add_rule(chain, rule);
+ size = RULESIZE(rule);
+ if (!error && sopt->sopt_dir == SOPT_GET) {
+ if (is7) {
+ error = convert_rule_to_7(rule);
+ size = RULESIZE7(rule);
+ if (error)
+ return error;
+ }
+ error = sooptcopyout(sopt, rule, size);
+ }
+ }
+ free(rule, M_TEMP);
+ break;
+
+ case IP_FW_DEL:
+ /*
+ * IP_FW_DEL is used for deleting single rules or sets,
+ * and (ab)used to atomically manipulate sets. Argument size
+ * is used to distinguish between the two:
+ * sizeof(u_int32_t)
+ * delete single rule or set of rules,
+ * or reassign rules (or sets) to a different set.
+ * 2*sizeof(u_int32_t)
+ * atomic disable/enable sets.
+ * first u_int32_t contains sets to be disabled,
+ * second u_int32_t contains sets to be enabled.
+ */
+ error = sooptcopyin(sopt, rulenum,
+ 2*sizeof(u_int32_t), sizeof(u_int32_t));
+ if (error)
+ break;
+ size = sopt->sopt_valsize;
+ if (size == sizeof(u_int32_t) && rulenum[0] != 0) {
+ /* delete or reassign, locking done in del_entry() */
+ error = del_entry(chain, rulenum[0]);
+ } else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */
+ IPFW_UH_WLOCK(chain);
+ V_set_disable =
+ (V_set_disable | rulenum[0]) & ~rulenum[1] &
+ ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
+ IPFW_UH_WUNLOCK(chain);
+ } else
+ error = EINVAL;
+ break;
+
+ case IP_FW_ZERO:
+ case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
+ rulenum[0] = 0;
+ if (sopt->sopt_val != 0) {
+ error = sooptcopyin(sopt, rulenum,
+ sizeof(u_int32_t), sizeof(u_int32_t));
+ if (error)
+ break;
+ }
+ error = zero_entry(chain, rulenum[0],
+ sopt->sopt_name == IP_FW_RESETLOG);
+ break;
+
+ /*--- TABLE manipulations are protected by the IPFW_LOCK ---*/
+ case IP_FW_TABLE_ADD:
+ {
+ ipfw_table_entry ent;
+
+ error = sooptcopyin(sopt, &ent,
+ sizeof(ent), sizeof(ent));
+ if (error)
+ break;
+ error = ipfw_add_table_entry(chain, ent.tbl,
+ &ent.addr, sizeof(ent.addr), ent.masklen,
+ IPFW_TABLE_CIDR, ent.value);
+ }
+ break;
+
+ case IP_FW_TABLE_DEL:
+ {
+ ipfw_table_entry ent;
+
+ error = sooptcopyin(sopt, &ent,
+ sizeof(ent), sizeof(ent));
+ if (error)
+ break;
+ error = ipfw_del_table_entry(chain, ent.tbl,
+ &ent.addr, sizeof(ent.addr), ent.masklen, IPFW_TABLE_CIDR);
+ }
+ break;
+
+ case IP_FW_TABLE_XADD: /* IP_FW3 */
+ case IP_FW_TABLE_XDEL: /* IP_FW3 */
+ {
+ ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1);
+
+ /* Check minimum header size */
+ if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) {
+ error = EINVAL;
+ break;
+ }
+
+ /* Check if len field is valid */
+ if (xent->len > sizeof(ipfw_table_xentry)) {
+ error = EINVAL;
+ break;
+ }
+
+ len = xent->len - offsetof(ipfw_table_xentry, k);
+
+ error = (opt == IP_FW_TABLE_XADD) ?
+ ipfw_add_table_entry(chain, xent->tbl, &xent->k,
+ len, xent->masklen, xent->type, xent->value) :
+ ipfw_del_table_entry(chain, xent->tbl, &xent->k,
+ len, xent->masklen, xent->type);
+ }
+ break;
+
+ case IP_FW_TABLE_FLUSH:
+ {
+ u_int16_t tbl;
+
+ error = sooptcopyin(sopt, &tbl,
+ sizeof(tbl), sizeof(tbl));
+ if (error)
+ break;
+ error = ipfw_flush_table(chain, tbl);
+ }
+ break;
+
+ case IP_FW_TABLE_GETSIZE:
+ {
+ u_int32_t tbl, cnt;
+
+ if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
+ sizeof(tbl))))
+ break;
+ IPFW_RLOCK(chain);
+ error = ipfw_count_table(chain, tbl, &cnt);
+ IPFW_RUNLOCK(chain);
+ if (error)
+ break;
+ error = sooptcopyout(sopt, &cnt, sizeof(cnt));
+ }
+ break;
+
+ case IP_FW_TABLE_LIST:
+ {
+ ipfw_table *tbl;
+
+ if (sopt->sopt_valsize < sizeof(*tbl)) {
+ error = EINVAL;
+ break;
+ }
+ size = sopt->sopt_valsize;
+ tbl = malloc(size, M_TEMP, M_WAITOK);
+ error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
+ if (error) {
+ free(tbl, M_TEMP);
+ break;
+ }
+ tbl->size = (size - sizeof(*tbl)) /
+ sizeof(ipfw_table_entry);
+ IPFW_RLOCK(chain);
+ error = ipfw_dump_table(chain, tbl);
+ IPFW_RUNLOCK(chain);
+ if (error) {
+ free(tbl, M_TEMP);
+ break;
+ }
+ error = sooptcopyout(sopt, tbl, size);
+ free(tbl, M_TEMP);
+ }
+ break;
+
+ case IP_FW_TABLE_XGETSIZE: /* IP_FW3 */
+ {
+ uint32_t *tbl;
+
+ if (IP_FW3_OPLENGTH(sopt) < sizeof(uint32_t)) {
+ error = EINVAL;
+ break;
+ }
+
+ tbl = (uint32_t *)(op3 + 1);
+
+ IPFW_RLOCK(chain);
+ error = ipfw_count_xtable(chain, *tbl, tbl);
+ IPFW_RUNLOCK(chain);
+ if (error)
+ break;
+ error = sooptcopyout(sopt, op3, sopt->sopt_valsize);
+ }
+ break;
+
+ case IP_FW_TABLE_XLIST: /* IP_FW3 */
+ {
+ ipfw_xtable *tbl;
+
+ if ((size = valsize) < sizeof(ipfw_xtable)) {
+ error = EINVAL;
+ break;
+ }
+
+ tbl = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
+ memcpy(tbl, op3, sizeof(ipfw_xtable));
+
+ /* Get maximum number of entries we can store */
+ tbl->size = (size - sizeof(ipfw_xtable)) /
+ sizeof(ipfw_table_xentry);
+ IPFW_RLOCK(chain);
+ error = ipfw_dump_xtable(chain, tbl);
+ IPFW_RUNLOCK(chain);
+ if (error) {
+ free(tbl, M_TEMP);
+ break;
+ }
+
+ /* Revert size field back to bytes */
+ tbl->size = tbl->size * sizeof(ipfw_table_xentry) +
+ sizeof(ipfw_table);
+ /*
+ * Since we call sooptcopyin() with small buffer, sopt_valsize is
+ * decreased to reflect supplied buffer size. Set it back to original value
+ */
+ sopt->sopt_valsize = valsize;
+ error = sooptcopyout(sopt, tbl, size);
+ free(tbl, M_TEMP);
+ }
+ break;
+
+ /*--- NAT operations are protected by the IPFW_LOCK ---*/
+ case IP_FW_NAT_CFG:
+ if (IPFW_NAT_LOADED)
+ error = ipfw_nat_cfg_ptr(sopt);
+ else {
+ printf("IP_FW_NAT_CFG: %s\n",
+ "ipfw_nat not present, please load it");
+ error = EINVAL;
+ }
+ break;
+
+ case IP_FW_NAT_DEL:
+ if (IPFW_NAT_LOADED)
+ error = ipfw_nat_del_ptr(sopt);
+ else {
+ printf("IP_FW_NAT_DEL: %s\n",
+ "ipfw_nat not present, please load it");
+ error = EINVAL;
+ }
+ break;
+
+ case IP_FW_NAT_GET_CONFIG:
+ if (IPFW_NAT_LOADED)
+ error = ipfw_nat_get_cfg_ptr(sopt);
+ else {
+ printf("IP_FW_NAT_GET_CFG: %s\n",
+ "ipfw_nat not present, please load it");
+ error = EINVAL;
+ }
+ break;
+
+ case IP_FW_NAT_GET_LOG:
+ if (IPFW_NAT_LOADED)
+ error = ipfw_nat_get_log_ptr(sopt);
+ else {
+ printf("IP_FW_NAT_GET_LOG: %s\n",
+ "ipfw_nat not present, please load it");
+ error = EINVAL;
+ }
+ break;
+
+ default:
+ printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
+ error = EINVAL;
+ }
+
+ return (error);
+#undef RULE_MAXSIZE
+}
+
+
+#define RULE_MAXSIZE (256*sizeof(u_int32_t))
+
+/* Functions to convert rules 7.2 <==> 8.0 */
+int
+convert_rule_to_7(struct ip_fw *rule)
+{
+ /* Used to modify original rule */
+ struct ip_fw7 *rule7 = (struct ip_fw7 *)rule;
+ /* copy of original rule, version 8 */
+ struct ip_fw *tmp;
+
+ /* Used to copy commands */
+ ipfw_insn *ccmd, *dst;
+ int ll = 0, ccmdlen = 0;
+
+ tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
+ if (tmp == NULL) {
+ return 1; //XXX error
+ }
+ bcopy(rule, tmp, RULE_MAXSIZE);
+
+ /* Copy fields */
+ rule7->_pad = tmp->_pad;
+ rule7->set = tmp->set;
+ rule7->rulenum = tmp->rulenum;
+ rule7->cmd_len = tmp->cmd_len;
+ rule7->act_ofs = tmp->act_ofs;
+ rule7->next_rule = (struct ip_fw7 *)tmp->next_rule;
+ rule7->next = (struct ip_fw7 *)tmp->x_next;
+ rule7->cmd_len = tmp->cmd_len;
+ rule7->pcnt = tmp->pcnt;
+ rule7->bcnt = tmp->bcnt;
+ rule7->timestamp = tmp->timestamp;
+
+ /* Copy commands */
+ for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ;
+ ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
+ ccmdlen = F_LEN(ccmd);
+
+ bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
+
+ if (dst->opcode > O_NAT)
+ /* O_REASS doesn't exists in 7.2 version, so
+ * decrement opcode if it is after O_REASS
+ */
+ dst->opcode--;
+
+ if (ccmdlen > ll) {
+ printf("ipfw: opcode %d size truncated\n",
+ ccmd->opcode);
+ return EINVAL;
+ }
+ }
+ free(tmp, M_TEMP);
+
+ return 0;
+}
+
+int
+convert_rule_to_8(struct ip_fw *rule)
+{
+ /* Used to modify original rule */
+ struct ip_fw7 *rule7 = (struct ip_fw7 *) rule;
+
+ /* Used to copy commands */
+ ipfw_insn *ccmd, *dst;
+ int ll = 0, ccmdlen = 0;
+
+ /* Copy of original rule */
+ struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
+ if (tmp == NULL) {
+ return 1; //XXX error
+ }
+
+ bcopy(rule7, tmp, RULE_MAXSIZE);
+
+ for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ;
+ ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
+ ccmdlen = F_LEN(ccmd);
+
+ bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
+
+ if (dst->opcode > O_NAT)
+ /* O_REASS doesn't exists in 7.2 version, so
+ * increment opcode if it is after O_REASS
+ */
+ dst->opcode++;
+
+ if (ccmdlen > ll) {
+ printf("ipfw: opcode %d size truncated\n",
+ ccmd->opcode);
+ return EINVAL;
+ }
+ }
+
+ rule->_pad = tmp->_pad;
+ rule->set = tmp->set;
+ rule->rulenum = tmp->rulenum;
+ rule->cmd_len = tmp->cmd_len;
+ rule->act_ofs = tmp->act_ofs;
+ rule->next_rule = (struct ip_fw *)tmp->next_rule;
+ rule->x_next = (struct ip_fw *)tmp->next;
+ rule->cmd_len = tmp->cmd_len;
+ rule->id = 0; /* XXX see if is ok = 0 */
+ rule->pcnt = tmp->pcnt;
+ rule->bcnt = tmp->bcnt;
+ rule->timestamp = tmp->timestamp;
+
+ free (tmp, M_TEMP);
+ return 0;
+}
+
+/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table.c b/freebsd/sys/netpfil/ipfw/ip_fw_table.c
new file mode 100644
index 00000000..58ee16e9
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table.c
@@ -0,0 +1,764 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Lookup table support for ipfw
+ *
+ * Lookup tables are implemented (at the moment) using the radix
+ * tree used for routing tables. Tables store key-value entries, where
+ * keys are network prefixes (addr/masklen), and values are integers.
+ * As a degenerate case we can interpret keys as 32-bit integers
+ * (with a /32 mask).
+ *
+ * The table is protected by the IPFW lock even for manipulation coming
+ * from userland, because operations are typically fast.
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
+#include <net/radix.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+
+MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
+
+struct table_entry {
+ struct radix_node rn[2];
+ struct sockaddr_in addr, mask;
+ u_int32_t value;
+};
+
+struct xaddr_iface {
+ uint8_t if_len; /* length of this struct */
+ uint8_t pad[7]; /* Align name */
+ char ifname[IF_NAMESIZE]; /* Interface name */
+};
+
+struct table_xentry {
+ struct radix_node rn[2];
+ union {
+#ifdef INET6
+ struct sockaddr_in6 addr6;
+#endif
+ struct xaddr_iface iface;
+ } a;
+ union {
+#ifdef INET6
+ struct sockaddr_in6 mask6;
+#endif
+ struct xaddr_iface ifmask;
+ } m;
+ u_int32_t value;
+};
+
+/*
+ * The radix code expects addr and mask to be array of bytes,
+ * with the first byte being the length of the array. rn_inithead
+ * is called with the offset in bits of the lookup key within the
+ * array. If we use a sockaddr_in as the underlying type,
+ * sin_len is conveniently located at offset 0, sin_addr is at
+ * offset 4 and normally aligned.
+ * But for portability, let's avoid assumption and make the code explicit
+ */
+#define KEY_LEN(v) *((uint8_t *)&(v))
+#define KEY_OFS (8*offsetof(struct sockaddr_in, sin_addr))
+/*
+ * Do not require radix to compare more than actual IPv4/IPv6 address
+ */
+#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
+#define KEY_LEN_INET6 (offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr))
+#define KEY_LEN_IFACE (offsetof(struct xaddr_iface, ifname))
+
+#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
+#define OFF_LEN_INET6 (8 * offsetof(struct sockaddr_in6, sin6_addr))
+#define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname))
+
+
+static inline void
+ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+{
+ uint32_t *cp;
+
+ for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
+ *cp++ = 0xFFFFFFFF;
+ *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+}
+
+int
+ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
+ uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value)
+{
+ struct radix_node_head *rnh, **rnh_ptr;
+ struct table_entry *ent;
+ struct table_xentry *xent;
+ struct radix_node *rn;
+ in_addr_t addr;
+ int offset;
+ void *ent_ptr;
+ struct sockaddr *addr_ptr, *mask_ptr;
+ char c;
+
+ if (tbl >= V_fw_tables_max)
+ return (EINVAL);
+
+ switch (type) {
+ case IPFW_TABLE_CIDR:
+ if (plen == sizeof(in_addr_t)) {
+#ifdef INET
+ /* IPv4 case */
+ if (mlen > 32)
+ return (EINVAL);
+ ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+ ent->value = value;
+ /* Set 'total' structure length */
+ KEY_LEN(ent->addr) = KEY_LEN_INET;
+ KEY_LEN(ent->mask) = KEY_LEN_INET;
+ /* Set offset of IPv4 address in bits */
+ offset = OFF_LEN_INET;
+ ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+ addr = *((in_addr_t *)paddr);
+ ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
+ /* Set pointers */
+ rnh_ptr = &ch->tables[tbl];
+ ent_ptr = ent;
+ addr_ptr = (struct sockaddr *)&ent->addr;
+ mask_ptr = (struct sockaddr *)&ent->mask;
+#endif
+#ifdef INET6
+ } else if (plen == sizeof(struct in6_addr)) {
+ /* IPv6 case */
+ if (mlen > 128)
+ return (EINVAL);
+ xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+ xent->value = value;
+ /* Set 'total' structure length */
+ KEY_LEN(xent->a.addr6) = KEY_LEN_INET6;
+ KEY_LEN(xent->m.mask6) = KEY_LEN_INET6;
+ /* Set offset of IPv6 address in bits */
+ offset = OFF_LEN_INET6;
+ ipv6_writemask(&xent->m.mask6.sin6_addr, mlen);
+ memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr));
+ APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr);
+ /* Set pointers */
+ rnh_ptr = &ch->xtables[tbl];
+ ent_ptr = xent;
+ addr_ptr = (struct sockaddr *)&xent->a.addr6;
+ mask_ptr = (struct sockaddr *)&xent->m.mask6;
+#endif
+ } else {
+ /* Unknown CIDR type */
+ return (EINVAL);
+ }
+ break;
+
+ case IPFW_TABLE_INTERFACE:
+ /* Check if string is terminated */
+ c = ((char *)paddr)[IF_NAMESIZE - 1];
+ ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
+ if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
+ return (EINVAL);
+
+ /* Include last \0 into comparison */
+ mlen++;
+
+ xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+ xent->value = value;
+ /* Set 'total' structure length */
+ KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen;
+ KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen;
+ /* Set offset of interface name in bits */
+ offset = OFF_LEN_IFACE;
+ memcpy(xent->a.iface.ifname, paddr, mlen);
+ /* Assume direct match */
+ /* TODO: Add interface pattern matching */
+#if 0
+ memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE);
+ mask_ptr = (struct sockaddr *)&xent->m.ifmask;
+#endif
+ /* Set pointers */
+ rnh_ptr = &ch->xtables[tbl];
+ ent_ptr = xent;
+ addr_ptr = (struct sockaddr *)&xent->a.iface;
+ mask_ptr = NULL;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ IPFW_WLOCK(ch);
+
+ /* Check if tabletype is valid */
+ if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) {
+ IPFW_WUNLOCK(ch);
+ free(ent_ptr, M_IPFW_TBL);
+ return (EINVAL);
+ }
+
+ /* Check if radix tree exists */
+ if ((rnh = *rnh_ptr) == NULL) {
+ IPFW_WUNLOCK(ch);
+ /* Create radix for a new table */
+ if (!rn_inithead((void **)&rnh, offset)) {
+ free(ent_ptr, M_IPFW_TBL);
+ return (ENOMEM);
+ }
+
+ IPFW_WLOCK(ch);
+ if (*rnh_ptr != NULL) {
+ /* Tree is already attached by other thread */
+ rn_detachhead((void **)&rnh);
+ rnh = *rnh_ptr;
+ /* Check table type another time */
+ if (ch->tabletype[tbl] != type) {
+ IPFW_WUNLOCK(ch);
+ free(ent_ptr, M_IPFW_TBL);
+ return (EINVAL);
+ }
+ } else {
+ *rnh_ptr = rnh;
+ /*
+ * Set table type. It can be set already
+ * (if we have IPv6-only table) but setting
+ * it another time does not hurt
+ */
+ ch->tabletype[tbl] = type;
+ }
+ }
+
+ rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr);
+ IPFW_WUNLOCK(ch);
+
+ if (rn == NULL) {
+ free(ent_ptr, M_IPFW_TBL);
+ return (EEXIST);
+ }
+ return (0);
+}
+
+int
+ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
+ uint8_t plen, uint8_t mlen, uint8_t type)
+{
+ struct radix_node_head *rnh, **rnh_ptr;
+ struct table_entry *ent;
+ in_addr_t addr;
+ struct sockaddr_in sa, mask;
+ struct sockaddr *sa_ptr, *mask_ptr;
+ char c;
+
+ if (tbl >= V_fw_tables_max)
+ return (EINVAL);
+
+ switch (type) {
+ case IPFW_TABLE_CIDR:
+ if (plen == sizeof(in_addr_t)) {
+ /* Set 'total' structure length */
+ KEY_LEN(sa) = KEY_LEN_INET;
+ KEY_LEN(mask) = KEY_LEN_INET;
+ mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+ addr = *((in_addr_t *)paddr);
+ sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
+ rnh_ptr = &ch->tables[tbl];
+ sa_ptr = (struct sockaddr *)&sa;
+ mask_ptr = (struct sockaddr *)&mask;
+#ifdef INET6
+ } else if (plen == sizeof(struct in6_addr)) {
+ /* IPv6 case */
+ if (mlen > 128)
+ return (EINVAL);
+ struct sockaddr_in6 sa6, mask6;
+ memset(&sa6, 0, sizeof(struct sockaddr_in6));
+ memset(&mask6, 0, sizeof(struct sockaddr_in6));
+ /* Set 'total' structure length */
+ KEY_LEN(sa6) = KEY_LEN_INET6;
+ KEY_LEN(mask6) = KEY_LEN_INET6;
+ ipv6_writemask(&mask6.sin6_addr, mlen);
+ memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
+ APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr);
+ rnh_ptr = &ch->xtables[tbl];
+ sa_ptr = (struct sockaddr *)&sa6;
+ mask_ptr = (struct sockaddr *)&mask6;
+#endif
+ } else {
+ /* Unknown CIDR type */
+ return (EINVAL);
+ }
+ break;
+
+ case IPFW_TABLE_INTERFACE:
+ /* Check if string is terminated */
+ c = ((char *)paddr)[IF_NAMESIZE - 1];
+ ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
+ if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
+ return (EINVAL);
+
+ struct xaddr_iface ifname, ifmask;
+ memset(&ifname, 0, sizeof(ifname));
+
+ /* Include last \0 into comparison */
+ mlen++;
+
+ /* Set 'total' structure length */
+ KEY_LEN(ifname) = KEY_LEN_IFACE + mlen;
+ KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen;
+ /* Assume direct match */
+ /* FIXME: Add interface pattern matching */
+#if 0
+ memset(ifmask.ifname, 0xFF, IF_NAMESIZE);
+ mask_ptr = (struct sockaddr *)&ifmask;
+#endif
+ mask_ptr = NULL;
+ memcpy(ifname.ifname, paddr, mlen);
+ /* Set pointers */
+ rnh_ptr = &ch->xtables[tbl];
+ sa_ptr = (struct sockaddr *)&ifname;
+
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ IPFW_WLOCK(ch);
+ if ((rnh = *rnh_ptr) == NULL) {
+ IPFW_WUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ if (ch->tabletype[tbl] != type) {
+ IPFW_WUNLOCK(ch);
+ return (EINVAL);
+ }
+
+ ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
+ IPFW_WUNLOCK(ch);
+
+ if (ent == NULL)
+ return (ESRCH);
+
+ free(ent, M_IPFW_TBL);
+ return (0);
+}
+
+static int
+flush_table_entry(struct radix_node *rn, void *arg)
+{
+ struct radix_node_head * const rnh = arg;
+ struct table_entry *ent;
+
+ ent = (struct table_entry *)
+ rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
+ if (ent != NULL)
+ free(ent, M_IPFW_TBL);
+ return (0);
+}
+
+int
+ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl)
+{
+ struct radix_node_head *rnh, *xrnh;
+
+ if (tbl >= V_fw_tables_max)
+ return (EINVAL);
+
+ /*
+ * We free both (IPv4 and extended) radix trees and
+ * clear table type here to permit table to be reused
+ * for different type without module reload
+ */
+
+ IPFW_WLOCK(ch);
+ /* Set IPv4 table pointer to zero */
+ if ((rnh = ch->tables[tbl]) != NULL)
+ ch->tables[tbl] = NULL;
+ /* Set extended table pointer to zero */
+ if ((xrnh = ch->xtables[tbl]) != NULL)
+ ch->xtables[tbl] = NULL;
+ /* Zero table type */
+ ch->tabletype[tbl] = 0;
+ IPFW_WUNLOCK(ch);
+
+ if (rnh != NULL) {
+ rnh->rnh_walktree(rnh, flush_table_entry, rnh);
+ rn_detachhead((void **)&rnh);
+ }
+
+ if (xrnh != NULL) {
+ xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh);
+ rn_detachhead((void **)&xrnh);
+ }
+
+ return (0);
+}
+
+void
+ipfw_destroy_tables(struct ip_fw_chain *ch)
+{
+ uint16_t tbl;
+
+ /* Flush all tables */
+ for (tbl = 0; tbl < V_fw_tables_max; tbl++)
+ ipfw_flush_table(ch, tbl);
+
+ /* Free pointers itself */
+ free(ch->tables, M_IPFW);
+ free(ch->xtables, M_IPFW);
+ free(ch->tabletype, M_IPFW);
+}
+
+int
+ipfw_init_tables(struct ip_fw_chain *ch)
+{
+ /* Allocate pointers */
+ ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
+ ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
+ ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
+ return (0);
+}
+
+int
+ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
+{
+ struct radix_node_head **tables, **xtables, *rnh;
+ struct radix_node_head **tables_old, **xtables_old;
+ uint8_t *tabletype, *tabletype_old;
+ unsigned int ntables_old, tbl;
+
+ /* Check new value for validity */
+ if (ntables > IPFW_TABLES_MAX)
+ ntables = IPFW_TABLES_MAX;
+
+ /* Allocate new pointers */
+ tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
+ xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
+ tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
+
+ IPFW_WLOCK(ch);
+
+ tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
+
+ /* Copy old table pointers */
+ memcpy(tables, ch->tables, sizeof(void *) * tbl);
+ memcpy(xtables, ch->xtables, sizeof(void *) * tbl);
+ memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl);
+
+ /* Change pointers and number of tables */
+ tables_old = ch->tables;
+ xtables_old = ch->xtables;
+ tabletype_old = ch->tabletype;
+ ch->tables = tables;
+ ch->xtables = xtables;
+ ch->tabletype = tabletype;
+
+ ntables_old = V_fw_tables_max;
+ V_fw_tables_max = ntables;
+
+ IPFW_WUNLOCK(ch);
+
+ /* Check if we need to destroy radix trees */
+ if (ntables < ntables_old) {
+ for (tbl = ntables; tbl < ntables_old; tbl++) {
+ if ((rnh = tables_old[tbl]) != NULL) {
+ rnh->rnh_walktree(rnh, flush_table_entry, rnh);
+ rn_detachhead((void **)&rnh);
+ }
+
+ if ((rnh = xtables_old[tbl]) != NULL) {
+ rnh->rnh_walktree(rnh, flush_table_entry, rnh);
+ rn_detachhead((void **)&rnh);
+ }
+ }
+ }
+
+ /* Free old pointers */
+ free(tables_old, M_IPFW);
+ free(xtables_old, M_IPFW);
+ free(tabletype_old, M_IPFW);
+
+ return (0);
+}
+
+int
+ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+ uint32_t *val)
+{
+ struct radix_node_head *rnh;
+ struct table_entry *ent;
+ struct sockaddr_in sa;
+
+ if (tbl >= V_fw_tables_max)
+ return (0);
+ if ((rnh = ch->tables[tbl]) == NULL)
+ return (0);
+ KEY_LEN(sa) = KEY_LEN_INET;
+ sa.sin_addr.s_addr = addr;
+ ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
+ if (ent != NULL) {
+ *val = ent->value;
+ return (1);
+ }
+ return (0);
+}
+
+int
+ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
+ uint32_t *val, int type)
+{
+ struct radix_node_head *rnh;
+ struct table_xentry *xent;
+ struct sockaddr_in6 sa6;
+ struct xaddr_iface iface;
+
+ if (tbl >= V_fw_tables_max)
+ return (0);
+ if ((rnh = ch->xtables[tbl]) == NULL)
+ return (0);
+
+ switch (type) {
+ case IPFW_TABLE_CIDR:
+ KEY_LEN(sa6) = KEY_LEN_INET6;
+ memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
+ xent = (struct table_xentry *)(rnh->rnh_lookup(&sa6, NULL, rnh));
+ break;
+
+ case IPFW_TABLE_INTERFACE:
+ KEY_LEN(iface) = KEY_LEN_IFACE +
+ strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
+ /* Assume direct match */
+ /* FIXME: Add interface pattern matching */
+ xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh));
+ break;
+
+ default:
+ return (0);
+ }
+
+ if (xent != NULL) {
+ *val = xent->value;
+ return (1);
+ }
+ return (0);
+}
+
+static int
+count_table_entry(struct radix_node *rn, void *arg)
+{
+ u_int32_t * const cnt = arg;
+
+ (*cnt)++;
+ return (0);
+}
+
+int
+ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
+{
+ struct radix_node_head *rnh;
+
+ if (tbl >= V_fw_tables_max)
+ return (EINVAL);
+ *cnt = 0;
+ if ((rnh = ch->tables[tbl]) == NULL)
+ return (0);
+ rnh->rnh_walktree(rnh, count_table_entry, cnt);
+ return (0);
+}
+
+static int
+dump_table_entry(struct radix_node *rn, void *arg)
+{
+ struct table_entry * const n = (struct table_entry *)rn;
+ ipfw_table * const tbl = arg;
+ ipfw_table_entry *ent;
+
+ if (tbl->cnt == tbl->size)
+ return (1);
+ ent = &tbl->ent[tbl->cnt];
+ ent->tbl = tbl->tbl;
+ if (in_nullhost(n->mask.sin_addr))
+ ent->masklen = 0;
+ else
+ ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
+ ent->addr = n->addr.sin_addr.s_addr;
+ ent->value = n->value;
+ tbl->cnt++;
+ return (0);
+}
+
+int
+ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
+{
+ struct radix_node_head *rnh;
+
+ if (tbl->tbl >= V_fw_tables_max)
+ return (EINVAL);
+ tbl->cnt = 0;
+ if ((rnh = ch->tables[tbl->tbl]) == NULL)
+ return (0);
+ rnh->rnh_walktree(rnh, dump_table_entry, tbl);
+ return (0);
+}
+
+static int
+count_table_xentry(struct radix_node *rn, void *arg)
+{
+ uint32_t * const cnt = arg;
+
+ (*cnt) += sizeof(ipfw_table_xentry);
+ return (0);
+}
+
+int
+ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
+{
+ struct radix_node_head *rnh;
+
+ if (tbl >= V_fw_tables_max)
+ return (EINVAL);
+ *cnt = 0;
+ if ((rnh = ch->tables[tbl]) != NULL)
+ rnh->rnh_walktree(rnh, count_table_xentry, cnt);
+ if ((rnh = ch->xtables[tbl]) != NULL)
+ rnh->rnh_walktree(rnh, count_table_xentry, cnt);
+ /* Return zero if table is empty */
+ if (*cnt > 0)
+ (*cnt) += sizeof(ipfw_xtable);
+ return (0);
+}
+
+
+static int
+dump_table_xentry_base(struct radix_node *rn, void *arg)
+{
+ struct table_entry * const n = (struct table_entry *)rn;
+ ipfw_xtable * const tbl = arg;
+ ipfw_table_xentry *xent;
+
+ /* Out of memory, returning */
+ if (tbl->cnt == tbl->size)
+ return (1);
+ xent = &tbl->xent[tbl->cnt];
+ xent->len = sizeof(ipfw_table_xentry);
+ xent->tbl = tbl->tbl;
+ if (in_nullhost(n->mask.sin_addr))
+ xent->masklen = 0;
+ else
+ xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
+ /* Save IPv4 address as deprecated IPv6 compatible */
+ xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
+ xent->value = n->value;
+ tbl->cnt++;
+ return (0);
+}
+
+static int
+dump_table_xentry_extended(struct radix_node *rn, void *arg)
+{
+ struct table_xentry * const n = (struct table_xentry *)rn;
+ ipfw_xtable * const tbl = arg;
+ ipfw_table_xentry *xent;
+#ifdef INET6
+ int i;
+ uint32_t *v;
+#endif
+ /* Out of memory, returning */
+ if (tbl->cnt == tbl->size)
+ return (1);
+ xent = &tbl->xent[tbl->cnt];
+ xent->len = sizeof(ipfw_table_xentry);
+ xent->tbl = tbl->tbl;
+
+ switch (tbl->type) {
+#ifdef INET6
+ case IPFW_TABLE_CIDR:
+ /* Count IPv6 mask */
+ v = (uint32_t *)&n->m.mask6.sin6_addr;
+ for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++)
+ xent->masklen += bitcount32(*v);
+ memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr));
+ break;
+#endif
+ case IPFW_TABLE_INTERFACE:
+ /* Assume exact mask */
+ xent->masklen = 8 * IF_NAMESIZE;
+ memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE);
+ break;
+
+ default:
+ /* unknown, skip entry */
+ return (0);
+ }
+
+ xent->value = n->value;
+ tbl->cnt++;
+ return (0);
+}
+
+int
+ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl)
+{
+ struct radix_node_head *rnh;
+
+ if (tbl->tbl >= V_fw_tables_max)
+ return (EINVAL);
+ tbl->cnt = 0;
+ tbl->type = ch->tabletype[tbl->tbl];
+ if ((rnh = ch->tables[tbl->tbl]) != NULL)
+ rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl);
+ if ((rnh = ch->xtables[tbl->tbl]) != NULL)
+ rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl);
+ return (0);
+}
+
+/* end of file */
diff --git a/freebsd/sys/nios2/include/machine/in_cksum.h b/freebsd/sys/nios2/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/nios2/include/machine/in_cksum.h
+++ b/freebsd/sys/nios2/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/nios2/include/machine/pci_cfgreg.h b/freebsd/sys/nios2/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/nios2/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/nios2/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/nios2/nios2/legacy.c b/freebsd/sys/nios2/nios2/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/nios2/nios2/legacy.c
+++ b/freebsd/sys/nios2/nios2/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/nios2/pci/pci_bus.c b/freebsd/sys/nios2/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/nios2/pci/pci_bus.c
+++ b/freebsd/sys/nios2/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/opencrypto/cryptodev.c b/freebsd/sys/opencrypto/cryptodev.c
deleted file mode 100644
index 441a6829..00000000
--- a/freebsd/sys/opencrypto/cryptodev.c
+++ /dev/null
@@ -1,1178 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/* $OpenBSD: cryptodev.c,v 1.52 2002/06/19 07:22:46 deraadt Exp $ */
-
-/*-
- * Copyright (c) 2001 Theo de Raadt
- * Copyright (c) 2002-2006 Sam Leffler, Errno Consulting
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Effort sponsored in part by the Defense Advanced Research Projects
- * Agency (DARPA) and Air Force Research Laboratory, Air Force
- * Materiel Command, USAF, under agreement number F30602-01-2-0537.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <rtems/bsd/local/opt_compat.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/sysctl.h>
-#include <sys/file.h>
-#include <sys/filedesc.h>
-#include <rtems/bsd/sys/errno.h>
-#include <sys/uio.h>
-#include <sys/random.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-#include <sys/fcntl.h>
-#include <sys/bus.h>
-
-#include <opencrypto/cryptodev.h>
-#include <opencrypto/xform.h>
-
-#ifdef COMPAT_FREEBSD32
-#include <sys/mount.h>
-#include <compat/freebsd32/freebsd32.h>
-
-struct session_op32 {
- u_int32_t cipher;
- u_int32_t mac;
- u_int32_t keylen;
- u_int32_t key;
- int mackeylen;
- u_int32_t mackey;
- u_int32_t ses;
-};
-
-struct session2_op32 {
- u_int32_t cipher;
- u_int32_t mac;
- u_int32_t keylen;
- u_int32_t key;
- int mackeylen;
- u_int32_t mackey;
- u_int32_t ses;
- int crid;
- int pad[4];
-};
-
-struct crypt_op32 {
- u_int32_t ses;
- u_int16_t op;
- u_int16_t flags;
- u_int len;
- u_int32_t src, dst;
- u_int32_t mac;
- u_int32_t iv;
-};
-
-struct crparam32 {
- u_int32_t crp_p;
- u_int crp_nbits;
-};
-
-struct crypt_kop32 {
- u_int crk_op;
- u_int crk_status;
- u_short crk_iparams;
- u_short crk_oparams;
- u_int crk_crid;
- struct crparam32 crk_param[CRK_MAXPARAM];
-};
-
-struct cryptotstat32 {
- struct timespec32 acc;
- struct timespec32 min;
- struct timespec32 max;
- u_int32_t count;
-};
-
-struct cryptostats32 {
- u_int32_t cs_ops;
- u_int32_t cs_errs;
- u_int32_t cs_kops;
- u_int32_t cs_kerrs;
- u_int32_t cs_intrs;
- u_int32_t cs_rets;
- u_int32_t cs_blocks;
- u_int32_t cs_kblocks;
- struct cryptotstat32 cs_invoke;
- struct cryptotstat32 cs_done;
- struct cryptotstat32 cs_cb;
- struct cryptotstat32 cs_finis;
-};
-
-#define CIOCGSESSION32 _IOWR('c', 101, struct session_op32)
-#define CIOCCRYPT32 _IOWR('c', 103, struct crypt_op32)
-#define CIOCKEY32 _IOWR('c', 104, struct crypt_kop32)
-#define CIOCGSESSION232 _IOWR('c', 106, struct session2_op32)
-#define CIOCKEY232 _IOWR('c', 107, struct crypt_kop32)
-
-static void
-session_op_from_32(const struct session_op32 *from, struct session_op *to)
-{
-
- CP(*from, *to, cipher);
- CP(*from, *to, mac);
- CP(*from, *to, keylen);
- PTRIN_CP(*from, *to, key);
- CP(*from, *to, mackeylen);
- PTRIN_CP(*from, *to, mackey);
- CP(*from, *to, ses);
-}
-
-static void
-session2_op_from_32(const struct session2_op32 *from, struct session2_op *to)
-{
-
- session_op_from_32((const struct session_op32 *)from,
- (struct session_op *)to);
- CP(*from, *to, crid);
-}
-
-static void
-session_op_to_32(const struct session_op *from, struct session_op32 *to)
-{
-
- CP(*from, *to, cipher);
- CP(*from, *to, mac);
- CP(*from, *to, keylen);
- PTROUT_CP(*from, *to, key);
- CP(*from, *to, mackeylen);
- PTROUT_CP(*from, *to, mackey);
- CP(*from, *to, ses);
-}
-
-static void
-session2_op_to_32(const struct session2_op *from, struct session2_op32 *to)
-{
-
- session_op_to_32((const struct session_op *)from,
- (struct session_op32 *)to);
- CP(*from, *to, crid);
-}
-
-static void
-crypt_op_from_32(const struct crypt_op32 *from, struct crypt_op *to)
-{
-
- CP(*from, *to, ses);
- CP(*from, *to, op);
- CP(*from, *to, flags);
- CP(*from, *to, len);
- PTRIN_CP(*from, *to, src);
- PTRIN_CP(*from, *to, dst);
- PTRIN_CP(*from, *to, mac);
- PTRIN_CP(*from, *to, iv);
-}
-
-static void
-crypt_op_to_32(const struct crypt_op *from, struct crypt_op32 *to)
-{
-
- CP(*from, *to, ses);
- CP(*from, *to, op);
- CP(*from, *to, flags);
- CP(*from, *to, len);
- PTROUT_CP(*from, *to, src);
- PTROUT_CP(*from, *to, dst);
- PTROUT_CP(*from, *to, mac);
- PTROUT_CP(*from, *to, iv);
-}
-
-static void
-crparam_from_32(const struct crparam32 *from, struct crparam *to)
-{
-
- PTRIN_CP(*from, *to, crp_p);
- CP(*from, *to, crp_nbits);
-}
-
-static void
-crparam_to_32(const struct crparam *from, struct crparam32 *to)
-{
-
- PTROUT_CP(*from, *to, crp_p);
- CP(*from, *to, crp_nbits);
-}
-
-static void
-crypt_kop_from_32(const struct crypt_kop32 *from, struct crypt_kop *to)
-{
- int i;
-
- CP(*from, *to, crk_op);
- CP(*from, *to, crk_status);
- CP(*from, *to, crk_iparams);
- CP(*from, *to, crk_oparams);
- CP(*from, *to, crk_crid);
- for (i = 0; i < CRK_MAXPARAM; i++)
- crparam_from_32(&from->crk_param[i], &to->crk_param[i]);
-}
-
-static void
-crypt_kop_to_32(const struct crypt_kop *from, struct crypt_kop32 *to)
-{
- int i;
-
- CP(*from, *to, crk_op);
- CP(*from, *to, crk_status);
- CP(*from, *to, crk_iparams);
- CP(*from, *to, crk_oparams);
- CP(*from, *to, crk_crid);
- for (i = 0; i < CRK_MAXPARAM; i++)
- crparam_to_32(&from->crk_param[i], &to->crk_param[i]);
-}
-#endif
-
-struct csession {
- TAILQ_ENTRY(csession) next;
- u_int64_t sid;
- u_int32_t ses;
- struct mtx lock; /* for op submission */
-
- u_int32_t cipher;
- struct enc_xform *txform;
- u_int32_t mac;
- struct auth_hash *thash;
-
- caddr_t key;
- int keylen;
- u_char tmp_iv[EALG_MAX_BLOCK_LEN];
-
- caddr_t mackey;
- int mackeylen;
-
- struct iovec iovec;
- struct uio uio;
- int error;
-};
-
-struct fcrypt {
- TAILQ_HEAD(csessionlist, csession) csessions;
- int sesn;
-};
-
-static int cryptof_rw(struct file *fp, struct uio *uio,
- struct ucred *cred, int flags, struct thread *);
-static int cryptof_truncate(struct file *, off_t, struct ucred *,
- struct thread *);
-static int cryptof_ioctl(struct file *, u_long, void *,
- struct ucred *, struct thread *);
-static int cryptof_poll(struct file *, int, struct ucred *, struct thread *);
-static int cryptof_kqfilter(struct file *, struct knote *);
-static int cryptof_stat(struct file *, struct stat *,
- struct ucred *, struct thread *);
-static int cryptof_close(struct file *, struct thread *);
-
-static struct fileops cryptofops = {
- .fo_read = cryptof_rw,
- .fo_write = cryptof_rw,
- .fo_truncate = cryptof_truncate,
- .fo_ioctl = cryptof_ioctl,
- .fo_poll = cryptof_poll,
- .fo_kqfilter = cryptof_kqfilter,
- .fo_stat = cryptof_stat,
- .fo_close = cryptof_close
-};
-
-static struct csession *csefind(struct fcrypt *, u_int);
-static int csedelete(struct fcrypt *, struct csession *);
-static struct csession *cseadd(struct fcrypt *, struct csession *);
-static struct csession *csecreate(struct fcrypt *, u_int64_t, caddr_t,
- u_int64_t, caddr_t, u_int64_t, u_int32_t, u_int32_t, struct enc_xform *,
- struct auth_hash *);
-static int csefree(struct csession *);
-
-static int cryptodev_op(struct csession *, struct crypt_op *,
- struct ucred *, struct thread *td);
-static int cryptodev_key(struct crypt_kop *);
-static int cryptodev_find(struct crypt_find_op *);
-
-static int
-cryptof_rw(
- struct file *fp,
- struct uio *uio,
- struct ucred *active_cred,
- int flags,
- struct thread *td)
-{
-
- return (EIO);
-}
-
-static int
-cryptof_truncate(
- struct file *fp,
- off_t length,
- struct ucred *active_cred,
- struct thread *td)
-{
-
- return (EINVAL);
-}
-
-/*
- * Check a crypto identifier to see if it requested
- * a software device/driver. This can be done either
- * by device name/class or through search constraints.
- */
-static int
-checkforsoftware(int crid)
-{
- if (crid & CRYPTOCAP_F_SOFTWARE)
- return EINVAL; /* XXX */
- if ((crid & CRYPTOCAP_F_HARDWARE) == 0 &&
- (crypto_getcaps(crid) & CRYPTOCAP_F_HARDWARE) == 0)
- return EINVAL; /* XXX */
- return 0;
-}
-
-/* ARGSUSED */
-static int
-cryptof_ioctl(
- struct file *fp,
- u_long cmd,
- void *data,
- struct ucred *active_cred,
- struct thread *td)
-{
-#define SES2(p) ((struct session2_op *)p)
- struct cryptoini cria, crie;
- struct fcrypt *fcr = fp->f_data;
- struct csession *cse;
- struct session_op *sop;
- struct crypt_op *cop;
- struct enc_xform *txform = NULL;
- struct auth_hash *thash = NULL;
- struct crypt_kop *kop;
- u_int64_t sid;
- u_int32_t ses;
- int error = 0, crid;
-#ifdef COMPAT_FREEBSD32
- struct session2_op sopc;
- struct crypt_op copc;
- struct crypt_kop kopc;
-#endif
-
- switch (cmd) {
- case CIOCGSESSION:
- case CIOCGSESSION2:
-#ifdef COMPAT_FREEBSD32
- case CIOCGSESSION32:
- case CIOCGSESSION232:
- if (cmd == CIOCGSESSION32) {
- session_op_from_32(data, (struct session_op *)&sopc);
- sop = (struct session_op *)&sopc;
- } else if (cmd == CIOCGSESSION232) {
- session2_op_from_32(data, &sopc);
- sop = (struct session_op *)&sopc;
- } else
-#endif
- sop = (struct session_op *)data;
- switch (sop->cipher) {
- case 0:
- break;
- case CRYPTO_DES_CBC:
- txform = &enc_xform_des;
- break;
- case CRYPTO_3DES_CBC:
- txform = &enc_xform_3des;
- break;
- case CRYPTO_BLF_CBC:
- txform = &enc_xform_blf;
- break;
- case CRYPTO_CAST_CBC:
- txform = &enc_xform_cast5;
- break;
- case CRYPTO_SKIPJACK_CBC:
- txform = &enc_xform_skipjack;
- break;
- case CRYPTO_AES_CBC:
- txform = &enc_xform_rijndael128;
- break;
- case CRYPTO_AES_XTS:
- txform = &enc_xform_aes_xts;
- break;
- case CRYPTO_NULL_CBC:
- txform = &enc_xform_null;
- break;
- case CRYPTO_ARC4:
- txform = &enc_xform_arc4;
- break;
- case CRYPTO_CAMELLIA_CBC:
- txform = &enc_xform_camellia;
- break;
- default:
- return (EINVAL);
- }
-
- switch (sop->mac) {
- case 0:
- break;
- case CRYPTO_MD5_HMAC:
- thash = &auth_hash_hmac_md5;
- break;
- case CRYPTO_SHA1_HMAC:
- thash = &auth_hash_hmac_sha1;
- break;
- case CRYPTO_SHA2_256_HMAC:
- thash = &auth_hash_hmac_sha2_256;
- break;
- case CRYPTO_SHA2_384_HMAC:
- thash = &auth_hash_hmac_sha2_384;
- break;
- case CRYPTO_SHA2_512_HMAC:
- thash = &auth_hash_hmac_sha2_512;
- break;
- case CRYPTO_RIPEMD160_HMAC:
- thash = &auth_hash_hmac_ripemd_160;
- break;
-#ifdef notdef
- case CRYPTO_MD5:
- thash = &auth_hash_md5;
- break;
- case CRYPTO_SHA1:
- thash = &auth_hash_sha1;
- break;
-#endif
- case CRYPTO_NULL_HMAC:
- thash = &auth_hash_null;
- break;
- default:
- return (EINVAL);
- }
-
- bzero(&crie, sizeof(crie));
- bzero(&cria, sizeof(cria));
-
- if (txform) {
- crie.cri_alg = txform->type;
- crie.cri_klen = sop->keylen * 8;
- if (sop->keylen > txform->maxkey ||
- sop->keylen < txform->minkey) {
- error = EINVAL;
- goto bail;
- }
-
- crie.cri_key = malloc(crie.cri_klen / 8,
- M_XDATA, M_WAITOK);
- if ((error = copyin(sop->key, crie.cri_key,
- crie.cri_klen / 8)))
- goto bail;
- if (thash)
- crie.cri_next = &cria;
- }
-
- if (thash) {
- cria.cri_alg = thash->type;
- cria.cri_klen = sop->mackeylen * 8;
- if (sop->mackeylen != thash->keysize) {
- error = EINVAL;
- goto bail;
- }
-
- if (cria.cri_klen) {
- cria.cri_key = malloc(cria.cri_klen / 8,
- M_XDATA, M_WAITOK);
- if ((error = copyin(sop->mackey, cria.cri_key,
- cria.cri_klen / 8)))
- goto bail;
- }
- }
-
- /* NB: CIOGSESSION2 has the crid */
- if (cmd == CIOCGSESSION2
-#ifdef COMPAT_FREEBSD32
- || cmd == CIOCGSESSION232
-#endif
- ) {
- crid = SES2(sop)->crid;
- error = checkforsoftware(crid);
- if (error)
- goto bail;
- } else
- crid = CRYPTOCAP_F_HARDWARE;
- error = crypto_newsession(&sid, (txform ? &crie : &cria), crid);
- if (error)
- goto bail;
-
- cse = csecreate(fcr, sid, crie.cri_key, crie.cri_klen,
- cria.cri_key, cria.cri_klen, sop->cipher, sop->mac, txform,
- thash);
-
- if (cse == NULL) {
- crypto_freesession(sid);
- error = EINVAL;
- goto bail;
- }
- sop->ses = cse->ses;
- if (cmd == CIOCGSESSION2
-#ifdef COMPAT_FREEBSD32
- || cmd == CIOCGSESSION232
-#endif
- ) {
- /* return hardware/driver id */
- SES2(sop)->crid = CRYPTO_SESID2HID(cse->sid);
- }
-bail:
- if (error) {
- if (crie.cri_key)
- free(crie.cri_key, M_XDATA);
- if (cria.cri_key)
- free(cria.cri_key, M_XDATA);
- }
-#ifdef COMPAT_FREEBSD32
- else {
- if (cmd == CIOCGSESSION32)
- session_op_to_32(sop, data);
- else if (cmd == CIOCGSESSION232)
- session2_op_to_32((struct session2_op *)sop,
- data);
- }
-#endif
- break;
- case CIOCFSESSION:
- ses = *(u_int32_t *)data;
- cse = csefind(fcr, ses);
- if (cse == NULL)
- return (EINVAL);
- csedelete(fcr, cse);
- error = csefree(cse);
- break;
- case CIOCCRYPT:
-#ifdef COMPAT_FREEBSD32
- case CIOCCRYPT32:
- if (cmd == CIOCCRYPT32) {
- cop = &copc;
- crypt_op_from_32(data, cop);
- } else
-#endif
- cop = (struct crypt_op *)data;
- cse = csefind(fcr, cop->ses);
- if (cse == NULL)
- return (EINVAL);
- error = cryptodev_op(cse, cop, active_cred, td);
-#ifdef COMPAT_FREEBSD32
- if (error == 0 && cmd == CIOCCRYPT32)
- crypt_op_to_32(cop, data);
-#endif
- break;
- case CIOCKEY:
- case CIOCKEY2:
-#ifdef COMPAT_FREEBSD32
- case CIOCKEY32:
- case CIOCKEY232:
-#endif
- if (!crypto_userasymcrypto)
- return (EPERM); /* XXX compat? */
-#ifdef COMPAT_FREEBSD32
- if (cmd == CIOCKEY32 || cmd == CIOCKEY232) {
- kop = &kopc;
- crypt_kop_from_32(data, kop);
- } else
-#endif
- kop = (struct crypt_kop *)data;
- if (cmd == CIOCKEY
-#ifdef COMPAT_FREEBSD32
- || cmd == CIOCKEY32
-#endif
- ) {
- /* NB: crypto core enforces s/w driver use */
- kop->crk_crid =
- CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE;
- }
- mtx_lock(&Giant);
- error = cryptodev_key(kop);
- mtx_unlock(&Giant);
-#ifdef COMPAT_FREEBSD32
- if (cmd == CIOCKEY32 || cmd == CIOCKEY232)
- crypt_kop_to_32(kop, data);
-#endif
- break;
- case CIOCASYMFEAT:
- if (!crypto_userasymcrypto) {
- /*
- * NB: if user asym crypto operations are
- * not permitted return "no algorithms"
- * so well-behaved applications will just
- * fallback to doing them in software.
- */
- *(int *)data = 0;
- } else
- error = crypto_getfeat((int *)data);
- break;
- case CIOCFINDDEV:
- error = cryptodev_find((struct crypt_find_op *)data);
- break;
- default:
- error = EINVAL;
- break;
- }
- return (error);
-#undef SES2
-}
-
-static int cryptodev_cb(void *);
-
-
-static int
-cryptodev_op(
- struct csession *cse,
- struct crypt_op *cop,
- struct ucred *active_cred,
- struct thread *td)
-{
- struct cryptop *crp = NULL;
- struct cryptodesc *crde = NULL, *crda = NULL;
- int error;
-
- if (cop->len > 256*1024-4)
- return (E2BIG);
-
- if (cse->txform) {
- if (cop->len == 0 || (cop->len % cse->txform->blocksize) != 0)
- return (EINVAL);
- }
-
- cse->uio.uio_iov = &cse->iovec;
- cse->uio.uio_iovcnt = 1;
- cse->uio.uio_offset = 0;
- cse->uio.uio_resid = cop->len;
- cse->uio.uio_segflg = UIO_SYSSPACE;
- cse->uio.uio_rw = UIO_WRITE;
- cse->uio.uio_td = td;
- cse->uio.uio_iov[0].iov_len = cop->len;
- if (cse->thash) {
- cse->uio.uio_iov[0].iov_len += cse->thash->hashsize;
- cse->uio.uio_resid += cse->thash->hashsize;
- }
- cse->uio.uio_iov[0].iov_base = malloc(cse->uio.uio_iov[0].iov_len,
- M_XDATA, M_WAITOK);
-
- crp = crypto_getreq((cse->txform != NULL) + (cse->thash != NULL));
- if (crp == NULL) {
- error = ENOMEM;
- goto bail;
- }
-
- if (cse->thash) {
- crda = crp->crp_desc;
- if (cse->txform)
- crde = crda->crd_next;
- } else {
- if (cse->txform)
- crde = crp->crp_desc;
- else {
- error = EINVAL;
- goto bail;
- }
- }
-
- if ((error = copyin(cop->src, cse->uio.uio_iov[0].iov_base, cop->len)))
- goto bail;
-
- if (crda) {
- crda->crd_skip = 0;
- crda->crd_len = cop->len;
- crda->crd_inject = cop->len;
-
- crda->crd_alg = cse->mac;
- crda->crd_key = cse->mackey;
- crda->crd_klen = cse->mackeylen * 8;
- }
-
- if (crde) {
- if (cop->op == COP_ENCRYPT)
- crde->crd_flags |= CRD_F_ENCRYPT;
- else
- crde->crd_flags &= ~CRD_F_ENCRYPT;
- crde->crd_len = cop->len;
- crde->crd_inject = 0;
-
- crde->crd_alg = cse->cipher;
- crde->crd_key = cse->key;
- crde->crd_klen = cse->keylen * 8;
- }
-
- crp->crp_ilen = cop->len;
- crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIMM
- | (cop->flags & COP_F_BATCH);
- crp->crp_buf = (caddr_t)&cse->uio;
- crp->crp_callback = (int (*) (struct cryptop *)) cryptodev_cb;
- crp->crp_sid = cse->sid;
- crp->crp_opaque = (void *)cse;
-
- if (cop->iv) {
- if (crde == NULL) {
- error = EINVAL;
- goto bail;
- }
- if (cse->cipher == CRYPTO_ARC4) { /* XXX use flag? */
- error = EINVAL;
- goto bail;
- }
- if ((error = copyin(cop->iv, cse->tmp_iv, cse->txform->blocksize)))
- goto bail;
- bcopy(cse->tmp_iv, crde->crd_iv, cse->txform->blocksize);
- crde->crd_flags |= CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
- crde->crd_skip = 0;
- } else if (cse->cipher == CRYPTO_ARC4) { /* XXX use flag? */
- crde->crd_skip = 0;
- } else if (crde) {
- crde->crd_flags |= CRD_F_IV_PRESENT;
- crde->crd_skip = cse->txform->blocksize;
- crde->crd_len -= cse->txform->blocksize;
- }
-
- if (cop->mac && crda == NULL) {
- error = EINVAL;
- goto bail;
- }
-
- /*
- * Let the dispatch run unlocked, then, interlock against the
- * callback before checking if the operation completed and going
- * to sleep. This insures drivers don't inherit our lock which
- * results in a lock order reversal between crypto_dispatch forced
- * entry and the crypto_done callback into us.
- */
- error = crypto_dispatch(crp);
- mtx_lock(&cse->lock);
- if (error == 0 && (crp->crp_flags & CRYPTO_F_DONE) == 0)
- error = msleep(crp, &cse->lock, PWAIT, "crydev", 0);
- mtx_unlock(&cse->lock);
-
- if (error != 0)
- goto bail;
-
- if (crp->crp_etype != 0) {
- error = crp->crp_etype;
- goto bail;
- }
-
- if (cse->error) {
- error = cse->error;
- goto bail;
- }
-
- if (cop->dst &&
- (error = copyout(cse->uio.uio_iov[0].iov_base, cop->dst, cop->len)))
- goto bail;
-
- if (cop->mac &&
- (error = copyout((caddr_t)cse->uio.uio_iov[0].iov_base + cop->len,
- cop->mac, cse->thash->hashsize)))
- goto bail;
-
-bail:
- if (crp)
- crypto_freereq(crp);
- if (cse->uio.uio_iov[0].iov_base)
- free(cse->uio.uio_iov[0].iov_base, M_XDATA);
-
- return (error);
-}
-
-static int
-cryptodev_cb(void *op)
-{
- struct cryptop *crp = (struct cryptop *) op;
- struct csession *cse = (struct csession *)crp->crp_opaque;
- int error;
-
- error = crp->crp_etype;
- if (error == EAGAIN)
- error = crypto_dispatch(crp);
- mtx_lock(&cse->lock);
- if (error != 0 || (crp->crp_flags & CRYPTO_F_DONE)) {
- cse->error = error;
- wakeup_one(crp);
- }
- mtx_unlock(&cse->lock);
- return (0);
-}
-
-static int
-cryptodevkey_cb(void *op)
-{
- struct cryptkop *krp = (struct cryptkop *) op;
-
- wakeup_one(krp);
- return (0);
-}
-
-static int
-cryptodev_key(struct crypt_kop *kop)
-{
- struct cryptkop *krp = NULL;
- int error = EINVAL;
- int in, out, size, i;
-
- if (kop->crk_iparams + kop->crk_oparams > CRK_MAXPARAM) {
- return (EFBIG);
- }
-
- in = kop->crk_iparams;
- out = kop->crk_oparams;
- switch (kop->crk_op) {
- case CRK_MOD_EXP:
- if (in == 3 && out == 1)
- break;
- return (EINVAL);
- case CRK_MOD_EXP_CRT:
- if (in == 6 && out == 1)
- break;
- return (EINVAL);
- case CRK_DSA_SIGN:
- if (in == 5 && out == 2)
- break;
- return (EINVAL);
- case CRK_DSA_VERIFY:
- if (in == 7 && out == 0)
- break;
- return (EINVAL);
- case CRK_DH_COMPUTE_KEY:
- if (in == 3 && out == 1)
- break;
- return (EINVAL);
- default:
- return (EINVAL);
- }
-
- krp = (struct cryptkop *)malloc(sizeof *krp, M_XDATA, M_WAITOK|M_ZERO);
- if (!krp)
- return (ENOMEM);
- krp->krp_op = kop->crk_op;
- krp->krp_status = kop->crk_status;
- krp->krp_iparams = kop->crk_iparams;
- krp->krp_oparams = kop->crk_oparams;
- krp->krp_crid = kop->crk_crid;
- krp->krp_status = 0;
- krp->krp_callback = (int (*) (struct cryptkop *)) cryptodevkey_cb;
-
- for (i = 0; i < CRK_MAXPARAM; i++) {
- if (kop->crk_param[i].crp_nbits > 65536)
- /* Limit is the same as in OpenBSD */
- goto fail;
- krp->krp_param[i].crp_nbits = kop->crk_param[i].crp_nbits;
- }
- for (i = 0; i < krp->krp_iparams + krp->krp_oparams; i++) {
- size = (krp->krp_param[i].crp_nbits + 7) / 8;
- if (size == 0)
- continue;
- krp->krp_param[i].crp_p = malloc(size, M_XDATA, M_WAITOK);
- if (i >= krp->krp_iparams)
- continue;
- error = copyin(kop->crk_param[i].crp_p, krp->krp_param[i].crp_p, size);
- if (error)
- goto fail;
- }
-
- error = crypto_kdispatch(krp);
- if (error)
- goto fail;
- error = tsleep(krp, PSOCK, "crydev", 0);
- if (error) {
- /* XXX can this happen? if so, how do we recover? */
- goto fail;
- }
-
- kop->crk_crid = krp->krp_crid; /* device that did the work */
- if (krp->krp_status != 0) {
- error = krp->krp_status;
- goto fail;
- }
-
- for (i = krp->krp_iparams; i < krp->krp_iparams + krp->krp_oparams; i++) {
- size = (krp->krp_param[i].crp_nbits + 7) / 8;
- if (size == 0)
- continue;
- error = copyout(krp->krp_param[i].crp_p, kop->crk_param[i].crp_p, size);
- if (error)
- goto fail;
- }
-
-fail:
- if (krp) {
- kop->crk_status = krp->krp_status;
- for (i = 0; i < CRK_MAXPARAM; i++) {
- if (krp->krp_param[i].crp_p)
- free(krp->krp_param[i].crp_p, M_XDATA);
- }
- free(krp, M_XDATA);
- }
- return (error);
-}
-
-static int
-cryptodev_find(struct crypt_find_op *find)
-{
- device_t dev;
-
- if (find->crid != -1) {
- dev = crypto_find_device_byhid(find->crid);
- if (dev == NULL)
- return (ENOENT);
- strlcpy(find->name, device_get_nameunit(dev),
- sizeof(find->name));
- } else {
- find->crid = crypto_find_driver(find->name);
- if (find->crid == -1)
- return (ENOENT);
- }
- return (0);
-}
-
-/* ARGSUSED */
-static int
-cryptof_poll(
- struct file *fp,
- int events,
- struct ucred *active_cred,
- struct thread *td)
-{
-
- return (0);
-}
-
-/* ARGSUSED */
-static int
-cryptof_kqfilter(struct file *fp, struct knote *kn)
-{
-
- return (0);
-}
-
-/* ARGSUSED */
-static int
-cryptof_stat(
- struct file *fp,
- struct stat *sb,
- struct ucred *active_cred,
- struct thread *td)
-{
-
- return (EOPNOTSUPP);
-}
-
-/* ARGSUSED */
-static int
-cryptof_close(struct file *fp, struct thread *td)
-{
- struct fcrypt *fcr = fp->f_data;
- struct csession *cse;
-
- while ((cse = TAILQ_FIRST(&fcr->csessions))) {
- TAILQ_REMOVE(&fcr->csessions, cse, next);
- (void)csefree(cse);
- }
- free(fcr, M_XDATA);
- fp->f_data = NULL;
- return 0;
-}
-
-static struct csession *
-csefind(struct fcrypt *fcr, u_int ses)
-{
- struct csession *cse;
-
- TAILQ_FOREACH(cse, &fcr->csessions, next)
- if (cse->ses == ses)
- return (cse);
- return (NULL);
-}
-
-static int
-csedelete(struct fcrypt *fcr, struct csession *cse_del)
-{
- struct csession *cse;
-
- TAILQ_FOREACH(cse, &fcr->csessions, next) {
- if (cse == cse_del) {
- TAILQ_REMOVE(&fcr->csessions, cse, next);
- return (1);
- }
- }
- return (0);
-}
-
-static struct csession *
-cseadd(struct fcrypt *fcr, struct csession *cse)
-{
- TAILQ_INSERT_TAIL(&fcr->csessions, cse, next);
- cse->ses = fcr->sesn++;
- return (cse);
-}
-
-struct csession *
-csecreate(struct fcrypt *fcr, u_int64_t sid, caddr_t key, u_int64_t keylen,
- caddr_t mackey, u_int64_t mackeylen, u_int32_t cipher, u_int32_t mac,
- struct enc_xform *txform, struct auth_hash *thash)
-{
- struct csession *cse;
-
-#ifdef INVARIANTS
- /* NB: required when mtx_init is built with INVARIANTS */
- cse = malloc(sizeof(struct csession), M_XDATA, M_NOWAIT | M_ZERO);
-#else
- cse = malloc(sizeof(struct csession), M_XDATA, M_NOWAIT);
-#endif
- if (cse == NULL)
- return NULL;
- mtx_init(&cse->lock, "cryptodev", "crypto session lock", MTX_DEF);
- cse->key = key;
- cse->keylen = keylen/8;
- cse->mackey = mackey;
- cse->mackeylen = mackeylen/8;
- cse->sid = sid;
- cse->cipher = cipher;
- cse->mac = mac;
- cse->txform = txform;
- cse->thash = thash;
- cseadd(fcr, cse);
- return (cse);
-}
-
-static int
-csefree(struct csession *cse)
-{
- int error;
-
- error = crypto_freesession(cse->sid);
- mtx_destroy(&cse->lock);
- if (cse->key)
- free(cse->key, M_XDATA);
- if (cse->mackey)
- free(cse->mackey, M_XDATA);
- free(cse, M_XDATA);
- return (error);
-}
-
-static int
-cryptoopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
-{
- return (0);
-}
-
-static int
-cryptoread(struct cdev *dev, struct uio *uio, int ioflag)
-{
- return (EIO);
-}
-
-static int
-cryptowrite(struct cdev *dev, struct uio *uio, int ioflag)
-{
- return (EIO);
-}
-
-static int
-cryptoioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
-{
- struct file *f;
- struct fcrypt *fcr;
- int fd, error;
-
- switch (cmd) {
- case CRIOGET:
- fcr = malloc(sizeof(struct fcrypt), M_XDATA, M_WAITOK);
- TAILQ_INIT(&fcr->csessions);
- fcr->sesn = 0;
-
- error = falloc(td, &f, &fd);
-
- if (error) {
- free(fcr, M_XDATA);
- return (error);
- }
- /* falloc automatically provides an extra reference to 'f'. */
- finit(f, FREAD | FWRITE, DTYPE_CRYPTO, fcr, &cryptofops);
- *(u_int32_t *)data = fd;
- fdrop(f, td);
- break;
- case CRIOFINDDEV:
- error = cryptodev_find((struct crypt_find_op *)data);
- break;
- case CRIOASYMFEAT:
- error = crypto_getfeat((int *)data);
- break;
- default:
- error = EINVAL;
- break;
- }
- return (error);
-}
-
-static struct cdevsw crypto_cdevsw = {
- .d_version = D_VERSION,
- .d_flags = D_NEEDGIANT,
- .d_open = cryptoopen,
- .d_read = cryptoread,
- .d_write = cryptowrite,
- .d_ioctl = cryptoioctl,
- .d_name = "crypto",
-};
-static struct cdev *crypto_dev;
-
-/*
- * Initialization code, both for static and dynamic loading.
- */
-static int
-cryptodev_modevent(module_t mod, int type, void *unused)
-{
- switch (type) {
- case MOD_LOAD:
- if (bootverbose)
- printf("crypto: <crypto device>\n");
- crypto_dev = make_dev(&crypto_cdevsw, 0,
- UID_ROOT, GID_WHEEL, 0666,
- "crypto");
- return 0;
- case MOD_UNLOAD:
- /*XXX disallow if active sessions */
- destroy_dev(crypto_dev);
- return 0;
- }
- return EINVAL;
-}
-
-static moduledata_t cryptodev_mod = {
- "cryptodev",
- cryptodev_modevent,
- 0
-};
-MODULE_VERSION(cryptodev, 1);
-DECLARE_MODULE(cryptodev, cryptodev_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_DEPEND(cryptodev, crypto, 1, 1, 1);
-MODULE_DEPEND(cryptodev, zlib, 1, 1, 1);
diff --git a/freebsd/sys/powerpc/include/machine/cpufunc.h b/freebsd/sys/powerpc/include/machine/cpufunc.h
index f028b62c..425fd1ca 100644
--- a/freebsd/sys/powerpc/include/machine/cpufunc.h
+++ b/freebsd/sys/powerpc/include/machine/cpufunc.h
@@ -68,6 +68,15 @@ mtmsr(register_t value)
__asm __volatile ("mtmsr %0; isync" :: "r"(value));
}
+#ifdef __powerpc64__
+static __inline void
+mtmsrd(register_t value)
+{
+
+ __asm __volatile ("mtmsrd %0; isync" :: "r"(value));
+}
+#endif
+
static __inline register_t
mfmsr(void)
{
@@ -78,6 +87,7 @@ mfmsr(void)
return (value);
}
+#ifndef __powerpc64__
static __inline void
mtsrin(vm_offset_t va, register_t value)
{
@@ -94,6 +104,18 @@ mfsrin(vm_offset_t va)
return (value);
}
+#endif
+
+static __inline register_t
+mfctrl(void)
+{
+ register_t value;
+
+ __asm __volatile ("mfspr %0,136" : "=r"(value));
+
+ return (value);
+}
+
static __inline void
mtdec(register_t value)
@@ -126,6 +148,9 @@ static __inline u_quad_t
mftb(void)
{
u_quad_t tb;
+ #ifdef __powerpc64__
+ __asm __volatile ("mftb %0" : "=r"(tb));
+ #else
uint32_t *tbup = (uint32_t *)&tb;
uint32_t *tblp = tbup + 1;
@@ -133,6 +158,7 @@ mftb(void)
*tbup = mfspr(TBR_TBU);
*tblp = mfspr(TBR_TBL);
} while (*tbup != mfspr(TBR_TBU));
+ #endif
return (tb);
}
@@ -150,21 +176,21 @@ static __inline void
eieio(void)
{
- __asm __volatile ("eieio");
+ __asm __volatile ("eieio" : : : "memory");
}
static __inline void
isync(void)
{
- __asm __volatile ("isync");
+ __asm __volatile ("isync" : : : "memory");
}
static __inline void
powerpc_sync(void)
{
- __asm __volatile ("sync");
+ __asm __volatile ("sync" : : : "memory");
}
static __inline register_t
diff --git a/freebsd/sys/powerpc/include/machine/in_cksum.h b/freebsd/sys/powerpc/include/machine/in_cksum.h
index 09920062..4fe1b402 100644
--- a/freebsd/sys/powerpc/include/machine/in_cksum.h
+++ b/freebsd/sys/powerpc/include/machine/in_cksum.h
@@ -39,6 +39,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -65,9 +66,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/powerpc/include/machine/pci_cfgreg.h b/freebsd/sys/powerpc/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/powerpc/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/powerpc/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/powerpc/include/machine/psl.h b/freebsd/sys/powerpc/include/machine/psl.h
index ec3975a0..92bfa6ca 100644
--- a/freebsd/sys/powerpc/include/machine/psl.h
+++ b/freebsd/sys/powerpc/include/machine/psl.h
@@ -44,23 +44,23 @@
* FP, FE0, FE1 - reserved, always cleared, setting has no effect.
*
*/
-#define PSL_UCLE 0x04000000 /* User mode cache lock enable */
-#define PSL_SPE 0x02000000 /* SPE enable */
-#define PSL_WE 0x00040000 /* Wait state enable */
-#define PSL_CE 0x00020000 /* Critical interrupt enable */
-#define PSL_EE 0x00008000 /* External interrupt enable */
-#define PSL_PR 0x00004000 /* User mode */
-#define PSL_FP 0x00002000 /* Floating point available */
-#define PSL_ME 0x00001000 /* Machine check interrupt enable */
-#define PSL_FE0 0x00000800 /* Floating point exception mode 0 */
-#define PSL_UBLE 0x00000400 /* BTB lock enable */
-#define PSL_DE 0x00000200 /* Debug interrupt enable */
-#define PSL_FE1 0x00000100 /* Floating point exception mode 1 */
-#define PSL_IS 0x00000020 /* Instruction address space */
-#define PSL_DS 0x00000010 /* Data address space */
-#define PSL_PMM 0x00000004 /* Performance monitor mark */
+#define PSL_UCLE 0x04000000UL /* User mode cache lock enable */
+#define PSL_SPE 0x02000000UL /* SPE enable */
+#define PSL_WE 0x00040000UL /* Wait state enable */
+#define PSL_CE 0x00020000UL /* Critical interrupt enable */
+#define PSL_EE 0x00008000UL /* External interrupt enable */
+#define PSL_PR 0x00004000UL /* User mode */
+#define PSL_FP 0x00002000UL /* Floating point available */
+#define PSL_ME 0x00001000UL /* Machine check interrupt enable */
+#define PSL_FE0 0x00000800UL /* Floating point exception mode 0 */
+#define PSL_UBLE 0x00000400UL /* BTB lock enable */
+#define PSL_DE 0x00000200UL /* Debug interrupt enable */
+#define PSL_FE1 0x00000100UL /* Floating point exception mode 1 */
+#define PSL_IS 0x00000020UL /* Instruction address space */
+#define PSL_DS 0x00000010UL /* Data address space */
+#define PSL_PMM 0x00000004UL /* Performance monitor mark */
-#define PSL_FE_DFLT 0x00000004 /* default: no FP */
+#define PSL_FE_DFLT 0x00000000UL /* default == none */
/* Initial kernel MSR, use IS=1 ad DS=1. */
#define PSL_KERNSET_INIT (PSL_IS | PSL_DS)
@@ -77,22 +77,29 @@
*
* [*] Little-endian mode on the 601 is implemented in the HID0 register.
*/
-#define PSL_VEC 0x02000000 /* AltiVec vector unit available */
-#define PSL_POW 0x00040000 /* power management */
-#define PSL_ILE 0x00010000 /* interrupt endian mode (1 == le) */
-#define PSL_EE 0x00008000 /* external interrupt enable */
-#define PSL_PR 0x00004000 /* privilege mode (1 == user) */
-#define PSL_FP 0x00002000 /* floating point enable */
-#define PSL_ME 0x00001000 /* machine check enable */
-#define PSL_FE0 0x00000800 /* floating point interrupt mode 0 */
-#define PSL_SE 0x00000400 /* single-step trace enable */
-#define PSL_BE 0x00000200 /* branch trace enable */
-#define PSL_FE1 0x00000100 /* floating point interrupt mode 1 */
-#define PSL_IP 0x00000040 /* interrupt prefix */
-#define PSL_IR 0x00000020 /* instruction address relocation */
-#define PSL_DR 0x00000010 /* data address relocation */
-#define PSL_RI 0x00000002 /* recoverable interrupt */
-#define PSL_LE 0x00000001 /* endian mode (1 == le) */
+
+#ifdef __powerpc64__
+#define PSL_SF 0x8000000000000000UL /* 64-bit addressing */
+#define PSL_HV 0x1000000000000000UL /* hyper-privileged mode */
+#endif
+
+#define PSL_VEC 0x02000000UL /* AltiVec vector unit available */
+#define PSL_POW 0x00040000UL /* power management */
+#define PSL_ILE 0x00010000UL /* interrupt endian mode (1 == le) */
+#define PSL_EE 0x00008000UL /* external interrupt enable */
+#define PSL_PR 0x00004000UL /* privilege mode (1 == user) */
+#define PSL_FP 0x00002000UL /* floating point enable */
+#define PSL_ME 0x00001000UL /* machine check enable */
+#define PSL_FE0 0x00000800UL /* floating point interrupt mode 0 */
+#define PSL_SE 0x00000400UL /* single-step trace enable */
+#define PSL_BE 0x00000200UL /* branch trace enable */
+#define PSL_FE1 0x00000100UL /* floating point interrupt mode 1 */
+#define PSL_IP 0x00000040UL /* interrupt prefix */
+#define PSL_IR 0x00000020UL /* instruction address relocation */
+#define PSL_DR 0x00000010UL /* data address relocation */
+#define PSL_PMM 0x00000004UL /* performance monitor mark */
+#define PSL_RI 0x00000002UL /* recoverable interrupt */
+#define PSL_LE 0x00000001UL /* endian mode (1 == le) */
#define PSL_601_MASK ~(PSL_POW|PSL_ILE|PSL_BE|PSL_RI|PSL_LE)
@@ -111,7 +118,11 @@
#define PSL_MBO 0
#define PSL_MBZ 0
+#ifdef __powerpc64__
+#define PSL_KERNSET (PSL_SF | PSL_EE | PSL_ME | PSL_IR | PSL_DR | PSL_RI)
+#else
#define PSL_KERNSET (PSL_EE | PSL_ME | PSL_IR | PSL_DR | PSL_RI)
+#endif
#define PSL_USERSET (PSL_KERNSET | PSL_PR)
#define PSL_USERSTATIC (PSL_USERSET | PSL_IP | 0x87c0008c)
diff --git a/freebsd/sys/powerpc/include/machine/spr.h b/freebsd/sys/powerpc/include/machine/spr.h
index 586a57be..e3569876 100644
--- a/freebsd/sys/powerpc/include/machine/spr.h
+++ b/freebsd/sys/powerpc/include/machine/spr.h
@@ -10,13 +10,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -44,6 +37,9 @@
__asm __volatile("mfspr %0,%1" : "=r"(val) : "K"(reg)); \
val; } )
+
+#ifndef __powerpc64__
+
/* The following routines allow manipulation of the full 64-bit width
* of SPRs on 64 bit CPUs in bridge mode */
@@ -81,6 +77,8 @@
: "=r"(scratch), "=r"(val) : "K"(reg), "r"(32), "r"(1)); \
val; } )
+#endif
+
#endif /* _LOCORE */
/*
@@ -113,6 +111,9 @@
#define SPR_SDR1 0x019 /* .68 Page table base address register */
#define SPR_SRR0 0x01a /* 468 Save/Restore Register 0 */
#define SPR_SRR1 0x01b /* 468 Save/Restore Register 1 */
+#define SRR1_ISI_PFAULT 0x40000000 /* ISI page not found */
+#define SRR1_ISI_NOEXECUTE 0x10000000 /* Memory marked no-execute */
+#define SRR1_ISI_PP 0x08000000 /* PP bits forbid access */
#define SPR_DECAR 0x036 /* ..8 Decrementer auto reload */
#define SPR_EIE 0x050 /* ..8 Exception Interrupt ??? */
#define SPR_EID 0x051 /* ..8 Exception Interrupt ??? */
@@ -150,13 +151,26 @@
#define IBM401E2 0x0025
#define IBM401F2 0x0026
#define IBM401G2 0x0027
+#define IBMRS64II 0x0033
+#define IBMRS64III 0x0034
+#define IBMPOWER4 0x0035
+#define IBMRS64III_2 0x0036
+#define IBMRS64IV 0x0037
+#define IBMPOWER4PLUS 0x0038
#define IBM970 0x0039
+#define IBMPOWER5 0x003a
+#define IBMPOWER5PLUS 0x003b
#define IBM970FX 0x003c
-#define IBMPOWER3 0x0041
+#define IBMPOWER6 0x003e
+#define IBMPOWER7 0x003f
+#define IBMPOWER3 0x0040
+#define IBMPOWER3PLUS 0x0041
#define IBM970MP 0x0044
#define IBM970GX 0x0045
#define MPC860 0x0050
+#define IBMCELLBE 0x0070
#define MPC8240 0x0081
+#define PA6T 0x0090
#define IBM405GP 0x4011
#define IBM405L 0x4161
#define IBM750FX 0x7000
@@ -334,8 +348,8 @@
#define SPR_MMCR0_PMC1CE 0x00008000 /* PMC1 condition enable */
#define SPR_MMCR0_PMCNCE 0x00004000 /* PMCn condition enable */
#define SPR_MMCR0_TRIGGER 0x00002000 /* Trigger */
-#define SPR_MMCR0_PMC1SEL(x) ((x) << 6) /* PMC1 selector */
-#define SPR_MMCR0_PMC2SEL(x) ((x) << 0) /* PMC2 selector */
+#define SPR_MMCR0_PMC1SEL(x) (((x) & 0x3f) << 6) /* PMC1 selector */
+#define SPR_MMCR0_PMC2SEL(x) (((x) & 0x3f) << 0) /* PMC2 selector */
#define SPR_970MMCR0_PMC1SEL(x) ((x) << 8) /* PMC1 selector (970) */
#define SPR_970MMCR0_PMC2SEL(x) ((x) << 1) /* PMC2 selector (970) */
#define SPR_SGR 0x3b9 /* 4.. Storage Guarded Register */
@@ -345,10 +359,10 @@
#define SPR_SLER 0x3bb /* 4.. Storage Little Endian Register */
#define SPR_SIA 0x3bb /* .6. Sampled Instruction Address */
#define SPR_MMCR1 0x3bc /* .6. Monitor Mode Control Register 2 */
-#define SPR_MMCR1_PMC3SEL(x) ((x) << 27) /* PMC 3 selector */
-#define SPR_MMCR1_PMC4SEL(x) ((x) << 22) /* PMC 4 selector */
-#define SPR_MMCR1_PMC5SEL(x) ((x) << 17) /* PMC 5 selector */
-#define SPR_MMCR1_PMC6SEL(x) ((x) << 11) /* PMC 6 selector */
+#define SPR_MMCR1_PMC3SEL(x) (((x) & 0x1f) << 27) /* PMC 3 selector */
+#define SPR_MMCR1_PMC4SEL(x) (((x) & 0x1f) << 22) /* PMC 4 selector */
+#define SPR_MMCR1_PMC5SEL(x) (((x) & 0x1f) << 17) /* PMC 5 selector */
+#define SPR_MMCR1_PMC6SEL(x) (((x) & 0x3f) << 11) /* PMC 6 selector */
#define SPR_SU0R 0x3bc /* 4.. Storage User-defined 0 Register */
#define SPR_PMC3 0x3bd /* .6. Performance Counter Register 3 */
@@ -409,6 +423,10 @@
#define SPR_HID1 0x3f1 /* ..8 Hardware Implementation Register 1 */
#define SPR_HID4 0x3f4 /* ..8 Hardware Implementation Register 4 */
#define SPR_HID5 0x3f6 /* ..8 Hardware Implementation Register 5 */
+#define SPR_HID6 0x3f9 /* ..8 Hardware Implementation Register 6 */
+
+#define SPR_CELL_TSRL 0x380 /* ... Cell BE Thread Status Register */
+#define SPR_CELL_TSCR 0x399 /* ... Cell BE Thread Switch Register */
#if defined(AIM)
#define SPR_DBSR 0x3f0 /* 4.. Debug Status Register */
@@ -626,8 +644,8 @@
#define SPR_MCSRR1 0x23b /* ..8 571 Machine check SRR1 */
#define SPR_SVR 0x3ff /* ..8 1023 System Version Register */
-#define SVR_MPC8533 0x803c
-#define SVR_MPC8533E 0x8034
+#define SVR_MPC8533 0x8034
+#define SVR_MPC8533E 0x803c
#define SVR_MPC8541 0x8072
#define SVR_MPC8541E 0x807a
#define SVR_MPC8548 0x8031
@@ -636,6 +654,18 @@
#define SVR_MPC8555E 0x8079
#define SVR_MPC8572 0x80e0
#define SVR_MPC8572E 0x80e8
+#define SVR_P1011 0x80e5
+#define SVR_P1011E 0x80ed
+#define SVR_P1020 0x80e4
+#define SVR_P1020E 0x80ec
+#define SVR_P2010 0x80e3
+#define SVR_P2010E 0x80eb
+#define SVR_P2020 0x80e2
+#define SVR_P2020E 0x80ea
+#define SVR_P4040 0x8200
+#define SVR_P4040E 0x8208
+#define SVR_P4080 0x8201
+#define SVR_P4080E 0x8209
#define SVR_VER(svr) (((svr) >> 16) & 0xffff)
#define SPR_PID0 0x030 /* ..8 Process ID Register 0 */
diff --git a/freebsd/sys/powerpc/pci/pci_bus.c b/freebsd/sys/powerpc/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/powerpc/pci/pci_bus.c
+++ b/freebsd/sys/powerpc/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/powerpc/powerpc/in_cksum.c b/freebsd/sys/powerpc/powerpc/in_cksum.c
index f17d1092..44c62e12 100644
--- a/freebsd/sys/powerpc/powerpc/in_cksum.c
+++ b/freebsd/sys/powerpc/powerpc/in_cksum.c
@@ -235,7 +235,7 @@ skip_start:
if (len < mlen)
mlen = len;
- if ((clen ^ (int) addr) & 1)
+ if ((clen ^ (long) addr) & 1)
sum += in_cksumdata(addr, mlen) << 8;
else
sum += in_cksumdata(addr, mlen);
diff --git a/freebsd/sys/powerpc/powerpc/legacy.c b/freebsd/sys/powerpc/powerpc/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/powerpc/powerpc/legacy.c
+++ b/freebsd/sys/powerpc/powerpc/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/security/audit/audit.h b/freebsd/sys/security/audit/audit.h
index f66f33a4..69f6c328 100644
--- a/freebsd/sys/security/audit/audit.h
+++ b/freebsd/sys/security/audit/audit.h
@@ -99,8 +99,8 @@ void audit_arg_sockaddr(struct thread *td, struct sockaddr *sa);
void audit_arg_auid(uid_t auid);
void audit_arg_auditinfo(struct auditinfo *au_info);
void audit_arg_auditinfo_addr(struct auditinfo_addr *au_info);
-void audit_arg_upath1(struct thread *td, char *upath);
-void audit_arg_upath2(struct thread *td, char *upath);
+void audit_arg_upath1(struct thread *td, int dirfd, char *upath);
+void audit_arg_upath2(struct thread *td, int dirfd, char *upath);
void audit_arg_vnode1(struct vnode *vp);
void audit_arg_vnode2(struct vnode *vp);
void audit_arg_text(char *text);
@@ -114,6 +114,7 @@ void audit_arg_auditon(union auditon_udata *udata);
void audit_arg_file(struct proc *p, struct file *fp);
void audit_arg_argv(char *argv, int argc, int length);
void audit_arg_envv(char *envv, int envc, int length);
+void audit_arg_rights(cap_rights_t rights);
void audit_sysclose(struct thread *td, int fd);
void audit_cred_copy(struct ucred *src, struct ucred *dest);
void audit_cred_destroy(struct ucred *cred);
@@ -235,6 +236,11 @@ void audit_thread_free(struct thread *td);
audit_arg_rgid((rgid)); \
} while (0)
+#define AUDIT_ARG_RIGHTS(rights) do { \
+ if (AUDITING_TD(curthread)) \
+ audit_arg_rights((rights)); \
+} while (0)
+
#define AUDIT_ARG_RUID(ruid) do { \
if (AUDITING_TD(curthread)) \
audit_arg_ruid((ruid)); \
@@ -270,14 +276,14 @@ void audit_thread_free(struct thread *td);
audit_arg_uid((uid)); \
} while (0)
-#define AUDIT_ARG_UPATH1(td, upath) do { \
+#define AUDIT_ARG_UPATH1(td, dirfd, upath) do { \
if (AUDITING_TD(curthread)) \
- audit_arg_upath1((td), (upath)); \
+ audit_arg_upath1((td), (dirfd), (upath)); \
} while (0)
-#define AUDIT_ARG_UPATH2(td, upath) do { \
+#define AUDIT_ARG_UPATH2(td, dirfd, upath) do { \
if (AUDITING_TD(curthread)) \
- audit_arg_upath2((td), (upath)); \
+ audit_arg_upath2((td), (dirfd), (upath)); \
} while (0)
#define AUDIT_ARG_VALUE(value) do { \
@@ -342,6 +348,7 @@ void audit_thread_free(struct thread *td);
#define AUDIT_ARG_PID(pid)
#define AUDIT_ARG_PROCESS(p)
#define AUDIT_ARG_RGID(rgid)
+#define AUDIT_ARG_RIGHTS(rights)
#define AUDIT_ARG_RUID(ruid)
#define AUDIT_ARG_SIGNUM(signum)
#define AUDIT_ARG_SGID(sgid)
@@ -349,8 +356,8 @@ void audit_thread_free(struct thread *td);
#define AUDIT_ARG_SUID(suid)
#define AUDIT_ARG_TEXT(text)
#define AUDIT_ARG_UID(uid)
-#define AUDIT_ARG_UPATH1(td, upath)
-#define AUDIT_ARG_UPATH2(td, upath)
+#define AUDIT_ARG_UPATH1(td, dirfd, upath)
+#define AUDIT_ARG_UPATH2(td, dirfd, upath)
#define AUDIT_ARG_VALUE(value)
#define AUDIT_ARG_VNODE1(vp)
#define AUDIT_ARG_VNODE2(vp)
diff --git a/freebsd/sys/security/mac/mac_framework.h b/freebsd/sys/security/mac/mac_framework.h
index 70fb7c99..e5e27063 100644
--- a/freebsd/sys/security/mac/mac_framework.h
+++ b/freebsd/sys/security/mac/mac_framework.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 1999-2002, 2007-2009 Robert N. M. Watson
+ * Copyright (c) 1999-2002, 2007-2011 Robert N. M. Watson
* Copyright (c) 2001-2005 Networks Associates Technology, Inc.
* Copyright (c) 2005-2006 SPARTA, Inc.
* All rights reserved.
@@ -225,6 +225,10 @@ int mac_posixsem_check_getvalue(struct ucred *active_cred,
int mac_posixsem_check_open(struct ucred *cred, struct ksem *ks);
int mac_posixsem_check_post(struct ucred *active_cred,
struct ucred *file_cred, struct ksem *ks);
+int mac_posixsem_check_setmode(struct ucred *cred, struct ksem *ks,
+ mode_t mode);
+int mac_posixsem_check_setowner(struct ucred *cred, struct ksem *ks,
+ uid_t uid, gid_t gid);
int mac_posixsem_check_stat(struct ucred *active_cred,
struct ucred *file_cred, struct ksem *ks);
int mac_posixsem_check_unlink(struct ucred *cred, struct ksem *ks);
@@ -234,9 +238,15 @@ void mac_posixsem_create(struct ucred *cred, struct ksem *ks);
void mac_posixsem_destroy(struct ksem *);
void mac_posixsem_init(struct ksem *);
+int mac_posixshm_check_create(struct ucred *cred, const char *path);
int mac_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd,
int prot, int flags);
-int mac_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd);
+int mac_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd,
+ accmode_t accmode);
+int mac_posixshm_check_setmode(struct ucred *cred, struct shmfd *shmfd,
+ mode_t mode);
+int mac_posixshm_check_setowner(struct ucred *cred, struct shmfd *shmfd,
+ uid_t uid, gid_t gid);
int mac_posixshm_check_stat(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd);
int mac_posixshm_check_truncate(struct ucred *active_cred,
diff --git a/freebsd/sys/sh/include/machine/in_cksum.h b/freebsd/sys/sh/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/sh/include/machine/in_cksum.h
+++ b/freebsd/sys/sh/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/sh/include/machine/pci_cfgreg.h b/freebsd/sys/sh/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/sh/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/sh/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/sh/pci/pci_bus.c b/freebsd/sys/sh/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/sh/pci/pci_bus.c
+++ b/freebsd/sys/sh/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/sh/sh/legacy.c b/freebsd/sys/sh/sh/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/sh/sh/legacy.c
+++ b/freebsd/sys/sh/sh/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/sparc/include/machine/in_cksum.h b/freebsd/sys/sparc/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/sparc/include/machine/in_cksum.h
+++ b/freebsd/sys/sparc/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/sparc/include/machine/pci_cfgreg.h b/freebsd/sys/sparc/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/sparc/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/sparc/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/sparc/pci/pci_bus.c b/freebsd/sys/sparc/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/sparc/pci/pci_bus.c
+++ b/freebsd/sys/sparc/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/sparc/sparc/legacy.c b/freebsd/sys/sparc/sparc/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/sparc/sparc/legacy.c
+++ b/freebsd/sys/sparc/sparc/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/sparc64/include/machine/in_cksum.h b/freebsd/sys/sparc64/include/machine/in_cksum.h
index ae06a4cb..d5d167f5 100644
--- a/freebsd/sys/sparc64/include/machine/in_cksum.h
+++ b/freebsd/sys/sparc64/include/machine/in_cksum.h
@@ -65,6 +65,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
static __inline void
in_cksum_update(struct ip *ip)
{
@@ -73,6 +74,7 @@ in_cksum_update(struct ip *ip)
__tmp = (int)ip->ip_sum + 1;
ip->ip_sum = __tmp + (__tmp >> 16);
}
+#endif
static __inline u_short
in_addword(u_short sum, u_short b)
@@ -106,6 +108,7 @@ in_pseudo(u_int sum, u_int b, u_int c)
return (sum);
}
+#if defined(IPVERSION) && (IPVERSION == 4)
static __inline u_int
in_cksum_hdr(struct ip *ip)
{
@@ -163,6 +166,7 @@ in_cksum_hdr(struct ip *ip)
#undef __LD_ADD
return (__ret);
}
+#endif
#ifdef _KERNEL
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/sparc64/include/machine/pci_cfgreg.h b/freebsd/sys/sparc64/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/sparc64/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/sparc64/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/sparc64/pci/pci_bus.c b/freebsd/sys/sparc64/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/sparc64/pci/pci_bus.c
+++ b/freebsd/sys/sparc64/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/sparc64/sparc64/legacy.c b/freebsd/sys/sparc64/sparc64/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/sparc64/sparc64/legacy.c
+++ b/freebsd/sys/sparc64/sparc64/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/sys/_callout.h b/freebsd/sys/sys/_callout.h
new file mode 100644
index 00000000..b8c3ce92
--- /dev/null
+++ b/freebsd/sys/sys/_callout.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)callout.h 8.2 (Berkeley) 1/21/94
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__CALLOUT_H
+#define _SYS__CALLOUT_H
+
+#include <sys/queue.h>
+
+struct lock_object;
+
+SLIST_HEAD(callout_list, callout);
+TAILQ_HEAD(callout_tailq, callout);
+
+struct callout {
+ union {
+ SLIST_ENTRY(callout) sle;
+ TAILQ_ENTRY(callout) tqe;
+ } c_links;
+ int c_time; /* ticks to the event */
+ void *c_arg; /* function argument */
+ void (*c_func)(void *); /* function to call */
+ struct lock_object *c_lock; /* lock to handle */
+ int c_flags; /* state of this entry */
+ volatile int c_cpu; /* CPU we're scheduled on */
+};
+
+#endif
diff --git a/freebsd/sys/sys/_cpuset.h b/freebsd/sys/sys/_cpuset.h
new file mode 100644
index 00000000..42a0a6a9
--- /dev/null
+++ b/freebsd/sys/sys/_cpuset.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2008 Nokia Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__CPUSET_H_
+#define _SYS__CPUSET_H_
+
+#ifdef _KERNEL
+#define CPU_SETSIZE MAXCPU
+#endif
+
+#define CPU_MAXSIZE 128
+
+#ifndef CPU_SETSIZE
+#define CPU_SETSIZE CPU_MAXSIZE
+#endif
+
+#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */
+#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS)
+
+typedef struct _cpuset {
+ long __bits[howmany(CPU_SETSIZE, _NCPUBITS)];
+} cpuset_t;
+
+#endif /* !_SYS__CPUSET_H_ */
diff --git a/freebsd/sys/sys/_lockmgr.h b/freebsd/sys/sys/_lockmgr.h
index 0b99e1a4..0367ff1e 100644
--- a/freebsd/sys/sys/_lockmgr.h
+++ b/freebsd/sys/sys/_lockmgr.h
@@ -38,6 +38,7 @@
struct lock {
struct lock_object lock_object;
volatile uintptr_t lk_lock;
+ u_int lk_exslpfail;
int lk_timo;
int lk_pri;
#ifdef DEBUG_LOCKS
diff --git a/freebsd/sys/sys/_null.h b/freebsd/sys/sys/_null.h
index ed6804cc..92706c6a 100644
--- a/freebsd/sys/sys/_null.h
+++ b/freebsd/sys/sys/_null.h
@@ -31,7 +31,9 @@
#if !defined(__cplusplus)
#define NULL ((void *)0)
#else
-#if defined(__GNUG__) && defined(__GNUC__) && __GNUC__ >= 4
+#if __cplusplus >= 201103L
+#define NULL nullptr
+#elif defined(__GNUG__) && defined(__GNUC__) && __GNUC__ >= 4
#define NULL __null
#else
#if defined(__LP64__)
diff --git a/freebsd/sys/sys/_rmlock.h b/freebsd/sys/sys/_rmlock.h
index 3ea2cb27..283ea379 100644
--- a/freebsd/sys/sys/_rmlock.h
+++ b/freebsd/sys/sys/_rmlock.h
@@ -46,11 +46,15 @@ LIST_HEAD(rmpriolist,rm_priotracker);
#ifndef __rtems__
struct rmlock {
struct lock_object lock_object;
- volatile int rm_noreadtoken;
+ volatile cpuset_t rm_writecpus;
LIST_HEAD(,rm_priotracker) rm_activeReaders;
- struct mtx rm_lock;
-
+ union {
+ struct mtx _rm_lock_mtx;
+ struct sx _rm_lock_sx;
+ } _rm_lock;
};
+#define rm_lock_mtx _rm_lock._rm_lock_mtx
+#define rm_lock_sx _rm_lock._rm_lock_sx
#else /* __rtems__ */
#include <sys/rwlock.h>
#define rmlock rwlock
diff --git a/freebsd/sys/sys/_semaphore.h b/freebsd/sys/sys/_semaphore.h
index df3c5da1..560076c8 100644
--- a/freebsd/sys/sys/_semaphore.h
+++ b/freebsd/sys/sys/_semaphore.h
@@ -31,26 +31,11 @@
typedef intptr_t semid_t;
struct timespec;
-#ifndef _KERNEL
-
-#include <sys/cdefs.h>
+#ifndef __rtems__
+#define SEM_VALUE_MAX __INT_MAX
+#endif /* __rtems__ */
-/*
- * Semaphore definitions.
- */
-struct sem {
-#define SEM_MAGIC ((u_int32_t) 0x09fa4012)
- u_int32_t magic;
- pthread_mutex_t lock;
- pthread_cond_t gtzero;
- u_int32_t count;
- u_int32_t nwaiters;
-#define SEM_USER (NULL)
- semid_t semid; /* semaphore id if kernel (shared) semaphore */
- int syssem; /* 1 if kernel (shared) semaphore */
- LIST_ENTRY(sem) entry;
- struct sem **backpointer;
-};
+#ifndef _KERNEL
__BEGIN_DECLS
diff --git a/freebsd/sys/sys/_sockaddr_storage.h b/freebsd/sys/sys/_sockaddr_storage.h
new file mode 100644
index 00000000..5c0048b5
--- /dev/null
+++ b/freebsd/sys/sys/_sockaddr_storage.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)socket.h 8.4 (Berkeley) 2/21/94
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__SOCKADDR_STORAGE_H_
+#define _SYS__SOCKADDR_STORAGE_H_
+
+/*
+ * RFC 2553: protocol-independent placeholder for socket addresses
+ */
+#define _SS_MAXSIZE 128U
+#define _SS_ALIGNSIZE (sizeof(__int64_t))
+#define _SS_PAD1SIZE (_SS_ALIGNSIZE - sizeof(unsigned char) - \
+ sizeof(sa_family_t))
+#define _SS_PAD2SIZE (_SS_MAXSIZE - sizeof(unsigned char) - \
+ sizeof(sa_family_t) - _SS_PAD1SIZE - _SS_ALIGNSIZE)
+
+struct sockaddr_storage {
+ unsigned char ss_len; /* address length */
+ sa_family_t ss_family; /* address family */
+ char __ss_pad1[_SS_PAD1SIZE];
+ __int64_t __ss_align; /* force desired struct alignment */
+ char __ss_pad2[_SS_PAD2SIZE];
+};
+
+#endif /* !_SYS__SOCKADDR_STORAGE_H_ */
diff --git a/freebsd/sys/sys/_task.h b/freebsd/sys/sys/_task.h
index 2a51e1b0..11fd1bc0 100644
--- a/freebsd/sys/sys/_task.h
+++ b/freebsd/sys/sys/_task.h
@@ -36,15 +36,19 @@
* taskqueue_run(). The first argument is taken from the 'ta_context'
* field of struct task and the second argument is a count of how many
* times the task was enqueued before the call to taskqueue_run().
+ *
+ * List of locks
+ * (c) const after init
+ * (q) taskqueue lock
*/
typedef void task_fn_t(void *context, int pending);
struct task {
- STAILQ_ENTRY(task) ta_link; /* link for queue */
- u_short ta_pending; /* count times queued */
- u_short ta_priority; /* Priority */
- task_fn_t *ta_func; /* task handler */
- void *ta_context; /* argument for handler */
+ STAILQ_ENTRY(task) ta_link; /* (q) link for queue */
+ u_short ta_pending; /* (q) count times queued */
+ u_short ta_priority; /* (c) Priority */
+ task_fn_t *ta_func; /* (c) task handler */
+ void *ta_context; /* (c) argument for handler */
};
#endif /* !_SYS__TASK_H_ */
diff --git a/freebsd/sys/sys/acl.h b/freebsd/sys/sys/acl.h
index ec2e025b..77ebf89d 100644
--- a/freebsd/sys/sys/acl.h
+++ b/freebsd/sys/sys/acl.h
@@ -217,12 +217,23 @@ typedef void *acl_t;
#define ACL_WRITE_OWNER 0x00004000
#define ACL_SYNCHRONIZE 0x00008000
-#define ACL_NFS4_PERM_BITS (ACL_READ_DATA | ACL_WRITE_DATA | \
+#define ACL_FULL_SET (ACL_READ_DATA | ACL_WRITE_DATA | \
ACL_APPEND_DATA | ACL_READ_NAMED_ATTRS | ACL_WRITE_NAMED_ATTRS | \
ACL_EXECUTE | ACL_DELETE_CHILD | ACL_READ_ATTRIBUTES | \
ACL_WRITE_ATTRIBUTES | ACL_DELETE | ACL_READ_ACL | ACL_WRITE_ACL | \
ACL_WRITE_OWNER | ACL_SYNCHRONIZE)
+#define ACL_MODIFY_SET (ACL_FULL_SET & \
+ ~(ACL_WRITE_ACL | ACL_WRITE_OWNER))
+
+#define ACL_READ_SET (ACL_READ_DATA | ACL_READ_NAMED_ATTRS | \
+ ACL_READ_ATTRIBUTES | ACL_READ_ACL)
+
+#define ACL_WRITE_SET (ACL_WRITE_DATA | ACL_APPEND_DATA | \
+ ACL_WRITE_NAMED_ATTRS | ACL_WRITE_ATTRIBUTES)
+
+#define ACL_NFS4_PERM_BITS ACL_FULL_SET
+
/*
* Possible entry_id values for acl_get_entry(3).
*/
@@ -285,8 +296,6 @@ mode_t acl_posix1e_newfilemode(mode_t cmode,
struct acl *acl_alloc(int flags);
void acl_free(struct acl *aclp);
-void acl_nfs4_trivial_from_mode(struct acl *aclp,
- mode_t mode);
void acl_nfs4_sync_acl_from_mode(struct acl *aclp,
mode_t mode, int file_owner_id);
void acl_nfs4_sync_mode_from_acl(mode_t *mode,
diff --git a/freebsd/sys/sys/ata.h b/freebsd/sys/sys/ata.h
index efa5b246..76d8f646 100644
--- a/freebsd/sys/sys/ata.h
+++ b/freebsd/sys/sys/ata.h
@@ -189,10 +189,10 @@ struct ata_params {
} __packed support, enabled;
/*088*/ u_int16_t udmamodes; /* UltraDMA modes */
-/*089*/ u_int16_t erase_time;
-/*090*/ u_int16_t enhanced_erase_time;
+/*089*/ u_int16_t erase_time; /* time req'd in 2min units */
+/*090*/ u_int16_t enhanced_erase_time; /* time req'd in 2min units */
/*091*/ u_int16_t apm_value;
-/*092*/ u_int16_t master_passwd_revision;
+/*092*/ u_int16_t master_passwd_revision; /* password revision code */
/*093*/ u_int16_t hwres;
#define ATA_CABLE_ID 0x2000
@@ -229,6 +229,14 @@ struct ata_params {
u_int16_t reserved121[6];
/*127*/ u_int16_t removable_status;
/*128*/ u_int16_t security_status;
+#define ATA_SECURITY_LEVEL 0x0100 /* 0: high, 1: maximum */
+#define ATA_SECURITY_ENH_SUPP 0x0020 /* enhanced erase supported */
+#define ATA_SECURITY_COUNT_EXP 0x0010 /* count expired */
+#define ATA_SECURITY_FROZEN 0x0008 /* security config is frozen */
+#define ATA_SECURITY_LOCKED 0x0004 /* drive is locked */
+#define ATA_SECURITY_ENABLED 0x0002 /* ATA Security is enabled */
+#define ATA_SECURITY_SUPPORTED 0x0001 /* ATA Security is supported */
+
u_int16_t reserved129[31];
/*160*/ u_int16_t cfa_powermode1;
u_int16_t reserved161;
@@ -261,6 +269,12 @@ struct ata_params {
/*255*/ u_int16_t integrity;
} __packed;
+/* ATA Dataset Management */
+#define ATA_DSM_BLK_SIZE 512
+#define ATA_DSM_BLK_RANGES 64
+#define ATA_DSM_RANGE_SIZE 8
+#define ATA_DSM_RANGE_MAX 65535
+
/*
* ATA Device Register
*
@@ -275,6 +289,23 @@ struct ata_params {
#define ATA_DEV_SLAVE 0x10
#define ATA_DEV_LBA 0x40
+/* ATA limits */
+#define ATA_MAX_28BIT_LBA 268435455UL
+
+/* ATA Status Register */
+#define ATA_STATUS_ERROR 0x01
+#define ATA_STATUS_DEVICE_FAULT 0x20
+
+/* ATA Error Register */
+#define ATA_ERROR_ABORT 0x04
+#define ATA_ERROR_ID_NOT_FOUND 0x10
+
+/* ATA HPA Features */
+#define ATA_HPA_FEAT_MAX_ADDR 0x00
+#define ATA_HPA_FEAT_SET_PWD 0x01
+#define ATA_HPA_FEAT_LOCK 0x02
+#define ATA_HPA_FEAT_UNLOCK 0x03
+#define ATA_HPA_FEAT_FREEZE 0x04
/* ATA transfer modes */
#define ATA_MODE_MASK 0x0f
@@ -332,6 +363,7 @@ struct ata_params {
#define ATA_READ_VERIFY48 0x42
#define ATA_READ_FPDMA_QUEUED 0x60 /* read DMA NCQ */
#define ATA_WRITE_FPDMA_QUEUED 0x61 /* write DMA NCQ */
+#define ATA_SEP_ATTN 0x67 /* SEP request */
#define ATA_SEEK 0x70 /* seek */
#define ATA_PACKET_CMD 0xa0 /* packet command */
#define ATA_ATAPI_IDENTIFY 0xa1 /* get ATAPI params*/
@@ -370,7 +402,12 @@ struct ata_params {
#define ATA_SF_DIS_RELIRQ 0xdd /* disable release interrupt */
#define ATA_SF_ENAB_SRVIRQ 0x5e /* enable service interrupt */
#define ATA_SF_DIS_SRVIRQ 0xde /* disable service interrupt */
-#define ATA_SECURITY_FREEE_LOCK 0xf5 /* freeze security config */
+#define ATA_SECURITY_SET_PASSWORD 0xf1 /* set drive password */
+#define ATA_SECURITY_UNLOCK 0xf2 /* unlock drive using passwd */
+#define ATA_SECURITY_ERASE_PREPARE 0xf3 /* prepare to erase drive */
+#define ATA_SECURITY_ERASE_UNIT 0xf4 /* erase all blocks on drive */
+#define ATA_SECURITY_FREEZE_LOCK 0xf5 /* freeze security config */
+#define ATA_SECURITY_DISABLE_PASSWORD 0xf6 /* disable drive password */
#define ATA_READ_NATIVE_MAX_ADDRESS 0xf8 /* read native max address */
#define ATA_SET_MAX_ADDRESS 0xf9 /* set max address */
@@ -516,6 +553,20 @@ struct ata_ioc_request {
int error;
};
+struct ata_security_password {
+ u_int16_t ctrl;
+#define ATA_SECURITY_PASSWORD_USER 0x0000
+#define ATA_SECURITY_PASSWORD_MASTER 0x0001
+#define ATA_SECURITY_ERASE_NORMAL 0x0000
+#define ATA_SECURITY_ERASE_ENHANCED 0x0002
+#define ATA_SECURITY_LEVEL_HIGH 0x0000
+#define ATA_SECURITY_LEVEL_MAXIMUM 0x0100
+
+ u_int8_t password[32];
+ u_int16_t revision;
+ u_int16_t reserved[238];
+};
+
/* pr device ATA ioctl calls */
#define IOCATAREQUEST _IOWR('a', 100, struct ata_ioc_request)
#define IOCATAGPARM _IOR('a', 101, struct ata_params)
diff --git a/freebsd/sys/sys/buf.h b/freebsd/sys/sys/buf.h
index e182c7cc..e87fd420 100644
--- a/freebsd/sys/sys/buf.h
+++ b/freebsd/sys/sys/buf.h
@@ -117,6 +117,7 @@ struct buf {
long b_bufsize; /* Allocated buffer size. */
long b_runningbufspace; /* when I/O is running, pipelining */
caddr_t b_kvabase; /* base kva for buffer */
+ caddr_t b_kvaalloc; /* allocated kva for B_KVAALLOC */
int b_kvasize; /* size of kva for buffer */
daddr_t b_lblkno; /* Logical block number. */
struct vnode *b_vp; /* Device vnode. */
@@ -202,10 +203,10 @@ struct buf {
#define B_PERSISTENT 0x00000100 /* Perm. ref'ed while EXT2FS mounted. */
#define B_DONE 0x00000200 /* I/O completed. */
#define B_EINTR 0x00000400 /* I/O was interrupted */
-#define B_00000800 0x00000800 /* Available flag. */
-#define B_00001000 0x00001000 /* Available flag. */
+#define B_UNMAPPED 0x00000800 /* KVA is not mapped. */
+#define B_KVAALLOC 0x00001000 /* But allocated. */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
-#define B_00004000 0x00004000 /* Available flag. */
+#define B_BARRIER 0x00004000 /* Write this and all preceeding first. */
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
#define B_MALLOC 0x00010000 /* malloced b_data */
#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
@@ -215,7 +216,7 @@ struct buf {
#define B_DIRTY 0x00200000 /* Needs writing later (in EXT2FS). */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
#define B_00800000 0x00800000 /* Available flag. */
-#define B_01000000 0x01000000 /* Available flag. */
+#define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */
#define B_NEEDSGIANT 0x02000000 /* Buffer's vnode needs giant. */
#define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */
#define B_MANAGED 0x08000000 /* Managed by FS. */
@@ -224,8 +225,8 @@ struct buf {
#define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */
#define B_REMFREE 0x80000000 /* Delayed bremfree */
-#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34b27" \
- "\33paging\32b25\31b24\30b23\27relbuf\26dirty\25b20" \
+#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
+ "\33paging\32needsgiant\31nocopy\30b23\27relbuf\26dirty\25b20" \
"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
"\15b12\14b11\13eintr\12done\11persist\10delwri\7validsuspwrt" \
"\6cache\5deferred\4direct\3async\2needcommit\1age"
@@ -239,6 +240,8 @@ struct buf {
#define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */
#define BX_ALTDATA 0x00000040 /* Holds extended data */
+#define PRINT_BUF_XFLAGS "\20\7altdata\6bkgrdmarker\5bkgrdwrite\2clean\1dirty"
+
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
/*
@@ -249,6 +252,8 @@ struct buf {
#define BV_BKGRDWAIT 0x00000004 /* Background write waiting */
#define BV_INFREECNT 0x80000000 /* buf is counted in numfreebufs */
+#define PRINT_BUF_VFLAGS "\20\40infreecnt\3bkgrdwait\2bkgrdinprog\1scanned"
+
#ifdef _KERNEL
/*
* Buffer locking
@@ -450,7 +455,9 @@ buf_countdeps(struct buf *bp, int i)
*/
#define GB_LOCK_NOWAIT 0x0001 /* Fail if we block on a buf lock. */
#define GB_NOCREAT 0x0002 /* Don't create a buf if not found. */
-#define GB_NOWAIT_BD 0x0004 /* Do not wait for bufdaemon */
+#define GB_NOWAIT_BD 0x0004 /* Do not wait for bufdaemon. */
+#define GB_UNMAPPED 0x0008 /* Do not mmap buffer pages. */
+#define GB_KVAALLOC 0x0010 /* But allocate KVA. */
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
@@ -470,21 +477,29 @@ extern struct buf *swbuf; /* Swap I/O buffer headers. */
extern int nswbuf; /* Number of swap I/O buffer headers. */
extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
+extern caddr_t unmapped_buf;
void runningbufwakeup(struct buf *);
void waitrunningbufspace(void);
caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est);
void bufinit(void);
+void bdata2bio(struct buf *bp, struct bio *bip);
void bwillwrite(void);
int buf_dirty_count_severe(void);
void bremfree(struct buf *);
void bremfreef(struct buf *); /* XXX Force bremfree, only for nfs. */
int bread(struct vnode *, daddr_t, int, struct ucred *, struct buf **);
+int bread_gb(struct vnode *, daddr_t, int, struct ucred *,
+ int gbflags, struct buf **);
void breada(struct vnode *, daddr_t *, int *, int, struct ucred *);
int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
struct ucred *, struct buf **);
+int breadn_flags(struct vnode *, daddr_t, int, daddr_t *, int *, int,
+ struct ucred *, int, struct buf **);
void bdwrite(struct buf *);
void bawrite(struct buf *);
+void babarrierwrite(struct buf *);
+int bbarrierwrite(struct buf *);
void bdirty(struct buf *);
void bundirty(struct buf *);
void bufstrategy(struct bufobj *, struct buf *);
@@ -500,16 +515,22 @@ int bufwait(struct buf *);
int bufwrite(struct buf *);
void bufdone(struct buf *);
void bufdone_finish(struct buf *);
+void bd_speedup(void);
int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, long, int, struct buf **);
int cluster_wbuild(struct vnode *, long, daddr_t, int);
void cluster_write(struct vnode *, struct buf *, u_quad_t, int);
+int cluster_read_gb(struct vnode *, u_quad_t, daddr_t, long,
+ struct ucred *, long, int, int, struct buf **);
+int cluster_wbuild_gb(struct vnode *, long, daddr_t, int, int);
+void cluster_write_gb(struct vnode *, struct buf *, u_quad_t, int, int);
+void vfs_bio_bzero_buf(struct buf *bp, int base, int size);
void vfs_bio_set_valid(struct buf *, int base, int size);
void vfs_bio_clrbuf(struct buf *);
void vfs_busy_pages(struct buf *, int clear_modify);
void vfs_unbusy_pages(struct buf *);
-int vmapbuf(struct buf *);
+int vmapbuf(struct buf *, int);
void vunmapbuf(struct buf *);
void relpbuf(struct buf *, int *);
void brelvp(struct buf *);
diff --git a/freebsd/sys/sys/buf_ring.h b/freebsd/sys/sys/buf_ring.h
index 59853e42..59e74cf7 100644
--- a/freebsd/sys/sys/buf_ring.h
+++ b/freebsd/sys/sys/buf_ring.h
@@ -48,7 +48,6 @@ struct buf_ring {
int br_prod_mask;
uint64_t br_drops;
uint64_t br_prod_bufs;
- uint64_t br_prod_bytes;
/*
* Pad out to next L2 cache line
*/
@@ -74,7 +73,7 @@ struct buf_ring {
*
*/
static __inline int
-buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes)
+buf_ring_enqueue(struct buf_ring *br, void *buf)
{
uint32_t prod_head, prod_next;
uint32_t cons_tail;
@@ -95,6 +94,7 @@ buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes)
prod_next = (prod_head + 1) & br->br_prod_mask;
if (prod_next == cons_tail) {
+ br->br_drops++;
critical_exit();
return (ENOBUFS);
}
@@ -117,19 +117,11 @@ buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes)
while (br->br_prod_tail != prod_head)
cpu_spinwait();
br->br_prod_bufs++;
- br->br_prod_bytes += nbytes;
br->br_prod_tail = prod_next;
critical_exit();
return (0);
}
-static __inline int
-buf_ring_enqueue(struct buf_ring *br, void *buf)
-{
-
- return (buf_ring_enqueue_bytes(br, buf, 0));
-}
-
/*
* multi-consumer safe dequeue
*
@@ -222,6 +214,54 @@ buf_ring_dequeue_sc(struct buf_ring *br)
}
/*
+ * single-consumer advance after a peek
+ * use where it is protected by a lock
+ * e.g. a network driver's tx queue lock
+ */
+static __inline void
+buf_ring_advance_sc(struct buf_ring *br)
+{
+ uint32_t cons_head, cons_next;
+ uint32_t prod_tail;
+
+ cons_head = br->br_cons_head;
+ prod_tail = br->br_prod_tail;
+
+ cons_next = (cons_head + 1) & br->br_cons_mask;
+ if (cons_head == prod_tail)
+ return;
+ br->br_cons_head = cons_next;
+#ifdef DEBUG_BUFRING
+ br->br_ring[cons_head] = NULL;
+#endif
+ br->br_cons_tail = cons_next;
+}
+
+/*
+ * Used to return a buffer (most likely already there)
+ * to the top od the ring. The caller should *not*
+ * have used any dequeue to pull it out of the ring
+ * but instead should have used the peek() function.
+ * This is normally used where the transmit queue
+ * of a driver is full, and an mubf must be returned.
+ * Most likely whats in the ring-buffer is what
+ * is being put back (since it was not removed), but
+ * sometimes the lower transmit function may have
+ * done a pullup or other function that will have
+ * changed it. As an optimzation we always put it
+ * back (since jhb says the store is probably cheaper),
+ * if we have to do a multi-queue version we will need
+ * the compare and an atomic.
+ */
+static __inline void
+buf_ring_putback_sc(struct buf_ring *br, void *new)
+{
+ KASSERT(br->br_cons_head != br->br_prod_tail,
+ ("Buf-Ring has none in putback")) ;
+ br->br_ring[br->br_cons_head] = new;
+}
+
+/*
* return a pointer to the first entry in the ring
* without modifying it, or NULL if the ring is empty
* race-prone if not protected by a lock
diff --git a/freebsd/sys/sys/bus.h b/freebsd/sys/sys/bus.h
index 7259981e..f0406732 100644
--- a/freebsd/sys/sys/bus.h
+++ b/freebsd/sys/sys/bus.h
@@ -192,7 +192,6 @@ enum intr_type {
INTR_TYPE_MISC = 16,
INTR_TYPE_CLK = 32,
INTR_TYPE_AV = 64,
- INTR_FAST = 128,
INTR_EXCL = 256, /* exclusive interrupt */
INTR_MPSAFE = 512, /* this interrupt is SMP safe */
INTR_ENTROPY = 1024, /* this interrupt provides entropy */
@@ -239,6 +238,7 @@ struct resource_list_entry {
STAILQ_ENTRY(resource_list_entry) link;
int type; /**< @brief type argument to alloc_resource */
int rid; /**< @brief resource identifier */
+ int flags; /**< @brief resource flags */
struct resource *res; /**< @brief the real resource when allocated */
u_long start; /**< @brief start of resource range */
u_long end; /**< @brief end of resource range */
@@ -246,6 +246,10 @@ struct resource_list_entry {
};
STAILQ_HEAD(resource_list, resource_list_entry);
+#define RLE_RESERVED 0x0001 /* Reserved by the parent bus. */
+#define RLE_ALLOCATED 0x0002 /* Reserved resource is allocated. */
+#define RLE_PREFETCH 0x0004 /* Resource is a prefetch range. */
+
void resource_list_init(struct resource_list *rl);
void resource_list_free(struct resource_list *rl);
struct resource_list_entry *
@@ -255,6 +259,9 @@ struct resource_list_entry *
int resource_list_add_next(struct resource_list *rl,
int type,
u_long start, u_long end, u_long count);
+int resource_list_busy(struct resource_list *rl,
+ int type, int rid);
+int resource_list_reserved(struct resource_list *rl, int type, int rid);
struct resource_list_entry*
resource_list_find(struct resource_list *rl,
int type, int rid);
@@ -269,6 +276,15 @@ struct resource *
int resource_list_release(struct resource_list *rl,
device_t bus, device_t child,
int type, int rid, struct resource *res);
+struct resource *
+ resource_list_reserve(struct resource_list *rl,
+ device_t bus, device_t child,
+ int type, int *rid,
+ u_long start, u_long end,
+ u_long count, u_int flags);
+int resource_list_unreserve(struct resource_list *rl,
+ device_t bus, device_t child,
+ int type, int rid);
void resource_list_purge(struct resource_list *rl);
int resource_list_print_type(struct resource_list *rl,
const char *name, int type,
@@ -358,8 +374,10 @@ struct resource_spec {
int flags;
};
-int bus_alloc_resources(device_t dev, struct resource_spec *rs, struct resource **res);
-void bus_release_resources(device_t dev, const struct resource_spec *rs, struct resource **res);
+int bus_alloc_resources(device_t dev, struct resource_spec *rs,
+ struct resource **res);
+void bus_release_resources(device_t dev, const struct resource_spec *rs,
+ struct resource **res);
int bus_adjust_resource(device_t child, int type, struct resource *r,
u_long start, u_long end);
@@ -446,6 +464,8 @@ int device_set_devclass(device_t dev, const char *classname);
int device_set_driver(device_t dev, driver_t *driver);
void device_set_flags(device_t dev, u_int32_t flags);
void device_set_softc(device_t dev, void *softc);
+void device_free_softc(void *softc);
+void device_claim_softc(device_t dev);
int device_set_unit(device_t dev, int unit); /* XXX DONT USE XXX */
int device_shutdown(device_t dev);
void device_unbusy(device_t dev);
@@ -454,7 +474,10 @@ void device_verbose(device_t dev);
/*
* Access functions for devclass.
*/
+int devclass_add_driver(devclass_t dc, driver_t *driver,
+ int pass, devclass_t *dcp);
devclass_t devclass_create(const char *classname);
+int devclass_delete_driver(devclass_t busclass, driver_t *driver);
devclass_t devclass_find(const char *classname);
const char *devclass_get_name(devclass_t dc);
device_t devclass_get_device(devclass_t dc, int unit);
@@ -515,7 +538,7 @@ void bus_data_generation_update(void);
* is for drivers that wish to have a generic form and a specialized form,
* like is done with the pci bus and the acpi pci bus. BUS_PROBE_HOOVER is
* for those busses that implement a generic device place-holder for devices on
- * the bus that have no more specific river for them (aka ugen).
+ * the bus that have no more specific driver for them (aka ugen).
* BUS_PROBE_NOWILDCARD or lower means that the device isn't really bidding
* for a device node, but accepts only devices that its parent has told it
* use this driver.
diff --git a/freebsd/sys/sys/bus_dma.h b/freebsd/sys/sys/bus_dma.h
index 0dad3267..6fbac13b 100644
--- a/freebsd/sys/sys/bus_dma.h
+++ b/freebsd/sys/sys/bus_dma.h
@@ -109,8 +109,14 @@
*/
#define BUS_DMA_KEEP_PG_OFFSET 0x400
+#define BUS_DMA_LOAD_MBUF 0x800
+
/* Forwards needed by prototypes below. */
+union ccb;
+struct bio;
struct mbuf;
+struct memdesc;
+struct pmap;
struct uio;
/*
@@ -191,6 +197,56 @@ typedef void bus_dmamap_callback_t(void *, bus_dma_segment_t *, int, int);
typedef void bus_dmamap_callback2_t(void *, bus_dma_segment_t *, int, bus_size_t, int);
/*
+ * Map the buffer buf into bus space using the dmamap map.
+ */
+int bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
+ bus_size_t buflen, bus_dmamap_callback_t *callback,
+ void *callback_arg, int flags);
+
+/*
+ * Like bus_dmamap_load but for mbufs. Note the use of the
+ * bus_dmamap_callback2_t interface.
+ */
+int bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct mbuf *mbuf,
+ bus_dmamap_callback2_t *callback, void *callback_arg,
+ int flags);
+
+int bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct mbuf *mbuf, bus_dma_segment_t *segs,
+ int *nsegs, int flags);
+
+/*
+ * Like bus_dmamap_load but for uios. Note the use of the
+ * bus_dmamap_callback2_t interface.
+ */
+int bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct uio *ui,
+ bus_dmamap_callback2_t *callback, void *callback_arg,
+ int flags);
+
+/*
+ * Like bus_dmamap_load but for cam control blocks.
+ */
+int bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb,
+ bus_dmamap_callback_t *callback, void *callback_arg,
+ int flags);
+
+/*
+ * Like bus_dmamap_load but for bios.
+ */
+int bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
+ bus_dmamap_callback_t *callback, void *callback_arg,
+ int flags);
+
+/*
+ * Loads any memory descriptor.
+ */
+int bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct memdesc *mem, bus_dmamap_callback_t *callback,
+ void *callback_arg, int flags);
+
+/*
* XXX sparc64 uses the same interface, but a much different implementation.
* <machine/bus_dma.h> for the sparc64 arch contains the equivalent
* declarations.
@@ -224,35 +280,6 @@ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map);
/*
- * Map the buffer buf into bus space using the dmamap map.
- */
-int bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
- bus_size_t buflen, bus_dmamap_callback_t *callback,
- void *callback_arg, int flags);
-
-/*
- * Like bus_dmamap_load but for mbufs. Note the use of the
- * bus_dmamap_callback2_t interface.
- */
-int bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map,
- struct mbuf *mbuf,
- bus_dmamap_callback2_t *callback, void *callback_arg,
- int flags);
-
-int bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
- struct mbuf *mbuf, bus_dma_segment_t *segs,
- int *nsegs, int flags);
-
-/*
- * Like bus_dmamap_load but for uios. Note the use of the
- * bus_dmamap_callback2_t interface.
- */
-int bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map,
- struct uio *ui,
- bus_dmamap_callback2_t *callback, void *callback_arg,
- int flags);
-
-/*
* Perform a synchronization operation on the given map.
*/
void _bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_dmasync_op_t);
@@ -272,6 +299,36 @@ void _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map);
_bus_dmamap_unload(dmat, dmamap); \
} while (0)
+/*
+ * The following functions define the interface between the MD and MI
+ * busdma layers. These are not intended for consumption by driver
+ * software.
+ */
+void __bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct memdesc *mem,
+ bus_dmamap_callback_t *callback,
+ void *callback_arg);
+
+#define _bus_dmamap_waitok(dmat, map, mem, callback, callback_arg) \
+ do { \
+ if ((map) != NULL) \
+ __bus_dmamap_waitok(dmat, map, mem, callback, \
+ callback_arg); \
+ } while (0);
+
+int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map,
+ void *buf, bus_size_t buflen, struct pmap *pmap,
+ int flags, bus_dma_segment_t *segs, int *segp);
+
+int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
+ vm_paddr_t paddr, bus_size_t buflen,
+ int flags, bus_dma_segment_t *segs, int *segp);
+
+bus_dma_segment_t *_bus_dmamap_complete(bus_dma_tag_t dmat,
+ bus_dmamap_t map,
+ bus_dma_segment_t *segs,
+ int nsegs, int error);
+
#endif /* __sparc64__ */
#endif /* _BUS_DMA_H_ */
diff --git a/freebsd/sys/sys/callout.h b/freebsd/sys/sys/callout.h
index 370cec7a..95b9a32b 100644
--- a/freebsd/sys/sys/callout.h
+++ b/freebsd/sys/sys/callout.h
@@ -38,25 +38,7 @@
#ifndef _SYS_CALLOUT_H_
#define _SYS_CALLOUT_H_
-#include <sys/queue.h>
-
-struct lock_object;
-
-SLIST_HEAD(callout_list, callout);
-TAILQ_HEAD(callout_tailq, callout);
-
-struct callout {
- union {
- SLIST_ENTRY(callout) sle;
- TAILQ_ENTRY(callout) tqe;
- } c_links;
- int c_time; /* ticks to the event */
- void *c_arg; /* function argument */
- void (*c_func)(void *); /* function to call */
- struct lock_object *c_lock; /* lock to handle */
- int c_flags; /* state of this entry */
- volatile int c_cpu; /* CPU we're scheduled on */
-};
+#include <sys/_callout.h>
#define CALLOUT_LOCAL_ALLOC 0x0001 /* was allocated from callfree */
#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */
@@ -97,7 +79,8 @@ int callout_schedule_on(struct callout *, int, int);
#define callout_stop(c) _callout_stop_safe(c, 0)
int _callout_stop_safe(struct callout *, int);
void callout_tick(void);
-
+int callout_tickstofirst(int limit);
+extern void (*callout_new_inserted)(int cpu, int ticks);
#endif
diff --git a/freebsd/sys/sys/capability.h b/freebsd/sys/sys/capability.h
new file mode 100644
index 00000000..e9106b6e
--- /dev/null
+++ b/freebsd/sys/sys/capability.h
@@ -0,0 +1,209 @@
+/*-
+ * Copyright (c) 2008-2010 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * This software was developed at the University of Cambridge Computer
+ * Laboratory with support from a grant from Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Definitions for FreeBSD capabilities facility.
+ */
+#ifndef _SYS_CAPABILITY_H_
+#define _SYS_CAPABILITY_H_
+
+#include <sys/cdefs.h>
+#include <rtems/bsd/sys/types.h>
+
+#include <sys/file.h>
+
+/*
+ * Possible rights on capabilities.
+ *
+ * Notes:
+ * Some system calls don't require a capability in order to perform an
+ * operation on an fd. These include: close, dup, dup2.
+ *
+ * sendfile is authorized using CAP_READ on the file and CAP_WRITE on the
+ * socket.
+ *
+ * mmap() and aio*() system calls will need special attention as they may
+ * involve reads or writes depending a great deal on context.
+ */
+
+/* General file I/O. */
+#define CAP_READ 0x0000000000000001ULL /* read/recv */
+#define CAP_WRITE 0x0000000000000002ULL /* write/send */
+#define CAP_MMAP 0x0000000000000004ULL /* mmap */
+#define CAP_MAPEXEC 0x0000000000000008ULL /* mmap(2) as exec */
+#define CAP_FEXECVE 0x0000000000000010ULL
+#define CAP_FSYNC 0x0000000000000020ULL
+#define CAP_FTRUNCATE 0x0000000000000040ULL
+#define CAP_SEEK 0x0000000000000080ULL
+
+/* VFS methods. */
+#define CAP_FCHFLAGS 0x0000000000000100ULL
+#define CAP_FCHDIR 0x0000000000000200ULL
+#define CAP_FCHMOD 0x0000000000000400ULL
+#define CAP_FCHOWN 0x0000000000000800ULL
+#define CAP_FCNTL 0x0000000000001000ULL
+#define CAP_FPATHCONF 0x0000000000002000ULL
+#define CAP_FLOCK 0x0000000000004000ULL
+#define CAP_FSCK 0x0000000000008000ULL
+#define CAP_FSTAT 0x0000000000010000ULL
+#define CAP_FSTATFS 0x0000000000020000ULL
+#define CAP_FUTIMES 0x0000000000040000ULL
+#define CAP_CREATE 0x0000000000080000ULL
+#define CAP_DELETE 0x0000000000100000ULL
+#define CAP_MKDIR 0x0000000000200000ULL
+#define CAP_RMDIR 0x0000000000400000ULL
+#define CAP_MKFIFO 0x0000000000800000ULL
+
+/* Lookups - used to constrain *at() calls. */
+#define CAP_LOOKUP 0x0000000001000000ULL
+
+/* Extended attributes. */
+#define CAP_EXTATTR_DELETE 0x0000000002000000ULL
+#define CAP_EXTATTR_GET 0x0000000004000000ULL
+#define CAP_EXTATTR_LIST 0x0000000008000000ULL
+#define CAP_EXTATTR_SET 0x0000000010000000ULL
+
+/* Access Control Lists. */
+#define CAP_ACL_CHECK 0x0000000020000000ULL
+#define CAP_ACL_DELETE 0x0000000040000000ULL
+#define CAP_ACL_GET 0x0000000080000000ULL
+#define CAP_ACL_SET 0x0000000100000000ULL
+
+/* Socket operations. */
+#define CAP_ACCEPT 0x0000000200000000ULL
+#define CAP_BIND 0x0000000400000000ULL
+#define CAP_CONNECT 0x0000000800000000ULL
+#define CAP_GETPEERNAME 0x0000001000000000ULL
+#define CAP_GETSOCKNAME 0x0000002000000000ULL
+#define CAP_GETSOCKOPT 0x0000004000000000ULL
+#define CAP_LISTEN 0x0000008000000000ULL
+#define CAP_PEELOFF 0x0000010000000000ULL
+#define CAP_SETSOCKOPT 0x0000020000000000ULL
+#define CAP_SHUTDOWN 0x0000040000000000ULL
+
+#define CAP_SOCK_ALL \
+ (CAP_ACCEPT | CAP_BIND | CAP_CONNECT \
+ | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT \
+ | CAP_LISTEN | CAP_PEELOFF | CAP_SETSOCKOPT | CAP_SHUTDOWN)
+
+/* Mandatory Access Control. */
+#define CAP_MAC_GET 0x0000080000000000ULL
+#define CAP_MAC_SET 0x0000100000000000ULL
+
+/* Methods on semaphores. */
+#define CAP_SEM_GETVALUE 0x0000200000000000ULL
+#define CAP_SEM_POST 0x0000400000000000ULL
+#define CAP_SEM_WAIT 0x0000800000000000ULL
+
+/* kqueue events. */
+#define CAP_POLL_EVENT 0x0001000000000000ULL
+#define CAP_POST_EVENT 0x0002000000000000ULL
+
+/* Strange and powerful rights that should not be given lightly. */
+#define CAP_IOCTL 0x0004000000000000ULL
+#define CAP_TTYHOOK 0x0008000000000000ULL
+
+/* Process management via process descriptors. */
+#define CAP_PDGETPID 0x0010000000000000ULL
+#define CAP_PDWAIT 0x0020000000000000ULL
+#define CAP_PDKILL 0x0040000000000000ULL
+
+/* The mask of all valid method rights. */
+#define CAP_MASK_VALID 0x007fffffffffffffULL
+
+#ifdef _KERNEL
+
+#define IN_CAPABILITY_MODE(td) (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE)
+
+/*
+ * Create a capability to wrap a file object.
+ */
+int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
+ int *capfd);
+
+/*
+ * Unwrap a capability if its rights mask is a superset of 'rights'.
+ *
+ * Unwrapping a non-capability is effectively a no-op; the value of fp_cap
+ * is simply copied into fpp.
+ */
+int cap_funwrap(struct file *fp_cap, cap_rights_t rights,
+ struct file **fpp);
+int cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights,
+ u_char *maxprotp, struct file **fpp);
+
+/*
+ * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to
+ * extract the rights from a capability. However, this should not be used by
+ * kernel code generally, instead cap_funwrap() should be used in order to
+ * keep all access control in one place.
+ */
+cap_rights_t cap_rights(struct file *fp_cap);
+
+#else /* !_KERNEL */
+
+__BEGIN_DECLS
+
+/*
+ * cap_enter(): Cause the process to enter capability mode, which will
+ * prevent it from directly accessing global namespaces. System calls will
+ * be limited to process-local, process-inherited, or file descriptor
+ * operations. If already in capability mode, a no-op.
+ *
+ * Currently, process-inherited operations are not properly handled -- in
+ * particular, we're interested in things like waitpid(2), kill(2), etc,
+ * being properly constrained. One possible solution is to introduce process
+ * descriptors.
+ */
+int cap_enter(void);
+
+/*
+ * cap_getmode(): Are we in capability mode?
+ */
+int cap_getmode(u_int* modep);
+
+/*
+ * cap_new(): Create a new capability derived from an existing file
+ * descriptor with the specified rights. If the existing file descriptor is
+ * a capability, then the new rights must be a subset of the existing rights.
+ */
+int cap_new(int fd, cap_rights_t rights);
+
+/*
+ * cap_getrights(): Query the rights on a capability.
+ */
+int cap_getrights(int fd, cap_rights_t *rightsp);
+
+__END_DECLS
+
+#endif /* !_KERNEL */
+
+#endif /* !_SYS_CAPABILITY_H_ */
diff --git a/freebsd/sys/sys/conf.h b/freebsd/sys/sys/conf.h
index 694bbb36..a8dfc300 100644
--- a/freebsd/sys/sys/conf.h
+++ b/freebsd/sys/sys/conf.h
@@ -52,7 +52,7 @@ struct cdevsw;
struct file;
struct cdev {
- void *__si_reserved;
+ struct mount *si_mountpt;
u_int si_flags;
#define SI_ETERNAL 0x0001 /* never destroyed */
#define SI_ALIAS 0x0002 /* carrier of alias name */
@@ -142,9 +142,7 @@ typedef int d_read_t(struct cdev *dev, struct uio *uio, int ioflag);
typedef int d_write_t(struct cdev *dev, struct uio *uio, int ioflag);
typedef int d_poll_t(struct cdev *dev, int events, struct thread *td);
typedef int d_kqfilter_t(struct cdev *dev, struct knote *kn);
-typedef int d_mmap_t(struct cdev *dev, vm_offset_t offset, vm_paddr_t *paddr,
- int nprot);
-typedef int d_mmap2_t(struct cdev *dev, vm_offset_t offset, vm_paddr_t *paddr,
+typedef int d_mmap_t(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
int nprot, vm_memattr_t *memattr);
typedef int d_mmap_single_t(struct cdev *cdev, vm_ooffset_t *offset,
vm_size_t size, struct vm_object **object, int nprot);
@@ -179,7 +177,7 @@ typedef int dumper_t(
#define D_PSEUDO 0x00200000 /* make_dev() can return NULL */
#define D_NEEDGIANT 0x00400000 /* driver want Giant */
#define D_NEEDMINOR 0x00800000 /* driver uses clone_create() */
-#define D_MMAP2 0x01000000 /* driver uses d_mmap2() */
+#define D_UNMAPPED_IO 0x01000000 /* d_strategy can accept unmapped IO */
/*
* Version numbers.
@@ -187,7 +185,8 @@ typedef int dumper_t(
#define D_VERSION_00 0x20011966
#define D_VERSION_01 0x17032005 /* Add d_uid,gid,mode & kind */
#define D_VERSION_02 0x28042009 /* Add d_mmap_single */
-#define D_VERSION D_VERSION_02
+#define D_VERSION_03 0x17122009 /* d_mmap takes memattr,vm_ooffset_t */
+#define D_VERSION D_VERSION_03
/*
* Flags used for internal housekeeping
@@ -208,31 +207,24 @@ struct cdevsw {
d_write_t *d_write;
d_ioctl_t *d_ioctl;
d_poll_t *d_poll;
- union {
- d_mmap_t *old;
- d_mmap2_t *new;
- } __d_mmap;
+ d_mmap_t *d_mmap;
d_strategy_t *d_strategy;
dumper_t *d_dump;
d_kqfilter_t *d_kqfilter;
d_purge_t *d_purge;
d_mmap_single_t *d_mmap_single;
- uid_t d_uid;
- gid_t d_gid;
- mode_t d_mode;
- const char *d_kind;
+
+ int32_t d_spare0[3];
+ void *d_spare1[3];
/* These fields should not be messed with by drivers */
- LIST_ENTRY(cdevsw) d_list;
LIST_HEAD(, cdev) d_devs;
- int d_spare3;
+ int d_spare2;
union {
struct cdevsw *gianttrick;
SLIST_ENTRY(cdevsw) postfree_list;
} __d_giant;
};
-#define d_mmap __d_mmap.old
-#define d_mmap2 __d_mmap.new
#define d_gianttrick __d_giant.gianttrick
#define d_postfree_list __d_giant.postfree_list
@@ -273,16 +265,18 @@ void dev_ref(struct cdev *dev);
void dev_refl(struct cdev *dev);
void dev_rel(struct cdev *dev);
void dev_strategy(struct cdev *dev, struct buf *bp);
+void dev_strategy_csw(struct cdev *dev, struct cdevsw *csw, struct buf *bp);
struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid,
int _perms, const char *_fmt, ...) __printflike(6, 7);
struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms,
const char *_fmt, ...) __printflike(7, 8);
-#define MAKEDEV_REF 0x01
-#define MAKEDEV_WHTOUT 0x02
-#define MAKEDEV_NOWAIT 0x04
-#define MAKEDEV_WAITOK 0x08
-#define MAKEDEV_ETERNAL 0x10
+#define MAKEDEV_REF 0x01
+#define MAKEDEV_WHTOUT 0x02
+#define MAKEDEV_NOWAIT 0x04
+#define MAKEDEV_WAITOK 0x08
+#define MAKEDEV_ETERNAL 0x10
+#define MAKEDEV_CHECKNAME 0x20
struct cdev *make_dev_credf(int _flags,
struct cdevsw *_devsw, int _unit,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode,
@@ -292,6 +286,11 @@ int make_dev_p(int _flags, struct cdev **_cdev, struct cdevsw *_devsw,
const char *_fmt, ...) __printflike(8, 9);
struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...)
__printflike(2, 3);
+int make_dev_alias_p(int _flags, struct cdev **_cdev, struct cdev *_pdev,
+ const char *_fmt, ...) __printflike(4, 5);
+int make_dev_physpath_alias(int _flags, struct cdev **_cdev,
+ struct cdev *_pdev, struct cdev *_old_alias,
+ const char *_physpath);
void dev_lock(void);
void dev_unlock(void);
void setconf(void);
@@ -310,6 +309,9 @@ int devfs_set_cdevpriv(void *priv, cdevpriv_dtr_t dtr);
void devfs_clear_cdevpriv(void);
void devfs_fpdrop(struct file *fp); /* XXX This is not public KPI */
+ino_t devfs_alloc_cdp_inode(void);
+void devfs_free_cdp_inode(ino_t ino);
+
#define UID_ROOT 0
#define UID_BIN 3
#define UID_UUCP 66
@@ -344,6 +346,7 @@ struct dumperinfo {
int set_dumper(struct dumperinfo *);
int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t);
void dumpsys(struct dumperinfo *);
+int doadump(boolean_t);
extern int dumping; /* system is dumping */
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/cpuset.h b/freebsd/sys/sys/cpuset.h
new file mode 100644
index 00000000..9e6c1257
--- /dev/null
+++ b/freebsd/sys/sys/cpuset.h
@@ -0,0 +1,229 @@
+/*-
+ * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2008 Nokia Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_CPUSET_H_
+#define _SYS_CPUSET_H_
+
+#include <sys/_cpuset.h>
+
+#define CPUSETBUFSIZ ((2 + sizeof(long) * 2) * _NCPUWORDS)
+
+/*
+ * Macros addressing word and bit within it, tuned to make compiler
+ * optimize cases when CPU_SETSIZE fits into single machine word.
+ */
+#define __cpuset_mask(n) \
+ ((long)1 << ((_NCPUWORDS == 1) ? (__size_t)(n) : ((n) % _NCPUBITS)))
+#define __cpuset_word(n) ((_NCPUWORDS == 1) ? 0 : ((n) / _NCPUBITS))
+
+#define CPU_CLR(n, p) ((p)->__bits[__cpuset_word(n)] &= ~__cpuset_mask(n))
+#define CPU_COPY(f, t) (void)(*(t) = *(f))
+#define CPU_ISSET(n, p) (((p)->__bits[__cpuset_word(n)] & __cpuset_mask(n)) != 0)
+#define CPU_SET(n, p) ((p)->__bits[__cpuset_word(n)] |= __cpuset_mask(n))
+#define CPU_ZERO(p) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ (p)->__bits[__i] = 0; \
+} while (0)
+
+#define CPU_FILL(p) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ (p)->__bits[__i] = -1; \
+} while (0)
+
+#define CPU_SETOF(n, p) do { \
+ CPU_ZERO(p); \
+ ((p)->__bits[__cpuset_word(n)] = __cpuset_mask(n)); \
+} while (0)
+
+/* Is p empty. */
+#define CPU_EMPTY(p) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ if ((p)->__bits[__i]) \
+ break; \
+ __i == _NCPUWORDS; \
+})
+
+/* Is p full set. */
+#define CPU_ISFULLSET(p) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ if ((p)->__bits[__i] != (long)-1) \
+ break; \
+ __i == _NCPUWORDS; \
+})
+
+/* Is c a subset of p. */
+#define CPU_SUBSET(p, c) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ if (((c)->__bits[__i] & \
+ (p)->__bits[__i]) != \
+ (c)->__bits[__i]) \
+ break; \
+ __i == _NCPUWORDS; \
+})
+
+/* Are there any common bits between b & c? */
+#define CPU_OVERLAP(p, c) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ if (((c)->__bits[__i] & \
+ (p)->__bits[__i]) != 0) \
+ break; \
+ __i != _NCPUWORDS; \
+})
+
+/* Compare two sets, returns 0 if equal 1 otherwise. */
+#define CPU_CMP(p, c) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ if (((c)->__bits[__i] != \
+ (p)->__bits[__i])) \
+ break; \
+ __i != _NCPUWORDS; \
+})
+
+#define CPU_OR(d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ (d)->__bits[__i] |= (s)->__bits[__i]; \
+} while (0)
+
+#define CPU_AND(d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ (d)->__bits[__i] &= (s)->__bits[__i]; \
+} while (0)
+
+#define CPU_NAND(d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ (d)->__bits[__i] &= ~(s)->__bits[__i]; \
+} while (0)
+
+#define CPU_CLR_ATOMIC(n, p) \
+ atomic_clear_long(&(p)->__bits[__cpuset_word(n)], __cpuset_mask(n))
+
+#define CPU_SET_ATOMIC(n, p) \
+ atomic_set_long(&(p)->__bits[__cpuset_word(n)], __cpuset_mask(n))
+
+/* Convenience functions catering special cases. */
+#define CPU_OR_ATOMIC(d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ atomic_set_long(&(d)->__bits[__i], \
+ (s)->__bits[__i]); \
+} while (0)
+
+#define CPU_COPY_STORE_REL(f, t) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ atomic_store_rel_long(&(t)->__bits[__i], \
+ (f)->__bits[__i]); \
+} while (0)
+
+/*
+ * Valid cpulevel_t values.
+ */
+#define CPU_LEVEL_ROOT 1 /* All system cpus. */
+#define CPU_LEVEL_CPUSET 2 /* Available cpus for which. */
+#define CPU_LEVEL_WHICH 3 /* Actual mask/id for which. */
+
+/*
+ * Valid cpuwhich_t values.
+ */
+#define CPU_WHICH_TID 1 /* Specifies a thread id. */
+#define CPU_WHICH_PID 2 /* Specifies a process id. */
+#define CPU_WHICH_CPUSET 3 /* Specifies a set id. */
+#define CPU_WHICH_IRQ 4 /* Specifies an irq #. */
+#define CPU_WHICH_JAIL 5 /* Specifies a jail id. */
+
+/*
+ * Reserved cpuset identifiers.
+ */
+#define CPUSET_INVALID -1
+#define CPUSET_DEFAULT 0
+
+#ifdef _KERNEL
+LIST_HEAD(setlist, cpuset);
+
+/*
+ * cpusets encapsulate cpu binding information for one or more threads.
+ *
+ * a - Accessed with atomics.
+ * s - Set at creation, never modified. Only a ref required to read.
+ * c - Locked internally by a cpuset lock.
+ *
+ * The bitmask is only modified while holding the cpuset lock. It may be
+ * read while only a reference is held but the consumer must be prepared
+ * to deal with inconsistent results.
+ */
+struct cpuset {
+ cpuset_t cs_mask; /* bitmask of valid cpus. */
+ volatile u_int cs_ref; /* (a) Reference count. */
+ int cs_flags; /* (s) Flags from below. */
+ cpusetid_t cs_id; /* (s) Id or INVALID. */
+ struct cpuset *cs_parent; /* (s) Pointer to our parent. */
+ LIST_ENTRY(cpuset) cs_link; /* (c) All identified sets. */
+ LIST_ENTRY(cpuset) cs_siblings; /* (c) Sibling set link. */
+ struct setlist cs_children; /* (c) List of children. */
+};
+
+#define CPU_SET_ROOT 0x0001 /* Set is a root set. */
+#define CPU_SET_RDONLY 0x0002 /* No modification allowed. */
+
+extern cpuset_t *cpuset_root;
+struct prison;
+struct proc;
+
+struct cpuset *cpuset_thread0(void);
+struct cpuset *cpuset_ref(struct cpuset *);
+void cpuset_rel(struct cpuset *);
+int cpuset_setthread(lwpid_t id, cpuset_t *);
+int cpuset_create_root(struct prison *, struct cpuset **);
+int cpuset_setproc_update_set(struct proc *, struct cpuset *);
+int cpusetobj_ffs(const cpuset_t *);
+char *cpusetobj_strprint(char *, const cpuset_t *);
+int cpusetobj_strscan(cpuset_t *, const char *);
+
+#else
+__BEGIN_DECLS
+int cpuset(cpusetid_t *);
+int cpuset_setid(cpuwhich_t, id_t, cpusetid_t);
+int cpuset_getid(cpulevel_t, cpuwhich_t, id_t, cpusetid_t *);
+int cpuset_getaffinity(cpulevel_t, cpuwhich_t, id_t, size_t, cpuset_t *);
+int cpuset_setaffinity(cpulevel_t, cpuwhich_t, id_t, size_t, const cpuset_t *);
+__END_DECLS
+#endif
+#endif /* !_SYS_CPUSET_H_ */
diff --git a/freebsd/sys/sys/endian.h b/freebsd/sys/sys/endian.h
index 921fac41..fc8017e6 100644
--- a/freebsd/sys/sys/endian.h
+++ b/freebsd/sys/sys/endian.h
@@ -33,6 +33,11 @@
#include <rtems/bsd/sys/_types.h>
#include <machine/rtems-bsd-endian.h>
+#ifndef _UINT8_T_DECLARED
+typedef __uint8_t uint8_t;
+#define _UINT8_T_DECLARED
+#endif
+
#ifndef _UINT16_T_DECLARED
typedef __uint16_t uint16_t;
#define _UINT16_T_DECLARED
@@ -94,7 +99,7 @@ typedef __uint64_t uint64_t;
static __inline uint16_t
be16dec(const void *pp)
{
- unsigned char const *p = (unsigned char const *)pp;
+ uint8_t const *p = (uint8_t const *)pp;
return ((p[0] << 8) | p[1]);
}
@@ -102,15 +107,15 @@ be16dec(const void *pp)
static __inline uint32_t
be32dec(const void *pp)
{
- unsigned char const *p = (unsigned char const *)pp;
+ uint8_t const *p = (uint8_t const *)pp;
- return ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
+ return (((unsigned)p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
}
static __inline uint64_t
be64dec(const void *pp)
{
- unsigned char const *p = (unsigned char const *)pp;
+ uint8_t const *p = (uint8_t const *)pp;
return (((uint64_t)be32dec(p) << 32) | be32dec(p + 4));
}
@@ -118,7 +123,7 @@ be64dec(const void *pp)
static __inline uint16_t
le16dec(const void *pp)
{
- unsigned char const *p = (unsigned char const *)pp;
+ uint8_t const *p = (uint8_t const *)pp;
return ((p[1] << 8) | p[0]);
}
@@ -126,15 +131,15 @@ le16dec(const void *pp)
static __inline uint32_t
le32dec(const void *pp)
{
- unsigned char const *p = (unsigned char const *)pp;
+ uint8_t const *p = (uint8_t const *)pp;
- return ((p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]);
+ return (((unsigned)p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]);
}
static __inline uint64_t
le64dec(const void *pp)
{
- unsigned char const *p = (unsigned char const *)pp;
+ uint8_t const *p = (uint8_t const *)pp;
return (((uint64_t)le32dec(p + 4) << 32) | le32dec(p));
}
@@ -142,7 +147,7 @@ le64dec(const void *pp)
static __inline void
be16enc(void *pp, uint16_t u)
{
- unsigned char *p = (unsigned char *)pp;
+ uint8_t *p = (uint8_t *)pp;
p[0] = (u >> 8) & 0xff;
p[1] = u & 0xff;
@@ -151,7 +156,7 @@ be16enc(void *pp, uint16_t u)
static __inline void
be32enc(void *pp, uint32_t u)
{
- unsigned char *p = (unsigned char *)pp;
+ uint8_t *p = (uint8_t *)pp;
p[0] = (u >> 24) & 0xff;
p[1] = (u >> 16) & 0xff;
@@ -162,16 +167,16 @@ be32enc(void *pp, uint32_t u)
static __inline void
be64enc(void *pp, uint64_t u)
{
- unsigned char *p = (unsigned char *)pp;
+ uint8_t *p = (uint8_t *)pp;
- be32enc(p, u >> 32);
- be32enc(p + 4, u & 0xffffffff);
+ be32enc(p, (uint32_t)(u >> 32));
+ be32enc(p + 4, (uint32_t)(u & 0xffffffffU));
}
static __inline void
le16enc(void *pp, uint16_t u)
{
- unsigned char *p = (unsigned char *)pp;
+ uint8_t *p = (uint8_t *)pp;
p[0] = u & 0xff;
p[1] = (u >> 8) & 0xff;
@@ -180,7 +185,7 @@ le16enc(void *pp, uint16_t u)
static __inline void
le32enc(void *pp, uint32_t u)
{
- unsigned char *p = (unsigned char *)pp;
+ uint8_t *p = (uint8_t *)pp;
p[0] = u & 0xff;
p[1] = (u >> 8) & 0xff;
@@ -191,10 +196,10 @@ le32enc(void *pp, uint32_t u)
static __inline void
le64enc(void *pp, uint64_t u)
{
- unsigned char *p = (unsigned char *)pp;
+ uint8_t *p = (uint8_t *)pp;
- le32enc(p, u & 0xffffffff);
- le32enc(p + 4, u >> 32);
+ le32enc(p, (uint32_t)(u & 0xffffffffU));
+ le32enc(p + 4, (uint32_t)(u >> 32));
}
#endif /* _SYS_ENDIAN_H_ */
diff --git a/freebsd/sys/sys/eventhandler.h b/freebsd/sys/sys/eventhandler.h
index 61d05bff..47bc8940 100644
--- a/freebsd/sys/sys/eventhandler.h
+++ b/freebsd/sys/sys/eventhandler.h
@@ -228,7 +228,20 @@ EVENTHANDLER_DECLARE(process_exit, exitlist_fn);
EVENTHANDLER_DECLARE(process_fork, forklist_fn);
EVENTHANDLER_DECLARE(process_exec, execlist_fn);
+/*
+ * application dump event
+ */
struct thread;
+typedef void (*app_coredump_start_fn)(void *, struct thread *, char *name);
+typedef void (*app_coredump_progress_fn)(void *, struct thread *td, int byte_count);
+typedef void (*app_coredump_finish_fn)(void *, struct thread *td);
+typedef void (*app_coredump_error_fn)(void *, struct thread *td, char *msg, ...);
+
+EVENTHANDLER_DECLARE(app_coredump_start, app_coredump_start_fn);
+EVENTHANDLER_DECLARE(app_coredump_progress, app_coredump_progress_fn);
+EVENTHANDLER_DECLARE(app_coredump_finish, app_coredump_finish_fn);
+EVENTHANDLER_DECLARE(app_coredump_error, app_coredump_error_fn);
+
typedef void (*thread_ctor_fn)(void *, struct thread *);
typedef void (*thread_dtor_fn)(void *, struct thread *);
typedef void (*thread_fini_fn)(void *, struct thread *);
diff --git a/freebsd/sys/sys/file.h b/freebsd/sys/sys/file.h
index f563f75c..8df93726 100644
--- a/freebsd/sys/sys/file.h
+++ b/freebsd/sys/sys/file.h
@@ -63,16 +63,34 @@ struct socket;
#define DTYPE_SHM 8 /* swap-backed shared memory */
#define DTYPE_SEM 9 /* posix semaphore */
#define DTYPE_PTS 10 /* pseudo teletype master device */
+#define DTYPE_DEV 11 /* Device specific fd type */
+#define DTYPE_CAPABILITY 12 /* capability */
+#define DTYPE_PROCDESC 13 /* process descriptor */
#ifdef _KERNEL
struct file;
struct ucred;
+#define FOF_OFFSET 0x01 /* Use the offset in uio argument */
+#define FOF_NOLOCK 0x02 /* Do not take FOFFSET_LOCK */
+#define FOF_NEXTOFF 0x04 /* Also update f_nextoff */
+#define FOF_NOUPDATE 0x10 /* Do not update f_offset */
+off_t foffset_lock(struct file *fp, int flags);
+void foffset_lock_uio(struct file *fp, struct uio *uio, int flags);
+void foffset_unlock(struct file *fp, off_t val, int flags);
+void foffset_unlock_uio(struct file *fp, struct uio *uio, int flags);
+
+static inline off_t
+foffset_get(struct file *fp)
+{
+
+ return (foffset_lock(fp, FOF_NOLOCK));
+}
+
typedef int fo_rdwr_t(struct file *fp, struct uio *uio,
struct ucred *active_cred, int flags,
struct thread *td);
-#define FOF_OFFSET 1 /* Use the offset in uio argument */
typedef int fo_truncate_t(struct file *fp, off_t length,
struct ucred *active_cred, struct thread *td);
typedef int fo_ioctl_t(struct file *fp, u_long com, void *data,
@@ -83,6 +101,10 @@ typedef int fo_kqfilter_t(struct file *fp, struct knote *kn);
typedef int fo_stat_t(struct file *fp, struct stat *sb,
struct ucred *active_cred, struct thread *td);
typedef int fo_close_t(struct file *fp, struct thread *td);
+typedef int fo_chmod_t(struct file *fp, mode_t mode,
+ struct ucred *active_cred, struct thread *td);
+typedef int fo_chown_t(struct file *fp, uid_t uid, gid_t gid,
+ struct ucred *active_cred, struct thread *td);
typedef int fo_flags_t;
struct fileops {
@@ -94,6 +116,8 @@ struct fileops {
fo_kqfilter_t *fo_kqfilter;
fo_stat_t *fo_stat;
fo_close_t *fo_close;
+ fo_chmod_t *fo_chmod;
+ fo_chown_t *fo_chown;
fo_flags_t fo_flags; /* DFLAG_* below */
};
@@ -239,7 +263,8 @@ rtems_bsd_error_to_status_and_errno(int error)
#define f_advice f_vnun.fvn_advice
#define FOFFSET_LOCKED 0x1
-#define FOFFSET_LOCK_WAITING 0x2
+#define FOFFSET_LOCK_WAITING 0x2
+#define FDEVFS_VNODE 0x4
#endif /* _KERNEL || _WANT_FILE */
@@ -279,24 +304,30 @@ extern int maxfilesperproc; /* per process limit on number of open files */
extern volatile int openfiles; /* actual number of open files */
#ifndef __rtems__
-int fget(struct thread *td, int fd, struct file **fpp);
+int fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp);
#else /* __rtems__ */
struct file *rtems_bsd_get_file(int fd);
static inline int
-fget(struct thread *td, int fd, struct file **fpp)
+fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp)
{
struct file *fp = rtems_bsd_get_file(fd);
(void) td;
+ (void) rights;
*fpp = fp;
return fp != NULL ? 0 : EBADF;
}
#endif /* __rtems__ */
-int fget_read(struct thread *td, int fd, struct file **fpp);
-int fget_write(struct thread *td, int fd, struct file **fpp);
+int fget_mmap(struct thread *td, int fd, cap_rights_t rights,
+ u_char *maxprotp, struct file **fpp);
+int fget_read(struct thread *td, int fd, cap_rights_t rights,
+ struct file **fpp);
+int fget_write(struct thread *td, int fd, cap_rights_t rights,
+ struct file **fpp);
+int fgetcap(struct thread *td, int fd, struct file **fpp);
int _fdrop(struct file *fp, struct thread *td);
#ifndef __rtems__
@@ -313,6 +344,9 @@ fo_poll_t soo_poll;
fo_kqfilter_t soo_kqfilter;
fo_stat_t soo_stat;
fo_close_t soo_close;
+
+fo_chmod_t invfo_chmod;
+fo_chown_t invfo_chown;
#else /* __rtems__ */
int rtems_bsd_soo_kqfilter(rtems_libio_t *iop, struct knote *kn);
#endif /* __rtems__ */
@@ -336,18 +370,32 @@ finit(struct file *fp, u_int fflag, short type, void *data,
rtems_filesystem_location_add_to_mt_entry(pathinfo);
}
#endif /* __rtems__ */
-int fgetvp(struct thread *td, int fd, struct vnode **vpp);
-int fgetvp_read(struct thread *td, int fd, struct vnode **vpp);
-int fgetvp_write(struct thread *td, int fd, struct vnode **vpp);
-
-int fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp);
+int fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp);
+int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights,
+ struct vnode **vpp);
+int fgetvp_rights(struct thread *td, int fd, cap_rights_t need,
+ cap_rights_t *have, struct vnode **vpp);
+int fgetvp_read(struct thread *td, int fd, cap_rights_t rights,
+ struct vnode **vpp);
+int fgetvp_write(struct thread *td, int fd, cap_rights_t rights,
+ struct vnode **vpp);
+
+int fgetsock(struct thread *td, int fd, cap_rights_t rights,
+ struct socket **spp, u_int *fflagp);
void fputsock(struct socket *sp);
+static __inline int
+_fnoop(void)
+{
+
+ return (0);
+}
+
#define fhold(fp) \
(refcount_acquire(&(fp)->f_count))
#ifndef __rtems__
#define fdrop(fp, td) \
- (refcount_release(&(fp)->f_count) ? _fdrop((fp), (td)) : 0)
+ (refcount_release(&(fp)->f_count) ? _fdrop((fp), (td)) : _fnoop())
#else /* __rtems__ */
#define fdrop(fp, td) do { } while (0)
#endif /* __rtems__ */
@@ -361,37 +409,28 @@ static __inline fo_poll_t fo_poll;
static __inline fo_kqfilter_t fo_kqfilter;
static __inline fo_stat_t fo_stat;
static __inline fo_close_t fo_close;
+static __inline fo_chmod_t fo_chmod;
+static __inline fo_chown_t fo_chown;
static __inline int
-fo_read(fp, uio, active_cred, flags, td)
- struct file *fp;
- struct uio *uio;
- struct ucred *active_cred;
- int flags;
- struct thread *td;
+fo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
{
return ((*fp->f_ops->fo_read)(fp, uio, active_cred, flags, td));
}
static __inline int
-fo_write(fp, uio, active_cred, flags, td)
- struct file *fp;
- struct uio *uio;
- struct ucred *active_cred;
- int flags;
- struct thread *td;
+fo_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
{
return ((*fp->f_ops->fo_write)(fp, uio, active_cred, flags, td));
}
static __inline int
-fo_truncate(fp, length, active_cred, td)
- struct file *fp;
- off_t length;
- struct ucred *active_cred;
- struct thread *td;
+fo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
+ struct thread *td)
{
return ((*fp->f_ops->fo_truncate)(fp, length, active_cred, td));
@@ -399,12 +438,8 @@ fo_truncate(fp, length, active_cred, td)
#endif /* __rtems__ */
static __inline int
-fo_ioctl(fp, com, data, active_cred, td)
- struct file *fp;
- u_long com;
- void *data;
- struct ucred *active_cred;
- struct thread *td;
+fo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred,
+ struct thread *td)
{
#ifndef __rtems__
@@ -426,11 +461,8 @@ fo_ioctl(fp, com, data, active_cred, td)
}
static __inline int
-fo_poll(fp, events, active_cred, td)
- struct file *fp;
- int events;
- struct ucred *active_cred;
- struct thread *td;
+fo_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
{
#ifndef __rtems__
@@ -445,20 +477,15 @@ fo_poll(fp, events, active_cred, td)
#ifndef __rtems__
static __inline int
-fo_stat(fp, sb, active_cred, td)
- struct file *fp;
- struct stat *sb;
- struct ucred *active_cred;
- struct thread *td;
+fo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
+ struct thread *td)
{
return ((*fp->f_ops->fo_stat)(fp, sb, active_cred, td));
}
static __inline int
-fo_close(fp, td)
- struct file *fp;
- struct thread *td;
+fo_close(struct file *fp, struct thread *td)
{
return ((*fp->f_ops->fo_close)(fp, td));
@@ -466,9 +493,7 @@ fo_close(fp, td)
#endif /* __rtems__ */
static __inline int
-fo_kqfilter(fp, kn)
- struct file *fp;
- struct knote *kn;
+fo_kqfilter(struct file *fp, struct knote *kn)
{
#ifndef __rtems__
@@ -478,6 +503,24 @@ fo_kqfilter(fp, kn)
#endif /* __rtems__ */
}
+#ifndef __rtems__
+static __inline int
+fo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
+ struct thread *td)
+{
+
+ return ((*fp->f_ops->fo_chmod)(fp, mode, active_cred, td));
+}
+
+static __inline int
+fo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
+ struct thread *td)
+{
+
+ return ((*fp->f_ops->fo_chown)(fp, uid, gid, active_cred, td));
+}
+#endif /* __rtems__ */
+
#endif /* _KERNEL */
#endif /* !SYS_FILE_H */
diff --git a/freebsd/sys/sys/filedesc.h b/freebsd/sys/sys/filedesc.h
index 0b48d849..ad18114a 100644
--- a/freebsd/sys/sys/filedesc.h
+++ b/freebsd/sys/sys/filedesc.h
@@ -131,14 +131,17 @@ int closef(struct file *fp, struct thread *td);
int dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd,
int mode, int error);
#ifndef __rtems__
-int falloc(struct thread *td, struct file **resultfp, int *resultfd);
+int falloc(struct thread *td, struct file **resultfp, int *resultfd,
+ int flags);
#else /* __rtems__ */
static inline int
-falloc(struct thread *td, struct file **resultfp, int *resultfd)
+falloc(struct thread *td, struct file **resultfp, int *resultfd,
+ int flags)
{
rtems_libio_t *iop = rtems_libio_allocate();
(void) td;
+ (void) flags;
*resultfp = rtems_bsd_iop_to_fp(iop);
@@ -153,9 +156,10 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd)
}
}
#endif /* __rtems__ */
-int fallocf(struct thread *td, struct file **resultfp, int *resultfd,
- int flags);
+int falloc_noinstall(struct thread *td, struct file **resultfp);
+int finstall(struct thread *td, struct file *fp, int *resultfp, int flags);
int fdalloc(struct thread *td, int minfd, int *result);
+int fdallocn(struct thread *td, int minfd, int *fds, int n);
int fdavail(struct thread *td, int n);
int fdcheckstd(struct thread *td);
#ifndef __rtems__
@@ -181,7 +185,8 @@ struct filedesc *fdshare(struct filedesc *fdp);
struct filedesc_to_leader *
filedesc_to_leader_alloc(struct filedesc_to_leader *old,
struct filedesc *fdp, struct proc *leader);
-int getvnode(struct filedesc *fdp, int fd, struct file **fpp);
+int getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
+ struct file **fpp);
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
void setugidsafety(struct thread *td);
diff --git a/freebsd/sys/sys/fnv_hash.h b/freebsd/sys/sys/fnv_hash.h
index 2dbed339..c1452a67 100644
--- a/freebsd/sys/sys/fnv_hash.h
+++ b/freebsd/sys/sys/fnv_hash.h
@@ -7,6 +7,8 @@
*
* $FreeBSD$
*/
+#ifndef _SYS_FNV_HASH_H_
+#define _SYS_FNV_HASH_H_
typedef u_int32_t Fnv32_t;
typedef u_int64_t Fnv64_t;
@@ -66,3 +68,4 @@ fnv_64_str(const char *str, Fnv64_t hval)
}
return hval;
}
+#endif /* _SYS_FNV_HASH_H_ */
diff --git a/freebsd/sys/sys/hhook.h b/freebsd/sys/sys/hhook.h
index 51eda1fd..0d54eda4 100644
--- a/freebsd/sys/sys/hhook.h
+++ b/freebsd/sys/sys/hhook.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org>
+ * Copyright (c) 2010,2013 Lawrence Stewart <lstewart@freebsd.org>
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
@@ -91,12 +91,14 @@ struct hookinfo {
struct hhook_head {
STAILQ_HEAD(hhook_list, hhook) hhh_hooks;
struct rmlock hhh_lock;
+ uintptr_t hhh_vid;
int32_t hhh_id;
int32_t hhh_nhooks;
int32_t hhh_type;
uint32_t hhh_flags;
volatile uint32_t hhh_refcount;
LIST_ENTRY(hhook_head) hhh_next;
+ LIST_ENTRY(hhook_head) hhh_vnext;
};
/* Public KPI functions. */
diff --git a/freebsd/sys/sys/interrupt.h b/freebsd/sys/sys/interrupt.h
index c1df1c76..3dace82e 100644
--- a/freebsd/sys/sys/interrupt.h
+++ b/freebsd/sys/sys/interrupt.h
@@ -146,14 +146,13 @@ struct proc;
extern struct intr_event *tty_intr_event;
extern struct intr_event *clk_intr_event;
-extern void *softclock_ih;
extern void *vm_ih;
/* Counts and names for statistics (defined in MD code). */
-extern u_long eintrcnt[]; /* end of intrcnt[] */
-extern char eintrnames[]; /* end of intrnames[] */
extern u_long intrcnt[]; /* counts for for each device and stray */
extern char intrnames[]; /* string table containing device names */
+extern size_t sintrcnt; /* size of intrcnt table */
+extern size_t sintrnames; /* size of intrnames table */
#ifdef DDB
void db_dump_intr_event(struct intr_event *ie, int handlers);
@@ -177,6 +176,7 @@ int intr_event_remove_handler(void *cookie);
int intr_getaffinity(int irq, void *mask);
void *intr_handler_source(void *cookie);
int intr_setaffinity(int irq, void *mask);
+void _intr_drain(int irq); /* Linux compat only. */
int swi_add(struct intr_event **eventp, const char *name,
driver_intr_t handler, void *arg, int pri, enum intr_type flags,
void **cookiep);
diff --git a/freebsd/sys/sys/jail.h b/freebsd/sys/sys/jail.h
index 4a0b82b3..063dd6a3 100644
--- a/freebsd/sys/sys/jail.h
+++ b/freebsd/sys/sys/jail.h
@@ -135,6 +135,9 @@ MALLOC_DECLARE(M_PRISON);
#define HOSTUUIDLEN 64
+struct racct;
+struct prison_racct;
+
/*
* This structure describes a prison. It is pointed to by all struct
* ucreds's of the inmates. pr_ref keeps track of them and is used to
@@ -166,13 +169,15 @@ struct prison {
int pr_ip6s; /* (p) number of v6 IPs */
struct in_addr *pr_ip4; /* (p) v4 IPs of jail */
struct in6_addr *pr_ip6; /* (p) v6 IPs of jail */
- void *pr_sparep[4];
+ struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
+ void *pr_sparep[3];
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
int pr_securelevel; /* (p) securelevel */
int pr_enforce_statfs; /* (p) statfs permission */
- int pr_spare[5];
+ int pr_devfs_rsnum; /* (p) devfs ruleset */
+ int pr_spare[4];
unsigned long pr_hostid; /* (p) jail hostid */
char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */
char pr_path[MAXPATHLEN]; /* (c) chroot path */
@@ -180,6 +185,13 @@ struct prison {
char pr_domainname[MAXHOSTNAMELEN]; /* (p) jail domainname */
char pr_hostuuid[HOSTUUIDLEN]; /* (p) jail hostuuid */
};
+
+struct prison_racct {
+ LIST_ENTRY(prison_racct) prr_next;
+ char prr_name[MAXHOSTNAMELEN];
+ u_int prr_refcount;
+ struct racct *prr_racct;
+};
#endif /* _KERNEL || _WANT_PRISON */
#ifdef _KERNEL
@@ -211,7 +223,11 @@ struct prison {
#define PR_ALLOW_MOUNT 0x0010
#define PR_ALLOW_QUOTAS 0x0020
#define PR_ALLOW_SOCKET_AF 0x0040
-#define PR_ALLOW_ALL 0x007f
+#define PR_ALLOW_MOUNT_DEVFS 0x0080
+#define PR_ALLOW_MOUNT_NULLFS 0x0100
+#define PR_ALLOW_MOUNT_ZFS 0x0200
+#define PR_ALLOW_MOUNT_PROCFS 0x0400
+#define PR_ALLOW_ALL 0x07ff
/*
* OSD methods
@@ -326,6 +342,8 @@ SYSCTL_DECL(_security_jail_param);
sysctl_jail_param, fmt, descr)
#define SYSCTL_JAIL_PARAM_NODE(module, descr) \
SYSCTL_NODE(_security_jail_param, OID_AUTO, module, 0, 0, descr)
+#define SYSCTL_JAIL_PARAM_SUBNODE(parent, module, descr) \
+ SYSCTL_NODE(_security_jail_param_##parent, OID_AUTO, module, 0, 0, descr)
#define SYSCTL_JAIL_PARAM_SYS_NODE(module, access, descr) \
SYSCTL_JAIL_PARAM_NODE(module, descr); \
SYSCTL_JAIL_PARAM(_##module, , CTLTYPE_INT | (access), "E,jailsys", \
@@ -387,7 +405,12 @@ int prison_check_af(struct ucred *cred, int af);
int prison_if(struct ucred *cred, struct sockaddr *sa);
char *prison_name(struct prison *, struct prison *);
int prison_priv_check(struct ucred *cred, int priv);
-int sysctl_jail_param(struct sysctl_oid *, void *, int , struct sysctl_req *);
+int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
+void prison_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
+struct prison_racct *prison_racct_find(const char *name);
+void prison_racct_hold(struct prison_racct *prr);
+void prison_racct_free(struct prison_racct *prr);
#endif /* _KERNEL */
#endif /* !_SYS_JAIL_H_ */
diff --git a/freebsd/sys/sys/kernel.h b/freebsd/sys/sys/kernel.h
index 83122c99..e276f8ee 100644
--- a/freebsd/sys/sys/kernel.h
+++ b/freebsd/sys/sys/kernel.h
@@ -91,12 +91,6 @@ extern volatile int ticks;
* The SI_SUB_SWAP values represent a value used by
* the BSD 4.4Lite but not by FreeBSD; it is maintained in dependent
* order to support porting.
- *
- * The SI_SUB_PROTO_BEGIN and SI_SUB_PROTO_END bracket a range of
- * initializations to take place at splimp(). This is a historical
- * wart that should be removed -- probably running everything at
- * splimp() until the first init that doesn't want it is the correct
- * fix. They are currently present to ensure historical behavior.
*/
enum sysinit_sub_id {
SI_SUB_DUMMY = 0x0000000, /* not executed; for linker*/
@@ -116,6 +110,7 @@ enum sysinit_sub_id {
SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */
SI_SUB_KLD = 0x2000000, /* KLD and module setup */
SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/
+ SI_SUB_RACCT = 0x2110000, /* resource accounting */
SI_SUB_RANDOM = 0x2120000, /* random number generator */
SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */
SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */
@@ -153,12 +148,12 @@ enum sysinit_sub_id {
SI_SUB_P1003_1B = 0x6E00000, /* P1003.1B realtime */
SI_SUB_PSEUDO = 0x7000000, /* pseudo devices*/
SI_SUB_EXEC = 0x7400000, /* execve() handlers */
- SI_SUB_PROTO_BEGIN = 0x8000000, /* XXX: set splimp (kludge)*/
+ SI_SUB_PROTO_BEGIN = 0x8000000, /* VNET initialization */
SI_SUB_PROTO_IF = 0x8400000, /* interfaces*/
SI_SUB_PROTO_DOMAININIT = 0x8600000, /* domain registration system */
SI_SUB_PROTO_DOMAIN = 0x8800000, /* domains (address families?)*/
SI_SUB_PROTO_IFATTACHDOMAIN = 0x8800001, /* domain dependent data init*/
- SI_SUB_PROTO_END = 0x8ffffff, /* XXX: set splx (kludge)*/
+ SI_SUB_PROTO_END = 0x8ffffff, /* VNET helper functions */
SI_SUB_KPROF = 0x9000000, /* kernel profiling*/
SI_SUB_KICK_SCHEDULER = 0xa000000, /* start the timeout events*/
SI_SUB_INT_CONFIG_HOOKS = 0xa800000, /* Interrupts enabled config */
@@ -176,6 +171,7 @@ enum sysinit_sub_id {
SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/
SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/
SI_SUB_SMP = 0xf000000, /* start the APs*/
+ SI_SUB_RACCTD = 0xf100000, /* start raccd*/
SI_SUB_RUN_SCHEDULER = 0xfffffff /* scheduler*/
};
diff --git a/freebsd/sys/sys/kthread.h b/freebsd/sys/sys/kthread.h
index 390463f3..4911eccc 100644
--- a/freebsd/sys/sys/kthread.h
+++ b/freebsd/sys/sys/kthread.h
@@ -31,7 +31,7 @@
#include <sys/cdefs.h>
-/*-
+/*
* A kernel process descriptor; used to start "internal" daemons.
*
* Note: global_procpp may be NULL for no global save area.
@@ -73,7 +73,7 @@ int kthread_resume(struct thread *);
void kthread_shutdown(void *, int);
void kthread_start(const void *);
int kthread_suspend(struct thread *, int);
-void kthread_suspend_check(struct thread *);
+void kthread_suspend_check(void);
#endif /* !_SYS_KTHREAD_H_ */
diff --git a/freebsd/sys/sys/ktr.h b/freebsd/sys/sys/ktr.h
index 5fbe2947..de8ebe54 100644
--- a/freebsd/sys/sys/ktr.h
+++ b/freebsd/sys/sys/ktr.h
@@ -77,19 +77,6 @@
#define KTR_BUF 0x40000000 /* Buffer cache */
#define KTR_ALL 0x7fffffff
-/*
- * Trace classes which can be assigned to particular use at compile time
- * These must remain in high 22 as some assembly code counts on it
- */
-#define KTR_CT1 0x01000000
-#define KTR_CT2 0x02000000
-#define KTR_CT3 0x04000000
-#define KTR_CT4 0x08000000
-#define KTR_CT5 0x10000000
-#define KTR_CT6 0x20000000
-#define KTR_CT7 0x40000000
-#define KTR_CT8 0x80000000
-
/* Trace classes to compile in */
#ifdef KTR
#ifndef KTR_COMPILE
@@ -110,6 +97,9 @@
#ifndef LOCORE
+#include <rtems/bsd/sys/param.h>
+#include <sys/_cpuset.h>
+
struct ktr_entry {
u_int64_t ktr_timestamp;
int ktr_cpu;
@@ -120,7 +110,7 @@ struct ktr_entry {
u_long ktr_parms[KTR_PARMS];
};
-extern int ktr_cpumask;
+extern cpuset_t ktr_cpumask;
extern int ktr_mask;
extern int ktr_entries;
extern int ktr_verbose;
diff --git a/freebsd/sys/sys/libkern.h b/freebsd/sys/sys/libkern.h
index 942c21f5..acdedda6 100644
--- a/freebsd/sys/sys/libkern.h
+++ b/freebsd/sys/sys/libkern.h
@@ -76,6 +76,11 @@ static __inline long labs(long a) { return (a < 0 ? -a : a); }
#endif /* __rtems__ */
static __inline quad_t qabs(quad_t a) { return (a < 0 ? -a : a); }
+#define ARC4_ENTR_NONE 0 /* Don't have entropy yet. */
+#define ARC4_ENTR_HAVE 1 /* Have entropy. */
+#define ARC4_ENTR_SEED 2 /* Reseeding. */
+extern int arc4rand_iniseed_state;
+
/* Prototypes for non-quad routines. */
struct malloc_type;
uint32_t arc4random(void);
@@ -96,10 +101,8 @@ int fls(int);
int flsl(long);
#endif
int fnmatch(const char *, const char *, int);
-#ifndef __rtems__
-void gets(char *, size_t, int);
-#endif /* __rtems__ */
int locc(int, char *, u_int);
+void *memchr(const void *s, int c, size_t n);
int memcmp(const void *b1, const void *b2, size_t len);
void qsort(void *base, size_t nmemb, size_t size,
int (*compar)(const void *, const void *));
diff --git a/freebsd/sys/sys/linker.h b/freebsd/sys/sys/linker.h
index 896b0c8c..6948bb22 100644
--- a/freebsd/sys/sys/linker.h
+++ b/freebsd/sys/sys/linker.h
@@ -234,7 +234,11 @@ void *linker_hwpmc_list_objects(void);
/*
* Module lookup
*/
+extern vm_offset_t preload_addr_relocate;
extern caddr_t preload_metadata;
+
+extern void * preload_fetch_addr(caddr_t _mod);
+extern size_t preload_fetch_size(caddr_t _mod);
extern caddr_t preload_search_by_name(const char *_name);
extern caddr_t preload_search_by_type(const char *_type);
extern caddr_t preload_search_next_name(caddr_t _base);
diff --git a/freebsd/sys/sys/linker_set.h b/freebsd/sys/sys/linker_set.h
index fc04fe0d..b915302f 100644
--- a/freebsd/sys/sys/linker_set.h
+++ b/freebsd/sys/sys/linker_set.h
@@ -46,6 +46,8 @@
#ifdef __GNUCLIKE___SECTION
#ifndef __rtems__
#define __MAKE_SET(set, sym) \
+ __GLOBL(__CONCAT(__start_set_,set)); \
+ __GLOBL(__CONCAT(__stop_set_,set)); \
static void const * const __set_##set##_sym_##sym \
__section("set_" #set) __used = &sym
#else /* __rtems__ */
diff --git a/freebsd/sys/sys/lockmgr.h b/freebsd/sys/sys/lockmgr.h
index c85c39bd..4c3a272a 100644
--- a/freebsd/sys/sys/lockmgr.h
+++ b/freebsd/sys/sys/lockmgr.h
@@ -73,7 +73,10 @@ void _lockmgr_assert(struct lock *lk, int what, const char *file, int line);
#endif
void _lockmgr_disown(struct lock *lk, const char *file, int line);
+void lockallowrecurse(struct lock *lk);
+void lockallowshare(struct lock *lk);
void lockdestroy(struct lock *lk);
+void lockdisablerecurse(struct lock *lk);
void lockinit(struct lock *lk, int prio, const char *wmesg, int timo,
int flags);
#ifdef DDB
@@ -144,9 +147,6 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
#define LK_QUIET 0x000020
#define LK_ADAPTIVE 0x000040
-/* LK_EXSLPFAIL to follow, even if not used in lockinit() */
-#define LK_EXSLPFAIL 0x000080
-
/*
* Additional attributes to be used in lockmgr().
*/
diff --git a/freebsd/sys/sys/loginclass.h b/freebsd/sys/sys/loginclass.h
new file mode 100644
index 00000000..08f3409a
--- /dev/null
+++ b/freebsd/sys/sys/loginclass.h
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2011 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_LOGINCLASS_H_
+#define _SYS_LOGINCLASS_H_
+
+struct racct;
+
+/*
+ * Exactly one of these structures exists per login class.
+ */
+struct loginclass {
+ LIST_ENTRY(loginclass) lc_next;
+ char lc_name[MAXLOGNAME];
+ u_int lc_refcount;
+ struct racct *lc_racct;
+};
+
+void loginclass_hold(struct loginclass *lc);
+void loginclass_free(struct loginclass *lc);
+struct loginclass *loginclass_find(const char *name);
+void loginclass_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
+
+#endif /* !_SYS_LOGINCLASS_H_ */
diff --git a/freebsd/sys/sys/malloc.h b/freebsd/sys/sys/malloc.h
index 5586b39b..f99dfc5e 100644
--- a/freebsd/sys/sys/malloc.h
+++ b/freebsd/sys/sys/malloc.h
@@ -50,6 +50,7 @@
#define M_ZERO 0x0100 /* bzero the allocation */
#define M_NOVM 0x0200 /* don't ask VM for pages */
#define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */
+#define M_NODUMP 0x0800 /* don't dump pages in this allocation */
#define M_MAGIC 877983977 /* time when first defined :-) */
@@ -90,6 +91,7 @@ struct malloc_type_stats {
struct malloc_type_internal {
uint32_t mti_probes[DTMALLOC_PROBE_MAX];
/* DTrace probe ID array. */
+ u_char mti_zone;
struct malloc_type_stats mti_stats[MAXCPU];
};
diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h
index 7ce3bf44..391582c3 100644
--- a/freebsd/sys/sys/mbuf.h
+++ b/freebsd/sys/sys/mbuf.h
@@ -115,7 +115,7 @@ struct pkthdr {
/* variables for ip and tcp reassembly */
void *header; /* pointer to packet header */
int len; /* total packet length */
- uint32_t flowid; /* packet's 4-tuple system
+ uint32_t flowid; /* packet's 4-tuple system
* flow identifier
*/
/* variables for hardware checksum */
@@ -199,7 +199,9 @@ struct mbuf {
#define M_PROTO6 0x00080000 /* protocol-specific */
#define M_PROTO7 0x00100000 /* protocol-specific */
#define M_PROTO8 0x00200000 /* protocol-specific */
-#define M_FLOWID 0x00400000 /* flowid is valid */
+#define M_FLOWID 0x00400000 /* deprecated: flowid is valid */
+#define M_HASHTYPEBITS 0x0F000000 /* mask of bits holding flowid hash type */
+
/*
* For RELENG_{6,7} steal these flags for limited multiple routing table
* support. In RELENG_8 and beyond, use just one flag and a tag.
@@ -215,11 +217,45 @@ struct mbuf {
(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8)
/*
+ * Network interface cards are able to hash protocol fields (such as IPv4
+ * addresses and TCP port numbers) classify packets into flows. These flows
+ * can then be used to maintain ordering while delivering packets to the OS
+ * via parallel input queues, as well as to provide a stateless affinity
+ * model. NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set
+ * m_flag fields to indicate how the hash should be interpreted by the
+ * network stack.
+ *
+ * Most NICs support RSS, which provides ordering and explicit affinity, and
+ * use the hash m_flag bits to indicate what header fields were covered by
+ * the hash. M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations
+ * that provide an opaque flow identifier, allowing for ordering and
+ * distribution without explicit affinity.
+ */
+#define M_HASHTYPE_SHIFT 24
+#define M_HASHTYPE_NONE 0x0
+#define M_HASHTYPE_RSS_IPV4 0x1 /* IPv4 2-tuple */
+#define M_HASHTYPE_RSS_TCP_IPV4 0x2 /* TCPv4 4-tuple */
+#define M_HASHTYPE_RSS_IPV6 0x3 /* IPv6 2-tuple */
+#define M_HASHTYPE_RSS_TCP_IPV6 0x4 /* TCPv6 4-tuple */
+#define M_HASHTYPE_RSS_IPV6_EX 0x5 /* IPv6 2-tuple + ext hdrs */
+#define M_HASHTYPE_RSS_TCP_IPV6_EX 0x6 /* TCPv6 4-tiple + ext hdrs */
+#define M_HASHTYPE_OPAQUE 0xf /* ordering, not affinity */
+
+#define M_HASHTYPE_CLEAR(m) (m)->m_flags &= ~(M_HASHTYPEBITS)
+#define M_HASHTYPE_GET(m) (((m)->m_flags & M_HASHTYPEBITS) >> \
+ M_HASHTYPE_SHIFT)
+#define M_HASHTYPE_SET(m, v) do { \
+ (m)->m_flags &= ~M_HASHTYPEBITS; \
+ (m)->m_flags |= ((v) << M_HASHTYPE_SHIFT); \
+} while (0)
+#define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v))
+
+/*
* Flags preserved when copying m_pkthdr.
*/
#define M_COPYFLAGS \
(M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\
- M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB)
+ M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB|M_HASHTYPEBITS)
/*
* External buffer types: identify ext_buf type.
@@ -243,19 +279,28 @@ struct mbuf {
#define CSUM_IP 0x0001 /* will csum IP */
#define CSUM_TCP 0x0002 /* will csum TCP */
#define CSUM_UDP 0x0004 /* will csum UDP */
-#define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */
+#define CSUM_IP_FRAGS 0x0008 /* removed, left for compat */
#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
#define CSUM_TSO 0x0020 /* will do TSO */
#define CSUM_SCTP 0x0040 /* will csum SCTP */
+#define CSUM_SCTP_IPV6 0x0080 /* will csum IPv6/SCTP */
#define CSUM_IP_CHECKED 0x0100 /* did csum IP */
#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
#define CSUM_SCTP_VALID 0x1000 /* SCTP checksum is valid */
+#define CSUM_UDP_IPV6 0x2000 /* will csum IPv6/UDP */
+#define CSUM_TCP_IPV6 0x4000 /* will csum IPv6/TCP */
+/* CSUM_TSO_IPV6 0x8000 will do IPv6/TSO */
+
+/* CSUM_FRAGMENT_IPV6 0x10000 will do IPv6 fragementation */
+
+#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
+#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID
#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
-#define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */
+#define CSUM_DELAY_IP (CSUM_IP) /* Only v4, no v6 IP hdr csum */
/*
* mbuf types.
@@ -305,8 +350,8 @@ struct mbstat {
* Flags specifying how an allocation should be made.
*
* The flag to use is as follows:
- * - M_DONTWAIT or M_NOWAIT from an interrupt handler to not block allocation.
- * - M_WAIT or M_WAITOK from wherever it is safe to block.
+ * - M_NOWAIT (M_DONTWAIT) from an interrupt handler to not block allocation.
+ * - M_WAITOK (M_WAIT) from wherever it is safe to block.
*
* M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly and
* if we cannot allocate immediately we may return NULL, whereas
@@ -380,7 +425,7 @@ static __inline int
m_gettype(int size)
{
int type;
-
+
switch (size) {
case MSIZE:
type = EXT_MBUF;
@@ -410,7 +455,7 @@ static __inline uma_zone_t
m_getzone(int size)
{
uma_zone_t zone;
-
+
switch (size) {
case MSIZE:
zone = zone_mbuf;
@@ -549,7 +594,7 @@ m_free_fast(struct mbuf *m)
if (m->m_flags & M_PKTHDR)
KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags"));
#endif
-
+
uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS);
}
@@ -609,7 +654,7 @@ m_cljset(struct mbuf *m, void *cl, int type)
{
uma_zone_t zone;
int size;
-
+
switch (type) {
case EXT_CLUSTER:
size = MCLBYTES;
@@ -659,6 +704,16 @@ m_last(struct mbuf *m)
return (m);
}
+extern void (*m_addr_chg_pf_p)(struct mbuf *m);
+
+static __inline void
+m_addr_changed(struct mbuf *m)
+{
+
+ if (m_addr_chg_pf_p)
+ m_addr_chg_pf_p(m);
+}
+
/*
* mbuf, cluster, and external object allocation macros (for compatibility
* purposes).
@@ -903,6 +958,7 @@ struct mbuf *m_unshare(struct mbuf *, int how);
#define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */
#define PACKET_TAG_CARP 28 /* CARP info */
#define PACKET_TAG_IPSEC_NAT_T_PORTS 29 /* two uint16_t */
+#define PACKET_TAG_ND_OUTGOING 30 /* ND outgoing */
/* Specific cookies and tags. */
@@ -1018,7 +1074,7 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
#define M_SETFIB(_m, _fib) do { \
_m->m_flags &= ~M_FIB; \
_m->m_flags |= (((_fib) << M_FIBSHIFT) & M_FIB); \
-} while (0)
+} while (0)
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/module_khelp.h b/freebsd/sys/sys/module_khelp.h
index 9a7f507d..371e4c31 100644
--- a/freebsd/sys/sys/module_khelp.h
+++ b/freebsd/sys/sys/module_khelp.h
@@ -70,22 +70,6 @@ struct khelp_modevent_data {
uma_dtor umadtor;
};
-#define KHELP_DECLARE_MOD(hname, hdata, hhooks, version) \
- static struct khelp_modevent_data kmd_##hname = { \
- .name = #hname, \
- .helper = hdata \
- .hooks = hhooks, \
- .nhooks = sizeof(hhooks) / sizeof(hhooks[0]), \
- }; \
- static moduledata_t h_##hname = { \
- .name = #hname, \
- .evhand = khelp_modevent, \
- .priv = &kmd_##hname \
- }; \
- DECLARE_MODULE(hname, h_##hname, SI_SUB_PROTO_IFATTACHDOMAIN, \
- SI_ORDER_ANY); \
- MODULE_VERSION(hname, version)
-
#define KHELP_DECLARE_MOD_UMA(hname, hdata, hhooks, version, size, ctor, dtor) \
static struct khelp_modevent_data kmd_##hname = { \
.name = #hname, \
@@ -101,10 +85,12 @@ struct khelp_modevent_data {
.evhand = khelp_modevent, \
.priv = &kmd_##hname \
}; \
- DECLARE_MODULE(hname, h_##hname, SI_SUB_PROTO_IFATTACHDOMAIN, \
- SI_ORDER_ANY); \
+ DECLARE_MODULE(hname, h_##hname, SI_SUB_KLD, SI_ORDER_ANY); \
MODULE_VERSION(hname, version)
+#define KHELP_DECLARE_MOD(hname, hdata, hhooks, version) \
+ KHELP_DECLARE_MOD_UMA(hname, hdata, hhooks, version, 0, NULL, NULL)
+
int khelp_modevent(module_t mod, int type, void *data);
#endif /* _SYS_MODULE_KHELP_H_ */
diff --git a/freebsd/sys/sys/mount.h b/freebsd/sys/sys/mount.h
index efa17492..0001016b 100644
--- a/freebsd/sys/sys/mount.h
+++ b/freebsd/sys/sys/mount.h
@@ -148,6 +148,7 @@ struct vfsopt {
* Lock reference:
* m - mountlist_mtx
* i - interlock
+ * v - vnode freelist mutex
*
* Unmarked fields are considered stable as long as a ref is held.
*
@@ -164,11 +165,12 @@ struct mount {
int mnt_ref; /* (i) Reference count */
struct vnodelst mnt_nvnodelist; /* (i) list of vnodes */
int mnt_nvnodelistsize; /* (i) # of vnodes */
+ struct vnodelst mnt_activevnodelist; /* (v) list of active vnodes */
+ int mnt_activevnodelistsize;/* (v) # of active vnodes */
int mnt_writeopcount; /* (i) write syscalls pending */
int mnt_kern_flag; /* (i) kernel only flags */
- u_int mnt_flag; /* (i) flags shared with user */
- u_int mnt_xflag; /* (i) more flags shared with user */
- u_int mnt_noasync; /* (i) # noasync overrides */
+ uint64_t mnt_flag; /* (i) flags shared with user */
+ u_int mnt_pad_noasync;
struct vfsoptlist *mnt_opt; /* current mount options */
struct vfsoptlist *mnt_optnew; /* new options passed to fs */
int mnt_maxsymlinklen; /* max size of short symlink */
@@ -187,8 +189,48 @@ struct mount {
#define mnt_endzero mnt_gjprovider
char *mnt_gjprovider; /* gjournal provider name */
struct lock mnt_explock; /* vfs_export walkers lock */
+ TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */
+ TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/
};
+/*
+ * Definitions for MNT_VNODE_FOREACH_ALL.
+ */
+struct vnode *__mnt_vnode_next_all(struct vnode **mvp, struct mount *mp);
+struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp);
+void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp);
+
+#define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \
+ for (vp = __mnt_vnode_first_all(&(mvp), (mp)); \
+ (vp) != NULL; vp = __mnt_vnode_next_all(&(mvp), (mp)))
+
+#define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp) \
+ do { \
+ MNT_ILOCK(mp); \
+ __mnt_vnode_markerfree_all(&(mvp), (mp)); \
+ /* MNT_IUNLOCK(mp); -- done in above function */ \
+ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \
+ } while (0)
+
+/*
+ * Definitions for MNT_VNODE_FOREACH_ACTIVE.
+ */
+struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp);
+struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp);
+void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
+
+#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \
+ for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \
+ (vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp)))
+
+#define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \
+ __mnt_vnode_markerfree_active(&(mvp), (mp))
+
+/*
+ * Definitions for MNT_VNODE_FOREACH.
+ *
+ * This interface has been deprecated in favor of MNT_VNODE_FOREACH_ALL.
+ */
struct vnode *__mnt_vnode_next(struct vnode **mvp, struct mount *mp);
struct vnode *__mnt_vnode_first(struct vnode **mvp, struct mount *mp);
void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
@@ -224,43 +266,44 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
/*
* User specifiable flags, stored in mnt_flag.
*/
-#define MNT_RDONLY 0x00000001 /* read only filesystem */
-#define MNT_SYNCHRONOUS 0x00000002 /* filesystem written synchronously */
-#define MNT_NOEXEC 0x00000004 /* can't exec from filesystem */
-#define MNT_NOSUID 0x00000008 /* don't honor setuid bits on fs */
-#define MNT_UNION 0x00000020 /* union with underlying filesystem */
-#define MNT_ASYNC 0x00000040 /* filesystem written asynchronously */
-#define MNT_SUIDDIR 0x00100000 /* special handling of SUID on dirs */
-#define MNT_SOFTDEP 0x00200000 /* soft updates being done */
-#define MNT_NOSYMFOLLOW 0x00400000 /* do not follow symlinks */
-#define MNT_GJOURNAL 0x02000000 /* GEOM journal support enabled */
-#define MNT_MULTILABEL 0x04000000 /* MAC support for individual objects */
-#define MNT_ACLS 0x08000000 /* ACL support enabled */
-#define MNT_NOATIME 0x10000000 /* disable update of file access time */
-#define MNT_NOCLUSTERR 0x40000000 /* disable cluster read */
-#define MNT_NOCLUSTERW 0x80000000 /* disable cluster write */
-#define MNT_NFS4ACLS 0x00000010
+#define MNT_RDONLY 0x0000000000000001ULL /* read only filesystem */
+#define MNT_SYNCHRONOUS 0x0000000000000002ULL /* fs written synchronously */
+#define MNT_NOEXEC 0x0000000000000004ULL /* can't exec from filesystem */
+#define MNT_NOSUID 0x0000000000000008ULL /* don't honor setuid fs bits */
+#define MNT_NFS4ACLS 0x0000000000000010ULL /* enable NFS version 4 ACLs */
+#define MNT_UNION 0x0000000000000020ULL /* union with underlying fs */
+#define MNT_ASYNC 0x0000000000000040ULL /* fs written asynchronously */
+#define MNT_SUIDDIR 0x0000000000100000ULL /* special SUID dir handling */
+#define MNT_SOFTDEP 0x0000000000200000ULL /* using soft updates */
+#define MNT_NOSYMFOLLOW 0x0000000000400000ULL /* do not follow symlinks */
+#define MNT_GJOURNAL 0x0000000002000000ULL /* GEOM journal support enabled */
+#define MNT_MULTILABEL 0x0000000004000000ULL /* MAC support for objects */
+#define MNT_ACLS 0x0000000008000000ULL /* ACL support enabled */
+#define MNT_NOATIME 0x0000000010000000ULL /* dont update file access time */
+#define MNT_NOCLUSTERR 0x0000000040000000ULL /* disable cluster read */
+#define MNT_NOCLUSTERW 0x0000000080000000ULL /* disable cluster write */
+#define MNT_SUJ 0x0000000100000000ULL /* using journaled soft updates */
/*
* NFS export related mount flags.
*/
-#define MNT_EXRDONLY 0x00000080 /* exported read only */
-#define MNT_EXPORTED 0x00000100 /* filesystem is exported */
-#define MNT_DEFEXPORTED 0x00000200 /* exported to the world */
-#define MNT_EXPORTANON 0x00000400 /* use anon uid mapping for everyone */
-#define MNT_EXKERB 0x00000800 /* exported with Kerberos uid mapping */
-#define MNT_EXPUBLIC 0x20000000 /* public export (WebNFS) */
+#define MNT_EXRDONLY 0x0000000000000080ULL /* exported read only */
+#define MNT_EXPORTED 0x0000000000000100ULL /* filesystem is exported */
+#define MNT_DEFEXPORTED 0x0000000000000200ULL /* exported to the world */
+#define MNT_EXPORTANON 0x0000000000000400ULL /* anon uid mapping for all */
+#define MNT_EXKERB 0x0000000000000800ULL /* exported with Kerberos */
+#define MNT_EXPUBLIC 0x0000000020000000ULL /* public export (WebNFS) */
/*
* Flags set by internal operations,
* but visible to the user.
* XXX some of these are not quite right.. (I've never seen the root flag set)
*/
-#define MNT_LOCAL 0x00001000 /* filesystem is stored locally */
-#define MNT_QUOTA 0x00002000 /* quotas are enabled on filesystem */
-#define MNT_ROOTFS 0x00004000 /* identifies the root filesystem */
-#define MNT_USER 0x00008000 /* mounted by a user */
-#define MNT_IGNORE 0x00800000 /* do not show entry in df */
+#define MNT_LOCAL 0x0000000000001000ULL /* filesystem is stored locally */
+#define MNT_QUOTA 0x0000000000002000ULL /* quotas are enabled on fs */
+#define MNT_ROOTFS 0x0000000000004000ULL /* identifies the root fs */
+#define MNT_USER 0x0000000000008000ULL /* mounted by a user */
+#define MNT_IGNORE 0x0000000000800000ULL /* do not show entry in df */
/*
* Mask of flags that are visible to statfs().
@@ -268,14 +311,15 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
* but the 'mount' program may need changing to handle this.
*/
#define MNT_VISFLAGMASK (MNT_RDONLY | MNT_SYNCHRONOUS | MNT_NOEXEC | \
- MNT_NOSUID | MNT_UNION | \
+ MNT_NOSUID | MNT_UNION | MNT_SUJ | \
MNT_ASYNC | MNT_EXRDONLY | MNT_EXPORTED | \
MNT_DEFEXPORTED | MNT_EXPORTANON| MNT_EXKERB | \
MNT_LOCAL | MNT_USER | MNT_QUOTA | \
MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \
MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \
MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \
- MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | MNT_NFS4ACLS)
+ MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \
+ MNT_NFS4ACLS)
/* Mask of flags that can be updated. */
#define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \
@@ -292,12 +336,12 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
* XXX: MNT_BYFSID collides with MNT_ACLS, but because MNT_ACLS is only used for
* mount(2) and MNT_BYFSID is only used for unmount(2) it's harmless.
*/
-#define MNT_UPDATE 0x00010000 /* not a real mount, just an update */
-#define MNT_DELEXPORT 0x00020000 /* delete export host lists */
-#define MNT_RELOAD 0x00040000 /* reload filesystem data */
-#define MNT_FORCE 0x00080000 /* force unmount or readonly change */
-#define MNT_SNAPSHOT 0x01000000 /* snapshot the filesystem */
-#define MNT_BYFSID 0x08000000 /* specify filesystem by ID. */
+#define MNT_UPDATE 0x0000000000010000ULL /* not real mount, just update */
+#define MNT_DELEXPORT 0x0000000000020000ULL /* delete export host lists */
+#define MNT_RELOAD 0x0000000000040000ULL /* reload filesystem data */
+#define MNT_FORCE 0x0000000000080000ULL /* force unmount or readonly */
+#define MNT_SNAPSHOT 0x0000000001000000ULL /* snapshot the filesystem */
+#define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */
#define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \
MNT_FORCE | MNT_SNAPSHOT | MNT_BYFSID)
/*
@@ -324,6 +368,15 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
#define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */
#define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */
#define MNTK_SHARED_WRITES 0x00000080 /* Allow shared locking for writes */
+#define MNTK_NO_IOPF 0x00000100 /* Disallow page faults during reads
+ and writes. Filesystem shall properly
+ handle i/o state on EFAULT. */
+#define MNTK_VGONE_UPPER 0x00000200
+#define MNTK_VGONE_WAITER 0x00000400
+#define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800
+#define MNTK_MARKER 0x00001000
+#define MNTK_UNMAPPED_BUFS 0x00002000
+#define MNTK_NOASYNC 0x00800000 /* disable async */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
#define MNTK_SUSPEND 0x08000000 /* request write suspension */
@@ -465,6 +518,7 @@ struct ovfsconf {
#define VFCF_UNICODE 0x00200000 /* stores file names as Unicode */
#define VFCF_JAIL 0x00400000 /* can be mounted from within a jail */
#define VFCF_DELEGADMIN 0x00800000 /* supports delegated administration */
+#define VFCF_SBDRY 0x01000000 /* defer stop requests */
typedef uint32_t fsctlop_t;
@@ -556,7 +610,7 @@ struct nameidata;
struct sysctl_req;
struct mntarg;
-typedef int vfs_cmount_t(struct mntarg *ma, void *data, int flags);
+typedef int vfs_cmount_t(struct mntarg *ma, void *data, uint64_t flags);
typedef int vfs_unmount_t(struct mount *mp, int mntflags);
typedef int vfs_root_t(struct mount *mp, int flags, struct vnode **vpp);
typedef int vfs_quotactl_t(struct mount *mp, int cmds, uid_t uid, void *arg);
@@ -564,7 +618,8 @@ typedef int vfs_statfs_t(struct mount *mp, struct statfs *sbp);
typedef int vfs_sync_t(struct mount *mp, int waitfor);
typedef int vfs_vget_t(struct mount *mp, ino_t ino, int flags,
struct vnode **vpp);
-typedef int vfs_fhtovp_t(struct mount *mp, struct fid *fhp, struct vnode **vpp);
+typedef int vfs_fhtovp_t(struct mount *mp, struct fid *fhp,
+ int flags, struct vnode **vpp);
typedef int vfs_checkexp_t(struct mount *mp, struct sockaddr *nam,
int *extflagsp, struct ucred **credanonp,
int *numsecflavors, int **secflavors);
@@ -577,6 +632,7 @@ typedef int vfs_mount_t(struct mount *mp);
typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
struct sysctl_req *req);
typedef void vfs_susp_clean_t(struct mount *mp);
+typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp);
struct vfsops {
vfs_mount_t *vfs_mount;
@@ -594,32 +650,12 @@ struct vfsops {
vfs_extattrctl_t *vfs_extattrctl;
vfs_sysctl_t *vfs_sysctl;
vfs_susp_clean_t *vfs_susp_clean;
+ vfs_notify_lowervp_t *vfs_reclaim_lowervp;
+ vfs_notify_lowervp_t *vfs_unlink_lowervp;
};
vfs_statfs_t __vfs_statfs;
-#define VFS_MOUNT(MP) (*(MP)->mnt_op->vfs_mount)(MP)
-#define VFS_UNMOUNT(MP, FORCE) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE)
-#define VFS_ROOT(MP, FLAGS, VPP) \
- (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP)
-#define VFS_QUOTACTL(MP, C, U, A) \
- (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A)
-#define VFS_STATFS(MP, SBP) __vfs_statfs((MP), (SBP))
-#define VFS_SYNC(MP, WAIT) (*(MP)->mnt_op->vfs_sync)(MP, WAIT)
-#define VFS_VGET(MP, INO, FLAGS, VPP) \
- (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP)
-#define VFS_FHTOVP(MP, FIDP, VPP) \
- (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, VPP)
-#define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) \
- (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC, SEC)
-#define VFS_EXTATTRCTL(MP, C, FN, NS, N) \
- (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N)
-#define VFS_SYSCTL(MP, OP, REQ) \
- (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ)
-#define VFS_SUSP_CLEAN(MP) \
- ({if (*(MP)->mnt_op->vfs_susp_clean != NULL) \
- (*(MP)->mnt_op->vfs_susp_clean)(MP); })
-
#define VFS_NEEDSGIANT_(MP) \
((MP) != NULL && ((MP)->mnt_kern_flag & MNTK_MPSAFE) == 0)
@@ -642,7 +678,11 @@ vfs_statfs_t __vfs_statfs;
_locked = 0; \
_locked; \
})
-#define VFS_UNLOCK_GIANT(locked) if ((locked)) mtx_unlock(&Giant);
+#define VFS_UNLOCK_GIANT(locked) do \
+{ \
+ if ((locked)) \
+ mtx_unlock(&Giant); \
+} while (0)
#define VFS_ASSERT_GIANT(MP) do \
{ \
struct mount *_mp; \
@@ -651,6 +691,130 @@ vfs_statfs_t __vfs_statfs;
mtx_assert(&Giant, MA_OWNED); \
} while (0)
+#define VFS_PROLOGUE(MP) do { \
+ int _enable_stops; \
+ \
+ _enable_stops = ((MP) != NULL && \
+ ((MP)->mnt_vfc->vfc_flags & VFCF_SBDRY) && sigdeferstop())
+
+#define VFS_EPILOGUE(MP) \
+ if (_enable_stops) \
+ sigallowstop(); \
+} while (0)
+
+#define VFS_MOUNT(MP) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_mount)(MP); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_UNMOUNT(MP, FORCE) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_ROOT(MP, FLAGS, VPP) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_QUOTACTL(MP, C, U, A) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_STATFS(MP, SBP) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = __vfs_statfs((MP), (SBP)); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_SYNC(MP, WAIT) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_VGET(MP, INO, FLAGS, VPP) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_FHTOVP(MP, FIDP, FLAGS, VPP) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\
+ SEC); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_EXTATTRCTL(MP, C, FN, NS, N) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_SYSCTL(MP, OP, REQ) ({ \
+ int _rc; \
+ \
+ VFS_PROLOGUE(MP); \
+ _rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ); \
+ VFS_EPILOGUE(MP); \
+ _rc; })
+
+#define VFS_SUSP_CLEAN(MP) do { \
+ if (*(MP)->mnt_op->vfs_susp_clean != NULL) { \
+ VFS_PROLOGUE(MP); \
+ (*(MP)->mnt_op->vfs_susp_clean)(MP); \
+ VFS_EPILOGUE(MP); \
+ } \
+} while (0)
+
+#define VFS_RECLAIM_LOWERVP(MP, VP) do { \
+ if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) { \
+ VFS_PROLOGUE(MP); \
+ (*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); \
+ VFS_EPILOGUE(MP); \
+ } \
+} while (0)
+
+#define VFS_UNLINK_LOWERVP(MP, VP) do { \
+ if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \
+ VFS_PROLOGUE(MP); \
+ (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \
+ VFS_EPILOGUE(MP); \
+ } \
+} while (0)
+
#define VFS_KNOTE_LOCKED(vp, hint) do \
{ \
if (((vp)->v_vflag & VV_NOKNOTE) == 0) \
@@ -663,6 +827,9 @@ vfs_statfs_t __vfs_statfs;
VN_KNOTE((vp), (hint), 0); \
} while (0)
+#define VFS_NOTIFY_UPPER_RECLAIM 1
+#define VFS_NOTIFY_UPPER_UNLINK 2
+
#include <sys/module.h>
/*
@@ -694,7 +861,7 @@ extern char *mountrootfsname;
int dounmount(struct mount *, int, struct thread *);
-int kernel_mount(struct mntarg *ma, int flags);
+int kernel_mount(struct mntarg *ma, uint64_t flags);
int kernel_vmount(int flags, ...);
struct mntarg *mount_arg(struct mntarg *ma, const char *name, const void *val, int len);
struct mntarg *mount_argb(struct mntarg *ma, int flag, const char *name);
@@ -708,7 +875,8 @@ void vfs_event_signal(fsid_t *, u_int32_t, intptr_t);
void vfs_freeopts(struct vfsoptlist *opts);
void vfs_deleteopt(struct vfsoptlist *opts, const char *name);
int vfs_buildopts(struct uio *auio, struct vfsoptlist **options);
-int vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val);
+int vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w,
+ uint64_t val);
int vfs_getopt(struct vfsoptlist *, const char *, void **, int *);
int vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
char *vfs_getopts(struct vfsoptlist *, const char *, int *error);
@@ -728,8 +896,10 @@ void vfs_msync(struct mount *, int);
int vfs_busy(struct mount *, int);
int vfs_export /* process mount export info */
(struct mount *, struct export_args *);
-int vfs_allocate_syncvnode(struct mount *);
-int vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions);
+void vfs_allocate_syncvnode(struct mount *);
+void vfs_deallocate_syncvnode(struct mount *);
+int vfs_donmount(struct thread *td, uint64_t fsflags,
+ struct uio *fsoptions);
void vfs_getnewfsid(struct mount *);
struct cdev *vfs_getrootfsid(struct mount *);
struct mount *vfs_getvfs(fsid_t *); /* return vfs given fsid */
@@ -738,6 +908,7 @@ int vfs_modevent(module_t, int, void *);
void vfs_mount_error(struct mount *, const char *, ...);
void vfs_mountroot(void); /* mount our root filesystem */
void vfs_mountedfrom(struct mount *, const char *from);
+void vfs_notify_upper(struct vnode *, int);
void vfs_oexport_conv(const struct oexport_args *oexp,
struct export_args *exp);
void vfs_ref(struct mount *);
diff --git a/freebsd/sys/sys/mutex.h b/freebsd/sys/sys/mutex.h
index 447ca17f..0e356e15 100644
--- a/freebsd/sys/sys/mutex.h
+++ b/freebsd/sys/sys/mutex.h
@@ -32,7 +32,6 @@
#ifndef _SYS_MUTEX_H_
#define _SYS_MUTEX_H_
-#ifndef LOCORE
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
@@ -43,12 +42,6 @@
#include <sys/lockstat.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
-#endif /* _KERNEL_ */
-#endif /* !LOCORE */
-
-#include <machine/mutex.h>
-
-#ifdef _KERNEL
#ifdef __rtems__
#define MUTEX_NOINLINE 1
@@ -84,18 +77,6 @@
*/
#define MTX_DESTROYED (MTX_CONTESTED | MTX_UNOWNED)
-#endif /* _KERNEL */
-
-#ifndef LOCORE
-
-/*
- * XXX: Friendly reminder to fix things in MP code that is presently being
- * XXX: worked on.
- */
-#define mp_fixme(string)
-
-#ifdef _KERNEL
-
/*
* Prototypes
*
@@ -103,6 +84,10 @@
* of the kernel via macros, thus allowing us to use the cpp LOCK_FILE
* and LOCK_LINE. These functions should not be called directly by any
* code using the API. Their macros cover their functionality.
+ * Functions with a `_' suffix are the entrypoint for the common
+ * KPI covering both compat shims and fast path case. These can be
+ * used by consumers willing to pass options, file and line
+ * informations, in an option-independent way.
*
* [See below for descriptions]
*
@@ -136,7 +121,12 @@ void _mtx_assert(struct mtx *m, int what, const char *file, int line);
#endif
void _thread_lock_flags(struct thread *, int, const char *, int);
+#define mtx_trylock_flags_(m, opts, file, line) \
+ _mtx_trylock((m), (opts), (file), (line))
+
#ifndef __rtems__
+#define thread_lock_flags_(tdp, opts, file, line) \
+ _thread_lock_flags((tdp), (opts), (file), (line))
#define thread_lock(tdp) \
_thread_lock_flags((tdp), 0, __FILE__, __LINE__)
#define thread_lock_flags(tdp, opt) \
@@ -151,68 +141,59 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
#define mtx_recurse lock_object.lo_data
-/*
- * We define our machine-independent (unoptimized) mutex micro-operations
- * here, if they are not already defined in the machine-dependent mutex.h
- */
+/* Very simple operations on mtx_lock. */
/* Try to obtain mtx_lock once. */
-#ifndef _obtain_lock
-#define _obtain_lock(mp, tid) \
+#define _mtx_obtain_lock(mp, tid) \
atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid))
-#endif
/* Try to release mtx_lock if it is unrecursed and uncontested. */
-#ifndef _release_lock
-#define _release_lock(mp, tid) \
+#define _mtx_release_lock(mp, tid) \
atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), MTX_UNOWNED)
-#endif
/* Release mtx_lock quickly, assuming we own it. */
-#ifndef _release_lock_quick
-#define _release_lock_quick(mp) \
+#define _mtx_release_lock_quick(mp) \
atomic_store_rel_ptr(&(mp)->mtx_lock, MTX_UNOWNED)
-#endif
/*
- * Obtain a sleep lock inline, or call the "hard" function if we can't get it
- * easy.
+ * Full lock operations that are suitable to be inlined in non-debug
+ * kernels. If the lock cannot be acquired or released trivially then
+ * the work is deferred to another function.
*/
-#ifndef _get_sleep_lock
-#define _get_sleep_lock(mp, tid, opts, file, line) do { \
+
+/* Lock a normal mutex. */
+#define __mtx_lock(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
- if (!_obtain_lock((mp), _tid)) \
+ \
+ if (!_mtx_obtain_lock((mp), _tid)) \
_mtx_lock_sleep((mp), _tid, (opts), (file), (line)); \
else \
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE, \
mp, 0, 0, (file), (line)); \
} while (0)
-#endif
/*
- * Obtain a spin lock inline, or call the "hard" function if we can't get it
- * easy. For spinlocks, we handle recursion inline (it turns out that function
- * calls can be significantly expensive on some architectures).
- * Since spin locks are not _too_ common, inlining this code is not too big
- * a deal.
+ * Lock a spin mutex. For spinlocks, we handle recursion inline (it
+ * turns out that function calls can be significantly expensive on
+ * some architectures). Since spin locks are not _too_ common,
+ * inlining this code is not too big a deal.
*/
-#ifndef _get_spin_lock
#ifdef SMP
-#define _get_spin_lock(mp, tid, opts, file, line) do { \
+#define __mtx_lock_spin(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
+ \
spinlock_enter(); \
- if (!_obtain_lock((mp), _tid)) { \
+ if (!_mtx_obtain_lock((mp), _tid)) { \
if ((mp)->mtx_lock == _tid) \
(mp)->mtx_recurse++; \
- else { \
+ else \
_mtx_lock_spin((mp), _tid, (opts), (file), (line)); \
- } \
} else \
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, \
mp, 0, 0, (file), (line)); \
} while (0)
#else /* SMP */
-#define _get_spin_lock(mp, tid, opts, file, line) do { \
+#define __mtx_lock_spin(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
\
spinlock_enter(); \
@@ -220,49 +201,42 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
(mp)->mtx_recurse++; \
else { \
KASSERT((mp)->mtx_lock == MTX_UNOWNED, ("corrupt spinlock")); \
- (mp)->mtx_lock = _tid; \
+ (mp)->mtx_lock = _tid; \
} \
} while (0)
#endif /* SMP */
-#endif
-/*
- * Release a sleep lock inline, or call the "hard" function if we can't do it
- * easy.
- */
-#ifndef _rel_sleep_lock
-#define _rel_sleep_lock(mp, tid, opts, file, line) do { \
+/* Unlock a normal mutex. */
+#define __mtx_unlock(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
\
- if (!_release_lock((mp), _tid)) \
+ if (!_mtx_release_lock((mp), _tid)) \
_mtx_unlock_sleep((mp), (opts), (file), (line)); \
} while (0)
-#endif
/*
- * For spinlocks, we can handle everything inline, as it's pretty simple and
- * a function call would be too expensive (at least on some architectures).
- * Since spin locks are not _too_ common, inlining this code is not too big
- * a deal.
+ * Unlock a spin mutex. For spinlocks, we can handle everything
+ * inline, as it's pretty simple and a function call would be too
+ * expensive (at least on some architectures). Since spin locks are
+ * not _too_ common, inlining this code is not too big a deal.
*
* Since we always perform a spinlock_enter() when attempting to acquire a
* spin lock, we need to always perform a matching spinlock_exit() when
* releasing a spin lock. This includes the recursion cases.
*/
-#ifndef _rel_spin_lock
#ifdef SMP
-#define _rel_spin_lock(mp) do { \
+#define __mtx_unlock_spin(mp) do { \
if (mtx_recursed((mp))) \
(mp)->mtx_recurse--; \
else { \
LOCKSTAT_PROFILE_RELEASE_LOCK(LS_MTX_SPIN_UNLOCK_RELEASE, \
mp); \
- _release_lock_quick((mp)); \
+ _mtx_release_lock_quick((mp)); \
} \
spinlock_exit(); \
} while (0)
#else /* SMP */
-#define _rel_spin_lock(mp) do { \
+#define __mtx_unlock_spin(mp) do { \
if (mtx_recursed((mp))) \
(mp)->mtx_recurse--; \
else { \
@@ -273,7 +247,6 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
spinlock_exit(); \
} while (0)
#endif /* SMP */
-#endif
/*
* Exported lock manipulation interface.
@@ -340,27 +313,48 @@ extern struct mtx_pool *mtxpool_sleep;
#error LOCK_DEBUG not defined, include <sys/lock.h> before <sys/mutex.h>
#endif
#if LOCK_DEBUG > 0 || defined(MUTEX_NOINLINE)
-#define mtx_lock_flags(m, opts) \
- _mtx_lock_flags((m), (opts), LOCK_FILE, LOCK_LINE)
-#define mtx_unlock_flags(m, opts) \
- _mtx_unlock_flags((m), (opts), LOCK_FILE, LOCK_LINE)
-#define mtx_lock_spin_flags(m, opts) \
- _mtx_lock_spin_flags((m), (opts), LOCK_FILE, LOCK_LINE)
-#define mtx_unlock_spin_flags(m, opts) \
- _mtx_unlock_spin_flags((m), (opts), LOCK_FILE, LOCK_LINE)
+#define mtx_lock_flags_(m, opts, file, line) \
+ _mtx_lock_flags((m), (opts), (file), (line))
+#define mtx_unlock_flags_(m, opts, file, line) \
+ _mtx_unlock_flags((m), (opts), (file), (line))
+#define mtx_lock_spin_flags_(m, opts, file, line) \
+ _mtx_lock_spin_flags((m), (opts), (file), (line))
+#define mtx_unlock_spin_flags_(m, opts, file, line) \
+ _mtx_unlock_spin_flags((m), (opts), (file), (line))
#else /* LOCK_DEBUG == 0 && !MUTEX_NOINLINE */
+#define mtx_lock_flags_(m, opts, file, line) \
+ __mtx_lock((m), curthread, (opts), (file), (line))
+#define mtx_unlock_flags_(m, opts, file, line) \
+ __mtx_unlock((m), curthread, (opts), (file), (line))
+#define mtx_lock_spin_flags_(m, opts, file, line) \
+ __mtx_lock_spin((m), curthread, (opts), (file), (line))
+#define mtx_unlock_spin_flags_(m, opts, file, line) \
+ __mtx_unlock_spin((m))
+#endif /* LOCK_DEBUG > 0 || MUTEX_NOINLINE */
+
+#ifdef INVARIANTS
+#define mtx_assert_(m, what, file, line) \
+ _mtx_assert((m), (what), (file), (line))
+
+#define GIANT_REQUIRED mtx_assert_(&Giant, MA_OWNED, __FILE__, __LINE__)
+
+#else /* INVARIANTS */
+#define mtx_assert_(m, what, file, line) (void)0
+#define GIANT_REQUIRED
+#endif /* INVARIANTS */
+
#define mtx_lock_flags(m, opts) \
- _get_sleep_lock((m), curthread, (opts), LOCK_FILE, LOCK_LINE)
+ mtx_lock_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
#define mtx_unlock_flags(m, opts) \
- _rel_sleep_lock((m), curthread, (opts), LOCK_FILE, LOCK_LINE)
+ mtx_unlock_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
#define mtx_lock_spin_flags(m, opts) \
- _get_spin_lock((m), curthread, (opts), LOCK_FILE, LOCK_LINE)
+ mtx_lock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
#define mtx_unlock_spin_flags(m, opts) \
- _rel_spin_lock((m))
-#endif /* LOCK_DEBUG > 0 || MUTEX_NOINLINE */
-
+ mtx_unlock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
#define mtx_trylock_flags(m, opts) \
- _mtx_trylock((m), (opts), LOCK_FILE, LOCK_LINE)
+ mtx_trylock_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
+#define mtx_assert(m, what) \
+ mtx_assert_((m), (what), __FILE__, __LINE__)
#define mtx_sleep(chan, mtx, pri, wmesg, timo) \
_sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo))
@@ -454,22 +448,10 @@ struct mtx_args {
#define MA_NOTRECURSED LA_NOTRECURSED
#endif
-#ifdef INVARIANTS
-#define mtx_assert(m, what) \
- _mtx_assert((m), (what), __FILE__, __LINE__)
-
-#define GIANT_REQUIRED mtx_assert(&Giant, MA_OWNED)
-
-#else /* INVARIANTS */
-#define mtx_assert(m, what) (void)0
-#define GIANT_REQUIRED
-#endif /* INVARIANTS */
-
/*
* Common lock type names.
*/
#define MTX_NETWORK_LOCK "network driver"
#endif /* _KERNEL */
-#endif /* !LOCORE */
#endif /* _SYS_MUTEX_H_ */
diff --git a/freebsd/sys/sys/nlist_aout.h b/freebsd/sys/sys/nlist_aout.h
index a4e11c39..fc7a3c78 100644
--- a/freebsd/sys/sys/nlist_aout.h
+++ b/freebsd/sys/sys/nlist_aout.h
@@ -51,11 +51,11 @@
struct nlist {
#ifdef _AOUT_INCLUDE_
union {
- char *n_name; /* symbol name (in memory) */
+ const char *n_name; /* symbol name (in memory) */
long n_strx; /* file string table offset (on disk) */
} n_un;
#else
- char *n_name; /* symbol name (in memory) */
+ const char *n_name; /* symbol name (in memory) */
int : 8 * (sizeof(long) > sizeof(char *) ?
sizeof(long) - sizeof(char *) : sizeof(char *) - sizeof(long));
#endif
diff --git a/freebsd/sys/sys/pcpu.h b/freebsd/sys/sys/pcpu.h
index 599af11b..ec3f9f94 100644
--- a/freebsd/sys/sys/pcpu.h
+++ b/freebsd/sys/sys/pcpu.h
@@ -37,16 +37,24 @@
#error "no assembler-serviceable parts inside"
#endif
+#include <sys/_cpuset.h>
#include <sys/queue.h>
#include <sys/vmmeter.h>
#include <rtems/bsd/sys/resource.h>
#include <machine/pcpu.h>
+#define DPCPU_SETNAME "set_pcpu"
+#define DPCPU_SYMPREFIX "pcpu_entry_"
+
+#ifdef _KERNEL
+
/*
* Define a set for pcpu data.
*/
extern uintptr_t *__start_set_pcpu;
+__GLOBL(__start_set_pcpu);
extern uintptr_t *__stop_set_pcpu;
+__GLOBL(__stop_set_pcpu);
/*
* Array of dynamic pcpu base offsets. Indexed by id.
@@ -68,7 +76,7 @@ extern uintptr_t dpcpu_off[];
*/
#define DPCPU_NAME(n) pcpu_entry_##n
#define DPCPU_DECLARE(t, n) extern t DPCPU_NAME(n)
-#define DPCPU_DEFINE(t, n) t DPCPU_NAME(n) __section("set_pcpu") __used
+#define DPCPU_DEFINE(t, n) t DPCPU_NAME(n) __section(DPCPU_SETNAME) __used
/*
* Accessors with a given base.
@@ -131,6 +139,8 @@ extern uintptr_t dpcpu_off[];
} \
} while(0)
+#endif /* _KERNEL */
+
/*
* XXXUPS remove as soon as we have per cpu variable
* linker sets and can define rm_queue in _rm_lock.h
@@ -140,8 +150,6 @@ struct rm_queue {
struct rm_queue* volatile rmq_prev;
};
-#define PCPU_NAME_LEN (sizeof("CPU ") + sizeof(__XSTRING(MAXCPU) + 1))
-
/*
* This structure maps out the global data that needs to be kept on a
* per-cpu basis. The members are accessed via the PCPU_GET/SET/PTR
@@ -157,19 +165,16 @@ struct pcpu {
uint64_t pc_switchtime; /* cpu_ticks() at last csw */
int pc_switchticks; /* `ticks' at last csw */
u_int pc_cpuid; /* This cpu number */
- cpumask_t pc_cpumask; /* This cpu mask */
- cpumask_t pc_other_cpus; /* Mask of all other cpus */
- SLIST_ENTRY(pcpu) pc_allcpu;
+ STAILQ_ENTRY(pcpu) pc_allcpu;
struct lock_list_entry *pc_spinlocks;
-#ifdef KTR
- char pc_name[PCPU_NAME_LEN]; /* String name for KTR */
-#endif
#ifndef __rtems__
struct vmmeter pc_cnt; /* VM stats counters */
#endif
long pc_cp_time[CPUSTATES]; /* statclock ticks */
struct device *pc_device;
void *pc_netisr; /* netisr SWI cookie */
+ int pc_dnweight; /* vm_page_dontneed() */
+ int pc_domain; /* Memory domain. */
/*
* Stuff for read mostly lock
@@ -192,14 +197,14 @@ struct pcpu {
* if only to make kernel debugging easier.
*/
PCPU_MD_FIELDS;
-} __aligned(128);
+} __aligned(CACHE_LINE_SIZE);
#ifdef _KERNEL
-SLIST_HEAD(cpuhead, pcpu);
+STAILQ_HEAD(cpuhead, pcpu);
extern struct cpuhead cpuhead;
-extern struct pcpu *cpuid_to_pcpu[MAXCPU];
+extern struct pcpu *cpuid_to_pcpu[];
#define curcpu PCPU_GET(cpuid)
#define curproc (curthread->td_proc)
diff --git a/freebsd/sys/sys/priority.h b/freebsd/sys/sys/priority.h
index a3493715..6548a35c 100644
--- a/freebsd/sys/sys/priority.h
+++ b/freebsd/sys/sys/priority.h
@@ -67,10 +67,10 @@
* Priorities range from 0 to 255, but differences of less then 4 (RQ_PPQ)
* are insignificant. Ranges are as follows:
*
- * Interrupt threads: 0 - 63
- * Top half kernel threads: 64 - 127
- * Realtime user threads: 128 - 159
- * Time sharing user threads: 160 - 223
+ * Interrupt threads: 0 - 47
+ * Realtime user threads: 48 - 79
+ * Top half kernel threads: 80 - 119
+ * Time sharing user threads: 120 - 223
* Idle user threads: 224 - 255
*
* XXX If/When the specific interrupt thread and top half thread ranges
@@ -81,19 +81,22 @@
#define PRI_MAX (255) /* Lowest priority. */
#define PRI_MIN_ITHD (PRI_MIN)
-#define PRI_MAX_ITHD (PRI_MIN_KERN - 1)
+#define PRI_MAX_ITHD (PRI_MIN_REALTIME - 1)
#define PI_REALTIME (PRI_MIN_ITHD + 0)
#define PI_AV (PRI_MIN_ITHD + 4)
-#define PI_NET (PRI_MIN_ITHD + 16)
-#define PI_DISK (PRI_MIN_ITHD + 20)
-#define PI_TTY (PRI_MIN_ITHD + 24)
-#define PI_DULL (PRI_MIN_ITHD + 32)
-#define PI_SOFT (PRI_MIN_ITHD + 36)
+#define PI_NET (PRI_MIN_ITHD + 8)
+#define PI_DISK (PRI_MIN_ITHD + 12)
+#define PI_TTY (PRI_MIN_ITHD + 16)
+#define PI_DULL (PRI_MIN_ITHD + 20)
+#define PI_SOFT (PRI_MIN_ITHD + 24)
#define PI_SWI(x) (PI_SOFT + (x) * RQ_PPQ)
-#define PRI_MIN_KERN (64)
-#define PRI_MAX_KERN (PRI_MIN_REALTIME - 1)
+#define PRI_MIN_REALTIME (48)
+#define PRI_MAX_REALTIME (PRI_MIN_KERN - 1)
+
+#define PRI_MIN_KERN (80)
+#define PRI_MAX_KERN (PRI_MIN_TIMESHARE - 1)
#define PSWP (PRI_MIN_KERN + 0)
#define PVM (PRI_MIN_KERN + 4)
@@ -103,13 +106,10 @@
#define PZERO (PRI_MIN_KERN + 20)
#define PSOCK (PRI_MIN_KERN + 24)
#define PWAIT (PRI_MIN_KERN + 28)
-#define PLOCK (PRI_MIN_KERN + 36)
-#define PPAUSE (PRI_MIN_KERN + 40)
-
-#define PRI_MIN_REALTIME (128)
-#define PRI_MAX_REALTIME (PRI_MIN_TIMESHARE - 1)
+#define PLOCK (PRI_MIN_KERN + 32)
+#define PPAUSE (PRI_MIN_KERN + 36)
-#define PRI_MIN_TIMESHARE (160)
+#define PRI_MIN_TIMESHARE (120)
#define PRI_MAX_TIMESHARE (PRI_MIN_IDLE - 1)
#define PUSER (PRI_MIN_TIMESHARE)
diff --git a/freebsd/sys/sys/priv.h b/freebsd/sys/sys/priv.h
index d82e9816..1797714a 100644
--- a/freebsd/sys/sys/priv.h
+++ b/freebsd/sys/sys/priv.h
@@ -156,6 +156,7 @@
#define PRIV_PROC_LIMIT 160 /* Exceed user process limit. */
#define PRIV_PROC_SETLOGIN 161 /* Can call setlogin. */
#define PRIV_PROC_SETRLIMIT 162 /* Can raise resources limits. */
+#define PRIV_PROC_SETLOGINCLASS 163 /* Can call setloginclass(2). */
/* System V IPC privileges.
*/
@@ -484,9 +485,18 @@
#define PRIV_AFS_DAEMON 661 /* Can become the AFS daemon. */
/*
+ * Resource Limits privileges.
+ */
+#define PRIV_RCTL_GET_RACCT 670
+#define PRIV_RCTL_GET_RULES 671
+#define PRIV_RCTL_GET_LIMITS 672
+#define PRIV_RCTL_ADD_RULE 673
+#define PRIV_RCTL_REMOVE_RULE 674
+
+/*
* Track end of privilege list.
*/
-#define _PRIV_HIGHEST 662
+#define _PRIV_HIGHEST 675
/*
* Validate that a named privilege is known by the privilege system. Invalid
diff --git a/freebsd/sys/sys/proc.h b/freebsd/sys/sys/proc.h
index d75dd7de..5605ef13 100644
--- a/freebsd/sys/sys/proc.h
+++ b/freebsd/sys/sys/proc.h
@@ -77,6 +77,7 @@ struct session {
u_int s_count; /* Ref cnt; pgrps in session - atomic. */
struct proc *s_leader; /* (m + e) Session leader. */
struct vnode *s_ttyvp; /* (m) Vnode of controlling tty. */
+ struct cdev_priv *s_ttydp; /* (m) Device of controlling tty. */
struct tty *s_ttyp; /* (e) Controlling tty. */
pid_t s_sid; /* (c) Session ID. */
/* (m) Setlogin() name: */
@@ -156,20 +157,23 @@ struct pargs {
* either lock is sufficient for read access, but both locks must be held
* for write access.
*/
+struct cpuset;
+struct kaioinfo;
struct kaudit_record;
-struct td_sched;
+struct kdtrace_proc;
+struct kdtrace_thread;
+struct mqueue_notifier;
struct nlminfo;
-struct kaioinfo;
struct p_sched;
struct proc;
+struct procdesc;
+struct racct;
+struct sbuf;
struct sleepqueue;
+struct td_sched;
struct thread;
struct trapframe;
struct turnstile;
-struct mqueue_notifier;
-struct kdtrace_proc;
-struct kdtrace_thread;
-struct cpuset;
/*
* XXX: Does this belong in resource.h or resourcevar.h instead?
@@ -183,13 +187,13 @@ struct cpuset;
* Locking for td_rux: (t) for all fields.
*/
struct rusage_ext {
- u_int64_t rux_runtime; /* (cj) Real time. */
- u_int64_t rux_uticks; /* (cj) Statclock hits in user mode. */
- u_int64_t rux_sticks; /* (cj) Statclock hits in sys mode. */
- u_int64_t rux_iticks; /* (cj) Statclock hits in intr mode. */
- u_int64_t rux_uu; /* (c) Previous user time in usec. */
- u_int64_t rux_su; /* (c) Previous sys time in usec. */
- u_int64_t rux_tu; /* (c) Previous total time in usec. */
+ uint64_t rux_runtime; /* (cj) Real time. */
+ uint64_t rux_uticks; /* (cj) Statclock hits in user mode. */
+ uint64_t rux_sticks; /* (cj) Statclock hits in sys mode. */
+ uint64_t rux_iticks; /* (cj) Statclock hits in intr mode. */
+ uint64_t rux_uu; /* (c) Previous user time in usec. */
+ uint64_t rux_su; /* (c) Previous sys time in usec. */
+ uint64_t rux_tu; /* (c) Previous total time in usec. */
};
/*
@@ -211,6 +215,7 @@ struct thread {
TAILQ_ENTRY(thread) td_runq; /* (t) Run queue. */
TAILQ_ENTRY(thread) td_slpq; /* (t) Sleep queue. */
TAILQ_ENTRY(thread) td_lockq; /* (t) Lock queue. */
+ LIST_ENTRY(thread) td_hash; /* (d) Hash chain. */
struct cpuset *td_cpuset; /* (t) CPU affinity mask. */
#endif /* __rtems__ */
struct seltd *td_sel; /* Select queue/channel. */
@@ -221,6 +226,7 @@ struct thread {
lwpid_t td_tid; /* (b) Thread ID. */
sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */
#define td_siglist td_sigqueue.sq_signals
+ u_char td_lend_user_pri; /* (t) Lend user pri. */
/* Cleared during fork1() */
#define td_startzero td_flags
@@ -251,8 +257,10 @@ struct thread {
u_int td_estcpu; /* (t) estimated cpu utilization */
int td_slptick; /* (t) Time at sleep. */
int td_blktick; /* (t) Time spent blocked. */
+ int td_swvoltick; /* (t) Time at last SW_VOL switch. */
u_int td_cow; /* (*) Number of copy-on-write faults */
struct rusage td_ru; /* (t) rusage information. */
+ struct rusage_ext td_rux; /* (t) Internal rusage information. */
uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */
uint64_t td_runtime; /* (t) How many cpu ticks we've run. */
u_int td_pticks; /* (t) Statclock hits for profiling */
@@ -273,6 +281,8 @@ struct thread {
struct ksiginfo td_dbgksi; /* (c) ksi reflected to debugger. */
int td_ng_outbound; /* (k) Thread entered ng from above. */
struct osd td_osd; /* (k) Object specific data. */
+ struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
+ pid_t td_dbg_forked; /* (c) Child pid for debugger. */
#define td_endzero td_rqindex
/* Copied during fork1() or thread_sched_upcall(). */
@@ -305,22 +315,20 @@ struct thread {
struct vm_object *td_kstack_obj;/* (a) Kstack object. */
vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */
int td_kstack_pages; /* (a) Size of the kstack. */
- void *td_unused1;
- vm_offset_t td_unused2;
- int td_unused3;
volatile u_int td_critnest; /* (k*) Critical section nest level. */
struct mdthread td_md; /* (k) Any machine-dependent fields. */
struct td_sched *td_sched; /* (*) Scheduler-specific data. */
struct kaudit_record *td_ar; /* (k) Active audit record, if any. */
- int td_syscalls; /* per-thread syscall count (used by NFS :)) */
struct lpohead td_lprof[2]; /* (a) lock profiling objects. */
struct kdtrace_thread *td_dtrace; /* (*) DTrace-specific data. */
int td_errno; /* Error returned by last syscall. */
struct vnet *td_vnet; /* (k) Effective vnet. */
const char *td_vnet_lpush; /* (k) Debugging vnet push / pop. */
- struct rusage_ext td_rux; /* (t) Internal rusage information. */
- struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
- pid_t td_dbg_forked; /* (c) Child pid for debugger. */
+ struct trapframe *td_intr_frame;/* (k) Frame of the current irq */
+ struct proc *td_rfppwait_p; /* (k) The vforked child */
+ struct vm_page **td_ma; /* (k) uio pages held */
+ int td_ma_cnt; /* (k) size of *td_ma */
+ struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */
u_int td_vp_reserv; /* (k) Count of reserved vnodes. */
#endif /* __rtems__ */
};
@@ -362,7 +370,7 @@ do { \
#define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */
#define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */
#define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */
-#define TDF_UBORROWING 0x00000200 /* Thread is borrowing user pri. */
+#define TDF_UNUSED09 0x00000200 /* --available-- */
#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
@@ -372,7 +380,7 @@ do { \
#define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */
#define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */
#define TDF_NOLOAD 0x00040000 /* Ignore during load avg calculations. */
-#define TDF_UNUSED19 0x00080000 /* Thread is sleeping on a umtx. */
+#define TDF_UNUSED19 0x00080000 /* --available-- */
#define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */
#define TDF_UNUSED21 0x00200000 /* --available-- */
#define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */
@@ -396,6 +404,7 @@ do { \
process */
#define TDB_STOPATFORK 0x00000080 /* Stop at the return from fork (child
only) */
+#define TDB_CHILD 0x00000100 /* New child indicator for ptrace() */
/*
* "Private" flags kept in td_pflags:
@@ -426,8 +435,10 @@ do { \
#define TDP_CALLCHAIN 0x00400000 /* Capture thread's callchain */
#define TDP_IGNSUSP 0x00800000 /* Permission to ignore the MNTK_SUSPEND* */
#define TDP_AUDITREC 0x01000000 /* Audit record pending on thread */
+#define TDP_RFPPWAIT 0x02000000 /* Handle RFPPWAIT on syscall exit */
#define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */
#define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */
+#define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */
/*
* Reasons that the current thread can not be run yet.
@@ -506,7 +517,7 @@ struct proc {
PRS_NEW = 0, /* In creation */
PRS_NORMAL, /* threads can be run. */
PRS_ZOMBIE
- } p_state; /* (j/c) S* process status. */
+ } p_state; /* (j/c) Process status. */
pid_t p_pid; /* (b) Process identifier. */
LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */
LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */
@@ -521,6 +532,7 @@ struct proc {
/* The following fields are all zeroed upon creation in fork. */
#define p_startzero p_oppid
pid_t p_oppid; /* (c + e) Save ppid in ptrace. XXX */
+ int p_pad_dbg_child;
struct vmspace *p_vmspace; /* (b) Address space. */
u_int p_swtick; /* (c) Tick when swapped in or out. */
struct itimerval p_realtimer; /* (c) Alarm timer. */
@@ -550,6 +562,7 @@ struct proc {
int p_boundary_count;/* (j) Num threads at user boundary */
int p_pendingcnt; /* how many signals are pending */
struct itimers *p_itimers; /* (c) POSIX interval timers. */
+ struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */
/* End area that is zeroed on creation. */
#define p_endzero p_magic
@@ -585,6 +598,17 @@ struct proc {
struct cv p_pwait; /* (*) wait cv for exit/exec. */
struct cv p_dbgwait; /* (*) wait cv for debugger attach
after fork. */
+ uint64_t p_prev_runtime; /* (c) Resource usage accounting. */
+ struct racct *p_racct; /* (b) Resource accounting. */
+ /*
+ * An orphan is the child that has beed re-parented to the
+ * debugger as a result of attaching to it. Need to keep
+ * track of them for parent to be able to collect the exit
+ * status of what used to be children.
+ */
+ LIST_ENTRY(proc) p_orphan; /* (e) List of orphan processes. */
+ LIST_HEAD(, proc) p_orphans; /* (e) Pointer to list of orphans. */
+ u_char p_throttled; /* (c) Flag for racct pcpu throttling */
#endif /* __rtems__ */
};
@@ -623,11 +647,13 @@ struct proc {
#define P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */
#define P_HWPMC 0x800000 /* Process is using HWPMCs */
#define P_JAILED 0x1000000 /* Process is in jail. */
+#define P_ORPHAN 0x2000000 /* Orphaned. */
#define P_INEXEC 0x4000000 /* Process is in execve(). */
#define P_STATCHILD 0x8000000 /* Child process stopped or exited. */
#define P_INMEM 0x10000000 /* Loaded into memory. */
#define P_SWAPPINGOUT 0x20000000 /* Process is being swapped out. */
#define P_SWAPPINGIN 0x40000000 /* Process is being swapped in. */
+#define P_PPTRACE 0x80000000 /* PT_TRACEME by vforked child. */
#define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
#define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED)
@@ -690,11 +716,12 @@ MALLOC_DECLARE(M_SUBPROC);
#define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads)
/*
- * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
- * as it is used to represent "no process group".
+ * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit
+ * in a pid_t, as it is used to represent "no process group".
*/
#define PID_MAX 99999
#define NO_PID 100000
+extern pid_t pid_max;
#define SESS_LEADER(p) ((p)->p_session->s_leader == (p))
@@ -795,6 +822,10 @@ MALLOC_DECLARE(M_SUBPROC);
#define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash])
extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
extern u_long pidhash;
+#define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash])
+extern LIST_HEAD(tidhashhead, thread) *tidhashtbl;
+extern u_long tidhash;
+extern struct rwlock tidhash_lock;
#define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash])
extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
@@ -822,6 +853,7 @@ extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
extern struct uma_zone *proc_zone;
struct proc *pfind(pid_t); /* Find process by id. */
+struct proc *pfind_locked(pid_t pid);
struct pgrp *pgfind(pid_t); /* Find process group by id. */
struct proc *zpfind(pid_t); /* Find zombie process by id. */
@@ -848,7 +880,7 @@ int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp,
int enterthispgrp(struct proc *p, struct pgrp *pgrp);
void faultin(struct proc *p);
void fixjobc(struct proc *p, struct pgrp *pgrp, int entering);
-int fork1(struct thread *, int, int, struct proc **);
+int fork1(struct thread *, int, int, struct proc **, int *, int);
void fork_exit(void (*)(void *, struct trapframe *), void *,
struct trapframe *);
void fork_return(struct thread *, struct trapframe *);
@@ -867,9 +899,13 @@ int p_canwait(struct thread *td, struct proc *p);
struct pargs *pargs_alloc(int len);
void pargs_drop(struct pargs *pa);
void pargs_hold(struct pargs *pa);
+int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb);
+int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb);
+int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb);
void procinit(void);
void proc_linkup0(struct proc *p, struct thread *td);
void proc_linkup(struct proc *p, struct thread *td);
+void proc_reap(struct thread *td, struct proc *p, int *status, int options);
void proc_reparent(struct proc *child, struct proc *newparent);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
@@ -884,7 +920,10 @@ int should_yield(void);
int sigonstack(size_t sp);
void sleepinit(void);
void stopevent(struct proc *, u_int, u_int);
+struct thread *tdfind(lwpid_t, pid_t);
void threadinit(void);
+void tidhash_add(struct thread *);
+void tidhash_remove(struct thread *);
void cpu_idle(int);
int cpu_idle_wakeup(int);
extern void (*cpu_idle_hook)(void); /* Hook to machdep CPU idler. */
diff --git a/freebsd/sys/sys/racct.h b/freebsd/sys/sys/racct.h
new file mode 100644
index 00000000..91df15db
--- /dev/null
+++ b/freebsd/sys/sys/racct.h
@@ -0,0 +1,165 @@
+/*-
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Edward Tomasz Napierala under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Resource accounting.
+ */
+
+#ifndef _RACCT_H_
+#define _RACCT_H_
+
+#include <sys/cdefs.h>
+#include <sys/queue.h>
+#include <rtems/bsd/sys/types.h>
+
+struct proc;
+struct rctl_rule_link;
+struct ucred;
+
+/*
+ * Resources.
+ */
+#define RACCT_UNDEFINED -1
+#define RACCT_CPU 0
+#define RACCT_DATA 1
+#define RACCT_STACK 2
+#define RACCT_CORE 3
+#define RACCT_RSS 4
+#define RACCT_MEMLOCK 5
+#define RACCT_NPROC 6
+#define RACCT_NOFILE 7
+#define RACCT_VMEM 8
+#define RACCT_NPTS 9
+#define RACCT_SWAP 10
+#define RACCT_NTHR 11
+#define RACCT_MSGQQUEUED 12
+#define RACCT_MSGQSIZE 13
+#define RACCT_NMSGQ 14
+#define RACCT_NSEM 15
+#define RACCT_NSEMOP 16
+#define RACCT_NSHM 17
+#define RACCT_SHMSIZE 18
+#define RACCT_WALLCLOCK 19
+#define RACCT_PCTCPU 20
+#define RACCT_MAX RACCT_PCTCPU
+
+/*
+ * Resource properties.
+ */
+#define RACCT_IN_MILLIONS 0x01
+#define RACCT_RECLAIMABLE 0x02
+#define RACCT_INHERITABLE 0x04
+#define RACCT_DENIABLE 0x08
+#define RACCT_SLOPPY 0x10
+#define RACCT_DECAYING 0x20
+
+extern int racct_types[];
+
+/*
+ * Amount stored in c_resources[] is 10**6 times bigger than what's
+ * visible to the userland. It gets fixed up when retrieving resource
+ * usage or adding rules.
+ */
+#define RACCT_IS_IN_MILLIONS(X) (racct_types[X] & RACCT_IN_MILLIONS)
+
+/*
+ * Resource usage can drop, as opposed to only grow. When the process
+ * terminates, its resource usage is freed from the respective
+ * per-credential racct containers.
+ */
+#define RACCT_IS_RECLAIMABLE(X) (racct_types[X] & RACCT_RECLAIMABLE)
+
+/*
+ * Children inherit resource usage.
+ */
+#define RACCT_IS_INHERITABLE(X) (racct_types[X] & RACCT_INHERITABLE)
+
+/*
+ * racct_{add,set}(9) can actually return an error and not update resource
+ * usage counters. Note that even when resource is not deniable, allocating
+ * resource might cause signals to be sent by RCTL code.
+ */
+#define RACCT_IS_DENIABLE(X) (racct_types[X] & RACCT_DENIABLE)
+
+/*
+ * Per-process resource usage information makes no sense, but per-credential
+ * one does. This kind of resources are usually allocated for process, but
+ * freed using credentials.
+ */
+#define RACCT_IS_SLOPPY(X) (racct_types[X] & RACCT_SLOPPY)
+
+/*
+ * When a process terminates, its resource usage is not automatically
+ * subtracted from per-credential racct containers. Instead, the resource
+ * usage of per-credential racct containers decays in time.
+ * Resource usage can olso drop for such resource.
+ * So far, the only such resource is RACCT_PCTCPU.
+ */
+#define RACCT_IS_DECAYING(X) (racct_types[X] & RACCT_DECAYING)
+
+/*
+ * Resource usage can drop, as opposed to only grow.
+ */
+#define RACCT_CAN_DROP(X) (RACCT_IS_RECLAIMABLE(X) | RACCT_IS_DECAYING(X))
+
+/*
+ * The 'racct' structure defines resource consumption for a particular
+ * subject, such as process or jail.
+ *
+ * This structure must be filled with zeroes initially.
+ */
+struct racct {
+ int64_t r_resources[RACCT_MAX + 1];
+ LIST_HEAD(, rctl_rule_link) r_rule_links;
+};
+
+int racct_add(struct proc *p, int resource, uint64_t amount);
+void racct_add_cred(struct ucred *cred, int resource, uint64_t amount);
+void racct_add_force(struct proc *p, int resource, uint64_t amount);
+int racct_set(struct proc *p, int resource, uint64_t amount);
+void racct_set_force(struct proc *p, int resource, uint64_t amount);
+void racct_sub(struct proc *p, int resource, uint64_t amount);
+void racct_sub_cred(struct ucred *cred, int resource, uint64_t amount);
+uint64_t racct_get_limit(struct proc *p, int resource);
+uint64_t racct_get_available(struct proc *p, int resource);
+
+void racct_create(struct racct **racctp);
+void racct_destroy(struct racct **racctp);
+
+int racct_proc_fork(struct proc *parent, struct proc *child);
+void racct_proc_fork_done(struct proc *child);
+void racct_proc_exit(struct proc *p);
+
+void racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
+ struct ucred *newcred);
+void racct_move(struct racct *dest, struct racct *src);
+
+#endif /* !_RACCT_H_ */
diff --git a/freebsd/sys/sys/resourcevar.h b/freebsd/sys/sys/resourcevar.h
index 07700598..3dead510 100644
--- a/freebsd/sys/sys/resourcevar.h
+++ b/freebsd/sys/sys/resourcevar.h
@@ -79,8 +79,12 @@ struct plimit {
int pl_refcnt; /* number of references */
};
+struct racct;
+
/*-
- * Per uid resource consumption
+ * Per uid resource consumption. This structure is used to track
+ * the total resource consumption (process count, socket buffer size,
+ * etc) for the uid and impose limits.
*
* Locking guide:
* (a) Constant from inception
@@ -97,6 +101,7 @@ struct uidinfo {
long ui_ptscnt; /* (b) number of pseudo-terminals */
uid_t ui_uid; /* (a) uid */
u_int ui_ref; /* (b) reference count */
+ struct racct *ui_racct; /* (a) resource accounting */
};
#define UIDINFO_VMSIZE_LOCK(ui) mtx_lock(&((ui)->ui_vmsize_mtx))
@@ -126,6 +131,8 @@ rtems_bsd_chgsbsize(u_int *hiwat, u_int to)
#endif /* __rtems__ */
int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval);
int fuswintr(void *base);
+int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
+ struct rlimit *limp);
struct plimit
*lim_alloc(void);
void lim_copy(struct plimit *dst, struct plimit *src);
@@ -142,6 +149,7 @@ void rucollect(struct rusage *ru, struct rusage *ru2);
void rufetch(struct proc *p, struct rusage *ru);
void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
struct timeval *sp);
+void rufetchtd(struct thread *td, struct rusage *ru);
void ruxagg(struct proc *p, struct thread *td);
int suswintr(void *base, int word);
struct uidinfo
@@ -149,6 +157,8 @@ struct uidinfo
void uifree(struct uidinfo *uip);
void uihashinit(void);
void uihold(struct uidinfo *uip);
+void ui_racct_foreach(void (*callback)(struct racct *racct,
+ void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* _KERNEL */
#endif /* !_SYS_RESOURCEVAR_H_ */
diff --git a/freebsd/sys/sys/rmlock.h b/freebsd/sys/sys/rmlock.h
index effbcffb..487cb392 100644
--- a/freebsd/sys/sys/rmlock.h
+++ b/freebsd/sys/sys/rmlock.h
@@ -33,6 +33,7 @@
#define _SYS_RMLOCK_H_
#include <sys/mutex.h>
+#include <sys/sx.h>
#include <sys/_lock.h>
#include <sys/_rmlock.h>
@@ -43,6 +44,7 @@
*/
#define RM_NOWITNESS 0x00000001
#define RM_RECURSE 0x00000002
+#define RM_SLEEPABLE 0x00000004
#ifndef __rtems__
void rm_init(struct rmlock *rm, const char *name);
@@ -54,14 +56,15 @@ void rm_sysinit_flags(void *arg);
void _rm_wlock_debug(struct rmlock *rm, const char *file, int line);
void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line);
-void _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
- const char *file, int line);
+int _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
+ int trylock, const char *file, int line);
void _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
const char *file, int line);
void _rm_wlock(struct rmlock *rm);
void _rm_wunlock(struct rmlock *rm);
-void _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker);
+int _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker,
+ int trylock);
void _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker);
/*
@@ -75,14 +78,17 @@ void _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker);
#define rm_wlock(rm) _rm_wlock_debug((rm), LOCK_FILE, LOCK_LINE)
#define rm_wunlock(rm) _rm_wunlock_debug((rm), LOCK_FILE, LOCK_LINE)
#define rm_rlock(rm,tracker) \
- _rm_rlock_debug((rm),(tracker), LOCK_FILE, LOCK_LINE )
+ ((void)_rm_rlock_debug((rm),(tracker), 0, LOCK_FILE, LOCK_LINE ))
+#define rm_try_rlock(rm,tracker) \
+ _rm_rlock_debug((rm),(tracker), 1, LOCK_FILE, LOCK_LINE )
#define rm_runlock(rm,tracker) \
_rm_runlock_debug((rm), (tracker), LOCK_FILE, LOCK_LINE )
#else
-#define rm_wlock(rm) _rm_wlock((rm))
-#define rm_wunlock(rm) _rm_wunlock((rm))
-#define rm_rlock(rm,tracker) _rm_rlock((rm),(tracker))
-#define rm_runlock(rm,tracker) _rm_runlock((rm), (tracker))
+#define rm_wlock(rm) _rm_wlock((rm))
+#define rm_wunlock(rm) _rm_wunlock((rm))
+#define rm_rlock(rm,tracker) ((void)_rm_rlock((rm),(tracker), 0))
+#define rm_try_rlock(rm,tracker) _rm_rlock((rm),(tracker), 1)
+#define rm_runlock(rm,tracker) _rm_runlock((rm), (tracker))
#endif
#else /* __rtems__ */
diff --git a/freebsd/sys/sys/sbuf.h b/freebsd/sys/sys/sbuf.h
index 6f923378..5fa9e00f 100644
--- a/freebsd/sys/sys/sbuf.h
+++ b/freebsd/sys/sys/sbuf.h
@@ -34,7 +34,6 @@
#include <rtems/bsd/sys/_types.h>
struct sbuf;
-struct sbuf_drain_data;
typedef int (sbuf_drain_func)(void *, const char *, int);
/*
@@ -42,17 +41,20 @@ typedef int (sbuf_drain_func)(void *, const char *, int);
*/
struct sbuf {
char *s_buf; /* storage buffer */
- struct sbuf_drain *s_drain; /* drain function and data */
- int s_size; /* size of storage buffer */
- int s_len; /* current length of string */
+ sbuf_drain_func *s_drain_func; /* drain function */
+ void *s_drain_arg; /* user-supplied drain argument */
+ int s_error; /* current error code */
+ ssize_t s_size; /* size of storage buffer */
+ ssize_t s_len; /* current length of string */
#define SBUF_FIXEDLEN 0x00000000 /* fixed length buffer (default) */
#define SBUF_AUTOEXTEND 0x00000001 /* automatically extend buffer */
#define SBUF_USRFLAGMSK 0x0000ffff /* mask of flags the user may specify */
#define SBUF_DYNAMIC 0x00010000 /* s_buf must be freed */
#define SBUF_FINISHED 0x00020000 /* set by sbuf_finish() */
-#define SBUF_OVERFLOWED 0x00040000 /* sbuf overflowed */
#define SBUF_DYNSTRUCT 0x00080000 /* sbuf must be freed */
+#define SBUF_INSECTION 0x00100000 /* set by sbuf_start_section() */
int s_flags; /* flags */
+ ssize_t s_sect_len; /* current length of section */
};
__BEGIN_DECLS
@@ -63,7 +65,7 @@ struct sbuf *sbuf_new(struct sbuf *, char *, int, int);
#define sbuf_new_auto() \
sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND)
void sbuf_clear(struct sbuf *);
-int sbuf_setpos(struct sbuf *, int);
+int sbuf_setpos(struct sbuf *, ssize_t);
int sbuf_bcat(struct sbuf *, const void *, size_t);
int sbuf_bcpy(struct sbuf *, const void *, size_t);
int sbuf_cat(struct sbuf *, const char *);
@@ -75,12 +77,14 @@ int sbuf_vprintf(struct sbuf *, const char *, __va_list)
int sbuf_putc(struct sbuf *, int);
void sbuf_set_drain(struct sbuf *, sbuf_drain_func *, void *);
int sbuf_trim(struct sbuf *);
-int sbuf_overflowed(struct sbuf *);
+int sbuf_error(const struct sbuf *);
int sbuf_finish(struct sbuf *);
char *sbuf_data(struct sbuf *);
-int sbuf_len(struct sbuf *);
-int sbuf_done(struct sbuf *);
+ssize_t sbuf_len(struct sbuf *);
+int sbuf_done(const struct sbuf *);
void sbuf_delete(struct sbuf *);
+void sbuf_start_section(struct sbuf *, ssize_t *);
+ssize_t sbuf_end_section(struct sbuf *, ssize_t, size_t, int);
#ifdef _KERNEL
struct uio;
diff --git a/freebsd/sys/sys/sdt.h b/freebsd/sys/sys/sdt.h
index 7d1286b8..f2f9b317 100644
--- a/freebsd/sys/sys/sdt.h
+++ b/freebsd/sys/sys/sdt.h
@@ -91,6 +91,10 @@
#define SDT_PROBE_DEFINE3(prov, mod, func, name, sname, arg0, arg1, arg2)
#define SDT_PROBE_DEFINE4(prov, mod, func, name, sname, arg0, arg1, arg2, arg3)
#define SDT_PROBE_DEFINE5(prov, mod, func, name, sname, arg0, arg1, arg2, arg3, arg4)
+#define SDT_PROBE_DEFINE6(prov, mod, func, name, snamp, arg0, arg1, arg2, \
+ arg3, arg4, arg5)
+#define SDT_PROBE_DEFINE7(prov, mod, func, name, snamp, arg0, arg1, arg2, \
+ arg3, arg4, arg5, arg6)
#define SDT_PROBE0(prov, mod, func, name)
#define SDT_PROBE1(prov, mod, func, name, arg0)
@@ -98,6 +102,9 @@
#define SDT_PROBE3(prov, mod, func, name, arg0, arg1, arg2)
#define SDT_PROBE4(prov, mod, func, name, arg0, arg1, arg2, arg3)
#define SDT_PROBE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4)
+#define SDT_PROBE6(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5)
+#define SDT_PROBE7(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6)
#else
@@ -232,6 +239,27 @@ struct sdt_provider {
SDT_PROBE_ARGTYPE(prov, mod, func, name, 3, arg3); \
SDT_PROBE_ARGTYPE(prov, mod, func, name, 4, arg4)
+#define SDT_PROBE_DEFINE6(prov, mod, func, name, sname, arg0, arg1, arg2, arg3,\
+ arg4, arg5) \
+ SDT_PROBE_DEFINE(prov, mod, func, name, sname); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 1, arg1); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 2, arg2); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 3, arg3); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 4, arg4); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 5, arg5);
+
+#define SDT_PROBE_DEFINE7(prov, mod, func, name, sname, arg0, arg1, arg2, arg3,\
+ arg4, arg5, arg6) \
+ SDT_PROBE_DEFINE(prov, mod, func, name, sname); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 0, arg0); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 1, arg1); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 2, arg2); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 3, arg3); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 4, arg4); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 5, arg5); \
+ SDT_PROBE_ARGTYPE(prov, mod, func, name, 6, arg6);
+
#define SDT_PROBE0(prov, mod, func, name) \
SDT_PROBE(prov, mod, func, name, 0, 0, 0, 0, 0)
#define SDT_PROBE1(prov, mod, func, name, arg0) \
@@ -244,6 +272,27 @@ struct sdt_provider {
SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, 0)
#define SDT_PROBE5(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) \
SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4)
+#define SDT_PROBE6(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5) \
+ do { \
+ if (sdt_##prov##_##mod##_##func##_##name->id) \
+ (*(void (*)(uint32_t, uintptr_t, uintptr_t, uintptr_t, \
+ uintptr_t, uintptr_t, uintptr_t))sdt_probe_func)( \
+ sdt_##prov##_##mod##_##func##_##name->id, \
+ (uintptr_t)arg0, (uintptr_t)arg1, (uintptr_t)arg2, \
+ (uintptr_t)arg3, (uintptr_t)arg4, (uintptr_t)arg5);\
+ } while (0)
+#define SDT_PROBE7(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6) \
+ do { \
+ if (sdt_##prov##_##mod##_##func##_##name->id) \
+ (*(void (*)(uint32_t, uintptr_t, uintptr_t, uintptr_t, \
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t)) \
+ sdt_probe_func)( \
+ sdt_##prov##_##mod##_##func##_##name->id, \
+ (uintptr_t)arg0, (uintptr_t)arg1, (uintptr_t)arg2, \
+ (uintptr_t)arg3, (uintptr_t)arg4, (uintptr_t)arg5, \
+ (uintptr_t)arg6); \
+ } while (0)
typedef int (*sdt_argtype_listall_func_t)(struct sdt_argtype *, void *);
typedef int (*sdt_probe_listall_func_t)(struct sdt_probe *, void *);
diff --git a/freebsd/sys/sys/smp.h b/freebsd/sys/sys/smp.h
index 9fc7cba1..2db4ea8d 100644
--- a/freebsd/sys/sys/smp.h
+++ b/freebsd/sys/sys/smp.h
@@ -16,7 +16,7 @@
#ifndef LOCORE
-#ifdef SMP
+#include <sys/cpuset.h>
/*
* Topology of a NUMA or HTT system.
@@ -34,13 +34,15 @@
struct cpu_group {
struct cpu_group *cg_parent; /* Our parent group. */
struct cpu_group *cg_child; /* Optional children groups. */
- cpumask_t cg_mask; /* Mask of cpus in this group. */
- int8_t cg_count; /* Count of cpus in this group. */
- int8_t cg_children; /* Number of children groups. */
+ cpuset_t cg_mask; /* Mask of cpus in this group. */
+ int32_t cg_count; /* Count of cpus in this group. */
+ int16_t cg_children; /* Number of children groups. */
int8_t cg_level; /* Shared cache level. */
int8_t cg_flags; /* Traversal modifiers. */
};
+typedef struct cpu_group *cpu_group_t;
+
/*
* Defines common resources for CPUs in the group. The highest level
* resource should be used when multiple are shared.
@@ -60,6 +62,7 @@ struct cpu_group {
/*
* Convenience routines for building topologies.
*/
+#ifdef SMP
struct cpu_group *smp_topo(void);
struct cpu_group *smp_topo_none(void);
struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
@@ -70,11 +73,10 @@ struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
extern void (*cpustop_restartfunc)(void);
extern int smp_active;
extern int smp_cpus;
-extern volatile cpumask_t started_cpus;
-extern volatile cpumask_t stopped_cpus;
-extern cpumask_t idle_cpus_mask;
-extern cpumask_t hlt_cpus_mask;
-extern cpumask_t logical_cpus_mask;
+extern volatile cpuset_t started_cpus;
+extern volatile cpuset_t stopped_cpus;
+extern cpuset_t hlt_cpus_mask;
+extern cpuset_t logical_cpus_mask;
#endif /* SMP */
#ifndef __rtems__
@@ -83,7 +85,7 @@ extern int mp_maxcpus;
extern int mp_ncpus;
extern volatile int smp_started;
-extern cpumask_t all_cpus;
+extern cpuset_t all_cpus;
#else /* __rtems__ */
#define mp_maxid 1U
#define mp_maxcpus 1
@@ -96,7 +98,11 @@ extern cpumask_t all_cpus;
* time, thus permitting us to configure sparse maps of cpuid-dependent
* (per-CPU) structures.
*/
-#define CPU_ABSENT(x_cpu) ((all_cpus & (1 << (x_cpu))) == 0)
+#ifndef __rtems__
+#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus))
+#else /* __rtems__ */
+#define CPU_ABSENT(x_cpu) 0
+#endif /* __rtems__ */
/*
* Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an
@@ -165,11 +171,11 @@ void cpu_mp_setmaxid(void);
void cpu_mp_start(void);
void forward_signal(struct thread *);
-int restart_cpus(cpumask_t);
-int stop_cpus(cpumask_t);
-int stop_cpus_hard(cpumask_t);
-#if defined(__amd64__)
-int suspend_cpus(cpumask_t);
+int restart_cpus(cpuset_t);
+int stop_cpus(cpuset_t);
+int stop_cpus_hard(cpuset_t);
+#if defined(__amd64__) || defined(__i386__)
+int suspend_cpus(cpuset_t);
#endif
void smp_rendezvous_action(void);
extern struct mtx smp_ipi_mtx;
@@ -180,7 +186,7 @@ void smp_rendezvous(void (*)(void *),
void (*)(void *),
void (*)(void *),
void *arg);
-void smp_rendezvous_cpus(cpumask_t,
+void smp_rendezvous_cpus(cpuset_t,
void (*)(void *),
void (*)(void *),
void (*)(void *),
diff --git a/freebsd/sys/sys/sockbuf.h b/freebsd/sys/sys/sockbuf.h
index b8e66999..bfccd74f 100644
--- a/freebsd/sys/sys/sockbuf.h
+++ b/freebsd/sys/sys/sockbuf.h
@@ -37,7 +37,7 @@
#include <sys/_mutex.h>
#include <sys/_sx.h>
-#define SB_MAX (256*1024) /* default for max chars in sockbuf */
+#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
/*
* Constants for sb_flags field of struct sockbuf.
diff --git a/freebsd/sys/sys/socket.h b/freebsd/sys/sys/socket.h
index 2f016b27..fee7f1fd 100644
--- a/freebsd/sys/sys/socket.h
+++ b/freebsd/sys/sys/socket.h
@@ -36,9 +36,7 @@
#include <sys/cdefs.h>
#include <rtems/bsd/sys/_types.h>
#include <sys/_iovec.h>
-#define _NO_NAMESPACE_POLLUTION
-#include <machine/rtems-bsd-param.h>
-#undef _NO_NAMESPACE_POLLUTION
+#include <machine/_align.h>
/*
* Definitions related to sockets: types, address families, options.
@@ -139,6 +137,7 @@ typedef __uid_t uid_t;
#define SO_LISTENQLEN 0x1012 /* socket's complete queue length */
#define SO_LISTENINCQLEN 0x1013 /* socket's incomplete queue length */
#define SO_SETFIB 0x1014 /* use this FIB to route */
+#define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */
#define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */
#define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name) */
#endif
@@ -301,26 +300,7 @@ struct sockproto {
};
#endif
-#ifndef _STRUCT_SOCKADDR_STORAGE_DECLARED
-/*
- * RFC 2553: protocol-independent placeholder for socket addresses
- */
-#define _SS_MAXSIZE 128U
-#define _SS_ALIGNSIZE (sizeof(__int64_t))
-#define _SS_PAD1SIZE (_SS_ALIGNSIZE - sizeof(unsigned char) - \
- sizeof(sa_family_t))
-#define _SS_PAD2SIZE (_SS_MAXSIZE - sizeof(unsigned char) - \
- sizeof(sa_family_t) - _SS_PAD1SIZE - _SS_ALIGNSIZE)
-
-struct sockaddr_storage {
- unsigned char ss_len; /* address length */
- sa_family_t ss_family; /* address family */
- char __ss_pad1[_SS_PAD1SIZE];
- __int64_t __ss_align; /* force desired struct alignment */
- char __ss_pad2[_SS_PAD2SIZE];
-};
-#define _STRUCT_SOCKADDR_STORAGE_DECLARED
-#endif
+#include <sys/_sockaddr_storage.h>
#if __BSD_VISIBLE
/*
@@ -549,7 +529,7 @@ struct sockcred {
_ALIGN(sizeof(struct cmsghdr)) > \
(char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \
(struct cmsghdr *)0 : \
- (struct cmsghdr *)((char *)(cmsg) + \
+ (struct cmsghdr *)(void *)((char *)(cmsg) + \
_ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len)))
/*
diff --git a/freebsd/sys/sys/socketvar.h b/freebsd/sys/sys/socketvar.h
index 88990831..94c3b24e 100644
--- a/freebsd/sys/sys/socketvar.h
+++ b/freebsd/sys/sys/socketvar.h
@@ -117,7 +117,14 @@ struct socket {
void *so_accept_filter_arg; /* saved filter args */
char *so_accept_filter_str; /* saved user args */
} *so_accf;
+ /*
+ * so_fibnum, so_user_cookie and friends can be used to attach
+ * some user-specified metadata to a socket, which then can be
+ * used by the kernel for various actions.
+ * so_user_cookie is used by ipfw/dummynet.
+ */
int so_fibnum; /* routing domain for this socket */
+ uint32_t so_user_cookie;
};
/*
@@ -234,17 +241,6 @@ struct xsocket {
} \
} while (0)
-#define sotryfree(so) do { \
- ACCEPT_LOCK_ASSERT(); \
- SOCK_LOCK_ASSERT(so); \
- if ((so)->so_count == 0) \
- sofree(so); \
- else { \
- SOCK_UNLOCK(so); \
- ACCEPT_UNLOCK(); \
- } \
-} while(0)
-
/*
* In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
* avoid a non-atomic test-and-wakeup. However, sowakeup is
diff --git a/freebsd/sys/sys/stddef.h b/freebsd/sys/sys/stddef.h
index df51c369..9b6ac537 100644
--- a/freebsd/sys/sys/stddef.h
+++ b/freebsd/sys/sys/stddef.h
@@ -34,7 +34,10 @@
#include <sys/_null.h>
#include <machine/_types.h>
+#ifndef _PTRDIFF_T_DECLARED
typedef __ptrdiff_t ptrdiff_t;
+#define _PTRDIFF_T_DECLARED
+#endif
#define offsetof(type, field) __offsetof(type, field)
#endif /* __rtems__ */
diff --git a/freebsd/sys/sys/stdint.h b/freebsd/sys/sys/stdint.h
index 74e4a0ce..ca3b1ee0 100644
--- a/freebsd/sys/sys/stdint.h
+++ b/freebsd/sys/sys/stdint.h
@@ -34,46 +34,7 @@
#ifndef __rtems__
#include <machine/_stdint.h>
-
-#ifndef _INT8_T_DECLARED
-typedef __int8_t int8_t;
-#define _INT8_T_DECLARED
-#endif
-
-#ifndef _INT16_T_DECLARED
-typedef __int16_t int16_t;
-#define _INT16_T_DECLARED
-#endif
-
-#ifndef _INT32_T_DECLARED
-typedef __int32_t int32_t;
-#define _INT32_T_DECLARED
-#endif
-
-#ifndef _INT64_T_DECLARED
-typedef __int64_t int64_t;
-#define _INT64_T_DECLARED
-#endif
-
-#ifndef _UINT8_T_DECLARED
-typedef __uint8_t uint8_t;
-#define _UINT8_T_DECLARED
-#endif
-
-#ifndef _UINT16_T_DECLARED
-typedef __uint16_t uint16_t;
-#define _UINT16_T_DECLARED
-#endif
-
-#ifndef _UINT32_T_DECLARED
-typedef __uint32_t uint32_t;
-#define _UINT32_T_DECLARED
-#endif
-
-#ifndef _UINT64_T_DECLARED
-typedef __uint64_t uint64_t;
-#define _UINT64_T_DECLARED
-#endif
+#include <sys/_stdint.h>
typedef __int_least8_t int_least8_t;
typedef __int_least16_t int_least16_t;
@@ -95,13 +56,20 @@ typedef __uint_fast16_t uint_fast16_t;
typedef __uint_fast32_t uint_fast32_t;
typedef __uint_fast64_t uint_fast64_t;
+#ifndef _INTMAX_T_DECLARED
typedef __intmax_t intmax_t;
+#define _INTMAX_T_DECLARED
+#endif
+#ifndef _UINTMAX_T_DECLARED
typedef __uintmax_t uintmax_t;
+#define _UINTMAX_T_DECLARED
+#endif
-#ifndef _INTPTR_T_DECLARED
-typedef __intptr_t intptr_t;
-typedef __uintptr_t uintptr_t;
-#define _INTPTR_T_DECLARED
+/* GNU and Darwin define this and people seem to think it's portable */
+#if defined(UINTPTR_MAX) && defined(UINT64_MAX) && (UINTPTR_MAX == UINT64_MAX)
+#define __WORDSIZE 64
+#else
+#define __WORDSIZE 32
#endif
#endif /* __rtems__ */
diff --git a/freebsd/sys/sys/sx.h b/freebsd/sys/sys/sx.h
index 39906fe7..53beea07 100644
--- a/freebsd/sys/sys/sx.h
+++ b/freebsd/sys/sys/sx.h
@@ -121,21 +121,34 @@ void _sx_assert(struct sx *sx, int what, const char *file, int line);
int sx_chain(struct thread *td, struct thread **ownerp);
#endif
+#define sx_downgrade_(sx, file, line) \
+ _sx_downgrade((sx), (file), (line))
+#define sx_try_slock_(sx, file, line) \
+ _sx_try_slock((sx), (file), (line))
+#define sx_try_xlock_(sx, file, line) \
+ _sx_try_xlock((sx), (file), (line))
+#define sx_try_upgrade_(sx, file, line) \
+ _sx_try_upgrade((sx), (file), (line))
+
struct sx_args {
struct sx *sa_sx;
const char *sa_desc;
+ int sa_flags;
};
-#define SX_SYSINIT(name, sxa, desc) \
+#define SX_SYSINIT_FLAGS(name, sxa, desc, flags) \
static struct sx_args name##_args = { \
(sxa), \
- (desc) \
+ (desc), \
+ (flags) \
}; \
SYSINIT(name##_sx_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
sx_sysinit, &name##_args); \
SYSUNINIT(name##_sx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
sx_destroy, (sxa))
+#define SX_SYSINIT(name, sxa, desc) SX_SYSINIT_FLAGS(name, sxa, desc, 0)
+
#ifndef __rtems__
/*
* Full lock operations that are suitable to be inlined in non-debug kernels.
@@ -212,30 +225,50 @@ __sx_sunlock(struct sx *sx, const char *file, int line)
#error "LOCK_DEBUG not defined, include <sys/lock.h> before <sys/sx.h>"
#endif
#if (LOCK_DEBUG > 0) || defined(SX_NOINLINE)
-#define sx_xlock(sx) (void)_sx_xlock((sx), 0, LOCK_FILE, LOCK_LINE)
-#define sx_xlock_sig(sx) \
- _sx_xlock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
-#define sx_xunlock(sx) _sx_xunlock((sx), LOCK_FILE, LOCK_LINE)
-#define sx_slock(sx) (void)_sx_slock((sx), 0, LOCK_FILE, LOCK_LINE)
-#define sx_slock_sig(sx) \
- _sx_slock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
-#define sx_sunlock(sx) _sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
+#define sx_xlock_(sx, file, line) \
+ (void)_sx_xlock((sx), 0, (file), (line))
+#define sx_xlock_sig_(sx, file, line) \
+ _sx_xlock((sx), SX_INTERRUPTIBLE, (file), (line))
+#define sx_xunlock_(sx, file, line) \
+ _sx_xunlock((sx), (file), (line))
+#define sx_slock_(sx, file, line) \
+ (void)_sx_slock((sx), 0, (file), (line))
+#define sx_slock_sig_(sx, file, line) \
+ _sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line))
+#define sx_sunlock_(sx, file, line) \
+ _sx_sunlock((sx), (file), (line))
#else
-#define sx_xlock(sx) \
- (void)__sx_xlock((sx), curthread, 0, LOCK_FILE, LOCK_LINE)
-#define sx_xlock_sig(sx) \
- __sx_xlock((sx), curthread, SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
-#define sx_xunlock(sx) \
- __sx_xunlock((sx), curthread, LOCK_FILE, LOCK_LINE)
-#define sx_slock(sx) (void)__sx_slock((sx), 0, LOCK_FILE, LOCK_LINE)
-#define sx_slock_sig(sx) \
- __sx_slock((sx), SX_INTERRUPTIBLE, LOCK_FILE, LOCK_LINE)
-#define sx_sunlock(sx) __sx_sunlock((sx), LOCK_FILE, LOCK_LINE)
+#define sx_xlock_(sx, file, line) \
+ (void)__sx_xlock((sx), curthread, 0, (file), (line))
+#define sx_xlock_sig_(sx, file, line) \
+ __sx_xlock((sx), curthread, SX_INTERRUPTIBLE, (file), (line))
+#define sx_xunlock_(sx, file, line) \
+ __sx_xunlock((sx), curthread, (file), (line))
+#define sx_slock_(sx, file, line) \
+ (void)__sx_slock((sx), 0, (file), (line))
+#define sx_slock_sig_(sx, file, line) \
+ __sx_slock((sx), SX_INTERRUPTIBLE, (file), (line))
+#define sx_sunlock_(sx, file, line) \
+ __sx_sunlock((sx), (file), (line))
#endif /* LOCK_DEBUG > 0 || SX_NOINLINE */
-#define sx_try_slock(sx) _sx_try_slock((sx), LOCK_FILE, LOCK_LINE)
-#define sx_try_xlock(sx) _sx_try_xlock((sx), LOCK_FILE, LOCK_LINE)
-#define sx_try_upgrade(sx) _sx_try_upgrade((sx), LOCK_FILE, LOCK_LINE)
-#define sx_downgrade(sx) _sx_downgrade((sx), LOCK_FILE, LOCK_LINE)
+#define sx_try_slock(sx) sx_try_slock_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_try_xlock(sx) sx_try_xlock_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_try_upgrade(sx) sx_try_upgrade_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_downgrade(sx) sx_downgrade_((sx), LOCK_FILE, LOCK_LINE)
+#ifdef INVARIANTS
+#define sx_assert_(sx, what, file, line) \
+ _sx_assert((sx), (what), (file), (line))
+#else
+#define sx_assert_(sx, what, file, line) (void)0
+#endif
+
+#define sx_xlock(sx) sx_xlock_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_xlock_sig(sx) sx_xlock_sig_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_xunlock(sx) sx_xunlock_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_slock(sx) sx_slock_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_slock_sig(sx) sx_slock_sig_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_sunlock(sx) sx_sunlock_((sx), LOCK_FILE, LOCK_LINE)
+#define sx_assert(sx, what) sx_assert_((sx), (what), __FILE__, __LINE__)
/*
* Return a pointer to the owning thread if the lock is exclusively
@@ -253,13 +286,15 @@ __sx_sunlock(struct sx *sx, const char *file, int line)
int sx_xlocked(struct sx *sx);
#endif /* __rtems__ */
-#define sx_unlock(sx) do { \
+#define sx_unlock_(sx, file, line) do { \
if (sx_xlocked(sx)) \
- sx_xunlock(sx); \
+ sx_xunlock_(sx, file, line); \
else \
- sx_sunlock(sx); \
+ sx_sunlock_(sx, file, line); \
} while (0)
+#define sx_unlock(sx) sx_unlock_((sx), LOCK_FILE, LOCK_LINE)
+
#define sx_sleep(chan, sx, pri, wmesg, timo) \
_sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo))
@@ -295,12 +330,6 @@ int sx_xlocked(struct sx *sx);
#define SX_NOTRECURSED LA_NOTRECURSED
#endif
-#ifdef INVARIANTS
-#define sx_assert(sx, what) _sx_assert((sx), (what), LOCK_FILE, LOCK_LINE)
-#else
-#define sx_assert(sx, what) (void)0
-#endif
-
#endif /* _KERNEL */
#endif /* !_SYS_SX_H_ */
diff --git a/freebsd/sys/sys/sysctl.h b/freebsd/sys/sys/sysctl.h
index f5341d87..cf7b90ce 100644
--- a/freebsd/sys/sys/sysctl.h
+++ b/freebsd/sys/sys/sysctl.h
@@ -66,17 +66,17 @@ struct ctlname {
#define CTLTYPE_NODE 1 /* name is a node */
#define CTLTYPE_INT 2 /* name describes an integer */
#define CTLTYPE_STRING 3 /* name describes a string */
-#define CTLTYPE_QUAD 4 /* name describes a 64-bit number */
+#define CTLTYPE_S64 4 /* name describes a signed 64-bit number */
#define CTLTYPE_OPAQUE 5 /* name describes a structure */
#define CTLTYPE_STRUCT CTLTYPE_OPAQUE /* name describes a structure */
#define CTLTYPE_UINT 6 /* name describes an unsigned integer */
#define CTLTYPE_LONG 7 /* name describes a long */
#define CTLTYPE_ULONG 8 /* name describes an unsigned long */
+#define CTLTYPE_U64 9 /* name describes an unsigned 64-bit number */
#define CTLFLAG_RD 0x80000000 /* Allow reads of variable */
#define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */
#define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR)
-#define CTLFLAG_NOLOCK 0x20000000 /* XXX Don't Lock */
#define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */
#define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */
#define CTLFLAG_PRISON 0x04000000 /* Prisoned roots can fiddle */
@@ -84,10 +84,14 @@ struct ctlname {
#define CTLFLAG_SKIP 0x01000000 /* Skip this sysctl when listing */
#define CTLMASK_SECURE 0x00F00000 /* Secure level */
#define CTLFLAG_TUN 0x00080000 /* Tunable variable */
+#define CTLFLAG_RDTUN (CTLFLAG_RD|CTLFLAG_TUN)
+#define CTLFLAG_RWTUN (CTLFLAG_RW|CTLFLAG_TUN)
#define CTLFLAG_MPSAFE 0x00040000 /* Handler is MP safe */
#define CTLFLAG_VNET 0x00020000 /* Prisons with vnet can fiddle */
-#define CTLFLAG_RDTUN (CTLFLAG_RD|CTLFLAG_TUN)
#define CTLFLAG_DYING 0x00010000 /* oid is being removed */
+#define CTLFLAG_CAPRD 0x00008000 /* Can be read in capability mode */
+#define CTLFLAG_CAPWR 0x00004000 /* Can be written in capability mode */
+#define CTLFLAG_CAPRW (CTLFLAG_CAPRD|CTLFLAG_CAPWR)
/*
* Secure level. Note that CTLFLAG_SECURE == CTLFLAG_SECURE1.
@@ -117,16 +121,15 @@ struct ctlname {
#ifdef _KERNEL
#include <sys/linker_set.h>
-#define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, int arg2, \
- struct sysctl_req *req
+#define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, \
+ intptr_t arg2, struct sysctl_req *req
/* definitions for sysctl_req 'lock' member */
-#define REQ_UNLOCKED 0 /* not locked and not wired */
-#define REQ_LOCKED 1 /* locked and not wired */
-#define REQ_WIRED 2 /* locked and wired */
+#define REQ_UNWIRED 1
+#define REQ_WIRED 2
/* definitions for sysctl_req 'flags' member */
-#if defined(__amd64__) || defined(__ia64__)
+#if defined(__amd64__) || defined(__ia64__) || defined(__powerpc64__)
#define SCTL_MASK32 1 /* 32 bit emulation */
#endif
@@ -136,7 +139,7 @@ struct ctlname {
*/
struct sysctl_req {
struct thread *td; /* used for access checking */
- int lock; /* locking/wiring state */
+ int lock; /* wiring state */
void *oldptr;
size_t oldlen;
size_t oldidx;
@@ -165,12 +168,12 @@ struct sysctl_oid {
int oid_number;
u_int oid_kind;
void *oid_arg1;
- int oid_arg2;
+ intptr_t oid_arg2;
const char *oid_name;
int (*oid_handler)(SYSCTL_HANDLER_ARGS);
const char *oid_fmt;
- int16_t oid_refcnt;
- uint16_t oid_running;
+ int oid_refcnt;
+ u_int oid_running;
const char *oid_descr;
};
@@ -180,8 +183,7 @@ struct sysctl_oid {
int sysctl_handle_int(SYSCTL_HANDLER_ARGS);
int sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS);
int sysctl_handle_long(SYSCTL_HANDLER_ARGS);
-int sysctl_handle_quad(SYSCTL_HANDLER_ARGS);
-int sysctl_handle_intptr(SYSCTL_HANDLER_ARGS);
+int sysctl_handle_64(SYSCTL_HANDLER_ARGS);
int sysctl_handle_string(SYSCTL_HANDLER_ARGS);
int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
@@ -235,6 +237,53 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
_bsd_sysctl_##parent##_##name##_children
#endif /* __rtems__ */
+/*
+ * These macros provide type safety for sysctls. SYSCTL_ALLOWED_TYPES()
+ * defines a transparent union of the allowed types. SYSCTL_ASSERT_TYPE()
+ * and SYSCTL_ADD_ASSERT_TYPE() use the transparent union to assert that
+ * the pointer matches the allowed types.
+ *
+ * The allow_0 member allows a literal 0 to be passed for ptr.
+ */
+#define SYSCTL_ALLOWED_TYPES(type, decls) \
+ union sysctl_##type { \
+ long allow_0; \
+ decls \
+ } __attribute__((__transparent_union__)); \
+ \
+ static inline void * \
+ __sysctl_assert_##type(union sysctl_##type ptr) \
+ { \
+ return (ptr.a); \
+ } \
+ struct __hack
+
+SYSCTL_ALLOWED_TYPES(INT, int *a; );
+SYSCTL_ALLOWED_TYPES(UINT, unsigned int *a; );
+SYSCTL_ALLOWED_TYPES(LONG, long *a; );
+SYSCTL_ALLOWED_TYPES(ULONG, unsigned long *a; );
+SYSCTL_ALLOWED_TYPES(INT64, int64_t *a; long long *b; );
+SYSCTL_ALLOWED_TYPES(UINT64, uint64_t *a; unsigned long long *b; );
+
+#ifdef notyet
+#define SYSCTL_ADD_ASSERT_TYPE(type, ptr) \
+ __sysctl_assert_ ## type (ptr)
+#define SYSCTL_ASSERT_TYPE(type, ptr, parent, name) \
+ _SYSCTL_ASSERT_TYPE(type, ptr, __LINE__, parent##_##name)
+#else
+#define SYSCTL_ADD_ASSERT_TYPE(type, ptr) ptr
+#define SYSCTL_ASSERT_TYPE(type, ptr, parent, name)
+#endif
+#define _SYSCTL_ASSERT_TYPE(t, p, l, id) \
+ __SYSCTL_ASSERT_TYPE(t, p, l, id)
+#define __SYSCTL_ASSERT_TYPE(type, ptr, line, id) \
+ static inline void \
+ sysctl_assert_##line##_##id(void) \
+ { \
+ (void)__sysctl_assert_##type(ptr); \
+ } \
+ struct __hack
+
#ifndef NO_SYSCTL_DESCR
#define __DESCR(d) d
#else
@@ -279,65 +328,81 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
arg, len, sysctl_handle_string, "A", __DESCR(descr))
/* Oid for an int. If ptr is NULL, val is returned. */
-#define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_int, "I", descr)
-
-#define SYSCTL_ADD_INT(ctx, parent, nbr, name, access, ptr, val, descr) \
- sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_INT|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_int, "I", __DESCR(descr))
+#define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_ASSERT_TYPE(INT, ptr, parent, name); \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_INT | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_int, "I", descr)
+
+#define SYSCTL_ADD_INT(ctx, parent, nbr, name, access, ptr, val, descr) \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_INT | CTLFLAG_MPSAFE | (access), \
+ SYSCTL_ADD_ASSERT_TYPE(INT, ptr), val, \
+ sysctl_handle_int, "I", __DESCR(descr))
/* Oid for an unsigned int. If ptr is NULL, val is returned. */
-#define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_int, "IU", descr)
-
-#define SYSCTL_ADD_UINT(ctx, parent, nbr, name, access, ptr, val, descr) \
- sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_int, "IU", __DESCR(descr))
-
-#define SYSCTL_XINT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_int, "IX", descr)
-
-#define SYSCTL_ADD_XINT(ctx, parent, nbr, name, access, ptr, val, descr) \
- sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_int, "IX", __DESCR(descr))
+#define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_ASSERT_TYPE(UINT, ptr, parent, name); \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_UINT | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_int, "IU", descr)
+
+#define SYSCTL_ADD_UINT(ctx, parent, nbr, name, access, ptr, val, descr) \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_UINT | CTLFLAG_MPSAFE | (access), \
+ SYSCTL_ADD_ASSERT_TYPE(UINT, ptr), val, \
+ sysctl_handle_int, "IU", __DESCR(descr))
/* Oid for a long. The pointer must be non NULL. */
-#define SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_LONG|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_long, "L", descr)
-
-#define SYSCTL_ADD_LONG(ctx, parent, nbr, name, access, ptr, descr) \
- sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_LONG|CTLFLAG_MPSAFE|(access), \
- ptr, 0, sysctl_handle_long, "L", __DESCR(descr))
+#define SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_ASSERT_TYPE(LONG, ptr, parent, name); \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_LONG | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_long, "L", descr)
+
+#define SYSCTL_ADD_LONG(ctx, parent, nbr, name, access, ptr, descr) \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_LONG | CTLFLAG_MPSAFE | (access), \
+ SYSCTL_ADD_ASSERT_TYPE(LONG, ptr), 0, \
+ sysctl_handle_long, "L", __DESCR(descr))
/* Oid for an unsigned long. The pointer must be non NULL. */
-#define SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_long, "LU", __DESCR(descr))
-
-#define SYSCTL_ADD_ULONG(ctx, parent, nbr, name, access, ptr, descr) \
- sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \
- ptr, 0, sysctl_handle_long, "LU", __DESCR(descr))
-
-#define SYSCTL_XLONG(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_long, "LX", __DESCR(descr))
-
-#define SYSCTL_ADD_XLONG(ctx, parent, nbr, name, access, ptr, descr) \
- sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \
- ptr, 0, sysctl_handle_long, "LX", __DESCR(descr))
+#define SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_ASSERT_TYPE(ULONG, ptr, parent, name); \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_ULONG | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_long, "LU", descr)
+
+#define SYSCTL_ADD_ULONG(ctx, parent, nbr, name, access, ptr, descr) \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_ULONG | CTLFLAG_MPSAFE | (access), \
+ SYSCTL_ADD_ASSERT_TYPE(ULONG, ptr), 0, \
+ sysctl_handle_long, "LU", __DESCR(descr))
/* Oid for a quad. The pointer must be non NULL. */
-#define SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_QUAD|CTLFLAG_MPSAFE|(access), \
- ptr, val, sysctl_handle_quad, "Q", __DESCR(descr))
-
-#define SYSCTL_ADD_QUAD(ctx, parent, nbr, name, access, ptr, descr) \
- sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_QUAD|CTLFLAG_MPSAFE|(access), \
- ptr, 0, sysctl_handle_quad, "Q", __DESCR(descr))
+#define SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_ASSERT_TYPE(INT64, ptr, parent, name); \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_S64 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_64, "Q", descr)
+
+#define SYSCTL_ADD_QUAD(ctx, parent, nbr, name, access, ptr, descr) \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_S64 | CTLFLAG_MPSAFE | (access), \
+ SYSCTL_ADD_ASSERT_TYPE(INT64, ptr), 0, \
+ sysctl_handle_64, "Q", __DESCR(descr))
+
+#define SYSCTL_UQUAD(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_ASSERT_TYPE(UINT64, ptr, parent, name); \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_64, "QU", descr)
+
+#define SYSCTL_ADD_UQUAD(ctx, parent, nbr, name, access, ptr, descr) \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \
+ SYSCTL_ADD_ASSERT_TYPE(UINT64, ptr), 0, \
+ sysctl_handle_64, "QU", __DESCR(descr))
/* Oid for an opaque object. Specified by a pointer and a length. */
#define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \
@@ -360,6 +425,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
/* Oid for a procedure. Specified by a pointer and an arg. */
#define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
+ CTASSERT(((access) & CTLTYPE) != 0); \
SYSCTL_OID(parent, nbr, name, (access), \
ptr, arg, handler, fmt, descr)
@@ -372,7 +438,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
* kernel features.
*/
#define FEATURE(name, desc) \
- SYSCTL_INT(_kern_features, OID_AUTO, name, CTLFLAG_RD, 0, 1, desc)
+ SYSCTL_INT(_kern_features, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_CAPRD, \
+ 0, 1, desc)
#endif /* _KERNEL */
@@ -520,6 +587,11 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define KERN_PROC_VMMAP 32 /* VM map entries for process */
#define KERN_PROC_FILEDESC 33 /* File descriptors for process */
#define KERN_PROC_GROUPS 34 /* process groups */
+#define KERN_PROC_ENV 35 /* get environment */
+#define KERN_PROC_AUXV 36 /* get ELF auxiliary vector */
+#define KERN_PROC_RLIMIT 37 /* process resource limits */
+#define KERN_PROC_PS_STRINGS 38 /* get ps_strings location */
+#define KERN_PROC_UMASK 39 /* process umask */
#define KERN_PROC_OSREL 40 /* osreldate for process binary */
/*
@@ -715,9 +787,11 @@ extern char kern_ident[];
/* Dynamic oid handling */
struct sysctl_oid *sysctl_add_oid(struct sysctl_ctx_list *clist,
struct sysctl_oid_list *parent, int nbr, const char *name,
- int kind, void *arg1, int arg2,
+ int kind, void *arg1, intptr_t arg2,
int (*handler) (SYSCTL_HANDLER_ARGS),
const char *fmt, const char *descr);
+int sysctl_remove_name(struct sysctl_oid *parent, const char *name, int del,
+ int recurse);
void sysctl_rename_oid(struct sysctl_oid *oidp, const char *name);
int sysctl_move_oid(struct sysctl_oid *oidp,
struct sysctl_oid_list *parent);
@@ -759,8 +833,8 @@ struct sbuf *sbuf_new_for_sysctl(struct sbuf *, char *, int,
#include <sys/cdefs.h>
__BEGIN_DECLS
-int sysctl(int *, u_int, void *, size_t *, void *, size_t);
-int sysctlbyname(const char *, void *, size_t *, void *, size_t);
+int sysctl(const int *, u_int, void *, size_t *, const void *, size_t);
+int sysctlbyname(const char *, void *, size_t *, const void *, size_t);
int sysctlnametomib(const char *, int *, size_t *);
__END_DECLS
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/sysproto.h b/freebsd/sys/sys/sysproto.h
index d56edfd4..ff77ddc6 100644
--- a/freebsd/sys/sys/sysproto.h
+++ b/freebsd/sys/sys/sysproto.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/8/sys/kern/syscalls.master 229725 2012-01-06 19:32:39Z jhb
+ * created from FreeBSD: stable/9/sys/kern/syscalls.master 251051 2013-05-28 05:51:00Z kib
*/
#ifndef _SYS_SYSPROTO_H_
@@ -60,7 +60,7 @@ struct open_args {
struct close_args {
char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
};
-struct wait_args {
+struct wait4_args {
char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)];
char status_l_[PADL_(int *)]; int * status; char status_r_[PADR_(int *)];
char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)];
@@ -579,9 +579,11 @@ struct freebsd6_pwrite_args {
char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
};
+#endif /* __rtems__ */
struct setfib_args {
char fibnum_l_[PADL_(int)]; int fibnum; char fibnum_r_[PADR_(int)];
};
+#ifndef __rtems__
struct ntp_adjtime_args {
char tp_l_[PADL_(struct timex *)]; struct timex * tp; char tp_r_[PADR_(struct timex *)];
};
@@ -1700,6 +1702,32 @@ struct lpathconf_args {
char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)];
};
+struct cap_new_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char rights_l_[PADL_(u_int64_t)]; u_int64_t rights; char rights_r_[PADR_(u_int64_t)];
+};
+struct cap_getrights_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char rightsp_l_[PADL_(u_int64_t *)]; u_int64_t * rightsp; char rightsp_r_[PADR_(u_int64_t *)];
+};
+struct cap_enter_args {
+ register_t dummy;
+};
+struct cap_getmode_args {
+ char modep_l_[PADL_(u_int *)]; u_int * modep; char modep_r_[PADR_(u_int *)];
+};
+struct pdfork_args {
+ char fdp_l_[PADL_(int *)]; int * fdp; char fdp_r_[PADR_(int *)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+};
+struct pdkill_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)];
+};
+struct pdgetpid_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char pidp_l_[PADL_(pid_t *)]; pid_t * pidp; char pidp_r_[PADR_(pid_t *)];
+};
struct pselect_args {
char nd_l_[PADL_(int)]; int nd; char nd_r_[PADR_(int)];
char in_l_[PADL_(fd_set *)]; fd_set * in; char in_r_[PADR_(fd_set *)];
@@ -1708,6 +1736,43 @@ struct pselect_args {
char ts_l_[PADL_(const struct timespec *)]; const struct timespec * ts; char ts_r_[PADR_(const struct timespec *)];
char sm_l_[PADL_(const sigset_t *)]; const sigset_t * sm; char sm_r_[PADR_(const sigset_t *)];
};
+struct getloginclass_args {
+ char namebuf_l_[PADL_(char *)]; char * namebuf; char namebuf_r_[PADR_(char *)];
+ char namelen_l_[PADL_(size_t)]; size_t namelen; char namelen_r_[PADR_(size_t)];
+};
+struct setloginclass_args {
+ char namebuf_l_[PADL_(const char *)]; const char * namebuf; char namebuf_r_[PADR_(const char *)];
+};
+struct rctl_get_racct_args {
+ char inbufp_l_[PADL_(const void *)]; const void * inbufp; char inbufp_r_[PADR_(const void *)];
+ char inbuflen_l_[PADL_(size_t)]; size_t inbuflen; char inbuflen_r_[PADR_(size_t)];
+ char outbufp_l_[PADL_(void *)]; void * outbufp; char outbufp_r_[PADR_(void *)];
+ char outbuflen_l_[PADL_(size_t)]; size_t outbuflen; char outbuflen_r_[PADR_(size_t)];
+};
+struct rctl_get_rules_args {
+ char inbufp_l_[PADL_(const void *)]; const void * inbufp; char inbufp_r_[PADR_(const void *)];
+ char inbuflen_l_[PADL_(size_t)]; size_t inbuflen; char inbuflen_r_[PADR_(size_t)];
+ char outbufp_l_[PADL_(void *)]; void * outbufp; char outbufp_r_[PADR_(void *)];
+ char outbuflen_l_[PADL_(size_t)]; size_t outbuflen; char outbuflen_r_[PADR_(size_t)];
+};
+struct rctl_get_limits_args {
+ char inbufp_l_[PADL_(const void *)]; const void * inbufp; char inbufp_r_[PADR_(const void *)];
+ char inbuflen_l_[PADL_(size_t)]; size_t inbuflen; char inbuflen_r_[PADR_(size_t)];
+ char outbufp_l_[PADL_(void *)]; void * outbufp; char outbufp_r_[PADR_(void *)];
+ char outbuflen_l_[PADL_(size_t)]; size_t outbuflen; char outbuflen_r_[PADR_(size_t)];
+};
+struct rctl_add_rule_args {
+ char inbufp_l_[PADL_(const void *)]; const void * inbufp; char inbufp_r_[PADR_(const void *)];
+ char inbuflen_l_[PADL_(size_t)]; size_t inbuflen; char inbuflen_r_[PADR_(size_t)];
+ char outbufp_l_[PADL_(void *)]; void * outbufp; char outbufp_r_[PADR_(void *)];
+ char outbuflen_l_[PADL_(size_t)]; size_t outbuflen; char outbuflen_r_[PADR_(size_t)];
+};
+struct rctl_remove_rule_args {
+ char inbufp_l_[PADL_(const void *)]; const void * inbufp; char inbufp_r_[PADR_(const void *)];
+ char inbuflen_l_[PADL_(size_t)]; size_t inbuflen; char inbuflen_r_[PADR_(size_t)];
+ char outbufp_l_[PADL_(void *)]; void * outbufp; char outbufp_r_[PADR_(void *)];
+ char outbuflen_l_[PADL_(size_t)]; size_t outbuflen; char outbuflen_r_[PADR_(size_t)];
+};
struct posix_fallocate_args {
char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
@@ -1719,369 +1784,392 @@ struct posix_fadvise_args {
char len_l_[PADL_(off_t)]; off_t len; char len_r_[PADR_(off_t)];
char advice_l_[PADL_(int)]; int advice; char advice_r_[PADR_(int)];
};
+struct wait6_args {
+ char idtype_l_[PADL_(int)]; int idtype; char idtype_r_[PADR_(int)];
+ char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
+ char status_l_[PADL_(int *)]; int * status; char status_r_[PADR_(int *)];
+ char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)];
+ char wrusage_l_[PADL_(struct __wrusage *)]; struct __wrusage * wrusage; char wrusage_r_[PADR_(struct __wrusage *)];
+ char info_l_[PADL_(siginfo_t *)]; siginfo_t * info; char info_r_[PADR_(siginfo_t *)];
+};
int nosys(struct thread *, struct nosys_args *);
-void sys_exit(struct thread *, struct sys_exit_args *);
-int fork(struct thread *, struct fork_args *);
-int read(struct thread *, struct read_args *);
-int write(struct thread *, struct write_args *);
-int open(struct thread *, struct open_args *);
-int close(struct thread *, struct close_args *);
-int wait4(struct thread *, struct wait_args *);
-int link(struct thread *, struct link_args *);
-int unlink(struct thread *, struct unlink_args *);
-int chdir(struct thread *, struct chdir_args *);
-int fchdir(struct thread *, struct fchdir_args *);
-int mknod(struct thread *, struct mknod_args *);
-int chmod(struct thread *, struct chmod_args *);
-int chown(struct thread *, struct chown_args *);
-int obreak(struct thread *, struct obreak_args *);
-int getpid(struct thread *, struct getpid_args *);
-int mount(struct thread *, struct mount_args *);
-int unmount(struct thread *, struct unmount_args *);
-int setuid(struct thread *, struct setuid_args *);
-int getuid(struct thread *, struct getuid_args *);
-int geteuid(struct thread *, struct geteuid_args *);
-int ptrace(struct thread *, struct ptrace_args *);
-int recvmsg(struct thread *, struct recvmsg_args *);
-int sendmsg(struct thread *, struct sendmsg_args *);
-int recvfrom(struct thread *, struct recvfrom_args *);
-int accept(struct thread *, struct accept_args *);
-int getpeername(struct thread *, struct getpeername_args *);
-int getsockname(struct thread *, struct getsockname_args *);
-int access(struct thread *, struct access_args *);
-int chflags(struct thread *, struct chflags_args *);
-int fchflags(struct thread *, struct fchflags_args *);
-int sync(struct thread *, struct sync_args *);
-int kill(struct thread *, struct kill_args *);
-int getppid(struct thread *, struct getppid_args *);
-int dup(struct thread *, struct dup_args *);
-int pipe(struct thread *, struct pipe_args *);
-int getegid(struct thread *, struct getegid_args *);
-int profil(struct thread *, struct profil_args *);
-int ktrace(struct thread *, struct ktrace_args *);
-int getgid(struct thread *, struct getgid_args *);
-int getlogin(struct thread *, struct getlogin_args *);
-int setlogin(struct thread *, struct setlogin_args *);
-int acct(struct thread *, struct acct_args *);
-int sigaltstack(struct thread *, struct sigaltstack_args *);
-int ioctl(struct thread *, struct ioctl_args *);
-int reboot(struct thread *, struct reboot_args *);
-int revoke(struct thread *, struct revoke_args *);
-int symlink(struct thread *, struct symlink_args *);
-int readlink(struct thread *, struct readlink_args *);
-int execve(struct thread *, struct execve_args *);
-int umask(struct thread *, struct umask_args *);
-int chroot(struct thread *, struct chroot_args *);
-int msync(struct thread *, struct msync_args *);
-int vfork(struct thread *, struct vfork_args *);
-int sbrk(struct thread *, struct sbrk_args *);
-int sstk(struct thread *, struct sstk_args *);
-int ovadvise(struct thread *, struct ovadvise_args *);
-int munmap(struct thread *, struct munmap_args *);
-int mprotect(struct thread *, struct mprotect_args *);
-int madvise(struct thread *, struct madvise_args *);
-int mincore(struct thread *, struct mincore_args *);
-int getgroups(struct thread *, struct getgroups_args *);
-int setgroups(struct thread *, struct setgroups_args *);
-int getpgrp(struct thread *, struct getpgrp_args *);
-int setpgid(struct thread *, struct setpgid_args *);
-int setitimer(struct thread *, struct setitimer_args *);
-int swapon(struct thread *, struct swapon_args *);
-int getitimer(struct thread *, struct getitimer_args *);
-int getdtablesize(struct thread *, struct getdtablesize_args *);
-int dup2(struct thread *, struct dup2_args *);
-int fcntl(struct thread *, struct fcntl_args *);
-int select(struct thread *, struct select_args *);
-int fsync(struct thread *, struct fsync_args *);
-int setpriority(struct thread *, struct setpriority_args *);
-int socket(struct thread *, struct socket_args *);
-int connect(struct thread *, struct connect_args *);
-int getpriority(struct thread *, struct getpriority_args *);
-int bind(struct thread *, struct bind_args *);
-int setsockopt(struct thread *, struct setsockopt_args *);
-int listen(struct thread *, struct listen_args *);
-int gettimeofday(struct thread *, struct gettimeofday_args *);
-int getrusage(struct thread *, struct getrusage_args *);
-int getsockopt(struct thread *, struct getsockopt_args *);
-int readv(struct thread *, struct readv_args *);
-int writev(struct thread *, struct writev_args *);
-int settimeofday(struct thread *, struct settimeofday_args *);
-int fchown(struct thread *, struct fchown_args *);
-int fchmod(struct thread *, struct fchmod_args *);
-int setreuid(struct thread *, struct setreuid_args *);
-int setregid(struct thread *, struct setregid_args *);
-int rename(struct thread *, struct rename_args *);
-int flock(struct thread *, struct flock_args *);
-int mkfifo(struct thread *, struct mkfifo_args *);
-int sendto(struct thread *, struct sendto_args *);
-int shutdown(struct thread *, struct shutdown_args *);
-int socketpair(struct thread *, struct socketpair_args *);
-int mkdir(struct thread *, struct mkdir_args *);
-int rmdir(struct thread *, struct rmdir_args *);
-int utimes(struct thread *, struct utimes_args *);
-int adjtime(struct thread *, struct adjtime_args *);
-int setsid(struct thread *, struct setsid_args *);
-int quotactl(struct thread *, struct quotactl_args *);
-int nlm_syscall(struct thread *, struct nlm_syscall_args *);
-int nfssvc(struct thread *, struct nfssvc_args *);
-int lgetfh(struct thread *, struct lgetfh_args *);
-int getfh(struct thread *, struct getfh_args *);
+void sys_sys_exit(struct thread *, struct sys_exit_args *);
+int sys_fork(struct thread *, struct fork_args *);
+int sys_read(struct thread *, struct read_args *);
+int sys_write(struct thread *, struct write_args *);
+int sys_open(struct thread *, struct open_args *);
+int sys_close(struct thread *, struct close_args *);
+int sys_wait4(struct thread *, struct wait4_args *);
+int sys_link(struct thread *, struct link_args *);
+int sys_unlink(struct thread *, struct unlink_args *);
+int sys_chdir(struct thread *, struct chdir_args *);
+int sys_fchdir(struct thread *, struct fchdir_args *);
+int sys_mknod(struct thread *, struct mknod_args *);
+int sys_chmod(struct thread *, struct chmod_args *);
+int sys_chown(struct thread *, struct chown_args *);
+int sys_obreak(struct thread *, struct obreak_args *);
+int sys_getpid(struct thread *, struct getpid_args *);
+int sys_mount(struct thread *, struct mount_args *);
+int sys_unmount(struct thread *, struct unmount_args *);
+int sys_setuid(struct thread *, struct setuid_args *);
+int sys_getuid(struct thread *, struct getuid_args *);
+int sys_geteuid(struct thread *, struct geteuid_args *);
+int sys_ptrace(struct thread *, struct ptrace_args *);
+int sys_recvmsg(struct thread *, struct recvmsg_args *);
+int sys_sendmsg(struct thread *, struct sendmsg_args *);
+int sys_recvfrom(struct thread *, struct recvfrom_args *);
+int sys_accept(struct thread *, struct accept_args *);
+int sys_getpeername(struct thread *, struct getpeername_args *);
+int sys_getsockname(struct thread *, struct getsockname_args *);
+int sys_access(struct thread *, struct access_args *);
+int sys_chflags(struct thread *, struct chflags_args *);
+int sys_fchflags(struct thread *, struct fchflags_args *);
+int sys_sync(struct thread *, struct sync_args *);
+int sys_kill(struct thread *, struct kill_args *);
+int sys_getppid(struct thread *, struct getppid_args *);
+int sys_dup(struct thread *, struct dup_args *);
+int sys_pipe(struct thread *, struct pipe_args *);
+int sys_getegid(struct thread *, struct getegid_args *);
+int sys_profil(struct thread *, struct profil_args *);
+int sys_ktrace(struct thread *, struct ktrace_args *);
+int sys_getgid(struct thread *, struct getgid_args *);
+int sys_getlogin(struct thread *, struct getlogin_args *);
+int sys_setlogin(struct thread *, struct setlogin_args *);
+int sys_acct(struct thread *, struct acct_args *);
+int sys_sigaltstack(struct thread *, struct sigaltstack_args *);
+int sys_ioctl(struct thread *, struct ioctl_args *);
+int sys_reboot(struct thread *, struct reboot_args *);
+int sys_revoke(struct thread *, struct revoke_args *);
+int sys_symlink(struct thread *, struct symlink_args *);
+int sys_readlink(struct thread *, struct readlink_args *);
+int sys_execve(struct thread *, struct execve_args *);
+int sys_umask(struct thread *, struct umask_args *);
+int sys_chroot(struct thread *, struct chroot_args *);
+int sys_msync(struct thread *, struct msync_args *);
+int sys_vfork(struct thread *, struct vfork_args *);
+int sys_sbrk(struct thread *, struct sbrk_args *);
+int sys_sstk(struct thread *, struct sstk_args *);
+int sys_ovadvise(struct thread *, struct ovadvise_args *);
+int sys_munmap(struct thread *, struct munmap_args *);
+int sys_mprotect(struct thread *, struct mprotect_args *);
+int sys_madvise(struct thread *, struct madvise_args *);
+int sys_mincore(struct thread *, struct mincore_args *);
+int sys_getgroups(struct thread *, struct getgroups_args *);
+int sys_setgroups(struct thread *, struct setgroups_args *);
+int sys_getpgrp(struct thread *, struct getpgrp_args *);
+int sys_setpgid(struct thread *, struct setpgid_args *);
+int sys_setitimer(struct thread *, struct setitimer_args *);
+int sys_swapon(struct thread *, struct swapon_args *);
+int sys_getitimer(struct thread *, struct getitimer_args *);
+int sys_getdtablesize(struct thread *, struct getdtablesize_args *);
+int sys_dup2(struct thread *, struct dup2_args *);
+int sys_fcntl(struct thread *, struct fcntl_args *);
+int sys_select(struct thread *, struct select_args *);
+int sys_fsync(struct thread *, struct fsync_args *);
+int sys_setpriority(struct thread *, struct setpriority_args *);
+int sys_socket(struct thread *, struct socket_args *);
+int sys_connect(struct thread *, struct connect_args *);
+int sys_getpriority(struct thread *, struct getpriority_args *);
+int sys_bind(struct thread *, struct bind_args *);
+int sys_setsockopt(struct thread *, struct setsockopt_args *);
+int sys_listen(struct thread *, struct listen_args *);
+int sys_gettimeofday(struct thread *, struct gettimeofday_args *);
+int sys_getrusage(struct thread *, struct getrusage_args *);
+int sys_getsockopt(struct thread *, struct getsockopt_args *);
+int sys_readv(struct thread *, struct readv_args *);
+int sys_writev(struct thread *, struct writev_args *);
+int sys_settimeofday(struct thread *, struct settimeofday_args *);
+int sys_fchown(struct thread *, struct fchown_args *);
+int sys_fchmod(struct thread *, struct fchmod_args *);
+int sys_setreuid(struct thread *, struct setreuid_args *);
+int sys_setregid(struct thread *, struct setregid_args *);
+int sys_rename(struct thread *, struct rename_args *);
+int sys_flock(struct thread *, struct flock_args *);
+int sys_mkfifo(struct thread *, struct mkfifo_args *);
+int sys_sendto(struct thread *, struct sendto_args *);
+int sys_shutdown(struct thread *, struct shutdown_args *);
+int sys_socketpair(struct thread *, struct socketpair_args *);
+int sys_mkdir(struct thread *, struct mkdir_args *);
+int sys_rmdir(struct thread *, struct rmdir_args *);
+int sys_utimes(struct thread *, struct utimes_args *);
+int sys_adjtime(struct thread *, struct adjtime_args *);
+int sys_setsid(struct thread *, struct setsid_args *);
+int sys_quotactl(struct thread *, struct quotactl_args *);
+int sys_nlm_syscall(struct thread *, struct nlm_syscall_args *);
+int sys_nfssvc(struct thread *, struct nfssvc_args *);
+int sys_lgetfh(struct thread *, struct lgetfh_args *);
+int sys_getfh(struct thread *, struct getfh_args *);
int sysarch(struct thread *, struct sysarch_args *);
-int rtprio(struct thread *, struct rtprio_args *);
-int semsys(struct thread *, struct semsys_args *);
-int msgsys(struct thread *, struct msgsys_args *);
-int shmsys(struct thread *, struct shmsys_args *);
+int sys_rtprio(struct thread *, struct rtprio_args *);
+int sys_semsys(struct thread *, struct semsys_args *);
+int sys_msgsys(struct thread *, struct msgsys_args *);
+int sys_shmsys(struct thread *, struct shmsys_args *);
int freebsd6_pread(struct thread *, struct freebsd6_pread_args *);
int freebsd6_pwrite(struct thread *, struct freebsd6_pwrite_args *);
-int setfib(struct thread *, struct setfib_args *);
-int ntp_adjtime(struct thread *, struct ntp_adjtime_args *);
-int setgid(struct thread *, struct setgid_args *);
-int setegid(struct thread *, struct setegid_args *);
-int seteuid(struct thread *, struct seteuid_args *);
-int stat(struct thread *, struct stat_args *);
-int fstat(struct thread *, struct fstat_args *);
-int lstat(struct thread *, struct lstat_args *);
-int pathconf(struct thread *, struct pathconf_args *);
-int fpathconf(struct thread *, struct fpathconf_args *);
-int getrlimit(struct thread *, struct __getrlimit_args *);
-int setrlimit(struct thread *, struct __setrlimit_args *);
-int getdirentries(struct thread *, struct getdirentries_args *);
+int sys_setfib(struct thread *, struct setfib_args *);
+int sys_ntp_adjtime(struct thread *, struct ntp_adjtime_args *);
+int sys_setgid(struct thread *, struct setgid_args *);
+int sys_setegid(struct thread *, struct setegid_args *);
+int sys_seteuid(struct thread *, struct seteuid_args *);
+int sys_stat(struct thread *, struct stat_args *);
+int sys_fstat(struct thread *, struct fstat_args *);
+int sys_lstat(struct thread *, struct lstat_args *);
+int sys_pathconf(struct thread *, struct pathconf_args *);
+int sys_fpathconf(struct thread *, struct fpathconf_args *);
+int sys_getrlimit(struct thread *, struct __getrlimit_args *);
+int sys_setrlimit(struct thread *, struct __setrlimit_args *);
+int sys_getdirentries(struct thread *, struct getdirentries_args *);
int freebsd6_mmap(struct thread *, struct freebsd6_mmap_args *);
int freebsd6_lseek(struct thread *, struct freebsd6_lseek_args *);
int freebsd6_truncate(struct thread *, struct freebsd6_truncate_args *);
int freebsd6_ftruncate(struct thread *, struct freebsd6_ftruncate_args *);
-int __sysctl(struct thread *, struct sysctl_args *);
-int mlock(struct thread *, struct mlock_args *);
-int munlock(struct thread *, struct munlock_args *);
-int undelete(struct thread *, struct undelete_args *);
-int futimes(struct thread *, struct futimes_args *);
-int getpgid(struct thread *, struct getpgid_args *);
-int poll(struct thread *, struct poll_args *);
-int semget(struct thread *, struct semget_args *);
-int semop(struct thread *, struct semop_args *);
-int msgget(struct thread *, struct msgget_args *);
-int msgsnd(struct thread *, struct msgsnd_args *);
-int msgrcv(struct thread *, struct msgrcv_args *);
-int shmat(struct thread *, struct shmat_args *);
-int shmdt(struct thread *, struct shmdt_args *);
-int shmget(struct thread *, struct shmget_args *);
-int clock_gettime(struct thread *, struct clock_gettime_args *);
-int clock_settime(struct thread *, struct clock_settime_args *);
-int clock_getres(struct thread *, struct clock_getres_args *);
-int ktimer_create(struct thread *, struct ktimer_create_args *);
-int ktimer_delete(struct thread *, struct ktimer_delete_args *);
-int ktimer_settime(struct thread *, struct ktimer_settime_args *);
-int ktimer_gettime(struct thread *, struct ktimer_gettime_args *);
-int ktimer_getoverrun(struct thread *, struct ktimer_getoverrun_args *);
-int nanosleep(struct thread *, struct nanosleep_args *);
-int ntp_gettime(struct thread *, struct ntp_gettime_args *);
-int minherit(struct thread *, struct minherit_args *);
-int rfork(struct thread *, struct rfork_args *);
-int openbsd_poll(struct thread *, struct openbsd_poll_args *);
-int issetugid(struct thread *, struct issetugid_args *);
-int lchown(struct thread *, struct lchown_args *);
-int aio_read(struct thread *, struct aio_read_args *);
-int aio_write(struct thread *, struct aio_write_args *);
-int lio_listio(struct thread *, struct lio_listio_args *);
-int getdents(struct thread *, struct getdents_args *);
-int lchmod(struct thread *, struct lchmod_args *);
-int lutimes(struct thread *, struct lutimes_args *);
-int nstat(struct thread *, struct nstat_args *);
-int nfstat(struct thread *, struct nfstat_args *);
-int nlstat(struct thread *, struct nlstat_args *);
-int preadv(struct thread *, struct preadv_args *);
-int pwritev(struct thread *, struct pwritev_args *);
-int fhopen(struct thread *, struct fhopen_args *);
-int fhstat(struct thread *, struct fhstat_args *);
-int modnext(struct thread *, struct modnext_args *);
-int modstat(struct thread *, struct modstat_args *);
-int modfnext(struct thread *, struct modfnext_args *);
-int modfind(struct thread *, struct modfind_args *);
-int kldload(struct thread *, struct kldload_args *);
-int kldunload(struct thread *, struct kldunload_args *);
-int kldfind(struct thread *, struct kldfind_args *);
-int kldnext(struct thread *, struct kldnext_args *);
-int kldstat(struct thread *, struct kldstat_args *);
-int kldfirstmod(struct thread *, struct kldfirstmod_args *);
-int getsid(struct thread *, struct getsid_args *);
-int setresuid(struct thread *, struct setresuid_args *);
-int setresgid(struct thread *, struct setresgid_args *);
-int aio_return(struct thread *, struct aio_return_args *);
-int aio_suspend(struct thread *, struct aio_suspend_args *);
-int aio_cancel(struct thread *, struct aio_cancel_args *);
-int aio_error(struct thread *, struct aio_error_args *);
-int oaio_read(struct thread *, struct oaio_read_args *);
-int oaio_write(struct thread *, struct oaio_write_args *);
-int olio_listio(struct thread *, struct olio_listio_args *);
-int yield(struct thread *, struct yield_args *);
-int mlockall(struct thread *, struct mlockall_args *);
-int munlockall(struct thread *, struct munlockall_args *);
-int __getcwd(struct thread *, struct __getcwd_args *);
-int sched_setparam(struct thread *, struct sched_setparam_args *);
-int sched_getparam(struct thread *, struct sched_getparam_args *);
-int sched_setscheduler(struct thread *, struct sched_setscheduler_args *);
-int sched_getscheduler(struct thread *, struct sched_getscheduler_args *);
-int sched_yield(struct thread *, struct sched_yield_args *);
-int sched_get_priority_max(struct thread *, struct sched_get_priority_max_args *);
-int sched_get_priority_min(struct thread *, struct sched_get_priority_min_args *);
-int sched_rr_get_interval(struct thread *, struct sched_rr_get_interval_args *);
-int utrace(struct thread *, struct utrace_args *);
-int kldsym(struct thread *, struct kldsym_args *);
-int jail(struct thread *, struct jail_args *);
-int nnpfs_syscall(struct thread *, struct nnpfs_syscall_args *);
-int sigprocmask(struct thread *, struct sigprocmask_args *);
-int sigsuspend(struct thread *, struct sigsuspend_args *);
-int sigpending(struct thread *, struct sigpending_args *);
-int sigtimedwait(struct thread *, struct sigtimedwait_args *);
-int sigwaitinfo(struct thread *, struct sigwaitinfo_args *);
-int __acl_get_file(struct thread *, struct __acl_get_file_args *);
-int __acl_set_file(struct thread *, struct __acl_set_file_args *);
-int __acl_get_fd(struct thread *, struct __acl_get_fd_args *);
-int __acl_set_fd(struct thread *, struct __acl_set_fd_args *);
-int __acl_delete_file(struct thread *, struct __acl_delete_file_args *);
-int __acl_delete_fd(struct thread *, struct __acl_delete_fd_args *);
-int __acl_aclcheck_file(struct thread *, struct __acl_aclcheck_file_args *);
-int __acl_aclcheck_fd(struct thread *, struct __acl_aclcheck_fd_args *);
-int extattrctl(struct thread *, struct extattrctl_args *);
-int extattr_set_file(struct thread *, struct extattr_set_file_args *);
-int extattr_get_file(struct thread *, struct extattr_get_file_args *);
-int extattr_delete_file(struct thread *, struct extattr_delete_file_args *);
-int aio_waitcomplete(struct thread *, struct aio_waitcomplete_args *);
-int getresuid(struct thread *, struct getresuid_args *);
-int getresgid(struct thread *, struct getresgid_args *);
-int kqueue(struct thread *, struct kqueue_args *);
-int kevent(struct thread *, struct kevent_args *);
-int extattr_set_fd(struct thread *, struct extattr_set_fd_args *);
-int extattr_get_fd(struct thread *, struct extattr_get_fd_args *);
-int extattr_delete_fd(struct thread *, struct extattr_delete_fd_args *);
-int __setugid(struct thread *, struct __setugid_args *);
-int eaccess(struct thread *, struct eaccess_args *);
-int afs3_syscall(struct thread *, struct afs3_syscall_args *);
-int nmount(struct thread *, struct nmount_args *);
-int __mac_get_proc(struct thread *, struct __mac_get_proc_args *);
-int __mac_set_proc(struct thread *, struct __mac_set_proc_args *);
-int __mac_get_fd(struct thread *, struct __mac_get_fd_args *);
-int __mac_get_file(struct thread *, struct __mac_get_file_args *);
-int __mac_set_fd(struct thread *, struct __mac_set_fd_args *);
-int __mac_set_file(struct thread *, struct __mac_set_file_args *);
-int kenv(struct thread *, struct kenv_args *);
-int lchflags(struct thread *, struct lchflags_args *);
-int uuidgen(struct thread *, struct uuidgen_args *);
-int sendfile(struct thread *, struct sendfile_args *);
-int mac_syscall(struct thread *, struct mac_syscall_args *);
-int getfsstat(struct thread *, struct getfsstat_args *);
-int statfs(struct thread *, struct statfs_args *);
-int fstatfs(struct thread *, struct fstatfs_args *);
-int fhstatfs(struct thread *, struct fhstatfs_args *);
-int ksem_close(struct thread *, struct ksem_close_args *);
-int ksem_post(struct thread *, struct ksem_post_args *);
-int ksem_wait(struct thread *, struct ksem_wait_args *);
-int ksem_trywait(struct thread *, struct ksem_trywait_args *);
-int ksem_init(struct thread *, struct ksem_init_args *);
-int ksem_open(struct thread *, struct ksem_open_args *);
-int ksem_unlink(struct thread *, struct ksem_unlink_args *);
-int ksem_getvalue(struct thread *, struct ksem_getvalue_args *);
-int ksem_destroy(struct thread *, struct ksem_destroy_args *);
-int __mac_get_pid(struct thread *, struct __mac_get_pid_args *);
-int __mac_get_link(struct thread *, struct __mac_get_link_args *);
-int __mac_set_link(struct thread *, struct __mac_set_link_args *);
-int extattr_set_link(struct thread *, struct extattr_set_link_args *);
-int extattr_get_link(struct thread *, struct extattr_get_link_args *);
-int extattr_delete_link(struct thread *, struct extattr_delete_link_args *);
-int __mac_execve(struct thread *, struct __mac_execve_args *);
-int sigaction(struct thread *, struct sigaction_args *);
-int sigreturn(struct thread *, struct sigreturn_args *);
-int getcontext(struct thread *, struct getcontext_args *);
-int setcontext(struct thread *, struct setcontext_args *);
-int swapcontext(struct thread *, struct swapcontext_args *);
-int swapoff(struct thread *, struct swapoff_args *);
-int __acl_get_link(struct thread *, struct __acl_get_link_args *);
-int __acl_set_link(struct thread *, struct __acl_set_link_args *);
-int __acl_delete_link(struct thread *, struct __acl_delete_link_args *);
-int __acl_aclcheck_link(struct thread *, struct __acl_aclcheck_link_args *);
-int sigwait(struct thread *, struct sigwait_args *);
-int thr_create(struct thread *, struct thr_create_args *);
-int thr_exit(struct thread *, struct thr_exit_args *);
-int thr_self(struct thread *, struct thr_self_args *);
-int thr_kill(struct thread *, struct thr_kill_args *);
-int _umtx_lock(struct thread *, struct _umtx_lock_args *);
-int _umtx_unlock(struct thread *, struct _umtx_unlock_args *);
-int jail_attach(struct thread *, struct jail_attach_args *);
-int extattr_list_fd(struct thread *, struct extattr_list_fd_args *);
-int extattr_list_file(struct thread *, struct extattr_list_file_args *);
-int extattr_list_link(struct thread *, struct extattr_list_link_args *);
-int ksem_timedwait(struct thread *, struct ksem_timedwait_args *);
-int thr_suspend(struct thread *, struct thr_suspend_args *);
-int thr_wake(struct thread *, struct thr_wake_args *);
-int kldunloadf(struct thread *, struct kldunloadf_args *);
-int audit(struct thread *, struct audit_args *);
-int auditon(struct thread *, struct auditon_args *);
-int getauid(struct thread *, struct getauid_args *);
-int setauid(struct thread *, struct setauid_args *);
-int getaudit(struct thread *, struct getaudit_args *);
-int setaudit(struct thread *, struct setaudit_args *);
-int getaudit_addr(struct thread *, struct getaudit_addr_args *);
-int setaudit_addr(struct thread *, struct setaudit_addr_args *);
-int auditctl(struct thread *, struct auditctl_args *);
-int _umtx_op(struct thread *, struct _umtx_op_args *);
-int thr_new(struct thread *, struct thr_new_args *);
-int sigqueue(struct thread *, struct sigqueue_args *);
-int kmq_open(struct thread *, struct kmq_open_args *);
-int kmq_setattr(struct thread *, struct kmq_setattr_args *);
-int kmq_timedreceive(struct thread *, struct kmq_timedreceive_args *);
-int kmq_timedsend(struct thread *, struct kmq_timedsend_args *);
-int kmq_notify(struct thread *, struct kmq_notify_args *);
-int kmq_unlink(struct thread *, struct kmq_unlink_args *);
-int abort2(struct thread *, struct abort2_args *);
-int thr_set_name(struct thread *, struct thr_set_name_args *);
-int aio_fsync(struct thread *, struct aio_fsync_args *);
-int rtprio_thread(struct thread *, struct rtprio_thread_args *);
-int sctp_peeloff(struct thread *, struct sctp_peeloff_args *);
-int sctp_generic_sendmsg(struct thread *, struct sctp_generic_sendmsg_args *);
-int sctp_generic_sendmsg_iov(struct thread *, struct sctp_generic_sendmsg_iov_args *);
-int sctp_generic_recvmsg(struct thread *, struct sctp_generic_recvmsg_args *);
-int pread(struct thread *, struct pread_args *);
-int pwrite(struct thread *, struct pwrite_args *);
-int mmap(struct thread *, struct mmap_args *);
-int lseek(struct thread *, struct lseek_args *);
-int truncate(struct thread *, struct truncate_args *);
-int ftruncate(struct thread *, struct ftruncate_args *);
-int thr_kill2(struct thread *, struct thr_kill2_args *);
-int shm_open(struct thread *, struct shm_open_args *);
-int shm_unlink(struct thread *, struct shm_unlink_args *);
-int cpuset(struct thread *, struct cpuset_args *);
-int cpuset_setid(struct thread *, struct cpuset_setid_args *);
-int cpuset_getid(struct thread *, struct cpuset_getid_args *);
-int cpuset_getaffinity(struct thread *, struct cpuset_getaffinity_args *);
-int cpuset_setaffinity(struct thread *, struct cpuset_setaffinity_args *);
-int faccessat(struct thread *, struct faccessat_args *);
-int fchmodat(struct thread *, struct fchmodat_args *);
-int fchownat(struct thread *, struct fchownat_args *);
-int fexecve(struct thread *, struct fexecve_args *);
-int fstatat(struct thread *, struct fstatat_args *);
-int futimesat(struct thread *, struct futimesat_args *);
-int linkat(struct thread *, struct linkat_args *);
-int mkdirat(struct thread *, struct mkdirat_args *);
-int mkfifoat(struct thread *, struct mkfifoat_args *);
-int mknodat(struct thread *, struct mknodat_args *);
-int openat(struct thread *, struct openat_args *);
-int readlinkat(struct thread *, struct readlinkat_args *);
-int renameat(struct thread *, struct renameat_args *);
-int symlinkat(struct thread *, struct symlinkat_args *);
-int unlinkat(struct thread *, struct unlinkat_args *);
-int posix_openpt(struct thread *, struct posix_openpt_args *);
-int gssd_syscall(struct thread *, struct gssd_syscall_args *);
-int jail_get(struct thread *, struct jail_get_args *);
-int jail_set(struct thread *, struct jail_set_args *);
-int jail_remove(struct thread *, struct jail_remove_args *);
-int closefrom(struct thread *, struct closefrom_args *);
-int __semctl(struct thread *, struct __semctl_args *);
-int msgctl(struct thread *, struct msgctl_args *);
-int shmctl(struct thread *, struct shmctl_args *);
-int lpathconf(struct thread *, struct lpathconf_args *);
-int pselect(struct thread *, struct pselect_args *);
-int posix_fallocate(struct thread *, struct posix_fallocate_args *);
-int posix_fadvise(struct thread *, struct posix_fadvise_args *);
+int sys___sysctl(struct thread *, struct sysctl_args *);
+int sys_mlock(struct thread *, struct mlock_args *);
+int sys_munlock(struct thread *, struct munlock_args *);
+int sys_undelete(struct thread *, struct undelete_args *);
+int sys_futimes(struct thread *, struct futimes_args *);
+int sys_getpgid(struct thread *, struct getpgid_args *);
+int sys_poll(struct thread *, struct poll_args *);
+int sys_semget(struct thread *, struct semget_args *);
+int sys_semop(struct thread *, struct semop_args *);
+int sys_msgget(struct thread *, struct msgget_args *);
+int sys_msgsnd(struct thread *, struct msgsnd_args *);
+int sys_msgrcv(struct thread *, struct msgrcv_args *);
+int sys_shmat(struct thread *, struct shmat_args *);
+int sys_shmdt(struct thread *, struct shmdt_args *);
+int sys_shmget(struct thread *, struct shmget_args *);
+int sys_clock_gettime(struct thread *, struct clock_gettime_args *);
+int sys_clock_settime(struct thread *, struct clock_settime_args *);
+int sys_clock_getres(struct thread *, struct clock_getres_args *);
+int sys_ktimer_create(struct thread *, struct ktimer_create_args *);
+int sys_ktimer_delete(struct thread *, struct ktimer_delete_args *);
+int sys_ktimer_settime(struct thread *, struct ktimer_settime_args *);
+int sys_ktimer_gettime(struct thread *, struct ktimer_gettime_args *);
+int sys_ktimer_getoverrun(struct thread *, struct ktimer_getoverrun_args *);
+int sys_nanosleep(struct thread *, struct nanosleep_args *);
+int sys_ntp_gettime(struct thread *, struct ntp_gettime_args *);
+int sys_minherit(struct thread *, struct minherit_args *);
+int sys_rfork(struct thread *, struct rfork_args *);
+int sys_openbsd_poll(struct thread *, struct openbsd_poll_args *);
+int sys_issetugid(struct thread *, struct issetugid_args *);
+int sys_lchown(struct thread *, struct lchown_args *);
+int sys_aio_read(struct thread *, struct aio_read_args *);
+int sys_aio_write(struct thread *, struct aio_write_args *);
+int sys_lio_listio(struct thread *, struct lio_listio_args *);
+int sys_getdents(struct thread *, struct getdents_args *);
+int sys_lchmod(struct thread *, struct lchmod_args *);
+int sys_lutimes(struct thread *, struct lutimes_args *);
+int sys_nstat(struct thread *, struct nstat_args *);
+int sys_nfstat(struct thread *, struct nfstat_args *);
+int sys_nlstat(struct thread *, struct nlstat_args *);
+int sys_preadv(struct thread *, struct preadv_args *);
+int sys_pwritev(struct thread *, struct pwritev_args *);
+int sys_fhopen(struct thread *, struct fhopen_args *);
+int sys_fhstat(struct thread *, struct fhstat_args *);
+int sys_modnext(struct thread *, struct modnext_args *);
+int sys_modstat(struct thread *, struct modstat_args *);
+int sys_modfnext(struct thread *, struct modfnext_args *);
+int sys_modfind(struct thread *, struct modfind_args *);
+int sys_kldload(struct thread *, struct kldload_args *);
+int sys_kldunload(struct thread *, struct kldunload_args *);
+int sys_kldfind(struct thread *, struct kldfind_args *);
+int sys_kldnext(struct thread *, struct kldnext_args *);
+int sys_kldstat(struct thread *, struct kldstat_args *);
+int sys_kldfirstmod(struct thread *, struct kldfirstmod_args *);
+int sys_getsid(struct thread *, struct getsid_args *);
+int sys_setresuid(struct thread *, struct setresuid_args *);
+int sys_setresgid(struct thread *, struct setresgid_args *);
+int sys_aio_return(struct thread *, struct aio_return_args *);
+int sys_aio_suspend(struct thread *, struct aio_suspend_args *);
+int sys_aio_cancel(struct thread *, struct aio_cancel_args *);
+int sys_aio_error(struct thread *, struct aio_error_args *);
+int sys_oaio_read(struct thread *, struct oaio_read_args *);
+int sys_oaio_write(struct thread *, struct oaio_write_args *);
+int sys_olio_listio(struct thread *, struct olio_listio_args *);
+int sys_yield(struct thread *, struct yield_args *);
+int sys_mlockall(struct thread *, struct mlockall_args *);
+int sys_munlockall(struct thread *, struct munlockall_args *);
+int sys___getcwd(struct thread *, struct __getcwd_args *);
+int sys_sched_setparam(struct thread *, struct sched_setparam_args *);
+int sys_sched_getparam(struct thread *, struct sched_getparam_args *);
+int sys_sched_setscheduler(struct thread *, struct sched_setscheduler_args *);
+int sys_sched_getscheduler(struct thread *, struct sched_getscheduler_args *);
+int sys_sched_yield(struct thread *, struct sched_yield_args *);
+int sys_sched_get_priority_max(struct thread *, struct sched_get_priority_max_args *);
+int sys_sched_get_priority_min(struct thread *, struct sched_get_priority_min_args *);
+int sys_sched_rr_get_interval(struct thread *, struct sched_rr_get_interval_args *);
+int sys_utrace(struct thread *, struct utrace_args *);
+int sys_kldsym(struct thread *, struct kldsym_args *);
+int sys_jail(struct thread *, struct jail_args *);
+int sys_nnpfs_syscall(struct thread *, struct nnpfs_syscall_args *);
+int sys_sigprocmask(struct thread *, struct sigprocmask_args *);
+int sys_sigsuspend(struct thread *, struct sigsuspend_args *);
+int sys_sigpending(struct thread *, struct sigpending_args *);
+int sys_sigtimedwait(struct thread *, struct sigtimedwait_args *);
+int sys_sigwaitinfo(struct thread *, struct sigwaitinfo_args *);
+int sys___acl_get_file(struct thread *, struct __acl_get_file_args *);
+int sys___acl_set_file(struct thread *, struct __acl_set_file_args *);
+int sys___acl_get_fd(struct thread *, struct __acl_get_fd_args *);
+int sys___acl_set_fd(struct thread *, struct __acl_set_fd_args *);
+int sys___acl_delete_file(struct thread *, struct __acl_delete_file_args *);
+int sys___acl_delete_fd(struct thread *, struct __acl_delete_fd_args *);
+int sys___acl_aclcheck_file(struct thread *, struct __acl_aclcheck_file_args *);
+int sys___acl_aclcheck_fd(struct thread *, struct __acl_aclcheck_fd_args *);
+int sys_extattrctl(struct thread *, struct extattrctl_args *);
+int sys_extattr_set_file(struct thread *, struct extattr_set_file_args *);
+int sys_extattr_get_file(struct thread *, struct extattr_get_file_args *);
+int sys_extattr_delete_file(struct thread *, struct extattr_delete_file_args *);
+int sys_aio_waitcomplete(struct thread *, struct aio_waitcomplete_args *);
+int sys_getresuid(struct thread *, struct getresuid_args *);
+int sys_getresgid(struct thread *, struct getresgid_args *);
+int sys_kqueue(struct thread *, struct kqueue_args *);
+int sys_kevent(struct thread *, struct kevent_args *);
+int sys_extattr_set_fd(struct thread *, struct extattr_set_fd_args *);
+int sys_extattr_get_fd(struct thread *, struct extattr_get_fd_args *);
+int sys_extattr_delete_fd(struct thread *, struct extattr_delete_fd_args *);
+int sys___setugid(struct thread *, struct __setugid_args *);
+int sys_eaccess(struct thread *, struct eaccess_args *);
+int sys_afs3_syscall(struct thread *, struct afs3_syscall_args *);
+int sys_nmount(struct thread *, struct nmount_args *);
+int sys___mac_get_proc(struct thread *, struct __mac_get_proc_args *);
+int sys___mac_set_proc(struct thread *, struct __mac_set_proc_args *);
+int sys___mac_get_fd(struct thread *, struct __mac_get_fd_args *);
+int sys___mac_get_file(struct thread *, struct __mac_get_file_args *);
+int sys___mac_set_fd(struct thread *, struct __mac_set_fd_args *);
+int sys___mac_set_file(struct thread *, struct __mac_set_file_args *);
+int sys_kenv(struct thread *, struct kenv_args *);
+int sys_lchflags(struct thread *, struct lchflags_args *);
+int sys_uuidgen(struct thread *, struct uuidgen_args *);
+int sys_sendfile(struct thread *, struct sendfile_args *);
+int sys_mac_syscall(struct thread *, struct mac_syscall_args *);
+int sys_getfsstat(struct thread *, struct getfsstat_args *);
+int sys_statfs(struct thread *, struct statfs_args *);
+int sys_fstatfs(struct thread *, struct fstatfs_args *);
+int sys_fhstatfs(struct thread *, struct fhstatfs_args *);
+int sys_ksem_close(struct thread *, struct ksem_close_args *);
+int sys_ksem_post(struct thread *, struct ksem_post_args *);
+int sys_ksem_wait(struct thread *, struct ksem_wait_args *);
+int sys_ksem_trywait(struct thread *, struct ksem_trywait_args *);
+int sys_ksem_init(struct thread *, struct ksem_init_args *);
+int sys_ksem_open(struct thread *, struct ksem_open_args *);
+int sys_ksem_unlink(struct thread *, struct ksem_unlink_args *);
+int sys_ksem_getvalue(struct thread *, struct ksem_getvalue_args *);
+int sys_ksem_destroy(struct thread *, struct ksem_destroy_args *);
+int sys___mac_get_pid(struct thread *, struct __mac_get_pid_args *);
+int sys___mac_get_link(struct thread *, struct __mac_get_link_args *);
+int sys___mac_set_link(struct thread *, struct __mac_set_link_args *);
+int sys_extattr_set_link(struct thread *, struct extattr_set_link_args *);
+int sys_extattr_get_link(struct thread *, struct extattr_get_link_args *);
+int sys_extattr_delete_link(struct thread *, struct extattr_delete_link_args *);
+int sys___mac_execve(struct thread *, struct __mac_execve_args *);
+int sys_sigaction(struct thread *, struct sigaction_args *);
+int sys_sigreturn(struct thread *, struct sigreturn_args *);
+int sys_getcontext(struct thread *, struct getcontext_args *);
+int sys_setcontext(struct thread *, struct setcontext_args *);
+int sys_swapcontext(struct thread *, struct swapcontext_args *);
+int sys_swapoff(struct thread *, struct swapoff_args *);
+int sys___acl_get_link(struct thread *, struct __acl_get_link_args *);
+int sys___acl_set_link(struct thread *, struct __acl_set_link_args *);
+int sys___acl_delete_link(struct thread *, struct __acl_delete_link_args *);
+int sys___acl_aclcheck_link(struct thread *, struct __acl_aclcheck_link_args *);
+int sys_sigwait(struct thread *, struct sigwait_args *);
+int sys_thr_create(struct thread *, struct thr_create_args *);
+int sys_thr_exit(struct thread *, struct thr_exit_args *);
+int sys_thr_self(struct thread *, struct thr_self_args *);
+int sys_thr_kill(struct thread *, struct thr_kill_args *);
+int sys__umtx_lock(struct thread *, struct _umtx_lock_args *);
+int sys__umtx_unlock(struct thread *, struct _umtx_unlock_args *);
+int sys_jail_attach(struct thread *, struct jail_attach_args *);
+int sys_extattr_list_fd(struct thread *, struct extattr_list_fd_args *);
+int sys_extattr_list_file(struct thread *, struct extattr_list_file_args *);
+int sys_extattr_list_link(struct thread *, struct extattr_list_link_args *);
+int sys_ksem_timedwait(struct thread *, struct ksem_timedwait_args *);
+int sys_thr_suspend(struct thread *, struct thr_suspend_args *);
+int sys_thr_wake(struct thread *, struct thr_wake_args *);
+int sys_kldunloadf(struct thread *, struct kldunloadf_args *);
+int sys_audit(struct thread *, struct audit_args *);
+int sys_auditon(struct thread *, struct auditon_args *);
+int sys_getauid(struct thread *, struct getauid_args *);
+int sys_setauid(struct thread *, struct setauid_args *);
+int sys_getaudit(struct thread *, struct getaudit_args *);
+int sys_setaudit(struct thread *, struct setaudit_args *);
+int sys_getaudit_addr(struct thread *, struct getaudit_addr_args *);
+int sys_setaudit_addr(struct thread *, struct setaudit_addr_args *);
+int sys_auditctl(struct thread *, struct auditctl_args *);
+int sys__umtx_op(struct thread *, struct _umtx_op_args *);
+int sys_thr_new(struct thread *, struct thr_new_args *);
+int sys_sigqueue(struct thread *, struct sigqueue_args *);
+int sys_kmq_open(struct thread *, struct kmq_open_args *);
+int sys_kmq_setattr(struct thread *, struct kmq_setattr_args *);
+int sys_kmq_timedreceive(struct thread *, struct kmq_timedreceive_args *);
+int sys_kmq_timedsend(struct thread *, struct kmq_timedsend_args *);
+int sys_kmq_notify(struct thread *, struct kmq_notify_args *);
+int sys_kmq_unlink(struct thread *, struct kmq_unlink_args *);
+int sys_abort2(struct thread *, struct abort2_args *);
+int sys_thr_set_name(struct thread *, struct thr_set_name_args *);
+int sys_aio_fsync(struct thread *, struct aio_fsync_args *);
+int sys_rtprio_thread(struct thread *, struct rtprio_thread_args *);
+int sys_sctp_peeloff(struct thread *, struct sctp_peeloff_args *);
+int sys_sctp_generic_sendmsg(struct thread *, struct sctp_generic_sendmsg_args *);
+int sys_sctp_generic_sendmsg_iov(struct thread *, struct sctp_generic_sendmsg_iov_args *);
+int sys_sctp_generic_recvmsg(struct thread *, struct sctp_generic_recvmsg_args *);
+int sys_pread(struct thread *, struct pread_args *);
+int sys_pwrite(struct thread *, struct pwrite_args *);
+int sys_mmap(struct thread *, struct mmap_args *);
+int sys_lseek(struct thread *, struct lseek_args *);
+int sys_truncate(struct thread *, struct truncate_args *);
+int sys_ftruncate(struct thread *, struct ftruncate_args *);
+int sys_thr_kill2(struct thread *, struct thr_kill2_args *);
+int sys_shm_open(struct thread *, struct shm_open_args *);
+int sys_shm_unlink(struct thread *, struct shm_unlink_args *);
+int sys_cpuset(struct thread *, struct cpuset_args *);
+int sys_cpuset_setid(struct thread *, struct cpuset_setid_args *);
+int sys_cpuset_getid(struct thread *, struct cpuset_getid_args *);
+int sys_cpuset_getaffinity(struct thread *, struct cpuset_getaffinity_args *);
+int sys_cpuset_setaffinity(struct thread *, struct cpuset_setaffinity_args *);
+int sys_faccessat(struct thread *, struct faccessat_args *);
+int sys_fchmodat(struct thread *, struct fchmodat_args *);
+int sys_fchownat(struct thread *, struct fchownat_args *);
+int sys_fexecve(struct thread *, struct fexecve_args *);
+int sys_fstatat(struct thread *, struct fstatat_args *);
+int sys_futimesat(struct thread *, struct futimesat_args *);
+int sys_linkat(struct thread *, struct linkat_args *);
+int sys_mkdirat(struct thread *, struct mkdirat_args *);
+int sys_mkfifoat(struct thread *, struct mkfifoat_args *);
+int sys_mknodat(struct thread *, struct mknodat_args *);
+int sys_openat(struct thread *, struct openat_args *);
+int sys_readlinkat(struct thread *, struct readlinkat_args *);
+int sys_renameat(struct thread *, struct renameat_args *);
+int sys_symlinkat(struct thread *, struct symlinkat_args *);
+int sys_unlinkat(struct thread *, struct unlinkat_args *);
+int sys_posix_openpt(struct thread *, struct posix_openpt_args *);
+int sys_gssd_syscall(struct thread *, struct gssd_syscall_args *);
+int sys_jail_get(struct thread *, struct jail_get_args *);
+int sys_jail_set(struct thread *, struct jail_set_args *);
+int sys_jail_remove(struct thread *, struct jail_remove_args *);
+int sys_closefrom(struct thread *, struct closefrom_args *);
+int sys___semctl(struct thread *, struct __semctl_args *);
+int sys_msgctl(struct thread *, struct msgctl_args *);
+int sys_shmctl(struct thread *, struct shmctl_args *);
+int sys_lpathconf(struct thread *, struct lpathconf_args *);
+int sys_cap_new(struct thread *, struct cap_new_args *);
+int sys_cap_getrights(struct thread *, struct cap_getrights_args *);
+int sys_cap_enter(struct thread *, struct cap_enter_args *);
+int sys_cap_getmode(struct thread *, struct cap_getmode_args *);
+int sys_pdfork(struct thread *, struct pdfork_args *);
+int sys_pdkill(struct thread *, struct pdkill_args *);
+int sys_pdgetpid(struct thread *, struct pdgetpid_args *);
+int sys_pselect(struct thread *, struct pselect_args *);
+int sys_getloginclass(struct thread *, struct getloginclass_args *);
+int sys_setloginclass(struct thread *, struct setloginclass_args *);
+int sys_rctl_get_racct(struct thread *, struct rctl_get_racct_args *);
+int sys_rctl_get_rules(struct thread *, struct rctl_get_rules_args *);
+int sys_rctl_get_limits(struct thread *, struct rctl_get_limits_args *);
+int sys_rctl_add_rule(struct thread *, struct rctl_add_rule_args *);
+int sys_rctl_remove_rule(struct thread *, struct rctl_remove_rule_args *);
+int sys_posix_fallocate(struct thread *, struct posix_fallocate_args *);
+int sys_posix_fadvise(struct thread *, struct posix_fadvise_args *);
+int sys_wait6(struct thread *, struct wait6_args *);
#ifdef COMPAT_43
@@ -2756,9 +2844,24 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#define SYS_AUE_msgctl AUE_MSGCTL
#define SYS_AUE_shmctl AUE_SHMCTL
#define SYS_AUE_lpathconf AUE_LPATHCONF
+#define SYS_AUE_cap_new AUE_CAP_NEW
+#define SYS_AUE_cap_getrights AUE_CAP_GETRIGHTS
+#define SYS_AUE_cap_enter AUE_CAP_ENTER
+#define SYS_AUE_cap_getmode AUE_CAP_GETMODE
+#define SYS_AUE_pdfork AUE_PDFORK
+#define SYS_AUE_pdkill AUE_PDKILL
+#define SYS_AUE_pdgetpid AUE_PDGETPID
#define SYS_AUE_pselect AUE_SELECT
+#define SYS_AUE_getloginclass AUE_NULL
+#define SYS_AUE_setloginclass AUE_NULL
+#define SYS_AUE_rctl_get_racct AUE_NULL
+#define SYS_AUE_rctl_get_rules AUE_NULL
+#define SYS_AUE_rctl_get_limits AUE_NULL
+#define SYS_AUE_rctl_add_rule AUE_NULL
+#define SYS_AUE_rctl_remove_rule AUE_NULL
#define SYS_AUE_posix_fallocate AUE_NULL
#define SYS_AUE_posix_fadvise AUE_NULL
+#define SYS_AUE_wait6 AUE_WAIT6
#endif /* __rtems__ */
#undef PAD_
diff --git a/freebsd/sys/sys/systm.h b/freebsd/sys/sys/systm.h
index b689998c..676bd9c7 100644
--- a/freebsd/sys/sys/systm.h
+++ b/freebsd/sys/sys/systm.h
@@ -51,15 +51,13 @@ extern int cold; /* nonzero if we are doing a cold boot */
/* In RTEMS there is no cold boot */
#define cold 0
#endif /* __rtems__ */
-extern int rebooting; /* boot() has been called. */
+extern int rebooting; /* kern_reboot() has been called. */
extern const char *panicstr; /* panic message */
extern char version[]; /* system version */
extern char compiler_version[]; /* compiler version */
extern char copyright[]; /* system copyright */
extern int kstack_pages; /* number of kernel stack pages */
-extern int nswap; /* size of swap space */
-
extern u_long pagesizes[]; /* supported page sizes */
extern long physmem; /* physical memory */
extern long realmem; /* 'real' memory */
@@ -154,6 +152,12 @@ extern char static_hints[]; /* by config for now */
extern char **kenvp;
+extern const void *zero_region; /* address space maps to a zeroed page */
+
+extern int unmapped_buf_allowed;
+extern int iosize_max_clamp;
+#define IOSIZE_MAX (iosize_max_clamp ? INT_MAX : SSIZE_MAX)
+
/*
* General function declarations.
*/
@@ -169,9 +173,10 @@ struct tty;
struct ucred;
struct uio;
struct _jmp_buf;
+struct trapframe;
#ifndef __rtems__
-int setjmp(struct _jmp_buf *);
+int setjmp(struct _jmp_buf *) __returns_twice;
void longjmp(struct _jmp_buf *, int) __dead2;
#endif /* __rtems__ */
int dumpstatus(vm_offset_t addr, off_t count);
@@ -188,11 +193,7 @@ void *hashinit_flags(int count, struct malloc_type *type,
void *phashinit(int count, struct malloc_type *type, u_long *nentries);
void g_waitidle(void);
-#ifdef RESTARTABLE_PANICS
-void panic(const char *, ...) __printflike(1, 2);
-#else
void panic(const char *, ...) __dead2 __printflike(1, 2);
-#endif
void cpu_boot(int);
void cpu_flush_dcache(void *, size_t);
@@ -201,7 +202,7 @@ void critical_enter(void);
void critical_exit(void);
void init_param1(void);
void init_param2(long physpages);
-void init_param3(long kmempages);
+void init_static_kenv(char *, size_t);
void tablefull(const char *);
int kvprintf(char const *, void (*)(int, void*), void *, int,
__va_list) __printflike(1, 0);
@@ -327,15 +328,25 @@ void realitexpire(void *);
int sysbeep(int hertz, int period);
void hardclock(int usermode, uintfptr_t pc);
+void hardclock_cnt(int cnt, int usermode);
void hardclock_cpu(int usermode);
+void hardclock_sync(int cpu);
void softclock(void *);
void statclock(int usermode);
+void statclock_cnt(int cnt, int usermode);
void profclock(int usermode, uintfptr_t pc);
+void profclock_cnt(int cnt, int usermode, uintfptr_t pc);
+
+int hardclockintr(void);
void startprofclock(struct proc *);
void stopprofclock(struct proc *);
void cpu_startprofclock(void);
void cpu_stopprofclock(void);
+void cpu_idleclock(void);
+void cpu_activeclock(void);
+extern int cpu_can_deep_sleep;
+extern int cpu_disable_deep_sleep;
#ifndef __rtems__
int cr_cansee(struct ucred *u1, struct ucred *u2);
@@ -377,9 +388,12 @@ void adjust_timeout_calltodo(struct timeval *time_change);
/* Initialize the world */
void consinit(void);
void cpu_initclocks(void);
+void cpu_initclocks_bsp(void);
+void cpu_initclocks_ap(void);
void usrinfoinit(void);
/* Finalize the world */
+void kern_reboot(int) __dead2;
void shutdown_nice(int);
/* Timeouts */
diff --git a/freebsd/sys/sys/taskqueue.h b/freebsd/sys/sys/taskqueue.h
index 6ac22e06..b4c7d208 100644
--- a/freebsd/sys/sys/taskqueue.h
+++ b/freebsd/sys/sys/taskqueue.h
@@ -35,10 +35,18 @@
#include <sys/queue.h>
#include <sys/_task.h>
+#include <sys/_callout.h>
struct taskqueue;
struct thread;
+struct timeout_task {
+ struct taskqueue *q;
+ struct task t;
+ struct callout c;
+ int f;
+};
+
/*
* A notification callback function which is called from
* taskqueue_enqueue(). The context argument is given in the call to
@@ -54,7 +62,15 @@ struct taskqueue *taskqueue_create(const char *name, int mflags,
int taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
const char *name, ...) __printflike(4, 5);
int taskqueue_enqueue(struct taskqueue *queue, struct task *task);
+int taskqueue_enqueue_timeout(struct taskqueue *queue,
+ struct timeout_task *timeout_task, int ticks);
+int taskqueue_cancel(struct taskqueue *queue, struct task *task,
+ u_int *pendp);
+int taskqueue_cancel_timeout(struct taskqueue *queue,
+ struct timeout_task *timeout_task, u_int *pendp);
void taskqueue_drain(struct taskqueue *queue, struct task *task);
+void taskqueue_drain_timeout(struct taskqueue *queue,
+ struct timeout_task *timeout_task);
void taskqueue_free(struct taskqueue *queue);
void taskqueue_run(struct taskqueue *queue);
void taskqueue_block(struct taskqueue *queue);
@@ -83,6 +99,12 @@ void taskqueue_thread_enqueue(void *context);
(task)->ta_context = (context); \
} while (0)
+void _timeout_task_init(struct taskqueue *queue,
+ struct timeout_task *timeout_task, int priority, task_fn_t func,
+ void *context);
+#define TIMEOUT_TASK_INIT(queue, timeout_task, priority, func, context) \
+ _timeout_task_init(queue, timeout_task, priority, func, context);
+
/*
* Declare a reference to a taskqueue.
*/
diff --git a/freebsd/sys/sys/timetc.h b/freebsd/sys/sys/timetc.h
index d5a818b4..4f75c3de 100644
--- a/freebsd/sys/sys/timetc.h
+++ b/freebsd/sys/sys/timetc.h
@@ -47,7 +47,7 @@ struct timecounter {
*/
u_int tc_counter_mask;
/* This mask should mask off any unimplemented bits. */
- u_int64_t tc_frequency;
+ uint64_t tc_frequency;
/* Frequency of the counter in Hz. */
char *tc_name;
/* Name of the timecounter. */
@@ -57,6 +57,8 @@ struct timecounter {
* another timecounter higher means better. Negative
* means "only use at explicit request".
*/
+ u_int tc_flags;
+#define TC_FLAGS_C3STOP 1 /* Timer dies in C3. */
void *tc_priv;
/* Pointer to the timecounter's private parts. */
@@ -65,11 +67,16 @@ struct timecounter {
};
extern struct timecounter *timecounter;
+extern int tc_min_ticktock_freq; /*
+ * Minimal tc_ticktock() call frequency,
+ * required to handle counter wraps.
+ */
u_int64_t tc_getfrequency(void);
void tc_init(struct timecounter *tc);
void tc_setclock(struct timespec *ts);
-void tc_ticktock(void);
+void tc_ticktock(int cnt);
+void cpu_tick_calibration(void);
#ifdef SYSCTL_DECL
SYSCTL_DECL(_kern_timecounter);
diff --git a/freebsd/sys/sys/tty.h b/freebsd/sys/sys/tty.h
index 34c6eff1..70617205 100644
--- a/freebsd/sys/sys/tty.h
+++ b/freebsd/sys/sys/tty.h
@@ -38,7 +38,7 @@
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/selinfo.h>
-#include <sys/termios.h>
+#include <sys/_termios.h>
#include <sys/ttycom.h>
#include <sys/ttyqueue.h>
@@ -152,6 +152,11 @@ struct xtty {
#ifdef _KERNEL
+/* Used to distinguish between normal, callout, lock and init devices. */
+#define TTYUNIT_INIT 0x1
+#define TTYUNIT_LOCK 0x2
+#define TTYUNIT_CALLOUT 0x4
+
/* Allocation and deallocation. */
struct tty *tty_alloc(struct ttydevsw *tsw, void *softc);
struct tty *tty_alloc_mutex(struct ttydevsw *tsw, void *softc, struct mtx *mtx);
@@ -175,6 +180,7 @@ void tty_signal_sessleader(struct tty *tp, int signal);
void tty_signal_pgrp(struct tty *tp, int signal);
/* Waking up readers/writers. */
int tty_wait(struct tty *tp, struct cv *cv);
+int tty_wait_background(struct tty *tp, struct thread *td, int sig);
int tty_timedwait(struct tty *tp, struct cv *cv, int timo);
void tty_wakeup(struct tty *tp, int flags);
@@ -203,6 +209,7 @@ void tty_info(struct tty *tp);
void ttyconsdev_select(const char *name);
/* Pseudo-terminal hooks. */
+int pts_alloc(int fflags, struct thread *td, struct file *fp);
int pts_alloc_external(int fd, struct thread *td, struct file *fp,
struct cdev *dev, const char *name);
diff --git a/freebsd/sys/sys/ttycom.h b/freebsd/sys/sys/ttycom.h
index 60b6145e..68a411ea 100644
--- a/freebsd/sys/sys/ttycom.h
+++ b/freebsd/sys/sys/ttycom.h
@@ -57,10 +57,9 @@ struct winsize {
};
/* 0-2 compat */
- /* 3-4 obsolete */
- /* 5-7 obsolete or unused */
+ /* 3-7 unused */
/* 8-10 compat */
- /* 11-12 obsolete or unused */
+ /* 11-12 unused */
#define TIOCEXCL _IO('t', 13) /* set exclusive use of tty */
#define TIOCNXCL _IO('t', 14) /* reset exclusive use of tty */
#define TIOCGPTN _IOR('t', 15, int) /* Get pts number. */
@@ -70,34 +69,34 @@ struct winsize {
#define TIOCSETA _IOW('t', 20, struct termios) /* set termios struct */
#define TIOCSETAW _IOW('t', 21, struct termios) /* drain output, set */
#define TIOCSETAF _IOW('t', 22, struct termios) /* drn out, fls in, set */
- /* 23-25 obsolete or unused */
+ /* 23-25 unused */
#define TIOCGETD _IOR('t', 26, int) /* get line discipline */
#define TIOCSETD _IOW('t', 27, int) /* set line discipline */
#define TIOCPTMASTER _IO('t', 28) /* pts master validation */
- /* 29-69 free */
- /* 80-84 slip */
+ /* 29-85 unused */
#define TIOCGDRAINWAIT _IOR('t', 86, int) /* get ttywait timeout */
#define TIOCSDRAINWAIT _IOW('t', 87, int) /* set ttywait timeout */
- /* 88 slip, ppp; conflicts */
+ /* 88 unused */
+ /* 89-91 conflicts: tun and tap */
#define TIOCTIMESTAMP _IOR('t', 89, struct timeval) /* enable/get timestamp
* of last input event */
- /* 70-90 ppp; many conflicts */
#define TIOCMGDTRWAIT _IOR('t', 90, int) /* modem: get wait on close */
#define TIOCMSDTRWAIT _IOW('t', 91, int) /* modem: set wait on close */
- /* 90-92 tap; some conflicts */
+ /* 92-93 tun and tap */
+ /* 94-97 conflicts: tun and tap */
#define TIOCDRAIN _IO('t', 94) /* wait till output drained */
#define TIOCSIG _IOWINT('t', 95) /* pty: generate signal */
#define TIOCEXT _IOW('t', 96, int) /* pty: external processing */
- /* 90-97 tun; some conflicts */
#define TIOCSCTTY _IO('t', 97) /* become controlling tty */
#define TIOCCONS _IOW('t', 98, int) /* become virtual console */
#define TIOCGSID _IOR('t', 99, int) /* get session id */
- /* 100 see consio.h */
+ /* 100 unused */
#define TIOCSTAT _IO('t', 101) /* simulate ^T status message */
#define TIOCUCNTL _IOW('t', 102, int) /* pty: set/clr usr cntl mode */
#define UIOCCMD(n) _IO('u', n) /* usr cntl op "n" */
#define TIOCSWINSZ _IOW('t', 103, struct winsize) /* set window size */
#define TIOCGWINSZ _IOR('t', 104, struct winsize) /* get window size */
+ /* 105 unused */
#define TIOCMGET _IOR('t', 106, int) /* get all modem bits */
#define TIOCM_LE 0001 /* line enable */
#define TIOCM_DTR 0002 /* data terminal ready */
diff --git a/freebsd/sys/sys/ttydevsw.h b/freebsd/sys/sys/ttydevsw.h
index e2278c3a..748ae0be 100644
--- a/freebsd/sys/sys/ttydevsw.h
+++ b/freebsd/sys/sys/ttydevsw.h
@@ -46,10 +46,12 @@ typedef void tsw_outwakeup_t(struct tty *tp);
typedef void tsw_inwakeup_t(struct tty *tp);
typedef int tsw_ioctl_t(struct tty *tp, u_long cmd, caddr_t data,
struct thread *td);
+typedef int tsw_cioctl_t(struct tty *tp, int unit, u_long cmd, caddr_t data,
+ struct thread *td);
typedef int tsw_param_t(struct tty *tp, struct termios *t);
typedef int tsw_modem_t(struct tty *tp, int sigon, int sigoff);
-typedef int tsw_mmap_t(struct tty *tp, vm_offset_t offset,
- vm_paddr_t * paddr, int nprot);
+typedef int tsw_mmap_t(struct tty *tp, vm_ooffset_t offset,
+ vm_paddr_t * paddr, int nprot, vm_memattr_t *memattr);
typedef void tsw_pktnotify_t(struct tty *tp, char event);
typedef void tsw_free_t(void *softc);
@@ -63,6 +65,7 @@ struct ttydevsw {
tsw_inwakeup_t *tsw_inwakeup; /* Input can be stored again. */
tsw_ioctl_t *tsw_ioctl; /* ioctl() hooks. */
+ tsw_cioctl_t *tsw_cioctl; /* ioctl() on control devices. */
tsw_param_t *tsw_param; /* TIOCSETA device parameter setting. */
tsw_modem_t *tsw_modem; /* Modem sigon/sigoff. */
@@ -70,6 +73,8 @@ struct ttydevsw {
tsw_pktnotify_t *tsw_pktnotify; /* TIOCPKT events. */
tsw_free_t *tsw_free; /* Destructor. */
+
+ void *tsw_spare[4]; /* For future use. */
};
static __inline int
@@ -126,6 +131,15 @@ ttydevsw_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td)
}
static __inline int
+ttydevsw_cioctl(struct tty *tp, int unit, u_long cmd, caddr_t data, struct thread *td)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return tp->t_devsw->tsw_cioctl(tp, unit, cmd, data, td);
+}
+
+static __inline int
ttydevsw_param(struct tty *tp, struct termios *t)
{
MPASS(!tty_gone(tp));
@@ -142,11 +156,12 @@ ttydevsw_modem(struct tty *tp, int sigon, int sigoff)
}
static __inline int
-ttydevsw_mmap(struct tty *tp, vm_offset_t offset, vm_paddr_t *paddr, int nprot)
+ttydevsw_mmap(struct tty *tp, vm_ooffset_t offset, vm_paddr_t *paddr,
+ int nprot, vm_memattr_t *memattr)
{
MPASS(!tty_gone(tp));
- return tp->t_devsw->tsw_mmap(tp, offset, paddr, nprot);
+ return tp->t_devsw->tsw_mmap(tp, offset, paddr, nprot, memattr);
}
static __inline void
diff --git a/freebsd/sys/sys/ttydisc.h b/freebsd/sys/sys/ttydisc.h
index 2ea54666..74a1a0ed 100644
--- a/freebsd/sys/sys/ttydisc.h
+++ b/freebsd/sys/sys/ttydisc.h
@@ -52,6 +52,7 @@ void ttydisc_optimize(struct tty *tp);
void ttydisc_modem(struct tty *tp, int open);
#define ttydisc_can_bypass(tp) ((tp)->t_flags & TF_BYPASS)
int ttydisc_rint(struct tty *tp, char c, int flags);
+size_t ttydisc_rint_simple(struct tty *tp, const void *buf, size_t len);
size_t ttydisc_rint_bypass(struct tty *tp, const void *buf, size_t len);
void ttydisc_rint_done(struct tty *tp);
size_t ttydisc_rint_poll(struct tty *tp);
diff --git a/freebsd/sys/sys/ucred.h b/freebsd/sys/sys/ucred.h
index e1de30d3..82e4d9a4 100644
--- a/freebsd/sys/sys/ucred.h
+++ b/freebsd/sys/sys/ucred.h
@@ -35,6 +35,8 @@
#include <bsm/audit.h>
+struct loginclass;
+
/*
* Credentials.
*
@@ -55,7 +57,7 @@ struct ucred {
struct uidinfo *cr_uidinfo; /* per euid resource consumption */
struct uidinfo *cr_ruidinfo; /* per ruid resource consumption */
struct prison *cr_prison; /* jail(2) */
- void *cr_pspare; /* general use */
+ struct loginclass *cr_loginclass; /* login class */
u_int cr_flags; /* credential flags */
void *cr_pspare2[2]; /* general use 2 */
#define cr_endcopy cr_label
@@ -74,6 +76,11 @@ struct ucred;
#define XU_NGROUPS 16
/*
+ * Flags for cr_flags.
+ */
+#define CRED_FLAG_CAPMODE 0x00000001 /* In capability mode. */
+
+/*
* This is the external representation of struct ucred.
*/
struct xucred {
@@ -116,6 +123,7 @@ int groupmember(gid_t gid, struct ucred *cred);
#define crfree(cr) do { } while (0)
#define crhold(cr) NULL
#define cru2x(cr, xcr) do { } while (0)
+#define groupmember(gid, cred) 1
#endif /* __rtems__ */
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/user.h b/freebsd/sys/sys/user.h
index 01a8e73a..46401674 100644
--- a/freebsd/sys/sys/user.h
+++ b/freebsd/sys/sys/user.h
@@ -95,15 +95,23 @@
#define WMESGLEN 8 /* size of returned wchan message */
#define LOCKNAMELEN 8 /* size of returned lock name */
-#define OCOMMLEN 16 /* size of returned thread name */
+#define TDNAMLEN 16 /* size of returned thread name */
#define COMMLEN 19 /* size of returned ki_comm name */
#define KI_EMULNAMELEN 16 /* size of returned ki_emul */
#define KI_NGROUPS 16 /* number of groups in ki_groups */
#define LOGNAMELEN 17 /* size of returned ki_login */
+#define LOGINCLASSLEN 17 /* size of returned ki_loginclass */
+#ifndef BURN_BRIDGES
+#define OCOMMLEN TDNAMLEN
+#define ki_ocomm ki_tdname
+#endif
+
+/* Flags for the process credential. */
+#define KI_CRF_CAPABILITY_MODE 0x00000001
/*
- * Steal a bit from ki_cr_flags (cr_flags is never used) to indicate
- * that the cred had more than KI_NGROUPS groups.
+ * Steal a bit from ki_cr_flags to indicate that the cred had more than
+ * KI_NGROUPS groups.
*/
#define KI_CRF_GRP_OVERFLOW 0x80000000
@@ -164,18 +172,19 @@ struct kinfo_proc {
char ki_rqindex; /* Run queue index */
u_char ki_oncpu; /* Which cpu we are on */
u_char ki_lastcpu; /* Last cpu we were on */
- char ki_ocomm[OCOMMLEN+1]; /* thread name */
+ char ki_tdname[TDNAMLEN+1]; /* thread name */
char ki_wmesg[WMESGLEN+1]; /* wchan message */
char ki_login[LOGNAMELEN+1]; /* setlogin name */
char ki_lockname[LOCKNAMELEN+1]; /* lock name */
char ki_comm[COMMLEN+1]; /* command name */
char ki_emul[KI_EMULNAMELEN+1]; /* emulation name */
+ char ki_loginclass[LOGINCLASSLEN+1]; /* login class */
/*
* When adding new variables, take space for char-strings from the
* front of ki_sparestrings, and ints from the end of ki_spareints.
* That way the spare room from both arrays will remain contiguous.
*/
- char ki_sparestrings[68]; /* spare string space */
+ char ki_sparestrings[50]; /* spare string space */
int ki_spareints[KI_NSPARE_INT]; /* spare room for growth */
u_int ki_cr_flags; /* Credential flags */
int ki_jid; /* Process jail ID */
@@ -229,6 +238,8 @@ struct user {
* The KERN_PROC_FILE sysctl allows a process to dump the file descriptor
* array of another process.
*/
+#define KF_ATTR_VALID 0x0001
+
#define KF_TYPE_NONE 0
#define KF_TYPE_VNODE 1
#define KF_TYPE_SOCKET 2
@@ -240,6 +251,8 @@ struct user {
#define KF_TYPE_SHM 8
#define KF_TYPE_SEM 9
#define KF_TYPE_PTS 10
+/* no KF_TYPE_CAPABILITY (11), since capabilities wrap other file objects */
+#define KF_TYPE_PROCDESC 12
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -256,6 +269,9 @@ struct user {
#define KF_FD_TYPE_CWD -1 /* Current working directory */
#define KF_FD_TYPE_ROOT -2 /* Root directory */
#define KF_FD_TYPE_JAIL -3 /* Jail directory */
+#define KF_FD_TYPE_TRACE -4 /* ptrace vnode */
+#define KF_FD_TYPE_TEXT -5 /* Text vnode */
+#define KF_FD_TYPE_CTTY -6 /* Controlling terminal */
#define KF_FLAG_READ 0x00000001
#define KF_FLAG_WRITE 0x00000002
@@ -265,6 +281,14 @@ struct user {
#define KF_FLAG_NONBLOCK 0x00000020
#define KF_FLAG_DIRECT 0x00000040
#define KF_FLAG_HASLOCK 0x00000080
+#define KF_FLAG_SHLOCK 0x00000100
+#define KF_FLAG_EXLOCK 0x00000200
+#define KF_FLAG_NOFOLLOW 0x00000400
+#define KF_FLAG_CREAT 0x00000800
+#define KF_FLAG_TRUNC 0x00001000
+#define KF_FLAG_EXCL 0x00002000
+#define KF_FLAG_EXEC 0x00004000
+#define KF_FLAG_CAPABILITY 0x00008000
/*
* Old format. Has variable hidden padding due to alignment.
@@ -299,22 +323,76 @@ struct kinfo_ofile {
#endif
struct kinfo_file {
- int kf_structsize; /* Variable size of record. */
- int kf_type; /* Descriptor type. */
- int kf_fd; /* Array index. */
- int kf_ref_count; /* Reference count. */
- int kf_flags; /* Flags. */
- int _kf_pad0; /* Round to 64 bit alignment */
- int64_t kf_offset; /* Seek location. */
- int kf_vnode_type; /* Vnode type. */
- int kf_sock_domain; /* Socket domain. */
- int kf_sock_type; /* Socket type. */
- int kf_sock_protocol; /* Socket protocol. */
+ int kf_structsize; /* Variable size of record. */
+ int kf_type; /* Descriptor type. */
+ int kf_fd; /* Array index. */
+ int kf_ref_count; /* Reference count. */
+ int kf_flags; /* Flags. */
+ int kf_pad0; /* Round to 64 bit alignment. */
+ int64_t kf_offset; /* Seek location. */
+ int kf_vnode_type; /* Vnode type. */
+ int kf_sock_domain; /* Socket domain. */
+ int kf_sock_type; /* Socket type. */
+ int kf_sock_protocol; /* Socket protocol. */
struct sockaddr_storage kf_sa_local; /* Socket address. */
struct sockaddr_storage kf_sa_peer; /* Peer address. */
- int _kf_ispare[16]; /* Space for more stuff. */
+ union {
+ struct {
+ /* Address of so_pcb. */
+ uint64_t kf_sock_pcb;
+ /* Address of inp_ppcb. */
+ uint64_t kf_sock_inpcb;
+ /* Address of unp_conn. */
+ uint64_t kf_sock_unpconn;
+ /* Send buffer state. */
+ uint16_t kf_sock_snd_sb_state;
+ /* Receive buffer state. */
+ uint16_t kf_sock_rcv_sb_state;
+ /* Round to 64 bit alignment. */
+ uint32_t kf_sock_pad0;
+ } kf_sock;
+ struct {
+ /* Global file id. */
+ uint64_t kf_file_fileid;
+ /* File size. */
+ uint64_t kf_file_size;
+ /* Vnode filesystem id. */
+ uint32_t kf_file_fsid;
+ /* File device. */
+ uint32_t kf_file_rdev;
+ /* File mode. */
+ uint16_t kf_file_mode;
+ /* Round to 64 bit alignment. */
+ uint16_t kf_file_pad0;
+ uint32_t kf_file_pad1;
+ } kf_file;
+ struct {
+ uint32_t kf_sem_value;
+ uint16_t kf_sem_mode;
+ } kf_sem;
+ struct {
+ uint64_t kf_pipe_addr;
+ uint64_t kf_pipe_peer;
+ uint32_t kf_pipe_buffer_cnt;
+ /* Round to 64 bit alignment. */
+ uint32_t kf_pipe_pad0[3];
+ } kf_pipe;
+ struct {
+ uint32_t kf_pts_dev;
+ /* Round to 64 bit alignment. */
+ uint32_t kf_pts_pad0[7];
+ } kf_pts;
+ struct {
+ pid_t kf_pid;
+ } kf_proc;
+ } kf_un;
+ uint16_t kf_status; /* Status flags. */
+ uint16_t kf_pad1; /* Round to 32 bit alignment. */
+ int _kf_ispare0; /* Space for more stuff. */
+ cap_rights_t kf_cap_rights; /* Capability rights. */
+ int _kf_ispare[4]; /* Space for more stuff. */
/* Truncated before copyout in sysctl */
- char kf_path[PATH_MAX]; /* Path to file, if any. */
+ char kf_path[PATH_MAX]; /* Path to file, if any. */
};
/*
@@ -339,6 +417,8 @@ struct kinfo_file {
#define KVME_FLAG_NEEDS_COPY 0x00000002
#define KVME_FLAG_NOCOREDUMP 0x00000004
#define KVME_FLAG_SUPER 0x00000008
+#define KVME_FLAG_GROWS_UP 0x00000010
+#define KVME_FLAG_GROWS_DOWN 0x00000020
#if defined(__amd64__)
#define KINFO_OVMENTRY_SIZE 1168
@@ -376,16 +456,20 @@ struct kinfo_vmentry {
uint64_t kve_start; /* Starting address. */
uint64_t kve_end; /* Finishing address. */
uint64_t kve_offset; /* Mapping offset in object */
- uint64_t kve_fileid; /* inode number if vnode */
- uint32_t kve_fsid; /* dev_t of vnode location */
+ uint64_t kve_vn_fileid; /* inode number if vnode */
+ uint32_t kve_vn_fsid; /* dev_t of vnode location */
int kve_flags; /* Flags on map entry. */
int kve_resident; /* Number of resident pages. */
int kve_private_resident; /* Number of private pages. */
int kve_protection; /* Protection bitmask. */
int kve_ref_count; /* VM obj ref count. */
int kve_shadow_count; /* VM obj shadow count. */
- int _kve_pad0; /* 64bit align next field */
- int _kve_ispare[16]; /* Space for more stuff. */
+ int kve_vn_type; /* Vnode type. */
+ uint64_t kve_vn_size; /* File size. */
+ uint32_t kve_vn_rdev; /* Device id if device. */
+ uint16_t kve_vn_mode; /* File mode. */
+ uint16_t kve_status; /* Status flags. */
+ int _kve_ispare[12]; /* Space for more stuff. */
/* Truncated before copyout in sysctl */
char kve_path[PATH_MAX]; /* Path to VM obj, if any. */
};
@@ -412,4 +496,27 @@ struct kinfo_kstack {
int _kkst_ispare[16]; /* Space for more stuff. */
};
+#ifdef _KERNEL
+/* Flags for kern_proc_out function. */
+#define KERN_PROC_NOTHREADS 0x1
+#define KERN_PROC_MASK32 0x2
+
+struct sbuf;
+
+/*
+ * The kern_proc out functions are helper functions to dump process
+ * miscellaneous kinfo structures to sbuf. The main consumers are KERN_PROC
+ * sysctls but they may also be used by other kernel subsystems.
+ *
+ * The functions manipulate the process locking state and expect the process
+ * to be locked on enter. On return the process is unlocked.
+ */
+
+int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
+int kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
+int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb);
+
+int vntype_to_kinfo(int vtype);
+#endif /* !_KERNEL */
+
#endif
diff --git a/freebsd/sys/sys/vmmeter.h b/freebsd/sys/sys/vmmeter.h
index 6c866c7f..c66016c6 100644
--- a/freebsd/sys/sys/vmmeter.h
+++ b/freebsd/sys/sys/vmmeter.h
@@ -34,6 +34,12 @@
#define _SYS_VMMETER_H_
/*
+ * This value is used by ps(1) to change sleep state flag from 'S' to
+ * 'I' and by the sched process to set the alarm clock.
+ */
+#define MAXSLP 20
+
+/*
* System wide statistics counters.
* Locking:
* a - locked by atomic operations
@@ -72,9 +78,9 @@ struct vmmeter {
u_int v_pdwakeups; /* (f) times daemon has awaken from sleep */
u_int v_pdpages; /* (q) pages analyzed by daemon */
- u_int v_tcached; /* (q) total pages cached */
- u_int v_dfree; /* (q) pages freed by daemon */
- u_int v_pfree; /* (q) pages freed by exiting processes */
+ u_int v_tcached; /* (p) total pages cached */
+ u_int v_dfree; /* (p) pages freed by daemon */
+ u_int v_pfree; /* (p) pages freed by exiting processes */
u_int v_tfree; /* (p) total pages freed */
/*
* Distribution of page usages.
diff --git a/freebsd/sys/sys/vnode.h b/freebsd/sys/sys/vnode.h
new file mode 100644
index 00000000..5b709f81
--- /dev/null
+++ b/freebsd/sys/sys/vnode.h
@@ -0,0 +1,843 @@
+/*-
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vnode.h 8.7 (Berkeley) 2/4/94
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_VNODE_H_
+#define _SYS_VNODE_H_
+
+#include <sys/bufobj.h>
+#include <sys/queue.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/lockmgr.h>
+#include <sys/mutex.h>
+#include <sys/rangelock.h>
+#include <sys/selinfo.h>
+#include <sys/uio.h>
+#include <sys/acl.h>
+#include <sys/ktr.h>
+
+#ifndef __rtems__
+/*
+ * The vnode is the focus of all file activity in UNIX. There is a
+ * unique vnode allocated for each active file, each current directory,
+ * each mounted-on file, text file, and the root.
+ */
+
+/*
+ * Vnode types. VNON means no type.
+ */
+enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD,
+ VMARKER };
+
+/*
+ * Each underlying filesystem allocates its own private area and hangs
+ * it from v_data. If non-null, this area is freed in getnewvnode().
+ */
+
+struct namecache;
+
+struct vpollinfo {
+ struct mtx vpi_lock; /* lock to protect below */
+ struct selinfo vpi_selinfo; /* identity of poller(s) */
+ short vpi_events; /* what they are looking for */
+ short vpi_revents; /* what has happened */
+};
+
+/*
+ * Reading or writing any of these items requires holding the appropriate lock.
+ *
+ * Lock reference:
+ * c - namecache mutex
+ * f - freelist mutex
+ * i - interlock
+ * m - mount point interlock
+ * p - pollinfo lock
+ * u - Only a reference to the vnode is needed to read.
+ * v - vnode lock
+ *
+ * Vnodes may be found on many lists. The general way to deal with operating
+ * on a vnode that is on a list is:
+ * 1) Lock the list and find the vnode.
+ * 2) Lock interlock so that the vnode does not go away.
+ * 3) Unlock the list to avoid lock order reversals.
+ * 4) vget with LK_INTERLOCK and check for ENOENT, or
+ * 5) Check for DOOMED if the vnode lock is not required.
+ * 6) Perform your operation, then vput().
+ */
+
+#if defined(_KERNEL) || defined(_KVM_VNODE)
+
+struct vnode {
+ /*
+ * Fields which define the identity of the vnode. These fields are
+ * owned by the filesystem (XXX: and vgone() ?)
+ */
+ enum vtype v_type; /* u vnode type */
+ const char *v_tag; /* u type of underlying data */
+ struct vop_vector *v_op; /* u vnode operations vector */
+ void *v_data; /* u private data for fs */
+
+ /*
+ * Filesystem instance stuff
+ */
+ struct mount *v_mount; /* u ptr to vfs we are in */
+ TAILQ_ENTRY(vnode) v_nmntvnodes; /* m vnodes for mount point */
+
+ /*
+ * Type specific fields, only one applies to any given vnode.
+ * See #defines below for renaming to v_* namespace.
+ */
+ union {
+ struct mount *vu_mount; /* v ptr to mountpoint (VDIR) */
+ struct socket *vu_socket; /* v unix domain net (VSOCK) */
+ struct cdev *vu_cdev; /* v device (VCHR, VBLK) */
+ struct fifoinfo *vu_fifoinfo; /* v fifo (VFIFO) */
+ } v_un;
+
+ /*
+ * vfs_hash: (mount + inode) -> vnode hash.
+ */
+ LIST_ENTRY(vnode) v_hashlist;
+ u_int v_hash;
+
+ /*
+ * VFS_namecache stuff
+ */
+ LIST_HEAD(, namecache) v_cache_src; /* c Cache entries from us */
+ TAILQ_HEAD(, namecache) v_cache_dst; /* c Cache entries to us */
+ struct namecache *v_cache_dd; /* c Cache entry for .. vnode */
+
+ /*
+ * clustering stuff
+ */
+ daddr_t v_cstart; /* v start block of cluster */
+ daddr_t v_lasta; /* v last allocation */
+ daddr_t v_lastw; /* v last write */
+ int v_clen; /* v length of cur. cluster */
+
+ /*
+ * Locking
+ */
+ struct lock v_lock; /* u (if fs don't have one) */
+ struct mtx v_interlock; /* lock for "i" things */
+ struct lock *v_vnlock; /* u pointer to vnode lock */
+ int v_holdcnt; /* i prevents recycling. */
+ int v_usecount; /* i ref count of users */
+ u_long v_iflag; /* i vnode flags (see below) */
+ u_long v_vflag; /* v vnode flags */
+ int v_writecount; /* v ref count of writers */
+
+ /*
+ * The machinery of being a vnode
+ */
+ TAILQ_ENTRY(vnode) v_actfreelist; /* f vnode active/free lists */
+ struct bufobj v_bufobj; /* * Buffer cache object */
+
+ /*
+ * Hooks for various subsystems and features.
+ */
+ struct vpollinfo *v_pollinfo; /* i Poll events, p for *v_pi */
+ struct label *v_label; /* MAC label for vnode */
+ struct lockf *v_lockf; /* Byte-level advisory lock list */
+ struct rangelock v_rl; /* Byte-range lock */
+};
+
+#endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
+
+#define v_mountedhere v_un.vu_mount
+#define v_socket v_un.vu_socket
+#define v_rdev v_un.vu_cdev
+#define v_fifoinfo v_un.vu_fifoinfo
+
+/* XXX: These are temporary to avoid a source sweep at this time */
+#define v_object v_bufobj.bo_object
+
+/*
+ * Userland version of struct vnode, for sysctl.
+ */
+struct xvnode {
+ size_t xv_size; /* sizeof(struct xvnode) */
+ void *xv_vnode; /* address of real vnode */
+ u_long xv_flag; /* vnode vflags */
+ int xv_usecount; /* reference count of users */
+ int xv_writecount; /* reference count of writers */
+ int xv_holdcnt; /* page & buffer references */
+ u_long xv_id; /* capability identifier */
+ void *xv_mount; /* address of parent mount */
+ long xv_numoutput; /* num of writes in progress */
+ enum vtype xv_type; /* vnode type */
+ union {
+ void *xvu_socket; /* socket, if VSOCK */
+ void *xvu_fifo; /* fifo, if VFIFO */
+ dev_t xvu_rdev; /* maj/min, if VBLK/VCHR */
+ struct {
+ dev_t xvu_dev; /* device, if VDIR/VREG/VLNK */
+ ino_t xvu_ino; /* id, if VDIR/VREG/VLNK */
+ } xv_uns;
+ } xv_un;
+};
+#define xv_socket xv_un.xvu_socket
+#define xv_fifo xv_un.xvu_fifo
+#define xv_rdev xv_un.xvu_rdev
+#define xv_dev xv_un.xv_uns.xvu_dev
+#define xv_ino xv_un.xv_uns.xvu_ino
+
+/* We don't need to lock the knlist */
+#define VN_KNLIST_EMPTY(vp) ((vp)->v_pollinfo == NULL || \
+ KNLIST_EMPTY(&(vp)->v_pollinfo->vpi_selinfo.si_note))
+
+#define VN_KNOTE(vp, b, a) \
+ do { \
+ if (!VN_KNLIST_EMPTY(vp)) \
+ KNOTE(&vp->v_pollinfo->vpi_selinfo.si_note, (b), \
+ (a) | KNF_NOKQLOCK); \
+ } while (0)
+#define VN_KNOTE_LOCKED(vp, b) VN_KNOTE(vp, b, KNF_LISTLOCKED)
+#define VN_KNOTE_UNLOCKED(vp, b) VN_KNOTE(vp, b, 0)
+
+/*
+ * Vnode flags.
+ * VI flags are protected by interlock and live in v_iflag
+ * VV flags are protected by the vnode lock and live in v_vflag
+ *
+ * VI_DOOMED is doubly protected by the interlock and vnode lock. Both
+ * are required for writing but the status may be checked with either.
+ */
+#define VI_MOUNT 0x0020 /* Mount in progress */
+#define VI_AGE 0x0040 /* Insert vnode at head of free list */
+#define VI_DOOMED 0x0080 /* This vnode is being recycled */
+#define VI_FREE 0x0100 /* This vnode is on the freelist */
+#define VI_ACTIVE 0x0200 /* This vnode is on the active list */
+#define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */
+#define VI_OWEINACT 0x1000 /* Need to call inactive */
+
+#define VV_ROOT 0x0001 /* root of its filesystem */
+#define VV_ISTTY 0x0002 /* vnode represents a tty */
+#define VV_NOSYNC 0x0004 /* unlinked, stop syncing */
+#define VV_ETERNALDEV 0x0008 /* device that is never destroyed */
+#define VV_CACHEDLABEL 0x0010 /* Vnode has valid cached MAC label */
+#define VV_TEXT 0x0020 /* vnode is a pure text prototype */
+#define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */
+#define VV_SYSTEM 0x0080 /* vnode being used by kernel */
+#define VV_PROCDEP 0x0100 /* vnode is process dependent */
+#define VV_NOKNOTE 0x0200 /* don't activate knotes on this vnode */
+#define VV_DELETED 0x0400 /* should be removed */
+#define VV_MD 0x0800 /* vnode backs the md device */
+#define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */
+
+/*
+ * Vnode attributes. A field value of VNOVAL represents a field whose value
+ * is unavailable (getattr) or which is not to be changed (setattr).
+ */
+struct vattr {
+ enum vtype va_type; /* vnode type (for create) */
+ u_short va_mode; /* files access mode and type */
+ short va_nlink; /* number of references to file */
+ uid_t va_uid; /* owner user id */
+ gid_t va_gid; /* owner group id */
+ dev_t va_fsid; /* filesystem id */
+ long va_fileid; /* file id */
+ u_quad_t va_size; /* file size in bytes */
+ long va_blocksize; /* blocksize preferred for i/o */
+ struct timespec va_atime; /* time of last access */
+ struct timespec va_mtime; /* time of last modification */
+ struct timespec va_ctime; /* time file changed */
+ struct timespec va_birthtime; /* time file created */
+ u_long va_gen; /* generation number of file */
+ u_long va_flags; /* flags defined for file */
+ dev_t va_rdev; /* device the special file represents */
+ u_quad_t va_bytes; /* bytes of disk space held by file */
+ u_quad_t va_filerev; /* file modification number */
+ u_int va_vaflags; /* operations flags, see below */
+ long va_spare; /* remain quad aligned */
+};
+
+/*
+ * Flags for va_vaflags.
+ */
+#define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */
+#define VA_EXCLUSIVE 0x02 /* exclusive create request */
+
+/*
+ * Flags for ioflag. (high 16 bits used to ask for read-ahead and
+ * help with write clustering)
+ * NB: IO_NDELAY and IO_DIRECT are linked to fcntl.h
+ */
+#define IO_UNIT 0x0001 /* do I/O as atomic unit */
+#define IO_APPEND 0x0002 /* append write to end */
+#define IO_NDELAY 0x0004 /* FNDELAY flag set in file table */
+#define IO_NODELOCKED 0x0008 /* underlying node already locked */
+#define IO_ASYNC 0x0010 /* bawrite rather then bdwrite */
+#define IO_VMIO 0x0020 /* data already in VMIO space */
+#define IO_INVAL 0x0040 /* invalidate after I/O */
+#define IO_SYNC 0x0080 /* do I/O synchronously */
+#define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */
+#define IO_EXT 0x0400 /* operate on external attributes */
+#define IO_NORMAL 0x0800 /* operate on regular data */
+#define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */
+#define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */
+
+#define IO_SEQMAX 0x7F /* seq heuristic max value */
+#define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */
+
+/*
+ * Flags for accmode_t.
+ */
+#define VEXEC 000000000100 /* execute/search permission */
+#define VWRITE 000000000200 /* write permission */
+#define VREAD 000000000400 /* read permission */
+#define VADMIN 000000010000 /* being the file owner */
+#define VAPPEND 000000040000 /* permission to write/append */
+/*
+ * VEXPLICIT_DENY makes VOP_ACCESSX(9) return EPERM or EACCES only
+ * if permission was denied explicitly, by a "deny" rule in NFSv4 ACL,
+ * and 0 otherwise. This never happens with ordinary unix access rights
+ * or POSIX.1e ACLs. Obviously, VEXPLICIT_DENY must be OR-ed with
+ * some other V* constant.
+ */
+#define VEXPLICIT_DENY 000000100000
+#define VREAD_NAMED_ATTRS 000000200000 /* not used */
+#define VWRITE_NAMED_ATTRS 000000400000 /* not used */
+#define VDELETE_CHILD 000001000000
+#define VREAD_ATTRIBUTES 000002000000 /* permission to stat(2) */
+#define VWRITE_ATTRIBUTES 000004000000 /* change {m,c,a}time */
+#define VDELETE 000010000000
+#define VREAD_ACL 000020000000 /* read ACL and file mode */
+#define VWRITE_ACL 000040000000 /* change ACL and/or file mode */
+#define VWRITE_OWNER 000100000000 /* change file owner */
+#define VSYNCHRONIZE 000200000000 /* not used */
+
+/*
+ * Permissions that were traditionally granted only to the file owner.
+ */
+#define VADMIN_PERMS (VADMIN | VWRITE_ATTRIBUTES | VWRITE_ACL | \
+ VWRITE_OWNER)
+
+/*
+ * Permissions that were traditionally granted to everyone.
+ */
+#define VSTAT_PERMS (VREAD_ATTRIBUTES | VREAD_ACL)
+
+/*
+ * Permissions that allow to change the state of the file in any way.
+ */
+#define VMODIFY_PERMS (VWRITE | VAPPEND | VADMIN_PERMS | VDELETE_CHILD | \
+ VDELETE)
+
+/*
+ * Token indicating no attribute value yet assigned.
+ */
+#define VNOVAL (-1)
+
+/*
+ * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon)
+ */
+#define VLKTIMEOUT (hz / 20 + 1)
+
+#ifdef _KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_VNODE);
+#endif
+
+/*
+ * Convert between vnode types and inode formats (since POSIX.1
+ * defines mode word of stat structure in terms of inode formats).
+ */
+extern enum vtype iftovt_tab[];
+extern int vttoif_tab[];
+#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
+#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
+#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
+
+/*
+ * Flags to various vnode functions.
+ */
+#define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
+#define FORCECLOSE 0x0002 /* vflush: force file closure */
+#define WRITECLOSE 0x0004 /* vflush: only close writable files */
+#define EARLYFLUSH 0x0008 /* vflush: early call for ffs_flushfiles */
+#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
+#define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */
+#define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */
+#define V_CLEANONLY 0x0008 /* vinvalbuf: invalidate only clean bufs */
+#define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */
+#define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */
+#define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */
+#define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */
+
+#define VR_START_WRITE 0x0001 /* vfs_write_resume: start write atomically */
+#define VR_NO_SUSPCLR 0x0002 /* vfs_write_resume: do not clear suspension */
+
+#define VREF(vp) vref(vp)
+
+#ifdef DIAGNOSTIC
+#define VATTR_NULL(vap) vattr_null(vap)
+#else
+#define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */
+#endif /* DIAGNOSTIC */
+
+#define NULLVP ((struct vnode *)NULL)
+
+/*
+ * Global vnode data.
+ */
+extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
+extern int async_io_version; /* 0 or POSIX version of AIO i'face */
+extern int desiredvnodes; /* number of vnodes desired */
+extern struct uma_zone *namei_zone;
+extern struct vattr va_null; /* predefined null vattr structure */
+
+#define VI_LOCK(vp) mtx_lock(&(vp)->v_interlock)
+#define VI_LOCK_FLAGS(vp, flags) mtx_lock_flags(&(vp)->v_interlock, (flags))
+#define VI_TRYLOCK(vp) mtx_trylock(&(vp)->v_interlock)
+#define VI_UNLOCK(vp) mtx_unlock(&(vp)->v_interlock)
+#define VI_MTX(vp) (&(vp)->v_interlock)
+
+#define VN_LOCK_AREC(vp) lockallowrecurse((vp)->v_vnlock)
+#define VN_LOCK_ASHARE(vp) lockallowshare((vp)->v_vnlock)
+
+#endif /* _KERNEL */
+
+/*
+ * Mods for extensibility.
+ */
+
+/*
+ * Flags for vdesc_flags:
+ */
+#define VDESC_MAX_VPS 16
+/* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
+#define VDESC_VP0_WILLRELE 0x0001
+#define VDESC_VP1_WILLRELE 0x0002
+#define VDESC_VP2_WILLRELE 0x0004
+#define VDESC_VP3_WILLRELE 0x0008
+#define VDESC_NOMAP_VPP 0x0100
+#define VDESC_VPP_WILLRELE 0x0200
+
+/*
+ * A generic structure.
+ * This can be used by bypass routines to identify generic arguments.
+ */
+struct vop_generic_args {
+ struct vnodeop_desc *a_desc;
+ /* other random data follows, presumably */
+};
+
+typedef int vop_bypass_t(struct vop_generic_args *);
+
+/*
+ * VDESC_NO_OFFSET is used to identify the end of the offset list
+ * and in places where no such field exists.
+ */
+#define VDESC_NO_OFFSET -1
+
+/*
+ * This structure describes the vnode operation taking place.
+ */
+struct vnodeop_desc {
+ char *vdesc_name; /* a readable name for debugging */
+ int vdesc_flags; /* VDESC_* flags */
+ vop_bypass_t *vdesc_call; /* Function to call */
+
+ /*
+ * These ops are used by bypass routines to map and locate arguments.
+ * Creds and procs are not needed in bypass routines, but sometimes
+ * they are useful to (for example) transport layers.
+ * Nameidata is useful because it has a cred in it.
+ */
+ int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */
+ int vdesc_vpp_offset; /* return vpp location */
+ int vdesc_cred_offset; /* cred location, if any */
+ int vdesc_thread_offset; /* thread location, if any */
+ int vdesc_componentname_offset; /* if any */
+};
+
+#ifdef _KERNEL
+/*
+ * A list of all the operation descs.
+ */
+extern struct vnodeop_desc *vnodeop_descs[];
+
+#define VOPARG_OFFSETOF(s_type, field) __offsetof(s_type, field)
+#define VOPARG_OFFSETTO(s_type, s_offset, struct_p) \
+ ((s_type)(((char*)(struct_p)) + (s_offset)))
+
+
+#ifdef DEBUG_VFS_LOCKS
+/*
+ * Support code to aid in debugging VFS locking problems. Not totally
+ * reliable since if the thread sleeps between changing the lock
+ * state and checking it with the assert, some other thread could
+ * change the state. They are good enough for debugging a single
+ * filesystem using a single-threaded test.
+ */
+void assert_vi_locked(struct vnode *vp, const char *str);
+void assert_vi_unlocked(struct vnode *vp, const char *str);
+void assert_vop_elocked(struct vnode *vp, const char *str);
+#if 0
+void assert_vop_elocked_other(struct vnode *vp, const char *str);
+#endif
+void assert_vop_locked(struct vnode *vp, const char *str);
+#if 0
+voi0 assert_vop_slocked(struct vnode *vp, const char *str);
+#endif
+void assert_vop_unlocked(struct vnode *vp, const char *str);
+
+#define ASSERT_VI_LOCKED(vp, str) assert_vi_locked((vp), (str))
+#define ASSERT_VI_UNLOCKED(vp, str) assert_vi_unlocked((vp), (str))
+#define ASSERT_VOP_ELOCKED(vp, str) assert_vop_elocked((vp), (str))
+#if 0
+#define ASSERT_VOP_ELOCKED_OTHER(vp, str) assert_vop_locked_other((vp), (str))
+#endif
+#define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked((vp), (str))
+#if 0
+#define ASSERT_VOP_SLOCKED(vp, str) assert_vop_slocked((vp), (str))
+#endif
+#define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked((vp), (str))
+
+#else /* !DEBUG_VFS_LOCKS */
+
+#define ASSERT_VI_LOCKED(vp, str) ((void)0)
+#define ASSERT_VI_UNLOCKED(vp, str) ((void)0)
+#define ASSERT_VOP_ELOCKED(vp, str) ((void)0)
+#if 0
+#define ASSERT_VOP_ELOCKED_OTHER(vp, str)
+#endif
+#define ASSERT_VOP_LOCKED(vp, str) ((void)0)
+#if 0
+#define ASSERT_VOP_SLOCKED(vp, str)
+#endif
+#define ASSERT_VOP_UNLOCKED(vp, str) ((void)0)
+#endif /* DEBUG_VFS_LOCKS */
+
+
+/*
+ * This call works for vnodes in the kernel.
+ */
+#define VCALL(c) ((c)->a_desc->vdesc_call(c))
+
+#define DOINGASYNC(vp) \
+ (((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC) != 0 && \
+ ((curthread->td_pflags & TDP_SYNCIO) == 0))
+
+/*
+ * VMIO support inline
+ */
+
+extern int vmiodirenable;
+
+static __inline int
+vn_canvmio(struct vnode *vp)
+{
+ if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR)))
+ return(TRUE);
+ return(FALSE);
+}
+
+/*
+ * Finally, include the default set of vnode operations.
+ */
+#include <rtems/bsd/local/vnode_if.h>
+
+/* vn_open_flags */
+#define VN_OPEN_NOAUDIT 0x00000001
+
+/*
+ * Public vnode manipulation functions.
+ */
+struct componentname;
+struct file;
+struct mount;
+struct nameidata;
+struct ostat;
+struct thread;
+struct proc;
+struct stat;
+struct nstat;
+struct ucred;
+struct uio;
+struct vattr;
+struct vnode;
+
+/* cache_* may belong in namei.h. */
+#define cache_enter(dvp, vp, cnp) \
+ cache_enter_time(dvp, vp, cnp, NULL, NULL)
+void cache_enter_time(struct vnode *dvp, struct vnode *vp,
+ struct componentname *cnp, struct timespec *tsp,
+ struct timespec *dtsp);
+#define cache_lookup(dvp, vpp, cnp) \
+ cache_lookup_times(dvp, vpp, cnp, NULL, NULL)
+int cache_lookup_times(struct vnode *dvp, struct vnode **vpp,
+ struct componentname *cnp, struct timespec *tsp, int *ticksp);
+void cache_purge(struct vnode *vp);
+void cache_purge_negative(struct vnode *vp);
+void cache_purgevfs(struct mount *mp);
+int change_dir(struct vnode *vp, struct thread *td);
+int change_root(struct vnode *vp, struct thread *td);
+void cvtstat(struct stat *st, struct ostat *ost);
+void cvtnstat(struct stat *sb, struct nstat *nsb);
+int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
+ struct vnode **vpp);
+void getnewvnode_reserve(u_int count);
+void getnewvnode_drop_reserve(void);
+int insmntque1(struct vnode *vp, struct mount *mp,
+ void (*dtr)(struct vnode *, void *), void *dtr_arg);
+int insmntque(struct vnode *vp, struct mount *mp);
+u_quad_t init_va_filerev(void);
+int speedup_syncer(void);
+int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf,
+ u_int *buflen);
+#define textvp_fullpath(p, rb, rfb) \
+ vn_fullpath(FIRST_THREAD_IN_PROC(p), (p)->p_textvp, rb, rfb)
+int vn_fullpath(struct thread *td, struct vnode *vn,
+ char **retbuf, char **freebuf);
+int vn_fullpath_global(struct thread *td, struct vnode *vn,
+ char **retbuf, char **freebuf);
+struct vnode *
+ vn_dir_dd_ino(struct vnode *vp);
+int vn_commname(struct vnode *vn, char *buf, u_int buflen);
+int vn_path_to_global_path(struct thread *td, struct vnode *vp,
+ char *path, u_int pathlen);
+int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
+ gid_t file_gid, accmode_t accmode, struct ucred *cred,
+ int *privused);
+int vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid,
+ struct acl *aclp, accmode_t accmode, struct ucred *cred,
+ int *privused);
+int vaccess_acl_posix1e(enum vtype type, uid_t file_uid,
+ gid_t file_gid, struct acl *acl, accmode_t accmode,
+ struct ucred *cred, int *privused);
+void vattr_null(struct vattr *vap);
+int vcount(struct vnode *vp);
+void vdrop(struct vnode *);
+void vdropl(struct vnode *);
+int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
+int vget(struct vnode *vp, int lockflag, struct thread *td);
+void vgone(struct vnode *vp);
+void vhold(struct vnode *);
+void vholdl(struct vnode *);
+void vinactive(struct vnode *, struct thread *);
+int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
+int vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
+ off_t length, int blksize);
+void vunref(struct vnode *);
+void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
+#define vprint(label, vp) vn_printf((vp), "%s\n", (label))
+int vrecycle(struct vnode *vp, struct thread *td);
+int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off,
+ struct ucred *cred);
+int vn_close(struct vnode *vp,
+ int flags, struct ucred *file_cred, struct thread *td);
+void vn_finished_write(struct mount *mp);
+void vn_finished_secondary_write(struct mount *mp);
+int vn_isdisk(struct vnode *vp, int *errp);
+int _vn_lock(struct vnode *vp, int flags, char *file, int line);
+#define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__)
+int vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp);
+int vn_open_cred(struct nameidata *ndp, int *flagp, int cmode,
+ u_int vn_open_flags, struct ucred *cred, struct file *fp);
+void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end);
+int vn_pollrecord(struct vnode *vp, struct thread *p, int events);
+int vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base,
+ int len, off_t offset, enum uio_seg segflg, int ioflg,
+ struct ucred *active_cred, struct ucred *file_cred, ssize_t *aresid,
+ struct thread *td);
+int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base,
+ size_t len, off_t offset, enum uio_seg segflg, int ioflg,
+ struct ucred *active_cred, struct ucred *file_cred, size_t *aresid,
+ struct thread *td);
+int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio,
+ const struct thread *td);
+int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
+ struct ucred *file_cred, struct thread *td);
+int vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
+int vn_start_secondary_write(struct vnode *vp, struct mount **mpp,
+ int flags);
+int vn_writechk(struct vnode *vp);
+int vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
+ const char *attrname, int *buflen, char *buf, struct thread *td);
+int vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
+ const char *attrname, int buflen, char *buf, struct thread *td);
+int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
+ const char *attrname, struct thread *td);
+int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags,
+ struct vnode **rvp);
+
+int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio);
+int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
+ struct uio *uio);
+
+#define vn_rangelock_unlock(vp, cookie) \
+ rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp))
+#define vn_rangelock_unlock_range(vp, cookie, start, end) \
+ rangelock_unlock_range(&(vp)->v_rl, (cookie), (start), (end), \
+ VI_MTX(vp))
+#define vn_rangelock_rlock(vp, start, end) \
+ rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+#define vn_rangelock_wlock(vp, start, end) \
+ rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+
+int vfs_cache_lookup(struct vop_lookup_args *ap);
+void vfs_timestamp(struct timespec *);
+void vfs_write_resume(struct mount *mp);
+void vfs_write_resume_flags(struct mount *mp, int flags);
+int vfs_write_suspend(struct mount *mp);
+int vop_stdbmap(struct vop_bmap_args *);
+int vop_stdfsync(struct vop_fsync_args *);
+int vop_stdgetwritemount(struct vop_getwritemount_args *);
+int vop_stdgetpages(struct vop_getpages_args *);
+int vop_stdinactive(struct vop_inactive_args *);
+int vop_stdislocked(struct vop_islocked_args *);
+int vop_stdkqfilter(struct vop_kqfilter_args *);
+int vop_stdlock(struct vop_lock1_args *);
+int vop_stdputpages(struct vop_putpages_args *);
+int vop_stdunlock(struct vop_unlock_args *);
+int vop_nopoll(struct vop_poll_args *);
+int vop_stdaccess(struct vop_access_args *ap);
+int vop_stdaccessx(struct vop_accessx_args *ap);
+int vop_stdadvise(struct vop_advise_args *ap);
+int vop_stdadvlock(struct vop_advlock_args *ap);
+int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
+int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
+int vop_stdallocate(struct vop_allocate_args *ap);
+int vop_stdpathconf(struct vop_pathconf_args *);
+int vop_stdpoll(struct vop_poll_args *);
+int vop_stdvptocnp(struct vop_vptocnp_args *ap);
+int vop_stdvptofh(struct vop_vptofh_args *ap);
+int vop_stdunp_bind(struct vop_unp_bind_args *ap);
+int vop_stdunp_connect(struct vop_unp_connect_args *ap);
+int vop_stdunp_detach(struct vop_unp_detach_args *ap);
+int vop_eopnotsupp(struct vop_generic_args *ap);
+int vop_ebadf(struct vop_generic_args *ap);
+int vop_einval(struct vop_generic_args *ap);
+int vop_enoent(struct vop_generic_args *ap);
+int vop_enotty(struct vop_generic_args *ap);
+int vop_null(struct vop_generic_args *ap);
+int vop_panic(struct vop_generic_args *ap);
+
+/* These are called from within the actual VOPS. */
+void vop_create_post(void *a, int rc);
+void vop_deleteextattr_post(void *a, int rc);
+void vop_link_post(void *a, int rc);
+void vop_lock_pre(void *a);
+void vop_lock_post(void *a, int rc);
+void vop_lookup_post(void *a, int rc);
+void vop_lookup_pre(void *a);
+void vop_mkdir_post(void *a, int rc);
+void vop_mknod_post(void *a, int rc);
+void vop_remove_post(void *a, int rc);
+void vop_rename_post(void *a, int rc);
+void vop_rename_pre(void *a);
+void vop_rmdir_post(void *a, int rc);
+void vop_setattr_post(void *a, int rc);
+void vop_setextattr_post(void *a, int rc);
+void vop_strategy_pre(void *a);
+void vop_symlink_post(void *a, int rc);
+void vop_unlock_post(void *a, int rc);
+void vop_unlock_pre(void *a);
+
+void vop_rename_fail(struct vop_rename_args *ap);
+
+#define VOP_WRITE_PRE(ap) \
+ struct vattr va; \
+ int error, osize, ooffset, noffset; \
+ \
+ osize = ooffset = noffset = 0; \
+ if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \
+ error = VOP_GETATTR((ap)->a_vp, &va, (ap)->a_cred); \
+ if (error) \
+ return (error); \
+ ooffset = (ap)->a_uio->uio_offset; \
+ osize = va.va_size; \
+ }
+
+#define VOP_WRITE_POST(ap, ret) \
+ noffset = (ap)->a_uio->uio_offset; \
+ if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \
+ VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \
+ | (noffset > osize ? NOTE_EXTEND : 0)); \
+ }
+
+#define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__)
+
+
+void vput(struct vnode *vp);
+void vrele(struct vnode *vp);
+void vref(struct vnode *vp);
+int vrefcnt(struct vnode *vp);
+void v_addpollinfo(struct vnode *vp);
+
+int vnode_create_vobject(struct vnode *vp, off_t size, struct thread *td);
+void vnode_destroy_vobject(struct vnode *vp);
+
+extern struct vop_vector fifo_specops;
+extern struct vop_vector dead_vnodeops;
+extern struct vop_vector default_vnodeops;
+
+#define VOP_PANIC ((void*)(uintptr_t)vop_panic)
+#define VOP_NULL ((void*)(uintptr_t)vop_null)
+#define VOP_EBADF ((void*)(uintptr_t)vop_ebadf)
+#define VOP_ENOTTY ((void*)(uintptr_t)vop_enotty)
+#define VOP_EINVAL ((void*)(uintptr_t)vop_einval)
+#define VOP_ENOENT ((void*)(uintptr_t)vop_enoent)
+#define VOP_EOPNOTSUPP ((void*)(uintptr_t)vop_eopnotsupp)
+
+/* vfs_hash.c */
+typedef int vfs_hash_cmp_t(struct vnode *vp, void *arg);
+
+int vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
+u_int vfs_hash_index(struct vnode *vp);
+int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
+void vfs_hash_rehash(struct vnode *vp, u_int hash);
+void vfs_hash_remove(struct vnode *vp);
+
+int vfs_kqfilter(struct vop_kqfilter_args *);
+void vfs_mark_atime(struct vnode *vp, struct ucred *cred);
+struct dirent;
+int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off);
+
+int vfs_unixify_accmode(accmode_t *accmode);
+
+void vfs_unp_reclaim(struct vnode *vp);
+
+int setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode);
+int setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
+ gid_t gid);
+int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
+ struct thread *td);
+int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
+ struct thread *td);
+
+#endif /* _KERNEL */
+#endif /* __rtems__ */
+
+#endif /* !_SYS_VNODE_H_ */
diff --git a/freebsd/sys/v850/include/machine/in_cksum.h b/freebsd/sys/v850/include/machine/in_cksum.h
index 37d88e2e..633efa1f 100644
--- a/freebsd/sys/v850/include/machine/in_cksum.h
+++ b/freebsd/sys/v850/include/machine/in_cksum.h
@@ -40,6 +40,7 @@
#define in_cksum(m, len) in_cksum_skip(m, len, 0)
+#if defined(IPVERSION) && (IPVERSION == 4)
/*
* It it useful to have an Internet checksum routine which is inlineable
* and optimized specifically for the task of computing IP header checksums
@@ -66,9 +67,12 @@ in_cksum_update(struct ip *ip)
} while(0)
#endif
+#endif
#ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
u_int in_cksum_hdr(const struct ip *ip);
+#endif
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/freebsd/sys/v850/include/machine/pci_cfgreg.h b/freebsd/sys/v850/include/machine/pci_cfgreg.h
index bc72418d..ea5e3198 100644
--- a/freebsd/sys/v850/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/v850/include/machine/pci_cfgreg.h
@@ -27,6 +27,9 @@
*
*/
+#ifndef __X86_PCI_CFGREG_H__
+#define __X86_PCI_CFGREG_H__
+
#define CONF1_ADDR_PORT 0x0cf8
#define CONF1_DATA_PORT 0x0cfc
@@ -43,10 +46,15 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
+u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes);
+#ifdef __HAVE_PIR
void pci_pir_open(void);
int pci_pir_probe(int bus, int require_parse);
int pci_pir_route_interrupt(int bus, int device, int func, int pin);
+#endif
+
+#endif /* !__X86_PCI_CFGREG_H__ */
diff --git a/freebsd/sys/v850/pci/pci_bus.c b/freebsd/sys/v850/pci/pci_bus.c
index ad0342ec..cfab0049 100644
--- a/freebsd/sys/v850/pci/pci_bus.c
+++ b/freebsd/sys/v850/pci/pci_bus.c
@@ -53,13 +53,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
-#ifndef __rtems__
-static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev,
- int pin);
-#else /* __rtems__ */
-int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin);
-#endif /* __rtems__ */
-
int
legacy_pcib_maxslots(device_t dev)
{
@@ -68,7 +61,7 @@ legacy_pcib_maxslots(device_t dev)
/* read configuration space register */
-u_int32_t
+uint32_t
legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
u_int reg, int bytes)
{
@@ -79,11 +72,26 @@ legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
void
legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
- u_int reg, u_int32_t data, int bytes)
+ u_int reg, uint32_t data, int bytes)
{
pci_cfgregwrite(bus, slot, func, reg, data, bytes);
}
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
/* Pass MSI requests up to the nexus. */
static int
@@ -135,6 +143,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
uint32_t id, uint8_t class, uint8_t subclass,
uint8_t *busnum)
{
+#ifdef __i386__
const char *s = NULL;
static uint8_t pxb[4]; /* hack for 450nx */
@@ -352,6 +361,14 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func,
}
return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
}
/*
@@ -362,7 +379,7 @@ static void
legacy_pcib_identify(driver_t *driver, device_t parent)
{
int bus, slot, func;
- u_int8_t hdrtype;
+ uint8_t hdrtype;
int found = 0;
int pcifunchigh;
int found824xx = 0;
@@ -405,8 +422,8 @@ legacy_pcib_identify(driver_t *driver, device_t parent)
/*
* Read the IDs and class from the device.
*/
- u_int32_t id;
- u_int8_t class, subclass, busnum;
+ uint32_t id;
+ uint8_t class, subclass, busnum;
const char *s;
device_t *devs;
int ndevs, i;
@@ -493,21 +510,23 @@ legacy_pcib_probe(device_t dev)
static int
legacy_pcib_attach(device_t dev)
{
+#ifdef __HAVE_PIR
device_t pir;
+#endif
int bus;
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
/*
* Look for a PCI BIOS interrupt routing table as that will be
* our method of routing interrupts if we have one.
*/
- bus = pcib_get_bus(dev);
-#ifndef __rtems__
if (pci_pir_probe(bus, 0)) {
pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
if (pir != NULL)
device_probe_and_attach(pir);
}
-#endif /* __rtems__ */
+#endif
device_add_child(dev, "pci", bus);
return bus_generic_attach(dev);
}
@@ -543,35 +562,45 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
return ENOENT;
}
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
SYSCTL_DECL(_hw_pci);
-static unsigned long legacy_host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start);
-SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN,
- &legacy_host_mem_start, 0x80000000,
- "Limit the host bridge memory to being above this address. Must be\n\
-set at boot via a tunable.");
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
- /*
- * If no memory preference is given, use upper 32MB slot most
- * bioses use for their memory window. Typically other bridges
- * before us get in the way to assert their preferences on memory.
- * Hardcoding like this sucks, so a more MD/MI way needs to be
- * found to do it. This is typically only used on older laptops
- * that don't have pci busses behind pci bridge, so assuming > 32MB
- * is liekly OK.
- *
- * However, this can cause problems for other chipsets, so we make
- * this tunable by hw.pci.host_mem_start.
- */
- if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL)
- start = legacy_host_mem_start;
- if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL)
- start = 0x1000;
+
+ start = hostb_alloc_start(type, start, end, count);
return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
count, flags));
}
@@ -600,7 +629,7 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
DEVMETHOD(pcib_release_msi, pcib_release_msi),
DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
@@ -616,7 +645,6 @@ DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
-#ifndef __rtems__
/*
* Install placeholder to claim the resources owned by the
* PCI bus interface. This could be used to extract the
@@ -665,7 +693,7 @@ static devclass_t pcibus_pnp_devclass;
DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
-
+#ifdef __HAVE_PIR
/*
* Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
* that appear in the PCIBIOS Interrupt Routing Table to use the routing
@@ -676,39 +704,17 @@ static int pcibios_pcib_probe(device_t bus);
static device_method_t pcibios_pcib_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcibios_pcib_probe),
- DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_read_ivar, pcib_read_ivar),
- DEVMETHOD(bus_write_ivar, pcib_write_ivar),
- DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
- DEVMETHOD(bus_release_resource, bus_generic_release_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
- DEVMETHOD(pcib_read_config, pcib_read_config),
- DEVMETHOD(pcib_write_config, pcib_write_config),
- DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
- DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
- DEVMETHOD(pcib_release_msi, pcib_release_msi),
- DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
- DEVMETHOD(pcib_release_msix, pcib_release_msix),
- DEVMETHOD(pcib_map_msi, pcib_map_msi),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
- DEVMETHOD_END
+ {0, 0}
};
static devclass_t pcib_devclass;
-DEFINE_CLASS_0(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
- sizeof(struct pcib_softc));
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
static int
@@ -727,11 +733,4 @@ pcibios_pcib_probe(device_t dev)
device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
return (-2000);
}
-
-static int
-pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
- pci_get_function(dev), pin));
-}
-#endif /* __rtems__ */
+#endif
diff --git a/freebsd/sys/v850/v850/legacy.c b/freebsd/sys/v850/v850/legacy.c
index c81ccc5e..3a2fab02 100644
--- a/freebsd/sys/v850/v850/legacy.c
+++ b/freebsd/sys/v850/v850/legacy.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <i386/bios/mca_machdep.h>
#endif
+#include <machine/clock.h>
#include <machine/legacyvar.h>
#include <machine/resource.h>
@@ -351,9 +352,22 @@ cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct cpu_device *cpdev;
- if (index != CPU_IVAR_PCPU)
+ switch (index) {
+ case CPU_IVAR_PCPU:
+ cpdev = device_get_ivars(child);
+ *result = (uintptr_t)cpdev->cd_pcpu;
+ break;
+#ifndef __rtems__
+ case CPU_IVAR_NOMINAL_MHZ:
+ if (tsc_is_invariant) {
+ *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) /
+ 1000000);
+ break;
+ }
+ /* FALLTHROUGH */
+#endif /* __rtems__ */
+ default:
return (ENOENT);
- cpdev = device_get_ivars(child);
- *result = (uintptr_t)cpdev->cd_pcpu;
+ }
return (0);
}
diff --git a/freebsd/sys/vm/uma.h b/freebsd/sys/vm/uma.h
index 138d087f..53344c02 100644
--- a/freebsd/sys/vm/uma.h
+++ b/freebsd/sys/vm/uma.h
@@ -248,6 +248,10 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
* backend pages and can fail early.
*/
#define UMA_ZONE_VTOSLAB 0x2000 /* Zone uses vtoslab for lookup. */
+#define UMA_ZONE_NODUMP 0x4000 /*
+ * Zone's pages will not be included in
+ * mini-dumps.
+ */
/*
* These flags are shared between the keg and zone. In zones wishing to add
@@ -452,11 +456,12 @@ int uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int size);
*
* Arguments:
* zone The zone to limit
+ * nitems The requested upper limit on the number of items allowed
*
* Returns:
- * Nothing
+ * int The effective value of nitems after rounding up based on page size
*/
-void uma_zone_set_max(uma_zone_t zone, int nitems);
+int uma_zone_set_max(uma_zone_t zone, int nitems);
/*
* Obtains the effective limit on the number of items in a zone
@@ -623,7 +628,8 @@ struct uma_type_header {
u_int64_t uth_allocs; /* Zone: number of allocations. */
u_int64_t uth_frees; /* Zone: number of frees. */
u_int64_t uth_fails; /* Zone: number of alloc failures. */
- u_int64_t _uth_reserved1[3]; /* Reserved. */
+ u_int64_t uth_sleeps; /* Zone: number of alloc sleeps. */
+ u_int64_t _uth_reserved1[2]; /* Reserved. */
};
struct uma_percpu_stat {
diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c
index c61faff4..69db9df8 100644
--- a/freebsd/sys/vm/uma_core.c
+++ b/freebsd/sys/vm/uma_core.c
@@ -88,8 +88,6 @@ __FBSDID("$FreeBSD$");
#include <vm/uma_int.h>
#include <vm/uma_dbg.h>
-#include <machine/vmparam.h>
-
#include <ddb/ddb.h>
#ifdef __rtems__
@@ -117,7 +115,7 @@ static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
static uma_zone_t hashzone;
/* The boot-time adjusted value for cache line alignment. */
-static int uma_align_cache = 64 - 1;
+int uma_align_cache = 64 - 1;
static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
@@ -144,6 +142,8 @@ static struct mtx uma_boot_pages_mtx;
/* Is the VM done starting up? */
static int booted = 0;
+#define UMA_STARTUP 1
+#define UMA_STARTUP2 2
#endif /* __rtems__ */
/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
@@ -862,6 +862,9 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
else
wait &= ~M_ZERO;
+ if (keg->uk_flags & UMA_ZONE_NODUMP)
+ wait |= M_NODUMP;
+
/* zone is passed for legacy reasons. */
mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
if (mem == NULL) {
@@ -990,7 +993,7 @@ startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
return (tmps->us_data);
}
mtx_unlock(&uma_boot_pages_mtx);
- if (booted == 0)
+ if (booted < UMA_STARTUP2)
panic("UMA: Increase vm.boot_pages");
/*
* Now that we've booted reset these users to their real allocator.
@@ -1072,10 +1075,8 @@ obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
while (pages != startpages) {
pages--;
p = TAILQ_LAST(&object->memq, pglist);
- vm_page_lock_queues();
vm_page_unwire(p, 0);
vm_page_free(p);
- vm_page_unlock_queues();
}
retkva = 0;
goto done;
@@ -1360,11 +1361,20 @@ keg_ctor(void *mem, int size, void *udata, int flags)
#ifdef UMA_MD_SMALL_ALLOC
keg->uk_allocf = uma_small_alloc;
keg->uk_freef = uma_small_free;
-#endif
+
+#ifndef __rtems__
+ if (booted < UMA_STARTUP)
+ keg->uk_allocf = startup_alloc;
+#endif /* __rtems__ */
+#else
#ifndef __rtems__
- if (booted == 0)
+ if (booted < UMA_STARTUP2)
keg->uk_allocf = startup_alloc;
- } else if (booted == 0 && (keg->uk_flags & UMA_ZFLAG_INTERNAL))
+#endif /* __rtems__ */
+#endif
+#ifndef __rtems__
+ } else if (booted < UMA_STARTUP2 &&
+ (keg->uk_flags & UMA_ZFLAG_INTERNAL))
keg->uk_allocf = startup_alloc;
#else /* __rtems__ */
}
@@ -1463,6 +1473,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
zone->uz_allocs = 0;
zone->uz_frees = 0;
zone->uz_fails = 0;
+ zone->uz_sleeps = 0;
zone->uz_fills = zone->uz_count = 0;
zone->uz_flags = 0;
keg = arg->keg;
@@ -1820,9 +1831,9 @@ uma_startup(void *bootmem, int boot_pages)
bucket_init();
-#if defined(UMA_MD_SMALL_ALLOC) && !defined(UMA_MD_SMALL_ALLOC_NEEDS_VM)
- booted = 1;
-#endif
+#ifndef __rtems__
+ booted = UMA_STARTUP;
+#endif /* __rtems__ */
#ifdef UMA_DEBUG
printf("UMA startup complete.\n");
@@ -1846,7 +1857,7 @@ SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_FIRST,
void
uma_startup2(void)
{
- booted = 1;
+ booted = UMA_STARTUP2;
bucket_enable();
#ifdef UMA_DEBUG
printf("UMA startup2 complete.\n");
@@ -2245,6 +2256,7 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
zone->uz_flags |= UMA_ZFLAG_FULL;
if (flags & M_NOWAIT)
break;
+ zone->uz_sleeps++;
msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
continue;
}
@@ -2388,6 +2400,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
*/
if (full && !empty) {
zone->uz_flags |= UMA_ZFLAG_FULL;
+ zone->uz_sleeps++;
msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
zone->uz_flags &= ~UMA_ZFLAG_FULL;
continue;
@@ -2894,7 +2907,7 @@ zone_free_item(uma_zone_t zone, void *item, void *udata,
}
/* See uma.h */
-void
+int
uma_zone_set_max(uma_zone_t zone, int nitems)
{
uma_keg_t keg;
@@ -2904,8 +2917,10 @@ uma_zone_set_max(uma_zone_t zone, int nitems)
keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
if (keg->uk_maxpages * keg->uk_ipers < nitems)
keg->uk_maxpages += keg->uk_ppera;
-
+ nitems = keg->uk_maxpages * keg->uk_ipers;
ZONE_UNLOCK(zone);
+
+ return (nitems);
}
/* See uma.h */
@@ -3040,13 +3055,11 @@ uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
if (kva == 0)
return (0);
- if (obj == NULL) {
- obj = vm_object_allocate(OBJT_DEFAULT,
- pages);
- } else {
+ if (obj == NULL)
+ obj = vm_object_allocate(OBJT_PHYS, pages);
+ else {
VM_OBJECT_LOCK_INIT(obj, "uma object");
- _vm_object_allocate(OBJT_DEFAULT,
- pages, obj);
+ _vm_object_allocate(OBJT_PHYS, pages, obj);
}
ZONE_LOCK(zone);
keg->uk_kva = kva;
@@ -3257,13 +3270,13 @@ uma_print_zone(uma_zone_t zone)
*/
static void
uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
- u_int64_t *freesp)
+ u_int64_t *freesp, u_int64_t *sleepsp)
{
uma_cache_t cache;
- u_int64_t allocs, frees;
+ u_int64_t allocs, frees, sleeps;
int cachefree, cpu;
- allocs = frees = 0;
+ allocs = frees = sleeps = 0;
cachefree = 0;
CPU_FOREACH(cpu) {
cache = &z->uz_cpu[cpu];
@@ -3276,12 +3289,15 @@ uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
}
allocs += z->uz_allocs;
frees += z->uz_frees;
+ sleeps += z->uz_sleeps;
if (cachefreep != NULL)
*cachefreep = cachefree;
if (allocsp != NULL)
*allocsp = allocs;
if (freesp != NULL)
*freesp = frees;
+ if (sleepsp != NULL)
+ *sleepsp = sleeps;
}
#endif /* DDB */
@@ -3315,36 +3331,19 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
uma_keg_t kz;
uma_zone_t z;
uma_keg_t k;
- char *buffer;
- int buflen, count, error, i;
-
- mtx_lock(&uma_mtx);
-restart:
- mtx_assert(&uma_mtx, MA_OWNED);
- count = 0;
- LIST_FOREACH(kz, &uma_kegs, uk_link) {
- LIST_FOREACH(z, &kz->uk_zones, uz_link)
- count++;
- }
- mtx_unlock(&uma_mtx);
+ int count, error, i;
- buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
- (mp_maxid + 1)) + 1;
- buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+ count = 0;
mtx_lock(&uma_mtx);
- i = 0;
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link)
- i++;
- }
- if (i > count) {
- free(buffer, M_TEMP);
- goto restart;
+ count++;
}
- count = i;
-
- sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
/*
* Insert stream header.
@@ -3353,11 +3352,7 @@ restart:
ush.ush_version = UMA_STREAM_VERSION;
ush.ush_maxcpus = (mp_maxid + 1);
ush.ush_count = count;
- if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
- mtx_unlock(&uma_mtx);
- error = ENOMEM;
- goto out;
- }
+ (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
@@ -3389,12 +3384,8 @@ restart:
uth.uth_allocs = z->uz_allocs;
uth.uth_frees = z->uz_frees;
uth.uth_fails = z->uz_fails;
- if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
- ZONE_UNLOCK(z);
- mtx_unlock(&uma_mtx);
- error = ENOMEM;
- goto out;
- }
+ uth.uth_sleeps = z->uz_sleeps;
+ (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
/*
* While it is not normally safe to access the cache
* bucket pointers while not on the CPU that owns the
@@ -3419,53 +3410,47 @@ restart:
ups.ups_allocs = cache->uc_allocs;
ups.ups_frees = cache->uc_frees;
skip:
- if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
- ZONE_UNLOCK(z);
- mtx_unlock(&uma_mtx);
- error = ENOMEM;
- goto out;
- }
+ (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
}
ZONE_UNLOCK(z);
}
}
mtx_unlock(&uma_mtx);
- sbuf_finish(&sbuf);
- error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
-out:
- free(buffer, M_TEMP);
+ error = sbuf_finish(&sbuf);
+ sbuf_delete(&sbuf);
return (error);
}
#ifdef DDB
DB_SHOW_COMMAND(uma, db_show_uma)
{
- u_int64_t allocs, frees;
+ u_int64_t allocs, frees, sleeps;
uma_bucket_t bucket;
uma_keg_t kz;
uma_zone_t z;
int cachefree;
- db_printf("%18s %8s %8s %8s %12s\n", "Zone", "Size", "Used", "Free",
- "Requests");
+ db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
+ "Requests", "Sleeps");
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
allocs = z->uz_allocs;
frees = z->uz_frees;
+ sleeps = z->uz_sleeps;
cachefree = 0;
} else
uma_zone_sumstat(z, &cachefree, &allocs,
- &frees);
+ &frees, &sleeps);
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z)))
cachefree += kz->uk_free;
LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
cachefree += bucket->ub_cnt;
- db_printf("%18s %8ju %8jd %8d %12ju\n", z->uz_name,
+ db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
(uintmax_t)kz->uk_size,
(intmax_t)(allocs - frees), cachefree,
- (uintmax_t)allocs);
+ (uintmax_t)allocs, sleeps);
if (db_pager_quit)
return;
}
diff --git a/freebsd/sys/vm/uma_int.h b/freebsd/sys/vm/uma_int.h
index 22e2af78..86c50509 100644
--- a/freebsd/sys/vm/uma_int.h
+++ b/freebsd/sys/vm/uma_int.h
@@ -45,7 +45,7 @@
*
* The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may
* be allocated off the page from a special slab zone. The free list within a
- * slab is managed with a linked list of indexes, which are 8 bit values. If
+ * slab is managed with a linked list of indices, which are 8 bit values. If
* UMA_SLAB_SIZE is defined to be too large I will have to switch to 16bit
* values. Currently on alpha you can get 250 or so 32 byte items and on x86
* you can get 250 or so 16byte items. For item sizes that would yield more
@@ -56,9 +56,9 @@
* wasted between items due to alignment problems. This may yield a much better
* memory footprint for certain sizes of objects. Another alternative is to
* increase the UMA_SLAB_SIZE, or allow for dynamic slab sizes. I prefer
- * dynamic slab sizes because we could stick with 8 bit indexes and only use
+ * dynamic slab sizes because we could stick with 8 bit indices and only use
* large slab sizes for zones with a lot of waste per slab. This may create
- * ineffeciencies in the vm subsystem due to fragmentation in the address space.
+ * inefficiencies in the vm subsystem due to fragmentation in the address space.
*
* The only really gross cases, with regards to memory waste, are for those
* items that are just over half the page size. You can get nearly 50% waste,
@@ -118,7 +118,7 @@
#define UMA_SLAB_MASK (PAGE_SIZE - 1) /* Mask to get back to the page */
#define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */
-#define UMA_BOOT_PAGES 48 /* Pages allocated for startup */
+#define UMA_BOOT_PAGES 64 /* Pages allocated for startup */
/* Max waste before going to off page slab management */
#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10)
@@ -160,6 +160,15 @@ struct uma_hash {
};
/*
+ * align field or structure to cache line
+ */
+#if defined(__amd64__)
+#define UMA_ALIGN __aligned(CACHE_LINE_SIZE)
+#else
+#define UMA_ALIGN
+#endif
+
+/*
* Structures for per cpu queues.
*/
@@ -177,7 +186,7 @@ struct uma_cache {
uma_bucket_t uc_allocbucket; /* Bucket to allocate from */
u_int64_t uc_allocs; /* Count of allocations */
u_int64_t uc_frees; /* Count of frees */
-};
+} UMA_ALIGN;
typedef struct uma_cache * uma_cache_t;
@@ -312,11 +321,13 @@ struct uma_zone {
uma_init uz_init; /* Initializer for each item */
uma_fini uz_fini; /* Discards memory */
- u_int64_t uz_allocs; /* Total number of allocations */
- u_int64_t uz_frees; /* Total number of frees */
- u_int64_t uz_fails; /* Total number of alloc failures */
u_int32_t uz_flags; /* Flags inherited from kegs */
u_int32_t uz_size; /* Size inherited from kegs */
+
+ u_int64_t uz_allocs UMA_ALIGN; /* Total number of allocations */
+ u_int64_t uz_frees; /* Total number of frees */
+ u_int64_t uz_fails; /* Total number of alloc failures */
+ u_int64_t uz_sleeps; /* Total number of alloc sleeps */
uint16_t uz_fills; /* Outstanding bucket fills */
uint16_t uz_count; /* Highest value ub_ptr can have */
@@ -324,7 +335,7 @@ struct uma_zone {
* This HAS to be the last item because we adjust the zone size
* based on NCPU and then allocate the space for the zones.
*/
- struct uma_cache uz_cpu[1]; /* Per cpu caches */
+ struct uma_cache uz_cpu[1]; /* Per cpu caches */
};
/*
@@ -341,6 +352,8 @@ struct uma_zone {
#define UMA_ZFLAG_INHERIT (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | \
UMA_ZFLAG_BUCKET)
+#undef UMA_ALIGN
+
#ifdef _KERNEL
/* Internal prototypes */
static __inline uma_slab_t hash_sfind(struct uma_hash *hash, u_int8_t *data);
diff --git a/freebsd/sys/vm/vm.h b/freebsd/sys/vm/vm.h
index 941300a0..106c510c 100644
--- a/freebsd/sys/vm/vm.h
+++ b/freebsd/sys/vm/vm.h
@@ -76,14 +76,14 @@ typedef u_char vm_prot_t; /* protection codes */
#define VM_PROT_READ ((vm_prot_t) 0x01)
#define VM_PROT_WRITE ((vm_prot_t) 0x02)
#define VM_PROT_EXECUTE ((vm_prot_t) 0x04)
-#define VM_PROT_OVERRIDE_WRITE ((vm_prot_t) 0x08) /* copy-on-write */
+#define VM_PROT_COPY ((vm_prot_t) 0x08) /* copy-on-read */
#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
#define VM_PROT_RW (VM_PROT_READ|VM_PROT_WRITE)
#define VM_PROT_DEFAULT VM_PROT_ALL
enum obj_type { OBJT_DEFAULT, OBJT_SWAP, OBJT_VNODE, OBJT_DEVICE, OBJT_PHYS,
- OBJT_DEAD, OBJT_SG };
+ OBJT_DEAD, OBJT_SG, OBJT_MGTDEVICE };
typedef u_char objtype_t;
union vm_map_object;
@@ -136,17 +136,21 @@ struct kva_md_info {
vm_offset_t clean_eva;
vm_offset_t pager_sva;
vm_offset_t pager_eva;
+ vm_offset_t bio_transient_sva;
+ vm_offset_t bio_transient_eva;
};
extern struct kva_md_info kmi;
extern void vm_ksubmap_init(struct kva_md_info *);
-struct uidinfo;
+extern int old_mlock;
+
+struct ucred;
int swap_reserve(vm_ooffset_t incr);
-int swap_reserve_by_uid(vm_ooffset_t incr, struct uidinfo *uip);
+int swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred);
void swap_reserve_force(vm_ooffset_t incr);
void swap_release(vm_ooffset_t decr);
-void swap_release_by_uid(vm_ooffset_t decr, struct uidinfo *uip);
+void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred);
#endif /* VM_H */
diff --git a/freebsd/sys/vm/vm_extern.h b/freebsd/sys/vm/vm_extern.h
index 7782f2a0..8b6c7ac5 100644
--- a/freebsd/sys/vm/vm_extern.h
+++ b/freebsd/sys/vm/vm_extern.h
@@ -63,8 +63,14 @@ void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
vm_ooffset_t *);
int vm_fault_disable_pagefaults(void);
void vm_fault_enable_pagefaults(int save);
+#ifndef __rtems__
+int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags, vm_page_t *m_hold);
+int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+ vm_prot_t prot, vm_page_t *ma, int max_count);
+#endif /* __rtems__ */
void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
-int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
+int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int);
void vm_waitproc(struct proc *);
int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t);
@@ -100,14 +106,9 @@ vsunlock(void *addr, size_t len)
(void) len;
}
#endif /* __rtems__ */
-void vm_object_print(/* db_expr_t */ long, boolean_t, /* db_expr_t */ long,
- char *);
-int vm_fault_quick(caddr_t v, int prot);
struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
void vm_imgact_unmap_page(struct sf_buf *sf);
void vm_thread_dispose(struct thread *td);
int vm_thread_new(struct thread *td, int pages);
-void vm_thread_swapin(struct thread *td);
-void vm_thread_swapout(struct thread *td);
#endif /* _KERNEL */
#endif /* !_VM_EXTERN_H_ */
diff --git a/freebsd/sys/x86/pci/pci_bus.c b/freebsd/sys/x86/pci/pci_bus.c
new file mode 100644
index 00000000..cfab0049
--- /dev/null
+++ b/freebsd/sys/x86/pci/pci_bus.c
@@ -0,0 +1,736 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_cpu.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcib_private.h>
+#include <isa/isavar.h>
+#ifdef CPU_ELAN
+#include <machine/md_var.h>
+#endif
+#include <machine/legacyvar.h>
+#include <machine/pci_cfgreg.h>
+#include <machine/resource.h>
+
+#include <rtems/bsd/local/pcib_if.h>
+
+int
+legacy_pcib_maxslots(device_t dev)
+{
+ return 31;
+}
+
+/* read configuration space register */
+
+uint32_t
+legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
+ u_int reg, int bytes)
+{
+ return(pci_cfgregread(bus, slot, func, reg, bytes));
+}
+
+/* write configuration space register */
+
+void
+legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
+ u_int reg, uint32_t data, int bytes)
+{
+ pci_cfgregwrite(bus, slot, func, reg, data, bytes);
+}
+
+/* route interrupt */
+
+static int
+legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+#ifdef __HAVE_PIR
+ return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev),
+ pci_get_function(dev), pin));
+#else
+ /* No routing possible */
+ return (PCI_INVALID_IRQ);
+#endif
+}
+
+/* Pass MSI requests up to the nexus. */
+
+static int
+legacy_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount,
+ int *irqs)
+{
+ device_t bus;
+
+ bus = device_get_parent(pcib);
+ return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
+ irqs));
+}
+
+static int
+legacy_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
+{
+ device_t bus;
+
+ bus = device_get_parent(pcib);
+ return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
+}
+
+int
+legacy_pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr,
+ uint32_t *data)
+{
+ device_t bus, hostb;
+ int error, func, slot;
+
+ bus = device_get_parent(pcib);
+ error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data);
+ if (error)
+ return (error);
+
+ slot = legacy_get_pcislot(pcib);
+ func = legacy_get_pcifunc(pcib);
+ if (slot == -1 || func == -1)
+ return (0);
+ hostb = pci_find_bsf(0, slot, func);
+ KASSERT(hostb != NULL, ("%s: missing hostb for 0:%d:%d", __func__,
+ slot, func));
+ pci_ht_map_msi(hostb, *addr);
+ return (0);
+
+}
+
+static const char *
+legacy_pcib_is_host_bridge(int bus, int slot, int func,
+ uint32_t id, uint8_t class, uint8_t subclass,
+ uint8_t *busnum)
+{
+#ifdef __i386__
+ const char *s = NULL;
+ static uint8_t pxb[4]; /* hack for 450nx */
+
+ *busnum = 0;
+
+ switch (id) {
+ case 0x12258086:
+ s = "Intel 824?? host to PCI bridge";
+ /* XXX This is a guess */
+ /* *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x41, 1); */
+ *busnum = bus;
+ break;
+ case 0x71208086:
+ s = "Intel 82810 (i810 GMCH) Host To Hub bridge";
+ break;
+ case 0x71228086:
+ s = "Intel 82810-DC100 (i810-DC100 GMCH) Host To Hub bridge";
+ break;
+ case 0x71248086:
+ s = "Intel 82810E (i810E GMCH) Host To Hub bridge";
+ break;
+ case 0x11308086:
+ s = "Intel 82815 (i815 GMCH) Host To Hub bridge";
+ break;
+ case 0x71808086:
+ s = "Intel 82443LX (440 LX) host to PCI bridge";
+ break;
+ case 0x71908086:
+ s = "Intel 82443BX (440 BX) host to PCI bridge";
+ break;
+ case 0x71928086:
+ s = "Intel 82443BX host to PCI bridge (AGP disabled)";
+ break;
+ case 0x71948086:
+ s = "Intel 82443MX host to PCI bridge";
+ break;
+ case 0x71a08086:
+ s = "Intel 82443GX host to PCI bridge";
+ break;
+ case 0x71a18086:
+ s = "Intel 82443GX host to AGP bridge";
+ break;
+ case 0x71a28086:
+ s = "Intel 82443GX host to PCI bridge (AGP disabled)";
+ break;
+ case 0x84c48086:
+ s = "Intel 82454KX/GX (Orion) host to PCI bridge";
+ *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x4a, 1);
+ break;
+ case 0x84ca8086:
+ /*
+ * For the 450nx chipset, there is a whole bundle of
+ * things pretending to be host bridges. The MIOC will
+ * be seen first and isn't really a pci bridge (the
+ * actual busses are attached to the PXB's). We need to
+ * read the registers of the MIOC to figure out the
+ * bus numbers for the PXB channels.
+ *
+ * Since the MIOC doesn't have a pci bus attached, we
+ * pretend it wasn't there.
+ */
+ pxb[0] = legacy_pcib_read_config(0, bus, slot, func,
+ 0xd0, 1); /* BUSNO[0] */
+ pxb[1] = legacy_pcib_read_config(0, bus, slot, func,
+ 0xd1, 1) + 1; /* SUBA[0]+1 */
+ pxb[2] = legacy_pcib_read_config(0, bus, slot, func,
+ 0xd3, 1); /* BUSNO[1] */
+ pxb[3] = legacy_pcib_read_config(0, bus, slot, func,
+ 0xd4, 1) + 1; /* SUBA[1]+1 */
+ return NULL;
+ case 0x84cb8086:
+ switch (slot) {
+ case 0x12:
+ s = "Intel 82454NX PXB#0, Bus#A";
+ *busnum = pxb[0];
+ break;
+ case 0x13:
+ s = "Intel 82454NX PXB#0, Bus#B";
+ *busnum = pxb[1];
+ break;
+ case 0x14:
+ s = "Intel 82454NX PXB#1, Bus#A";
+ *busnum = pxb[2];
+ break;
+ case 0x15:
+ s = "Intel 82454NX PXB#1, Bus#B";
+ *busnum = pxb[3];
+ break;
+ }
+ break;
+ case 0x1A308086:
+ s = "Intel 82845 Host to PCI bridge";
+ break;
+
+ /* AMD -- vendor 0x1022 */
+ case 0x30001022:
+ s = "AMD Elan SC520 host to PCI bridge";
+#ifdef CPU_ELAN
+ init_AMD_Elan_sc520();
+#else
+ printf(
+"*** WARNING: missing CPU_ELAN -- timekeeping may be wrong\n");
+#endif
+ break;
+ case 0x70061022:
+ s = "AMD-751 host to PCI bridge";
+ break;
+ case 0x700e1022:
+ s = "AMD-761 host to PCI bridge";
+ break;
+
+ /* SiS -- vendor 0x1039 */
+ case 0x04961039:
+ s = "SiS 85c496";
+ break;
+ case 0x04061039:
+ s = "SiS 85c501";
+ break;
+ case 0x06011039:
+ s = "SiS 85c601";
+ break;
+ case 0x55911039:
+ s = "SiS 5591 host to PCI bridge";
+ break;
+ case 0x00011039:
+ s = "SiS 5591 host to AGP bridge";
+ break;
+
+ /* VLSI -- vendor 0x1004 */
+ case 0x00051004:
+ s = "VLSI 82C592 Host to PCI bridge";
+ break;
+
+ /* XXX Here is MVP3, I got the datasheet but NO M/B to test it */
+ /* totally. Please let me know if anything wrong. -F */
+ /* XXX need info on the MVP3 -- any takers? */
+ case 0x05981106:
+ s = "VIA 82C598MVP (Apollo MVP3) host bridge";
+ break;
+
+ /* AcerLabs -- vendor 0x10b9 */
+ /* Funny : The datasheet told me vendor id is "10b8",sub-vendor */
+ /* id is '10b9" but the register always shows "10b9". -Foxfair */
+ case 0x154110b9:
+ s = "AcerLabs M1541 (Aladdin-V) PCI host bridge";
+ break;
+
+ /* OPTi -- vendor 0x1045 */
+ case 0xc7011045:
+ s = "OPTi 82C700 host to PCI bridge";
+ break;
+ case 0xc8221045:
+ s = "OPTi 82C822 host to PCI Bridge";
+ break;
+
+ /* ServerWorks -- vendor 0x1166 */
+ case 0x00051166:
+ s = "ServerWorks NB6536 2.0HE host to PCI bridge";
+ *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
+ break;
+
+ case 0x00061166:
+ /* FALLTHROUGH */
+ case 0x00081166:
+ /* FALLTHROUGH */
+ case 0x02011166:
+ /* FALLTHROUGH */
+ case 0x010f1014: /* IBM re-badged ServerWorks chipset */
+ s = "ServerWorks host to PCI bridge";
+ *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
+ break;
+
+ case 0x00091166:
+ s = "ServerWorks NB6635 3.0LE host to PCI bridge";
+ *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
+ break;
+
+ case 0x00101166:
+ s = "ServerWorks CIOB30 host to PCI bridge";
+ *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
+ break;
+
+ case 0x00111166:
+ /* FALLTHROUGH */
+ case 0x03021014: /* IBM re-badged ServerWorks chipset */
+ s = "ServerWorks CMIC-HE host to PCI-X bridge";
+ *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
+ break;
+
+ /* XXX unknown chipset, but working */
+ case 0x00171166:
+ /* FALLTHROUGH */
+ case 0x01011166:
+ case 0x01101166:
+ case 0x02251166:
+ s = "ServerWorks host to PCI bridge(unknown chipset)";
+ *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1);
+ break;
+
+ /* Compaq/HP -- vendor 0x0e11 */
+ case 0x60100e11:
+ s = "Compaq/HP Model 6010 HotPlug PCI Bridge";
+ *busnum = legacy_pcib_read_config(0, bus, slot, func, 0xc8, 1);
+ break;
+
+ /* Integrated Micro Solutions -- vendor 0x10e0 */
+ case 0x884910e0:
+ s = "Integrated Micro Solutions VL Bridge";
+ break;
+
+ default:
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ break;
+ }
+
+ return s;
+#else
+ const char *s = NULL;
+
+ *busnum = 0;
+ if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST)
+ s = "Host to PCI bridge";
+ return s;
+#endif
+}
+
+/*
+ * Scan the first pci bus for host-pci bridges and add pcib instances
+ * to the nexus for each bridge.
+ */
+static void
+legacy_pcib_identify(driver_t *driver, device_t parent)
+{
+ int bus, slot, func;
+ uint8_t hdrtype;
+ int found = 0;
+ int pcifunchigh;
+ int found824xx = 0;
+ int found_orion = 0;
+ device_t child;
+ devclass_t pci_devclass;
+
+ if (pci_cfgregopen() == 0)
+ return;
+ /*
+ * Check to see if we haven't already had a PCI bus added
+ * via some other means. If we have, bail since otherwise
+ * we're going to end up duplicating it.
+ */
+ if ((pci_devclass = devclass_find("pci")) &&
+ devclass_get_device(pci_devclass, 0))
+ return;
+
+
+ bus = 0;
+ retry:
+ for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
+ func = 0;
+ hdrtype = legacy_pcib_read_config(0, bus, slot, func,
+ PCIR_HDRTYPE, 1);
+ /*
+ * When enumerating bus devices, the standard says that
+ * one should check the header type and ignore the slots whose
+ * header types that the software doesn't know about. We use
+ * this to filter out devices.
+ */
+ if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
+ continue;
+ if ((hdrtype & PCIM_MFDEV) &&
+ (!found_orion || hdrtype != 0xff))
+ pcifunchigh = PCI_FUNCMAX;
+ else
+ pcifunchigh = 0;
+ for (func = 0; func <= pcifunchigh; func++) {
+ /*
+ * Read the IDs and class from the device.
+ */
+ uint32_t id;
+ uint8_t class, subclass, busnum;
+ const char *s;
+ device_t *devs;
+ int ndevs, i;
+
+ id = legacy_pcib_read_config(0, bus, slot, func,
+ PCIR_DEVVENDOR, 4);
+ if (id == -1)
+ continue;
+ class = legacy_pcib_read_config(0, bus, slot, func,
+ PCIR_CLASS, 1);
+ subclass = legacy_pcib_read_config(0, bus, slot, func,
+ PCIR_SUBCLASS, 1);
+
+ s = legacy_pcib_is_host_bridge(bus, slot, func,
+ id, class, subclass,
+ &busnum);
+ if (s == NULL)
+ continue;
+
+ /*
+ * Check to see if the physical bus has already
+ * been seen. Eg: hybrid 32 and 64 bit host
+ * bridges to the same logical bus.
+ */
+ if (device_get_children(parent, &devs, &ndevs) == 0) {
+ for (i = 0; s != NULL && i < ndevs; i++) {
+ if (strcmp(device_get_name(devs[i]),
+ "pcib") != 0)
+ continue;
+ if (legacy_get_pcibus(devs[i]) == busnum)
+ s = NULL;
+ }
+ free(devs, M_TEMP);
+ }
+
+ if (s == NULL)
+ continue;
+ /*
+ * Add at priority 100 to make sure we
+ * go after any motherboard resources
+ */
+ child = BUS_ADD_CHILD(parent, 100,
+ "pcib", busnum);
+ device_set_desc(child, s);
+ legacy_set_pcibus(child, busnum);
+ legacy_set_pcislot(child, slot);
+ legacy_set_pcifunc(child, func);
+
+ found = 1;
+ if (id == 0x12258086)
+ found824xx = 1;
+ if (id == 0x84c48086)
+ found_orion = 1;
+ }
+ }
+ if (found824xx && bus == 0) {
+ bus++;
+ goto retry;
+ }
+
+ /*
+ * Make sure we add at least one bridge since some old
+ * hardware doesn't actually have a host-pci bridge device.
+ * Note that pci_cfgregopen() thinks we have PCI devices..
+ */
+ if (!found) {
+ if (bootverbose)
+ printf(
+ "legacy_pcib_identify: no bridge found, adding pcib0 anyway\n");
+ child = BUS_ADD_CHILD(parent, 100, "pcib", 0);
+ legacy_set_pcibus(child, 0);
+ }
+}
+
+static int
+legacy_pcib_probe(device_t dev)
+{
+
+ if (pci_cfgregopen() == 0)
+ return ENXIO;
+ return -100;
+}
+
+static int
+legacy_pcib_attach(device_t dev)
+{
+#ifdef __HAVE_PIR
+ device_t pir;
+#endif
+ int bus;
+
+ bus = pcib_get_bus(dev);
+#ifdef __HAVE_PIR
+ /*
+ * Look for a PCI BIOS interrupt routing table as that will be
+ * our method of routing interrupts if we have one.
+ */
+ if (pci_pir_probe(bus, 0)) {
+ pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0);
+ if (pir != NULL)
+ device_probe_and_attach(pir);
+ }
+#endif
+ device_add_child(dev, "pci", bus);
+ return bus_generic_attach(dev);
+}
+
+int
+legacy_pcib_read_ivar(device_t dev, device_t child, int which,
+ uintptr_t *result)
+{
+
+ switch (which) {
+ case PCIB_IVAR_DOMAIN:
+ *result = 0;
+ return 0;
+ case PCIB_IVAR_BUS:
+ *result = legacy_get_pcibus(dev);
+ return 0;
+ }
+ return ENOENT;
+}
+
+int
+legacy_pcib_write_ivar(device_t dev, device_t child, int which,
+ uintptr_t value)
+{
+
+ switch (which) {
+ case PCIB_IVAR_DOMAIN:
+ return EINVAL;
+ case PCIB_IVAR_BUS:
+ legacy_set_pcibus(dev, value);
+ return 0;
+ }
+ return ENOENT;
+}
+
+/*
+ * Helper routine for x86 Host-PCI bridge driver resource allocation.
+ * This is used to adjust the start address of wildcard allocation
+ * requests to avoid low addresses that are known to be problematic.
+ *
+ * If no memory preference is given, use upper 32MB slot most BIOSes
+ * use for their memory window. This is typically only used on older
+ * laptops that don't have PCI busses behind a PCI bridge, so assuming
+ * > 32MB is likely OK.
+ *
+ * However, this can cause problems for other chipsets, so we make
+ * this tunable by hw.pci.host_mem_start.
+ */
+SYSCTL_DECL(_hw_pci);
+
+static unsigned long host_mem_start = 0x80000000;
+TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
+SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
+ 0, "Limit the host bridge memory to being above this address.");
+
+u_long
+hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+{
+
+ if (start + count - 1 != end) {
+ if (type == SYS_RES_MEMORY && start < host_mem_start)
+ start = host_mem_start;
+ if (type == SYS_RES_IOPORT && start < 0x1000)
+ start = 0x1000;
+ }
+ return (start);
+}
+
+struct resource *
+legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
+ u_long start, u_long end, u_long count, u_int flags)
+{
+
+ start = hostb_alloc_start(type, start, end, count);
+ return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
+ count, flags));
+}
+
+static device_method_t legacy_pcib_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_identify, legacy_pcib_identify),
+ DEVMETHOD(device_probe, legacy_pcib_probe),
+ DEVMETHOD(device_attach, legacy_pcib_attach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+
+ /* Bus interface */
+ DEVMETHOD(bus_read_ivar, legacy_pcib_read_ivar),
+ DEVMETHOD(bus_write_ivar, legacy_pcib_write_ivar),
+ DEVMETHOD(bus_alloc_resource, legacy_pcib_alloc_resource),
+ DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource),
+ DEVMETHOD(bus_release_resource, bus_generic_release_resource),
+ DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
+ DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
+ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
+ DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
+
+ /* pcib interface */
+ DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots),
+ DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
+ DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
+ DEVMETHOD(pcib_map_msi, legacy_pcib_map_msi),
+
+ DEVMETHOD_END
+};
+
+static devclass_t hostb_devclass;
+
+DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1);
+DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0);
+
+
+/*
+ * Install placeholder to claim the resources owned by the
+ * PCI bus interface. This could be used to extract the
+ * config space registers in the extreme case where the PnP
+ * ID is available and the PCI BIOS isn't, but for now we just
+ * eat the PnP ID and do nothing else.
+ *
+ * XXX we should silence this probe, as it will generally confuse
+ * people.
+ */
+static struct isa_pnp_id pcibus_pnp_ids[] = {
+ { 0x030ad041 /* PNP0A03 */, "PCI Bus" },
+ { 0x080ad041 /* PNP0A08 */, "PCIe Bus" },
+ { 0 }
+};
+
+static int
+pcibus_pnp_probe(device_t dev)
+{
+ int result;
+
+ if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, pcibus_pnp_ids)) <= 0)
+ device_quiet(dev);
+ return(result);
+}
+
+static int
+pcibus_pnp_attach(device_t dev)
+{
+ return(0);
+}
+
+static device_method_t pcibus_pnp_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, pcibus_pnp_probe),
+ DEVMETHOD(device_attach, pcibus_pnp_attach),
+ DEVMETHOD(device_detach, bus_generic_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+ { 0, 0 }
+};
+
+static devclass_t pcibus_pnp_devclass;
+
+DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1);
+DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0);
+
+#ifdef __HAVE_PIR
+/*
+ * Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges
+ * that appear in the PCIBIOS Interrupt Routing Table to use the routing
+ * table for interrupt routing when possible.
+ */
+static int pcibios_pcib_probe(device_t bus);
+
+static device_method_t pcibios_pcib_pci_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, pcibios_pcib_probe),
+
+ /* pcib interface */
+ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
+
+ {0, 0}
+};
+
+static devclass_t pcib_devclass;
+
+DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods,
+ sizeof(struct pcib_softc), pcib_driver);
+DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0);
+
+static int
+pcibios_pcib_probe(device_t dev)
+{
+ int bus;
+
+ if ((pci_get_class(dev) != PCIC_BRIDGE) ||
+ (pci_get_subclass(dev) != PCIS_BRIDGE_PCI))
+ return (ENXIO);
+ bus = pci_read_config(dev, PCIR_SECBUS_1, 1);
+ if (bus == 0)
+ return (ENXIO);
+ if (!pci_pir_probe(bus, 1))
+ return (ENXIO);
+ device_set_desc(dev, "PCIBIOS PCI-PCI bridge");
+ return (-2000);
+}
+#endif
diff --git a/freebsd/usr.bin/netstat/atalk.c b/freebsd/usr.bin/netstat/atalk.c
index 811cb1ab..be73ca0c 100644
--- a/freebsd/usr.bin/netstat/atalk.c
+++ b/freebsd/usr.bin/netstat/atalk.c
@@ -12,10 +12,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
diff --git a/freebsd/usr.bin/netstat/if.c b/freebsd/usr.bin/netstat/if.c
index b39670e1..b026cb47 100644
--- a/freebsd/usr.bin/netstat/if.c
+++ b/freebsd/usr.bin/netstat/if.c
@@ -12,10 +12,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -211,7 +207,6 @@ intpr(int interval1, u_long ifnetaddr, void (*pfunc)(char *))
u_long ierrors;
u_long idrops;
u_long collisions;
- short timer;
int drops;
struct sockaddr *sa = NULL;
char name[IFNAMSIZ];
@@ -245,8 +240,6 @@ intpr(int interval1, u_long ifnetaddr, void (*pfunc)(char *))
if (bflag)
printf(" %10.10s","Obytes");
printf(" %5s", "Coll");
- if (tflag)
- printf(" %s", "Time");
if (dflag)
printf(" %s", "Drop");
putchar('\n');
@@ -298,7 +291,6 @@ intpr(int interval1, u_long ifnetaddr, void (*pfunc)(char *))
ierrors = ifnet.if_ierrors;
idrops = ifnet.if_iqdrops;
collisions = ifnet.if_collisions;
- timer = ifnet.if_timer;
drops = ifnet.if_snd.ifq_drops;
if (ifaddraddr == 0) {
@@ -450,8 +442,6 @@ intpr(int interval1, u_long ifnetaddr, void (*pfunc)(char *))
show_stat("lu", 10, obytes, link_layer|network_layer);
show_stat("NRSlu", 5, collisions, link_layer);
- if (tflag)
- show_stat("LSd", 4, timer, link_layer);
if (dflag)
show_stat("LSd", 4, drops, link_layer);
putchar('\n');
@@ -583,7 +573,7 @@ sidewaysintpr(int interval1, u_long off)
interesting = ip;
interesting_off = off;
}
- snprintf(ip->ift_name, sizeof(ip->ift_name), "(%s)", name);;
+ snprintf(ip->ift_name, sizeof(ip->ift_name), "(%s)", name);
if ((ipn = malloc(sizeof(struct iftot))) == NULL) {
printf("malloc failed\n");
exit(1);
diff --git a/freebsd/usr.bin/netstat/inet.c b/freebsd/usr.bin/netstat/inet.c
index eb734043..3dc57b08 100644
--- a/freebsd/usr.bin/netstat/inet.c
+++ b/freebsd/usr.bin/netstat/inet.c
@@ -12,10 +12,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -91,11 +87,11 @@ __FBSDID("$FreeBSD$");
char *inetname(struct in_addr *);
void inetprint(struct in_addr *, int, const char *, int);
#ifdef INET6
-static int udp_done, tcp_done;
+static int udp_done, tcp_done, sdp_done;
#endif /* INET6 */
static int
-pcblist_sysctl(int proto, char **bufp, int istcp)
+pcblist_sysctl(int proto, const char *name, char **bufp, int istcp)
{
const char *mibvar;
char *buf;
@@ -115,7 +111,8 @@ pcblist_sysctl(int proto, char **bufp, int istcp)
mibvar = "net.inet.raw.pcblist";
break;
}
-
+ if (strncmp(name, "sdp", 3) == 0)
+ mibvar = "net.inet.sdp.pcblist";
len = 0;
if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) {
if (errno != ENOENT)
@@ -314,15 +311,23 @@ protopr(u_long off, const char *name, int af1, int proto)
struct inpcb *inp;
struct xinpgen *xig, *oxig;
struct xsocket *so;
+ struct xtcp_timer *timer;
istcp = 0;
switch (proto) {
case IPPROTO_TCP:
#ifdef INET6
- if (tcp_done != 0)
- return;
- else
- tcp_done = 1;
+ if (strncmp(name, "sdp", 3) != 0) {
+ if (tcp_done != 0)
+ return;
+ else
+ tcp_done = 1;
+ } else {
+ if (sdp_done != 0)
+ return;
+ else
+ sdp_done = 1;
+ }
#endif
istcp = 1;
break;
@@ -336,7 +341,7 @@ protopr(u_long off, const char *name, int af1, int proto)
break;
}
if (live) {
- if (!pcblist_sysctl(proto, &buf, istcp))
+ if (!pcblist_sysctl(proto, name, &buf, istcp))
return;
} else {
if (!pcblist_kvm(off, &buf, istcp))
@@ -348,12 +353,14 @@ protopr(u_long off, const char *name, int af1, int proto)
xig->xig_len > sizeof(struct xinpgen);
xig = (struct xinpgen *)((char *)xig + xig->xig_len)) {
if (istcp) {
+ timer = &((struct xtcpcb *)xig)->xt_timer;
tp = &((struct xtcpcb *)xig)->xt_tp;
inp = &((struct xtcpcb *)xig)->xt_inp;
so = &((struct xtcpcb *)xig)->xt_socket;
} else {
inp = &((struct xinpcb *)xig)->xi_inp;
so = &((struct xinpcb *)xig)->xi_socket;
+ timer = NULL;
}
/* Ignore sockets for protocols other than the desired one. */
@@ -432,6 +439,9 @@ protopr(u_long off, const char *name, int af1, int proto)
"S-CLUS", "R-HIWA", "S-HIWA",
"R-LOWA", "S-LOWA", "R-BCNT",
"S-BCNT", "R-BMAX", "S-BMAX");
+ printf(" %7.7s %7.7s %7.7s %7.7s %7.7s %7.7s",
+ "rexmt", "persist", "keep",
+ "2msl", "delack", "rcvtime");
}
putchar('\n');
protopr_initialized = 1;
@@ -452,7 +462,10 @@ protopr(u_long off, const char *name, int af1, int proto)
#endif
vchar = ((inp->inp_vflag & INP_IPV4) != 0) ?
"4 " : " ";
- printf("%-3.3s%-2.2s ", name, vchar);
+ if (istcp && (tp->t_flags & TF_TOE) != 0)
+ printf("%-3.3s%-2.2s ", "toe", vchar);
+ else
+ printf("%-3.3s%-2.2s ", name, vchar);
if (Lflag) {
char buf1[15];
@@ -461,10 +474,8 @@ protopr(u_long off, const char *name, int af1, int proto)
printf("%-14.14s ", buf1);
} else if (Tflag) {
if (istcp)
- printf("%6ju %6ju %6u ",
- (uintmax_t)tp->t_sndrexmitpack,
- (uintmax_t)tp->t_rcvoopack,
- tp->t_sndzerowin);
+ printf("%6u %6u %6u ", tp->t_sndrexmitpack,
+ tp->t_rcvoopack, tp->t_sndzerowin);
} else {
printf("%6u %6u ", so->so_rcv.sb_cc, so->so_snd.sb_cc);
}
@@ -530,6 +541,14 @@ protopr(u_long off, const char *name, int af1, int proto)
so->so_rcv.sb_lowat, so->so_snd.sb_lowat,
so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt,
so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax);
+ if (timer != NULL)
+ printf(" %4d.%02d %4d.%02d %4d.%02d %4d.%02d %4d.%02d %4d.%02d",
+ timer->tt_rexmt / 1000, (timer->tt_rexmt % 1000) / 10,
+ timer->tt_persist / 1000, (timer->tt_persist % 1000) / 10,
+ timer->tt_keep / 1000, (timer->tt_keep % 1000) / 10,
+ timer->tt_2msl / 1000, (timer->tt_2msl % 1000) / 10,
+ timer->tt_delack / 1000, (timer->tt_delack % 1000) / 10,
+ timer->t_rcvtime / 1000, (timer->t_rcvtime % 1000) / 10);
}
if (istcp && !Lflag && !xflag && !Tflag) {
if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES)
diff --git a/freebsd/usr.bin/netstat/inet6.c b/freebsd/usr.bin/netstat/inet6.c
index 401112d2..2d3d2db8 100644
--- a/freebsd/usr.bin/netstat/inet6.c
+++ b/freebsd/usr.bin/netstat/inet6.c
@@ -13,10 +13,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -366,15 +362,17 @@ static char *srcrule_str[] = {
void
ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct ip6stat ip6stat;
+ struct ip6stat ip6stat, zerostat;
int first, i;
size_t len;
len = sizeof ip6stat;
if (live) {
memset(&ip6stat, 0, len);
- if (sysctlbyname("net.inet6.ip6.stats", &ip6stat, &len, NULL,
- 0) < 0) {
+ if (zflag)
+ memset(&zerostat, 0, len);
+ if (sysctlbyname("net.inet6.ip6.stats", &ip6stat, &len,
+ zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
if (errno != ENOENT)
warn("sysctl: net.inet6.ip6.stats");
return;
@@ -412,7 +410,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
p(ip6s_cantfrag, "\t%ju datagram%s that can't be fragmented\n");
p(ip6s_badscope, "\t%ju packet%s that violated scope rules\n");
p(ip6s_notmember, "\t%ju multicast packet%s which we don't join\n");
- for (first = 1, i = 0; i < 256; i++)
+ for (first = 1, i = 0; i < IP6S_HDRCNT; i++)
if (ip6stat.ip6s_nxthist[i] != 0) {
if (first) {
printf("\tInput histogram:\n");
@@ -423,7 +421,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
}
printf("\tMbuf statistics:\n");
printf("\t\t%ju one mbuf\n", (uintmax_t)ip6stat.ip6s_m1);
- for (first = 1, i = 0; i < 32; i++) {
+ for (first = 1, i = 0; i < IP6S_M2MMAX; i++) {
char ifbuf[IFNAMSIZ];
if (ip6stat.ip6s_m2m[i] != 0) {
if (first) {
@@ -440,7 +438,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
printf("\t\t%ju two or more ext mbuf\n",
(uintmax_t)ip6stat.ip6s_mext2m);
p(ip6s_exthdrtoolong,
- "\t%ju packet%s whose headers are not continuous\n");
+ "\t%ju packet%s whose headers are not contiguous\n");
p(ip6s_nogif, "\t%ju tunneling packet%s that can't find gif\n");
p(ip6s_toomanyhdr,
"\t%ju packet%s discarded because of too many headers\n");
@@ -449,7 +447,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
#define PRINT_SCOPESTAT(s,i) do {\
switch(i) { /* XXX hardcoding in each case */\
case 1:\
- p(s, "\t\t%ju node-local%s\n");\
+ p(s, "\t\t%ju interface-local%s\n");\
break;\
case 2:\
p(s,"\t\t%ju link-local%s\n");\
@@ -468,7 +466,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
p(ip6s_sources_none,
"\t%ju failure%s of source address selection\n");
- for (first = 1, i = 0; i < 16; i++) {
+ for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_sameif[i]) {
if (first) {
printf("\tsource addresses on an outgoing I/F\n");
@@ -477,7 +475,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
PRINT_SCOPESTAT(ip6s_sources_sameif[i], i);
}
}
- for (first = 1, i = 0; i < 16; i++) {
+ for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_otherif[i]) {
if (first) {
printf("\tsource addresses on a non-outgoing I/F\n");
@@ -486,7 +484,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
PRINT_SCOPESTAT(ip6s_sources_otherif[i], i);
}
}
- for (first = 1, i = 0; i < 16; i++) {
+ for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_samescope[i]) {
if (first) {
printf("\tsource addresses of same scope\n");
@@ -495,7 +493,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
PRINT_SCOPESTAT(ip6s_sources_samescope[i], i);
}
}
- for (first = 1, i = 0; i < 16; i++) {
+ for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_otherscope[i]) {
if (first) {
printf("\tsource addresses of a different scope\n");
@@ -504,7 +502,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
PRINT_SCOPESTAT(ip6s_sources_otherscope[i], i);
}
}
- for (first = 1, i = 0; i < 16; i++) {
+ for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_deprecated[i]) {
if (first) {
printf("\tdeprecated source addresses\n");
@@ -515,7 +513,7 @@ ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
}
printf("\tSource addresses selection rule applied:\n");
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < IP6S_RULESMAX; i++) {
if (ip6stat.ip6s_sources_rule[i])
printf("\t\t%ju %s\n",
(uintmax_t)ip6stat.ip6s_sources_rule[i],
@@ -846,15 +844,17 @@ static const char *icmp6names[] = {
void
icmp6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct icmp6stat icmp6stat;
+ struct icmp6stat icmp6stat, zerostat;
int i, first;
size_t len;
len = sizeof icmp6stat;
if (live) {
memset(&icmp6stat, 0, len);
+ if (zflag)
+ memset(&zerostat, 0, len);
if (sysctlbyname("net.inet6.icmp6.stats", &icmp6stat, &len,
- NULL, 0) < 0) {
+ zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
if (errno != ENOENT)
warn("sysctl: net.inet6.icmp6.stats");
return;
@@ -1039,14 +1039,16 @@ pim6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
void
rip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct rip6stat rip6stat;
+ struct rip6stat rip6stat, zerostat;
u_quad_t delivered;
size_t len;
len = sizeof(rip6stat);
if (live) {
+ if (zflag)
+ memset(&zerostat, 0, len);
if (sysctlbyname("net.inet6.ip6.rip6stats", &rip6stat, &len,
- NULL, 0) < 0) {
+ zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
if (errno != ENOENT)
warn("sysctl: net.inet6.ip6.rip6stats");
return;
diff --git a/freebsd/usr.bin/netstat/ipsec.c b/freebsd/usr.bin/netstat/ipsec.c
index ef59da9f..53dfdbe1 100644
--- a/freebsd/usr.bin/netstat/ipsec.c
+++ b/freebsd/usr.bin/netstat/ipsec.c
@@ -67,10 +67,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
diff --git a/freebsd/usr.bin/netstat/main.c b/freebsd/usr.bin/netstat/main.c
index 4c76b906..557d1ca0 100644
--- a/freebsd/usr.bin/netstat/main.c
+++ b/freebsd/usr.bin/netstat/main.c
@@ -12,10 +12,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -194,6 +190,8 @@ static struct nlist nl[] = {
{ .n_name = "_mfctablesize" },
#define N_ARPSTAT 55
{ .n_name = "_arpstat" },
+#define N_UNP_SPHEAD 56
+ { .n_name = "unp_sphead" },
{ .n_name = NULL },
};
@@ -218,6 +216,10 @@ struct protox {
{ -1, N_SCTPSTAT, 1, sctp_protopr,
sctp_stats, NULL, "sctp", 1, IPPROTO_SCTP },
#endif
+#ifdef SDP
+ { -1, -1, 1, protopr,
+ NULL, NULL, "sdp", 1, IPPROTO_TCP },
+#endif
{ N_DIVCBINFO, -1, 1, protopr,
NULL, NULL, "divert", 1, IPPROTO_DIVERT },
{ N_RIPCBINFO, N_IPSTAT, 1, protopr,
@@ -258,6 +260,10 @@ static const struct protox ip6protox[] = {
ip6_stats, ip6_ifstats, "ip6", 1, IPPROTO_RAW },
{ N_RIPCBINFO, N_ICMP6STAT, 1, protopr,
icmp6_stats, icmp6_ifstats, "icmp6", 1, IPPROTO_ICMPV6 },
+#ifdef SDP
+ { -1, -1, 1, protopr,
+ NULL, NULL, "sdp", 1, IPPROTO_TCP },
+#endif
#ifdef IPSEC
{ -1, N_IPSEC6STAT, 1, NULL,
ipsec_stats, NULL, "ipsec6", 0, 0 },
@@ -355,7 +361,6 @@ int Qflag; /* show netisr information */
#endif /* __rtems__ */
int rflag; /* show routing tables (or routing stats) */
int sflag; /* show protocol statistics */
-int tflag; /* show i/f watchdog timers */
int Wflag; /* wide display */
int Tflag; /* TCP Information */
int xflag; /* extra information, includes all socket buffer info */
@@ -397,7 +402,6 @@ int rtems_bsd_command_netstat(int argc, char *argv[])
pflag = 0;
rflag = 0;
sflag = 0;
- tflag = 0;
Wflag = 0;
xflag = 0;
zflag = 0;
@@ -437,7 +441,7 @@ main(int argc, char *argv[])
af = AF_UNSPEC;
- while ((ch = getopt(argc, argv, "AaBbdf:ghI:iLlM:mN:np:Qq:rSTstuWw:xz"))
+ while ((ch = getopt(argc, argv, "AaBbdf:ghI:iLlM:mN:np:Qq:rSTsuWw:xz"))
!= -1)
switch(ch) {
case 'A':
@@ -543,9 +547,6 @@ main(int argc, char *argv[])
case 'S':
numeric_addr = 1;
break;
- case 't':
- tflag = 1;
- break;
case 'u':
af = AF_UNIX;
break;
@@ -710,7 +711,8 @@ main(int argc, char *argv[])
#ifndef __rtems__
if ((af == AF_UNIX || af == AF_UNSPEC) && !sflag)
unixpr(nl[N_UNP_COUNT].n_value, nl[N_UNP_GENCNT].n_value,
- nl[N_UNP_DHEAD].n_value, nl[N_UNP_SHEAD].n_value);
+ nl[N_UNP_DHEAD].n_value, nl[N_UNP_SHEAD].n_value,
+ nl[N_UNP_SPHEAD].n_value);
#endif
exit(0);
}
@@ -888,7 +890,7 @@ usage(void)
(void)fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
"usage: netstat [-AaLnSTWx] [-f protocol_family | -p protocol]\n"
" [-M core] [-N system]",
-" netstat -i | -I interface [-abdhntW] [-f address_family]\n"
+" netstat -i | -I interface [-abdhnW] [-f address_family]\n"
" [-M core] [-N system]",
" netstat -w wait [-I interface] [-d] [-M core] [-N system] [-q howmany]",
" netstat -s [-s] [-z] [-f protocol_family | -p protocol]\n"
diff --git a/freebsd/usr.bin/netstat/mbuf.c b/freebsd/usr.bin/netstat/mbuf.c
index 08633639..5bfbd7bb 100644
--- a/freebsd/usr.bin/netstat/mbuf.c
+++ b/freebsd/usr.bin/netstat/mbuf.c
@@ -69,16 +69,18 @@ mbpr(void *kvmd, u_long mbaddr)
struct memory_type_list *mtlp;
struct memory_type *mtp;
uintmax_t mbuf_count, mbuf_bytes, mbuf_free, mbuf_failures, mbuf_size;
+ uintmax_t mbuf_sleeps;
uintmax_t cluster_count, cluster_bytes, cluster_limit, cluster_free;
- uintmax_t cluster_failures, cluster_size;
+ uintmax_t cluster_failures, cluster_size, cluster_sleeps;
uintmax_t packet_count, packet_bytes, packet_free, packet_failures;
+ uintmax_t packet_sleeps;
uintmax_t tag_count, tag_bytes;
uintmax_t jumbop_count, jumbop_bytes, jumbop_limit, jumbop_free;
- uintmax_t jumbop_failures, jumbop_size;
+ uintmax_t jumbop_failures, jumbop_sleeps, jumbop_size;
uintmax_t jumbo9_count, jumbo9_bytes, jumbo9_limit, jumbo9_free;
- uintmax_t jumbo9_failures, jumbo9_size;
+ uintmax_t jumbo9_failures, jumbo9_sleeps, jumbo9_size;
uintmax_t jumbo16_count, jumbo16_bytes, jumbo16_limit, jumbo16_free;
- uintmax_t jumbo16_failures, jumbo16_size;
+ uintmax_t jumbo16_failures, jumbo16_sleeps, jumbo16_size;
uintmax_t bytes_inuse, bytes_incache, bytes_total;
int nsfbufs, nsfbufspeak, nsfbufsused;
struct mbstat mbstat;
@@ -127,6 +129,7 @@ mbpr(void *kvmd, u_long mbaddr)
mbuf_bytes = memstat_get_bytes(mtp);
mbuf_free = memstat_get_free(mtp);
mbuf_failures = memstat_get_failures(mtp);
+ mbuf_sleeps = memstat_get_sleeps(mtp);
mbuf_size = memstat_get_size(mtp);
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_PACKET_MEM_NAME);
@@ -138,6 +141,7 @@ mbpr(void *kvmd, u_long mbaddr)
packet_count = memstat_get_count(mtp);
packet_bytes = memstat_get_bytes(mtp);
packet_free = memstat_get_free(mtp);
+ packet_sleeps = memstat_get_sleeps(mtp);
packet_failures = memstat_get_failures(mtp);
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_CLUSTER_MEM_NAME);
@@ -151,6 +155,7 @@ mbpr(void *kvmd, u_long mbaddr)
cluster_limit = memstat_get_countlimit(mtp);
cluster_free = memstat_get_free(mtp);
cluster_failures = memstat_get_failures(mtp);
+ cluster_sleeps = memstat_get_sleeps(mtp);
cluster_size = memstat_get_size(mtp);
mtp = memstat_mtl_find(mtlp, ALLOCATOR_MALLOC, MBUF_TAG_MEM_NAME);
@@ -173,6 +178,7 @@ mbpr(void *kvmd, u_long mbaddr)
jumbop_limit = memstat_get_countlimit(mtp);
jumbop_free = memstat_get_free(mtp);
jumbop_failures = memstat_get_failures(mtp);
+ jumbop_sleeps = memstat_get_sleeps(mtp);
jumbop_size = memstat_get_size(mtp);
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBO9_MEM_NAME);
@@ -186,6 +192,7 @@ mbpr(void *kvmd, u_long mbaddr)
jumbo9_limit = memstat_get_countlimit(mtp);
jumbo9_free = memstat_get_free(mtp);
jumbo9_failures = memstat_get_failures(mtp);
+ jumbo9_sleeps = memstat_get_sleeps(mtp);
jumbo9_size = memstat_get_size(mtp);
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBO16_MEM_NAME);
@@ -199,6 +206,7 @@ mbpr(void *kvmd, u_long mbaddr)
jumbo16_limit = memstat_get_countlimit(mtp);
jumbo16_free = memstat_get_free(mtp);
jumbo16_failures = memstat_get_failures(mtp);
+ jumbo16_sleeps = memstat_get_sleeps(mtp);
jumbo16_size = memstat_get_size(mtp);
printf("%ju/%ju/%ju mbufs in use (current/cache/total)\n",
@@ -285,7 +293,13 @@ mbpr(void *kvmd, u_long mbaddr)
printf("%ju/%ju/%ju requests for mbufs denied (mbufs/clusters/"
"mbuf+clusters)\n", mbuf_failures, cluster_failures,
packet_failures);
+ printf("%ju/%ju/%ju requests for mbufs delayed (mbufs/clusters/"
+ "mbuf+clusters)\n", mbuf_sleeps, cluster_sleeps,
+ packet_sleeps);
+ printf("%ju/%ju/%ju requests for jumbo clusters delayed "
+ "(%juk/9k/16k)\n", jumbop_sleeps, jumbo9_sleeps,
+ jumbo16_sleeps, jumbop_size / 1024);
printf("%ju/%ju/%ju requests for jumbo clusters denied "
"(%juk/9k/16k)\n", jumbop_failures, jumbo9_failures,
jumbo16_failures, jumbop_size / 1024);
diff --git a/freebsd/usr.bin/netstat/netstat.h b/freebsd/usr.bin/netstat/netstat.h
index 69465cf8..0b2f42ed 100644
--- a/freebsd/usr.bin/netstat/netstat.h
+++ b/freebsd/usr.bin/netstat/netstat.h
@@ -10,10 +10,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -56,7 +52,6 @@ extern int numeric_addr; /* show addresses numerically */
extern int numeric_port; /* show ports numerically */
extern int rflag; /* show routing tables (or routing stats) */
extern int sflag; /* show protocol statistics */
-extern int tflag; /* show i/f watchdog timers */
extern int Tflag; /* show TCP control block info */
extern int Wflag; /* wide display */
extern int xflag; /* extended display, includes all socket buffer info */
@@ -169,7 +164,7 @@ void ddp_stats(u_long, const char *, int, int);
void netgraphprotopr(u_long, const char *, int, int);
#endif
-void unixpr(u_long, u_long, u_long, u_long);
+void unixpr(u_long, u_long, u_long, u_long, u_long);
void esis_stats(u_long, const char *, int, int);
void clnp_stats(u_long, const char *, int, int);
diff --git a/freebsd/usr.bin/netstat/pfkey.c b/freebsd/usr.bin/netstat/pfkey.c
index d3fa2768..45fcb977 100644
--- a/freebsd/usr.bin/netstat/pfkey.c
+++ b/freebsd/usr.bin/netstat/pfkey.c
@@ -42,10 +42,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
diff --git a/freebsd/usr.bin/netstat/route.c b/freebsd/usr.bin/netstat/route.c
index 33374831..3fc7f92d 100644
--- a/freebsd/usr.bin/netstat/route.c
+++ b/freebsd/usr.bin/netstat/route.c
@@ -12,10 +12,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
diff --git a/freebsd/usr.bin/netstat/sctp.c b/freebsd/usr.bin/netstat/sctp.c
index f4f191d5..8f87fd5a 100644
--- a/freebsd/usr.bin/netstat/sctp.c
+++ b/freebsd/usr.bin/netstat/sctp.c
@@ -430,7 +430,7 @@ sctp_process_inpcb(struct xsctp_inpcb *xinpcb,
}
xladdr = (struct xsctp_laddr *)(buf + *offset);
if (Lflag && !is_listening) {
- (int)sctp_skip_xinpcb_ifneed(buf, buflen, offset);
+ sctp_skip_xinpcb_ifneed(buf, buflen, offset);
return;
}
diff --git a/freebsd/usr.bin/netstat/unix.c b/freebsd/usr.bin/netstat/unix.c
index 7a209ebc..afb35113 100644
--- a/freebsd/usr.bin/netstat/unix.c
+++ b/freebsd/usr.bin/netstat/unix.c
@@ -12,10 +12,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -198,21 +194,37 @@ fail:
#ifndef __rtems__
void
-unixpr(u_long count_off, u_long gencnt_off, u_long dhead_off, u_long shead_off)
+unixpr(u_long count_off, u_long gencnt_off, u_long dhead_off, u_long shead_off,
+ u_long sphead_off)
{
char *buf;
int ret, type;
struct xsocket *so;
struct xunpgen *xug, *oxug;
struct xunpcb *xunp;
+ u_long head_off;
for (type = SOCK_STREAM; type <= SOCK_SEQPACKET; type++) {
if (live)
ret = pcblist_sysctl(type, &buf);
- else
- ret = pcblist_kvm(count_off, gencnt_off,
- type == SOCK_STREAM ? shead_off :
- (type == SOCK_DGRAM ? dhead_off : 0), &buf);
+ else {
+ head_off = 0;
+ switch (type) {
+ case SOCK_STREAM:
+ head_off = shead_off;
+ break;
+
+ case SOCK_DGRAM:
+ head_off = dhead_off;
+ break;
+
+ case SOCK_SEQPACKET:
+ head_off = sphead_off;
+ break;
+ }
+ ret = pcblist_kvm(count_off, gencnt_off, head_off,
+ &buf);
+ }
if (ret == -1)
continue;
if (ret < 0)
diff --git a/rtemsbsd/include/sys/cpuset.h b/rtemsbsd/include/cam/cam_xpt_internal.h
index 936ffd88..936ffd88 100644
--- a/rtemsbsd/include/sys/cpuset.h
+++ b/rtemsbsd/include/cam/cam_xpt_internal.h
diff --git a/rtemsbsd/include/sys/vnode.h b/rtemsbsd/include/cam/cam_xpt_periph.h
index 936ffd88..936ffd88 100644
--- a/rtemsbsd/include/sys/vnode.h
+++ b/rtemsbsd/include/cam/cam_xpt_periph.h
diff --git a/rtemsbsd/include/cam/scsi/smp_all.h b/rtemsbsd/include/cam/scsi/smp_all.h
new file mode 100644
index 00000000..936ffd88
--- /dev/null
+++ b/rtemsbsd/include/cam/scsi/smp_all.h
@@ -0,0 +1 @@
+/* EMPTY */
diff --git a/rtemsbsd/include/machine/_align.h b/rtemsbsd/include/machine/_align.h
new file mode 100644
index 00000000..298e5733
--- /dev/null
+++ b/rtemsbsd/include/machine/_align.h
@@ -0,0 +1 @@
+#include <rtems/bsd/sys/param.h>
diff --git a/rtemsbsd/include/machine/rtems-bsd-syscall-api.h b/rtemsbsd/include/machine/rtems-bsd-syscall-api.h
index 0358df6b..18b7016b 100644
--- a/rtemsbsd/include/machine/rtems-bsd-syscall-api.h
+++ b/rtemsbsd/include/machine/rtems-bsd-syscall-api.h
@@ -90,15 +90,17 @@ ssize_t sendto(int, const void *, size_t, int, const struct sockaddr *, socklen_
ssize_t sendmsg(int, const struct msghdr *, int);
+int setfib(int);
+
int setsockopt(int, int, int, const void *, socklen_t);
int shutdown(int, int);
int socket(int, int, int);
-int sysctl(int *, u_int, void *, size_t *, void *, size_t);
+int sysctl(const int *, u_int, void *, size_t *, const void *, size_t);
-int sysctlbyname(const char *, void *, size_t *, void *, size_t);
+int sysctlbyname(const char *, void *, size_t *, const void *, size_t);
int sysctlnametomib(const char *, int *, size_t *);
diff --git a/rtemsbsd/include/rtems/bsd/local/bus_if.h b/rtemsbsd/include/rtems/bsd/local/bus_if.h
index a0204db3..c95c7015 100644
--- a/rtemsbsd/include/rtems/bsd/local/bus_if.h
+++ b/rtemsbsd/include/rtems/bsd/local/bus_if.h
@@ -139,6 +139,27 @@ static __inline int BUS_WRITE_IVAR(device_t _dev, device_t _child, int _indx,
return ((bus_write_ivar_t *) _m)(_dev, _child, _indx, _value);
}
+/** @brief Unique descriptor for the BUS_CHILD_DELETED() method */
+extern struct kobjop_desc bus_child_deleted_desc;
+/** @brief A function implementing the BUS_CHILD_DELETED() method */
+typedef void bus_child_deleted_t(device_t _dev, device_t _child);
+/**
+ * @brief Notify a bus that a child was deleted
+ *
+ * Called at the beginning of device_delete_child() to allow the parent
+ * to teardown any bus-specific state for the child.
+ *
+ * @param _dev the device whose child is being deleted
+ * @param _child the child device which is being deleted
+ */
+
+static __inline void BUS_CHILD_DELETED(device_t _dev, device_t _child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_child_deleted);
+ ((bus_child_deleted_t *) _m)(_dev, _child);
+}
+
/** @brief Unique descriptor for the BUS_CHILD_DETACHED() method */
extern struct kobjop_desc bus_child_detached_desc;
/** @brief A function implementing the BUS_CHILD_DETACHED() method */
diff --git a/rtemsbsd/include/rtems/bsd/local/miidevs.h b/rtemsbsd/include/rtems/bsd/local/miidevs.h
index f0f42d41..66cb2ac8 100644
--- a/rtemsbsd/include/rtems/bsd/local/miidevs.h
+++ b/rtemsbsd/include/rtems/bsd/local/miidevs.h
@@ -6,7 +6,7 @@
* generated from:
* FreeBSD
*/
-/*$NetBSD: miidevs,v 1.6 1999/05/14 11:37:30 drochner Exp $*/
+/*$NetBSD: miidevs,v 1.105 2011/11/25 23:28:14 jakllsch Exp $*/
/*-
* Copyright (c) 1998, 1999 The NetBSD Foundation, Inc.
@@ -42,69 +42,77 @@
* List of known MII OUIs.
* For a complete list see http://standards.ieee.org/regauth/oui/
*
- * XXX Vendors do obviously not agree how OUIs (18 bit) are mapped
- * to the 16 bits available in the id registers. The MII_OUI() macro
- * in "mii.h" reflects the most obvious way. If a vendor uses a
- * different mapping, an "xx" prefixed OUI is defined here which is
- * mangled accordingly to compensate.
+ * XXX Vendors do obviously not agree how OUIs (24 bit) are mapped
+ * to the 22 bits available in the id registers.
+ * IEEE 802.3u-1995, subclause 22.2.4.3.1, figure 22-12, depicts the right
+ * mapping; the bit positions are defined in IEEE 802-1990, figure 5.2.
+ * (There is a formal 802.3 interpretation, number 1-07/98 of July 09 1998,
+ * about this.)
+ * The MII_OUI() macro in "mii.h" reflects this.
+ * If a vendor uses a different mapping, an "xx" prefixed OUI is defined here
+ * which is mangled accordingly to compensate.
*/
-#define MII_OUI_AGERE 0x00a0bc /* Agere Systems */
+#define MII_OUI_AGERE 0x00053d /* Agere Systems */
#define MII_OUI_ALTIMA 0x0010a9 /* Altima Communications */
#define MII_OUI_AMD 0x00001a /* Advanced Micro Devices */
-#define MII_OUI_ASIX 0x00602e /* Asix Semiconductor */
-#define MII_OUI_ATHEROS 0x001374 /* Atheros Communications */
#define MII_OUI_BROADCOM 0x001018 /* Broadcom Corporation */
#define MII_OUI_BROADCOM2 0x000af7 /* Broadcom Corporation */
+#define MII_OUI_BROADCOM3 0x001be9 /* Broadcom Corporation */
#define MII_OUI_CICADA 0x0003F1 /* Cicada Semiconductor */
#define MII_OUI_DAVICOM 0x00606e /* Davicom Semiconductor */
+#define MII_OUI_ENABLESEMI 0x0010dd /* Enable Semiconductor */
#define MII_OUI_ICPLUS 0x0090c3 /* IC Plus Corp. */
#define MII_OUI_ICS 0x00a0be /* Integrated Circuit Systems */
-#define MII_OUI_INTEL 0x00aa00 /* Intel */
-#define MII_OUI_JATO 0x00e083 /* Jato Technologies */
-#define MII_OUI_JMICRON 0x001b8c /* JMicron Technologies */
+#define MII_OUI_INTEL 0x00aa00 /* Intel Corporation */
+#define MII_OUI_JMICRON 0x00d831 /* JMicron Technologies */
#define MII_OUI_LEVEL1 0x00207b /* Level 1 */
+#define MII_OUI_MARVELL 0x005043 /* Marvell Semiconductor */
+#define MII_OUI_MYSON 0x00c0b4 /* Myson Technology */
#define MII_OUI_NATSEMI 0x080017 /* National Semiconductor */
+#define MII_OUI_PMCSIERRA 0x00e004 /* PMC-Sierra */
#define MII_OUI_QUALSEMI 0x006051 /* Quality Semiconductor */
-#define MII_OUI_RDC 0x000bb4 /* RDC Semiconductor */
-#define MII_OUI_REALTEK 0x000020 /* RealTek Semicondctor */
-#define MII_OUI_SEEQ 0x00a07d /* Seeq */
+#define MII_OUI_RDC 0x00d02d /* RDC Semiconductor */
+#define MII_OUI_REALTEK 0x00e04c /* RealTek Semicondctor */
+#define MII_OUI_SEEQ 0x00a07d /* Seeq Technology */
#define MII_OUI_SIS 0x00e006 /* Silicon Integrated Systems */
-#define MII_OUI_SMSC 0x0005be /* SMSC */
-#define MII_OUI_TDK 0x00c039 /* TDK */
#define MII_OUI_TI 0x080028 /* Texas Instruments */
-#define MII_OUI_VITESSE 0x0001c1 /* Vitesse Semiconductor */
+#define MII_OUI_TSC 0x00c039 /* TDK Semiconductor */
#define MII_OUI_XAQTI 0x00e0ae /* XaQti Corp. */
-#define MII_OUI_MARVELL 0x005043 /* Marvell Semiconductor */
-#define MII_OUI_xxMARVELL 0x000ac2 /* Marvell Semiconductor */
-/* in the 79c873, AMD uses another OUI (which matches Davicom!) */
-#define MII_OUI_xxAMD 0x00606e /* Advanced Micro Devices */
+/* Some Intel 82553's use an alternative OUI. */
+#define MII_OUI_xxINTEL 0x001f00 /* Intel Corporation */
-/* Intel 82553 A/B steppings */
-#define MII_OUI_xxINTEL 0x00f800 /* Intel */
+/* Some VIA 6122's use an alternative OUI. */
+#define MII_OUI_xxCICADA 0x00c08f /* Cicada Semiconductor */
-/* some vendors have the bits swapped within bytes
- (ie, ordered as on the wire) */
-#define MII_OUI_xxALTIMA 0x000895 /* Altima Communications */
+/* bad bitorder (bits "g" and "h" (= MSBs byte 1) lost) */
+#define MII_OUI_yyAMD 0x000058 /* Advanced Micro Devices */
+#define MII_OUI_xxATHEROS 0x00c82e /* Atheros Communications */
#define MII_OUI_xxBROADCOM 0x000818 /* Broadcom Corporation */
#define MII_OUI_xxBROADCOM_ALT1 0x0050ef /* Broadcom Corporation */
-#define MII_OUI_xxBROADCOM_ALT2 0x00d897 /* Broadcom Corporation */
-#define MII_OUI_xxICS 0x00057d /* Integrated Circuit Systems */
-#define MII_OUI_xxSEEQ 0x0005be /* Seeq */
-#define MII_OUI_xxSIS 0x000760 /* Silicon Integrated Systems */
-#define MII_OUI_xxTI 0x100014 /* Texas Instruments */
-#define MII_OUI_xxXAQTI 0x350700 /* XaQti Corp. */
+#define MII_OUI_xxDAVICOM 0x000676 /* Davicom Semiconductor */
+#define MII_OUI_yyINTEL 0x005500 /* Intel Corporation */
+#define MII_OUI_xxJATO 0x0007c1 /* Jato Technologies */
+#define MII_OUI_xxMARVELL 0x000ac2 /* Marvell Semiconductor */
+#define MII_OUI_xxMYSON 0x00032d /* Myson Technology */
+#define MII_OUI_xxNATSEMI 0x1000e8 /* National Semiconductor */
+#define MII_OUI_xxQUALSEMI 0x00068a /* Quality Semiconductor */
+#define MII_OUI_xxTSC 0x00039c /* TDK Semiconductor */
+#define MII_OUI_xxVITESSE 0x008083 /* Vitesse Semiconductor */
-/* Level 1 is completely different - from right to left.
- (Two bits get lost in the third OUI byte.) */
-#define MII_OUI_xxLEVEL1 0x1e0400 /* Level 1 */
+/* bad byteorder (bits "q" and "r" (= LSBs byte 3) lost) */
+#define MII_OUI_xxLEVEL1 0x782000 /* Level 1 */
+#define MII_OUI_xxXAQTI 0xace000 /* XaQti Corp. */
/* Don't know what's going on here. */
-#define MII_OUI_xxDAVICOM 0x006040 /* Davicom Semiconductor */
-
-/* This is the OUI of the gigE PHY in the RealTek 8169S/8110S/8211B chips */
-#define MII_OUI_xxREALTEK 0x000732 /* */
+#define MII_OUI_xxASIX 0x000674 /* Asix Semiconductor */
+#define MII_OUI_yyDAVICOM 0x000602 /* Davicom Semiconductor */
+#define MII_OUI_xxICPLUS 0x0009c3 /* IC Plus Corp. */
+#define MII_OUI_xxPMCSIERRA 0x0009c0 /* PMC-Sierra */
+#define MII_OUI_xxPMCSIERRA2 0x009057 /* PMC-Sierra */
+#define MII_OUI_xxREALTEK 0x000732 /* RealTek Semicondctor */
+#define MII_OUI_yyREALTEK 0x000004 /* RealTek Semicondctor */
/*
* List of known models. Grouped by oui.
@@ -117,164 +125,208 @@
#define MII_STR_AGERE_ET1011C "ET1011C 10/100/1000baseT PHY"
/* Altima Communications PHYs */
-#define MII_MODEL_xxALTIMA_AC101 0x0021
-#define MII_STR_xxALTIMA_AC101 "AC101 10/100 media interface"
-#define MII_MODEL_xxALTIMA_AC101L 0x0012
-#define MII_STR_xxALTIMA_AC101L "AC101L 10/100 media interface"
-#define MII_MODEL_xxALTIMA_ACXXX 0x0001
-#define MII_STR_xxALTIMA_ACXXX "ACXXX 10/100 media interface"
+#define MII_MODEL_ALTIMA_ACXXX 0x0001
+#define MII_STR_ALTIMA_ACXXX "ACXXX 10/100 media interface"
+#define MII_MODEL_ALTIMA_AC101L 0x0012
+#define MII_STR_ALTIMA_AC101L "AC101L 10/100 media interface"
+#define MII_MODEL_ALTIMA_AC101 0x0021
+#define MII_STR_ALTIMA_AC101 "AC101 10/100 media interface"
+/* AMD Am79C87[45] have ALTIMA OUI */
+#define MII_MODEL_ALTIMA_Am79C875 0x0014
+#define MII_STR_ALTIMA_Am79C875 "Am79C875 10/100 media interface"
+#define MII_MODEL_ALTIMA_Am79C874 0x0021
+#define MII_STR_ALTIMA_Am79C874 "Am79C874 10/100 media interface"
/* Advanced Micro Devices PHYs */
-#define MII_MODEL_AMD_79c973phy 0x0036
-#define MII_STR_AMD_79c973phy "Am79c973 internal PHY"
-#define MII_MODEL_AMD_79c978 0x0039
-#define MII_STR_AMD_79c978 "Am79c978 HomePNA PHY"
-#define MII_MODEL_xxAMD_79C873 0x0000
-#define MII_STR_xxAMD_79C873 "Am79C873/DM9101 10/100 media interface"
-
-/* Asix semiconductor PHYs. */
-#define MII_MODEL_ASIX_AX88X9X 0x0031
-#define MII_STR_ASIX_AX88X9X "Ax88x9x internal PHY"
-
-/* Atheros Communications/Attansic PHYs. */
-#define MII_MODEL_ATHEROS_F1 0x0001
-#define MII_STR_ATHEROS_F1 "Atheros F1 10/100/1000 PHY"
-#define MII_MODEL_ATHEROS_F2 0x0002
-#define MII_STR_ATHEROS_F2 "Atheros F2 10/100 PHY"
-#define MII_MODEL_ATHEROS_F1_7 0x0007
-#define MII_STR_ATHEROS_F1_7 "Atheros F1 10/100/1000 PHY"
-
-/* Broadcom Corp. PHYs. */
-#define MII_MODEL_BROADCOM_3C905B 0x0012
-#define MII_STR_BROADCOM_3C905B "3c905B 10/100 internal PHY"
-#define MII_MODEL_BROADCOM_3C905C 0x0017
-#define MII_STR_BROADCOM_3C905C "3c905C 10/100 internal PHY"
-#define MII_MODEL_BROADCOM_BCM5201 0x0021
-#define MII_STR_BROADCOM_BCM5201 "BCM5201 10/100baseTX PHY"
-#define MII_MODEL_BROADCOM_BCM5214 0x0028
-#define MII_STR_BROADCOM_BCM5214 "BCM5214 Quad 10/100 PHY"
-#define MII_MODEL_BROADCOM_BCM5221 0x001e
-#define MII_STR_BROADCOM_BCM5221 "BCM5221 10/100baseTX PHY"
-#define MII_MODEL_BROADCOM_BCM5222 0x0032
-#define MII_STR_BROADCOM_BCM5222 "BCM5222 Dual 10/100 PHY"
-#define MII_MODEL_BROADCOM_BCM4401 0x0036
-#define MII_STR_BROADCOM_BCM4401 "BCM4401 10/100baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5400 0x0004
-#define MII_STR_xxBROADCOM_BCM5400 "Broadcom 1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5401 0x0005
-#define MII_STR_xxBROADCOM_BCM5401 "BCM5401 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5411 0x0007
-#define MII_STR_xxBROADCOM_BCM5411 "BCM5411 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5754 0x000e
-#define MII_STR_xxBROADCOM_BCM5754 "BCM5754 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5752 0x0010
-#define MII_STR_xxBROADCOM_BCM5752 "BCM5752 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5701 0x0011
-#define MII_STR_xxBROADCOM_BCM5701 "BCM5701 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5706 0x0015
-#define MII_STR_xxBROADCOM_BCM5706 "BCM5706 10/100/1000baseTX/SX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5703 0x0016
-#define MII_STR_xxBROADCOM_BCM5703 "BCM5703 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5704 0x0019
-#define MII_STR_xxBROADCOM_BCM5704 "BCM5704 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5705 0x001a
-#define MII_STR_xxBROADCOM_BCM5705 "BCM5705 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5750 0x0018
-#define MII_STR_xxBROADCOM_BCM5750 "BCM5750 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM54K2 0x002e
-#define MII_STR_xxBROADCOM_BCM54K2 "BCM54K2 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5714 0x0034
-#define MII_STR_xxBROADCOM_BCM5714 "BCM5714 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5780 0x0035
-#define MII_STR_xxBROADCOM_BCM5780 "BCM5780 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_BCM5708C 0x0036
-#define MII_STR_xxBROADCOM_BCM5708C "BCM5708C 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5755 0x000c
-#define MII_STR_xxBROADCOM_ALT1_BCM5755 "BCM5755 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5787 0x000e
-#define MII_STR_xxBROADCOM_ALT1_BCM5787 "BCM5787 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5708S 0x0015
-#define MII_STR_xxBROADCOM_ALT1_BCM5708S "BCM5708S 1000/2500BaseSX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5709CAX 0x002c
-#define MII_STR_xxBROADCOM_ALT1_BCM5709CAX "BCM5709C(AX) 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5722 0x002d
-#define MII_STR_xxBROADCOM_ALT1_BCM5722 "BCM5722 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5784 0x003a
-#define MII_STR_xxBROADCOM_ALT1_BCM5784 "BCM5784 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5709C 0x003c
-#define MII_STR_xxBROADCOM_ALT1_BCM5709C "BCM5709C 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5761 0x003d
-#define MII_STR_xxBROADCOM_ALT1_BCM5761 "BCM5761 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT1_BCM5709S 0x003f
-#define MII_STR_xxBROADCOM_ALT1_BCM5709S "BCM5709S/5720S 1000/2500baseSX PHY"
-#define MII_MODEL_xxBROADCOM_ALT2_BCM57780 0x0019
-#define MII_STR_xxBROADCOM_ALT2_BCM57780 "BCM57780 1000BASE-T media interface"
-#define MII_MODEL_xxBROADCOM_ALT2_BCM5717C 0x0020
-#define MII_STR_xxBROADCOM_ALT2_BCM5717C "BCM5717C 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT2_BCM5719C 0x0022
-#define MII_STR_xxBROADCOM_ALT2_BCM5719C "BCM5719C 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT2_BCM57765 0x0024
-#define MII_STR_xxBROADCOM_ALT2_BCM57765 "BCM57765 10/100/1000baseTX PHY"
-#define MII_MODEL_xxBROADCOM_ALT2_BCM5720C 0x0036
-#define MII_STR_xxBROADCOM_ALT2_BCM5720C "BCM5720C 10/100/1000baseTX PHY"
+/* see Davicom DM9101 for Am79C873 */
+#define MII_MODEL_yyAMD_79C972_10T 0x0001
+#define MII_STR_yyAMD_79C972_10T "Am79C972 internal 10BASE-T interface"
+#define MII_MODEL_yyAMD_79c973phy 0x0036
+#define MII_STR_yyAMD_79c973phy "Am79C973 internal 10/100 media interface"
+#define MII_MODEL_yyAMD_79c901 0x0037
+#define MII_STR_yyAMD_79c901 "Am79C901 10BASE-T interface"
+#define MII_MODEL_yyAMD_79c901home 0x0039
+#define MII_STR_yyAMD_79c901home "Am79C901 HomePNA 1.0 interface"
+
+/* Atheros Communications/Attansic PHYs */
+#define MII_MODEL_xxATHEROS_F1 0x0001
+#define MII_STR_xxATHEROS_F1 "Atheros F1 10/100/1000 PHY"
+#define MII_MODEL_xxATHEROS_F2 0x0002
+#define MII_STR_xxATHEROS_F2 "Atheros F2 10/100 PHY"
+#define MII_MODEL_xxATHEROS_F1_7 0x0007
+#define MII_STR_xxATHEROS_F1_7 "Atheros F1 10/100/1000 PHY"
+
+/* Asix semiconductor PHYs */
+#define MII_MODEL_xxASIX_AX88X9X 0x0031
+#define MII_STR_xxASIX_AX88X9X "Ax88x9x internal PHY"
+
+/* Broadcom Corp. PHYs */
+#define MII_MODEL_xxBROADCOM_3C905B 0x0012
+#define MII_STR_xxBROADCOM_3C905B "Broadcom 3c905B internal PHY"
+#define MII_MODEL_xxBROADCOM_3C905C 0x0017
+#define MII_STR_xxBROADCOM_3C905C "Broadcom 3c905C internal PHY"
+#define MII_MODEL_xxBROADCOM_BCM5201 0x0021
+#define MII_STR_xxBROADCOM_BCM5201 "BCM5201 10/100 media interface"
+#define MII_MODEL_xxBROADCOM_BCM5214 0x0028
+#define MII_STR_xxBROADCOM_BCM5214 "BCM5214 Quad 10/100 media interface"
+#define MII_MODEL_xxBROADCOM_BCM5221 0x001e
+#define MII_STR_xxBROADCOM_BCM5221 "BCM5221 10/100 media interface"
+#define MII_MODEL_xxBROADCOM_BCM5222 0x0032
+#define MII_STR_xxBROADCOM_BCM5222 "BCM5222 Dual 10/100 media interface"
+#define MII_MODEL_xxBROADCOM_BCM4401 0x0036
+#define MII_STR_xxBROADCOM_BCM4401 "BCM4401 10/100 media interface"
+#define MII_MODEL_xxBROADCOM_BCM5365 0x0037
+#define MII_STR_xxBROADCOM_BCM5365 "BCM5365 10/100 5-port PHY switch"
+#define MII_MODEL_BROADCOM_BCM5400 0x0004
+#define MII_STR_BROADCOM_BCM5400 "BCM5400 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5401 0x0005
+#define MII_STR_BROADCOM_BCM5401 "BCM5401 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5411 0x0007
+#define MII_STR_BROADCOM_BCM5411 "BCM5411 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5464 0x000b
+#define MII_STR_BROADCOM_BCM5464 "BCM5464 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5461 0x000c
+#define MII_STR_BROADCOM_BCM5461 "BCM5461 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5462 0x000d
+#define MII_STR_BROADCOM_BCM5462 "BCM5462 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5421 0x000e
+#define MII_STR_BROADCOM_BCM5421 "BCM5421 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5752 0x0010
+#define MII_STR_BROADCOM_BCM5752 "BCM5752 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5701 0x0011
+#define MII_STR_BROADCOM_BCM5701 "BCM5701 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5706 0x0015
+#define MII_STR_BROADCOM_BCM5706 "BCM5706 1000BASE-T/SX media interface"
+#define MII_MODEL_BROADCOM_BCM5703 0x0016
+#define MII_STR_BROADCOM_BCM5703 "BCM5703 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5750 0x0018
+#define MII_STR_BROADCOM_BCM5750 "BCM5750 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5704 0x0019
+#define MII_STR_BROADCOM_BCM5704 "BCM5704 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5705 0x001a
+#define MII_STR_BROADCOM_BCM5705 "BCM5705 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM54K2 0x002e
+#define MII_STR_BROADCOM_BCM54K2 "BCM54K2 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5714 0x0034
+#define MII_STR_BROADCOM_BCM5714 "BCM5714 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5780 0x0035
+#define MII_STR_BROADCOM_BCM5780 "BCM5780 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5708C 0x0036
+#define MII_STR_BROADCOM_BCM5708C "BCM5708C 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM2_BCM5325 0x0003
+#define MII_STR_BROADCOM2_BCM5325 "BCM5325 10/100 5-port PHY switch"
#define MII_MODEL_BROADCOM2_BCM5906 0x0004
-#define MII_STR_BROADCOM2_BCM5906 "BCM5906 10/100baseTX PHY"
+#define MII_STR_BROADCOM2_BCM5906 "BCM5906 10/100baseTX media interface"
+#define MII_MODEL_BROADCOM2_BCM5481 0x000a
+#define MII_STR_BROADCOM2_BCM5481 "BCM5481 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM2_BCM5482 0x000b
+#define MII_STR_BROADCOM2_BCM5482 "BCM5482 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM2_BCM5755 0x000c
+#define MII_STR_BROADCOM2_BCM5755 "BCM5755 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM2_BCM5754 0x000e
+#define MII_STR_BROADCOM2_BCM5754 "BCM5754/5787 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM2_BCM5708S 0x0015
+#define MII_STR_BROADCOM2_BCM5708S "BCM5708S 1000/2500baseSX PHY"
+#define MII_MODEL_BROADCOM2_BCM5785 0x0016
+#define MII_STR_BROADCOM2_BCM5785 "BCM5785 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM2_BCM5709CAX 0x002c
+#define MII_STR_BROADCOM2_BCM5709CAX "BCM5709CAX 10/100/1000baseT PHY"
+#define MII_MODEL_BROADCOM2_BCM5722 0x002d
+#define MII_STR_BROADCOM2_BCM5722 "BCM5722 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM2_BCM5784 0x003a
+#define MII_STR_BROADCOM2_BCM5784 "BCM5784 10/100/1000baseT PHY"
+#define MII_MODEL_BROADCOM2_BCM5709C 0x003c
+#define MII_STR_BROADCOM2_BCM5709C "BCM5709 10/100/1000baseT PHY"
+#define MII_MODEL_BROADCOM2_BCM5761 0x003d
+#define MII_STR_BROADCOM2_BCM5761 "BCM5761 10/100/1000baseT PHY"
+#define MII_MODEL_BROADCOM2_BCM5709S 0x003f
+#define MII_STR_BROADCOM2_BCM5709S "BCM5709S/5720S 1000/2500baseSX PHY"
+#define MII_MODEL_BROADCOM3_BCM57780 0x0019
+#define MII_STR_BROADCOM3_BCM57780 "BCM57780 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM3_BCM5717C 0x0020
+#define MII_STR_BROADCOM3_BCM5717C "BCM5717C 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM3_BCM5719C 0x0022
+#define MII_STR_BROADCOM3_BCM5719C "BCM5719C 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM3_BCM57765 0x0024
+#define MII_STR_BROADCOM3_BCM57765 "BCM57765 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM3_BCM5720C 0x0036
+#define MII_STR_BROADCOM3_BCM5720C "BCM5720C 1000BASE-T media interface"
+#define MII_MODEL_xxBROADCOM_ALT1_BCM5906 0x0004
+#define MII_STR_xxBROADCOM_ALT1_BCM5906 "BCM5906 10/100baseTX media interface"
/* Cicada Semiconductor PHYs (now owned by Vitesse?) */
-#define MII_MODEL_CICADA_CS8201 0x0001
-#define MII_STR_CICADA_CS8201 "Cicada CS8201 10/100/1000TX PHY"
-#define MII_MODEL_CICADA_CS8204 0x0004
-#define MII_STR_CICADA_CS8204 "Cicada CS8204 10/100/1000TX PHY"
-#define MII_MODEL_CICADA_VSC8211 0x000b
-#define MII_STR_CICADA_VSC8211 "Cicada VSC8211 10/100/1000TX PHY"
-#define MII_MODEL_CICADA_CS8201A 0x0020
-#define MII_STR_CICADA_CS8201A "Cicada CS8201 10/100/1000TX PHY"
-#define MII_MODEL_CICADA_CS8201B 0x0021
-#define MII_STR_CICADA_CS8201B "Cicada CS8201 10/100/1000TX PHY"
-#define MII_MODEL_CICADA_CS8244 0x002c
-#define MII_STR_CICADA_CS8244 "Cicada CS8244 10/100/1000TX PHY"
-#define MII_MODEL_VITESSE_VSC8601 0x0002
-#define MII_STR_VITESSE_VSC8601 "Vitesse VSC8601 10/100/1000TX PHY"
+#define MII_MODEL_xxCICADA_CS8201 0x0001
+#define MII_STR_xxCICADA_CS8201 "Cicada CS8201 10/100/1000TX PHY"
+#define MII_MODEL_xxCICADA_CS8204 0x0004
+#define MII_STR_xxCICADA_CS8204 "Cicada CS8204 10/100/1000TX PHY"
+#define MII_MODEL_xxCICADA_VSC8211 0x000b
+#define MII_STR_xxCICADA_VSC8211 "Cicada VSC8211 10/100/1000TX PHY"
+#define MII_MODEL_xxCICADA_CS8201A 0x0020
+#define MII_STR_xxCICADA_CS8201A "Cicada CS8201 10/100/1000TX PHY"
+#define MII_MODEL_xxCICADA_CS8201B 0x0021
+#define MII_STR_xxCICADA_CS8201B "Cicada CS8201 10/100/1000TX PHY"
+#define MII_MODEL_xxCICADA_CS8244 0x002c
+#define MII_STR_xxCICADA_CS8244 "Cicada CS8244 10/100/1000TX PHY"
+#define MII_MODEL_xxVITESSE_VSC8601 0x0002
+#define MII_STR_xxVITESSE_VSC8601 "Vitesse VSC8601 10/100/1000TX PHY"
/* Davicom Semiconductor PHYs */
-#define MII_MODEL_DAVICOM_DM9102 0x0004
-#define MII_STR_DAVICOM_DM9102 "DM9102 10/100 media interface"
+/* AMD Am79C873 seems to be a relabeled DM9101 */
#define MII_MODEL_xxDAVICOM_DM9101 0x0000
-#define MII_STR_xxDAVICOM_DM9101 "DM9101 10/100 media interface"
-
-/* Integrated Circuit Systems PHYs */
-#define MII_MODEL_xxICS_1889 0x0001
-#define MII_STR_xxICS_1889 "ICS1889 10/100 media interface"
-#define MII_MODEL_xxICS_1890 0x0002
-#define MII_STR_xxICS_1890 "ICS1890 10/100 media interface"
-#define MII_MODEL_xxICS_1892 0x0003
-#define MII_STR_xxICS_1892 "ICS1892 10/100 media interface"
-#define MII_MODEL_xxICS_1893 0x0004
-#define MII_STR_xxICS_1893 "ICS1893 10/100 media interface"
+#define MII_STR_xxDAVICOM_DM9101 "DM9101 (AMD Am79C873) 10/100 media interface"
+#define MII_MODEL_xxDAVICOM_DM9102 0x0004
+#define MII_STR_xxDAVICOM_DM9102 "DM9102 10/100 media interface"
+#define MII_MODEL_yyDAVICOM_DM9101 0x0000
+#define MII_STR_yyDAVICOM_DM9101 "DM9101 10/100 media interface"
/* IC Plus Corp. PHYs */
-#define MII_MODEL_ICPLUS_IP101 0x0005
-#define MII_STR_ICPLUS_IP101 "IC Plus 10/100 PHY"
-#define MII_MODEL_ICPLUS_IP1000A 0x0008
-#define MII_STR_ICPLUS_IP1000A "IC Plus 10/100/1000 media interface"
-#define MII_MODEL_ICPLUS_IP1001 0x0019
-#define MII_STR_ICPLUS_IP1001 "IC Plus IP1001 10/100/1000 media interface"
-
-/* Intel PHYs */
-#define MII_MODEL_xxINTEL_I82553AB 0x0000
-#define MII_STR_xxINTEL_I82553AB "i83553 10/100 media interface"
-#define MII_MODEL_INTEL_I82555 0x0015
-#define MII_STR_INTEL_I82555 "i82555 10/100 media interface"
-#define MII_MODEL_INTEL_I82562EM 0x0032
-#define MII_STR_INTEL_I82562EM "i82562EM 10/100 media interface"
-#define MII_MODEL_INTEL_I82562ET 0x0033
-#define MII_STR_INTEL_I82562ET "i82562ET 10/100 media interface"
-#define MII_MODEL_INTEL_I82553C 0x0035
-#define MII_STR_INTEL_I82553C "i82553 10/100 media interface"
+#define MII_MODEL_xxICPLUS_IP101 0x0005
+#define MII_STR_xxICPLUS_IP101 "IP101 10/100 PHY"
+#define MII_MODEL_xxICPLUS_IP1000A 0x0008
+#define MII_STR_xxICPLUS_IP1000A "IP100A 10/100/1000 media interface"
+#define MII_MODEL_xxICPLUS_IP1001 0x0019
+#define MII_STR_xxICPLUS_IP1001 "IP1001 10/100/1000 media interface"
+
+/* Integrated Circuit Systems PHYs */
+#define MII_MODEL_ICS_1889 0x0001
+#define MII_STR_ICS_1889 "ICS1889 10/100 media interface"
+#define MII_MODEL_ICS_1890 0x0002
+#define MII_STR_ICS_1890 "ICS1890 10/100 media interface"
+#define MII_MODEL_ICS_1892 0x0003
+#define MII_STR_ICS_1892 "ICS1892 10/100 media interface"
+#define MII_MODEL_ICS_1893 0x0004
+#define MII_STR_ICS_1893 "ICS1893 10/100 media interface"
+
+/* Intel Corporation PHYs */
+#define MII_MODEL_xxINTEL_I82553 0x0000
+#define MII_STR_xxINTEL_I82553 "i82553 10/100 media interface"
+#define MII_MODEL_yyINTEL_I82555 0x0015
+#define MII_STR_yyINTEL_I82555 "i82555 10/100 media interface"
+#define MII_MODEL_yyINTEL_I82562EH 0x0017
+#define MII_STR_yyINTEL_I82562EH "i82562EH HomePNA interface"
+#define MII_MODEL_yyINTEL_I82562G 0x0031
+#define MII_STR_yyINTEL_I82562G "i82562G 10/100 media interface"
+#define MII_MODEL_yyINTEL_I82562EM 0x0032
+#define MII_STR_yyINTEL_I82562EM "i82562EM 10/100 media interface"
+#define MII_MODEL_yyINTEL_I82562ET 0x0033
+#define MII_STR_yyINTEL_I82562ET "i82562ET 10/100 media interface"
+#define MII_MODEL_yyINTEL_I82553 0x0035
+#define MII_STR_yyINTEL_I82553 "i82553 10/100 media interface"
+#define MII_MODEL_yyINTEL_I82566 0x0039
+#define MII_STR_yyINTEL_I82566 "i82566 10/100/1000 media interface"
+#define MII_MODEL_INTEL_I82577 0x0005
+#define MII_STR_INTEL_I82577 "i82577 10/100/1000 media interface"
+#define MII_MODEL_INTEL_I82579 0x0009
+#define MII_STR_INTEL_I82579 "i82579 10/100/1000 media interface"
+#define MII_MODEL_xxMARVELL_I82563 0x000a
+#define MII_STR_xxMARVELL_I82563 "i82563 10/100/1000 media interface"
+
+#define MII_MODEL_yyINTEL_IGP01E1000 0x0038
+#define MII_STR_yyINTEL_IGP01E1000 "Intel IGP01E1000 Gigabit PHY"
/* Jato Technologies PHYs */
-#define MII_MODEL_JATO_BASEX 0x0000
-#define MII_STR_JATO_BASEX "Jato 1000baseX media interface"
+#define MII_MODEL_xxJATO_BASEX 0x0000
+#define MII_STR_xxJATO_BASEX "Jato 1000baseX media interface"
/* JMicron Technologies PHYs */
#define MII_MODEL_JMICRON_JMP211 0x0021
@@ -285,109 +337,147 @@
/* Level 1 PHYs */
#define MII_MODEL_xxLEVEL1_LXT970 0x0000
#define MII_STR_xxLEVEL1_LXT970 "LXT970 10/100 media interface"
+#define MII_MODEL_LEVEL1_LXT971 0x000e
+#define MII_STR_LEVEL1_LXT971 "LXT971/2 10/100 media interface"
+#define MII_MODEL_LEVEL1_LXT973 0x0021
+#define MII_STR_LEVEL1_LXT973 "LXT973 10/100 Dual PHY"
+#define MII_MODEL_LEVEL1_LXT974 0x0004
+#define MII_STR_LEVEL1_LXT974 "LXT974 10/100 Quad PHY"
+#define MII_MODEL_LEVEL1_LXT975 0x0005
+#define MII_STR_LEVEL1_LXT975 "LXT975 10/100 Quad PHY"
+#define MII_MODEL_LEVEL1_LXT1000_OLD 0x0003
+#define MII_STR_LEVEL1_LXT1000_OLD "LXT1000 1000BASE-T media interface"
+#define MII_MODEL_LEVEL1_LXT1000 0x000c
+#define MII_STR_LEVEL1_LXT1000 "LXT1000 1000BASE-T media interface"
+
+/* Marvell Semiconductor PHYs */
+#define MII_MODEL_xxMARVELL_E1000 0x0000
+#define MII_STR_xxMARVELL_E1000 "Marvell 88E1000 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1011 0x0002
+#define MII_STR_xxMARVELL_E1011 "Marvell 88E1011 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1000_3 0x0003
+#define MII_STR_xxMARVELL_E1000_3 "Marvell 88E1000 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1000S 0x0004
+#define MII_STR_xxMARVELL_E1000S "Marvell 88E1000S Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1000_5 0x0005
+#define MII_STR_xxMARVELL_E1000_5 "Marvell 88E1000 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1101 0x0006
+#define MII_STR_xxMARVELL_E1101 "Marvell 88E1101 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E3082 0x0008
+#define MII_STR_xxMARVELL_E3082 "Marvell 88E3082 10/100 Fast Ethernet PHY"
+#define MII_MODEL_xxMARVELL_E1112 0x0009
+#define MII_STR_xxMARVELL_E1112 "Marvell 88E1112 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1149 0x000b
+#define MII_STR_xxMARVELL_E1149 "Marvell 88E1149 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1111 0x000c
+#define MII_STR_xxMARVELL_E1111 "Marvell 88E1111 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1145 0x000d
+#define MII_STR_xxMARVELL_E1145 "Marvell 88E1145 Quad Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1116 0x0021
+#define MII_STR_xxMARVELL_E1116 "Marvell 88E1116 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1116R 0x0024
+#define MII_STR_xxMARVELL_E1116R "Marvell 88E1116R Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1118 0x0022
+#define MII_STR_xxMARVELL_E1118 "Marvell 88E1118 Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1149R 0x0025
+#define MII_STR_xxMARVELL_E1149R "Marvell 88E1149R Quad Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E3016 0x0026
+#define MII_STR_xxMARVELL_E3016 "Marvell 88E3016 10/100 Fast Ethernet PHY"
+#define MII_MODEL_xxMARVELL_PHYG65G 0x0027
+#define MII_STR_xxMARVELL_PHYG65G "Marvell PHYG65G Gigabit PHY"
+#define MII_MODEL_xxMARVELL_E1116R_29 0x0029
+#define MII_STR_xxMARVELL_E1116R_29 "Marvell 88E1116R Gigabit PHY"
+#define MII_MODEL_MARVELL_E1000 0x0005
+#define MII_STR_MARVELL_E1000 "Marvell 88E1000 Gigabit PHY"
+#define MII_MODEL_MARVELL_E1011 0x0002
+#define MII_STR_MARVELL_E1011 "Marvell 88E1011 Gigabit PHY"
+#define MII_MODEL_MARVELL_E1000_3 0x0003
+#define MII_STR_MARVELL_E1000_3 "Marvell 88E1000 Gigabit PHY"
+#define MII_MODEL_MARVELL_E1000_5 0x0005
+#define MII_STR_MARVELL_E1000_5 "Marvell 88E1000 Gigabit PHY"
+#define MII_MODEL_MARVELL_E1111 0x000c
+#define MII_STR_MARVELL_E1111 "Marvell 88E1111 Gigabit PHY"
+
+/* Myson Technology PHYs */
+#define MII_MODEL_xxMYSON_MTD972 0x0000
+#define MII_STR_xxMYSON_MTD972 "MTD972 10/100 media interface"
+#define MII_MODEL_MYSON_MTD803 0x0000
+#define MII_STR_MYSON_MTD803 "MTD803 3-in-1 media interface"
/* National Semiconductor PHYs */
-#define MII_MODEL_NATSEMI_DP83840 0x0000
-#define MII_STR_NATSEMI_DP83840 "DP83840 10/100 media interface"
-#define MII_MODEL_NATSEMI_DP83843 0x0001
-#define MII_STR_NATSEMI_DP83843 "DP83843 10/100 media interface"
-#define MII_MODEL_NATSEMI_DP83815 0x0002
-#define MII_STR_NATSEMI_DP83815 "DP83815 10/100 media interface"
-#define MII_MODEL_NATSEMI_DP83847 0x0003
-#define MII_STR_NATSEMI_DP83847 "DP83847 10/100 media interface"
-#define MII_MODEL_NATSEMI_DP83891 0x0005
-#define MII_STR_NATSEMI_DP83891 "DP83891 10/100/1000 media interface"
-#define MII_MODEL_NATSEMI_DP83861 0x0006
-#define MII_STR_NATSEMI_DP83861 "DP83861 10/100/1000 media interface"
-#define MII_MODEL_NATSEMI_DP83865 0x0007
-#define MII_STR_NATSEMI_DP83865 "DP83865 10/100/1000 media interface"
-#define MII_MODEL_NATSEMI_DP83849 0x000a
-#define MII_STR_NATSEMI_DP83849 "DP83849 10/100 media interface"
+#define MII_MODEL_xxNATSEMI_DP83840 0x0000
+#define MII_STR_xxNATSEMI_DP83840 "DP83840 10/100 media interface"
+#define MII_MODEL_xxNATSEMI_DP83843 0x0001
+#define MII_STR_xxNATSEMI_DP83843 "DP83843 10/100 media interface"
+#define MII_MODEL_xxNATSEMI_DP83815 0x0002
+#define MII_STR_xxNATSEMI_DP83815 "DP83815 10/100 media interface"
+#define MII_MODEL_xxNATSEMI_DP83847 0x0003
+#define MII_STR_xxNATSEMI_DP83847 "DP83847 10/100 media interface"
+#define MII_MODEL_xxNATSEMI_DP83891 0x0005
+#define MII_STR_xxNATSEMI_DP83891 "DP83891 1000BASE-T media interface"
+#define MII_MODEL_xxNATSEMI_DP83861 0x0006
+#define MII_STR_xxNATSEMI_DP83861 "DP83861 1000BASE-T media interface"
+#define MII_MODEL_xxNATSEMI_DP83865 0x0007
+#define MII_STR_xxNATSEMI_DP83865 "DP83865 1000BASE-T media interface"
+#define MII_MODEL_xxNATSEMI_DP83849 0x000a
+#define MII_STR_xxNATSEMI_DP83849 "DP83849 10/100 media interface"
+
+/* PMC Sierra PHYs */
+#define MII_MODEL_xxPMCSIERRA_PM8351 0x0000
+#define MII_STR_xxPMCSIERRA_PM8351 "PM8351 OctalPHY Gigabit interface"
+#define MII_MODEL_xxPMCSIERRA2_PM8352 0x0002
+#define MII_STR_xxPMCSIERRA2_PM8352 "PM8352 OctalPHY Gigabit interface"
+#define MII_MODEL_xxPMCSIERRA2_PM8353 0x0003
+#define MII_STR_xxPMCSIERRA2_PM8353 "PM8353 QuadPHY Gigabit interface"
+#define MII_MODEL_PMCSIERRA_PM8354 0x0004
+#define MII_STR_PMCSIERRA_PM8354 "PM8354 QuadPHY Gigabit interface"
/* Quality Semiconductor PHYs */
-#define MII_MODEL_QUALSEMI_QS6612 0x0000
-#define MII_STR_QUALSEMI_QS6612 "QS6612 10/100 media interface"
+#define MII_MODEL_xxQUALSEMI_QS6612 0x0000
+#define MII_STR_xxQUALSEMI_QS6612 "QS6612 10/100 media interface"
/* RDC Semiconductor PHYs */
#define MII_MODEL_RDC_R6040 0x0003
#define MII_STR_RDC_R6040 "R6040 10/100 media interface"
-/* RealTek Semiconductor PHYs */
-#define MII_MODEL_REALTEK_RTL8201L 0x0020
-#define MII_STR_REALTEK_RTL8201L "RTL8201L 10/100 media interface"
-#define MII_MODEL_xxREALTEK_RTL8305SC 0x0005
-#define MII_STR_xxREALTEK_RTL8305SC "RTL8305SC 10/100 802.1q switch"
+/* RealTek Semicondctor PHYs */
+#define MII_MODEL_yyREALTEK_RTL8201L 0x0020
+#define MII_STR_yyREALTEK_RTL8201L "RTL8201L 10/100 media interface"
#define MII_MODEL_xxREALTEK_RTL8169S 0x0011
-#define MII_STR_xxREALTEK_RTL8169S "RTL8169S/8110S/8211B media interface"
-
-/* Seeq PHYs */
-#define MII_MODEL_xxSEEQ_80220 0x0003
-#define MII_STR_xxSEEQ_80220 "Seeq 80220 10/100 media interface"
-#define MII_MODEL_xxSEEQ_84220 0x0004
-#define MII_STR_xxSEEQ_84220 "Seeq 84220 10/100 media interface"
+#define MII_STR_xxREALTEK_RTL8169S "RTL8169S/8110S/8211 1000BASE-T media interface"
+#define MII_MODEL_REALTEK_RTL8305SC 0x0005
+#define MII_STR_REALTEK_RTL8305SC "RTL8305SC 10/100 802.1q switch"
+#define MII_MODEL_REALTEK_RTL8201E 0x0008
+#define MII_STR_REALTEK_RTL8201E "RTL8201E 10/100 media interface"
+#define MII_MODEL_REALTEK_RTL8169S 0x0011
+#define MII_STR_REALTEK_RTL8169S "RTL8169S/8110S/8211 1000BASE-T media interface"
+
+/* Seeq Seeq PHYs */
+#define MII_MODEL_SEEQ_80220 0x0003
+#define MII_STR_SEEQ_80220 "Seeq 80220 10/100 media interface"
+#define MII_MODEL_SEEQ_84220 0x0004
+#define MII_STR_SEEQ_84220 "Seeq 84220 10/100 media interface"
+#define MII_MODEL_SEEQ_80225 0x0008
+#define MII_STR_SEEQ_80225 "Seeq 80225 10/100 media interface"
/* Silicon Integrated Systems PHYs */
-#define MII_MODEL_xxSIS_900 0x0000
-#define MII_STR_xxSIS_900 "SiS 900 10/100 media interface"
-
-/* SMSC PHYs */
-#define MII_MODEL_SMSC_LAN83C183 0x0004
-#define MII_STR_SMSC_LAN83C183 "SMSC LAN83C183 10/100 media interface"
-
-/* TDK */
-#define MII_MODEL_TDK_78Q2120 0x0014
-#define MII_STR_TDK_78Q2120 "TDK 78Q2120 media interface"
+#define MII_MODEL_SIS_900 0x0000
+#define MII_STR_SIS_900 "SiS 900 10/100 media interface"
/* Texas Instruments PHYs */
-#define MII_MODEL_xxTI_TLAN10T 0x0001
-#define MII_STR_xxTI_TLAN10T "ThunderLAN 10baseT media interface"
-#define MII_MODEL_xxTI_100VGPMI 0x0002
-#define MII_STR_xxTI_100VGPMI "ThunderLAN 100VG-AnyLan media interface"
-
-/* XaQti Corp. PHYs. */
-#define MII_MODEL_XAQTI_XMACII 0x0000
-#define MII_STR_XAQTI_XMACII "XaQti Corp. XMAC II gigabit interface"
-
-/* Marvell Semiconductor PHYs */
-#define MII_MODEL_MARVELL_E1000 0x0000
-#define MII_STR_MARVELL_E1000 "Marvell 88E1000 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1011 0x0002
-#define MII_STR_MARVELL_E1011 "Marvell 88E1011 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1000_3 0x0003
-#define MII_STR_MARVELL_E1000_3 "Marvell 88E1000 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1000S 0x0004
-#define MII_STR_MARVELL_E1000S "Marvell 88E1000S Gigabit PHY"
-#define MII_MODEL_MARVELL_E1000_5 0x0005
-#define MII_STR_MARVELL_E1000_5 "Marvell 88E1000 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1101 0x0006
-#define MII_STR_MARVELL_E1101 "Marvell 88E1101 Gigabit PHY"
-#define MII_MODEL_MARVELL_E3082 0x0008
-#define MII_STR_MARVELL_E3082 "Marvell 88E3082 10/100 Fast Ethernet PHY"
-#define MII_MODEL_MARVELL_E1112 0x0009
-#define MII_STR_MARVELL_E1112 "Marvell 88E1112 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1149 0x000b
-#define MII_STR_MARVELL_E1149 "Marvell 88E1149 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1111 0x000c
-#define MII_STR_MARVELL_E1111 "Marvell 88E1111 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1116 0x0021
-#define MII_STR_MARVELL_E1116 "Marvell 88E1116 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1116R 0x0024
-#define MII_STR_MARVELL_E1116R "Marvell 88E1116R Gigabit PHY"
-#define MII_MODEL_MARVELL_E1118 0x0022
-#define MII_STR_MARVELL_E1118 "Marvell 88E1118 Gigabit PHY"
-#define MII_MODEL_MARVELL_E1149R 0x0025
-#define MII_STR_MARVELL_E1149R "Marvell 88E1149R Quad Gigabit PHY"
-#define MII_MODEL_MARVELL_E3016 0x0026
-#define MII_STR_MARVELL_E3016 "Marvell 88E3016 10/100 Fast Ethernet PHY"
-#define MII_MODEL_MARVELL_PHYG65G 0x0027
-#define MII_STR_MARVELL_PHYG65G "Marvell PHYG65G Gigabit PHY"
-#define MII_MODEL_xxMARVELL_E1000 0x0005
-#define MII_STR_xxMARVELL_E1000 "Marvell 88E1000 Gigabit PHY"
-#define MII_MODEL_xxMARVELL_E1011 0x0002
-#define MII_STR_xxMARVELL_E1011 "Marvell 88E1011 Gigabit PHY"
-#define MII_MODEL_xxMARVELL_E1000_3 0x0003
-#define MII_STR_xxMARVELL_E1000_3 "Marvell 88E1000 Gigabit PHY"
-#define MII_MODEL_xxMARVELL_E1000_5 0x0005
-#define MII_STR_xxMARVELL_E1000_5 "Marvell 88E1000 Gigabit PHY"
-#define MII_MODEL_xxMARVELL_E1111 0x000c
-#define MII_STR_xxMARVELL_E1111 "Marvell 88E1111 Gigabit PHY"
+#define MII_MODEL_TI_TLAN10T 0x0001
+#define MII_STR_TI_TLAN10T "ThunderLAN 10BASE-T media interface"
+#define MII_MODEL_TI_100VGPMI 0x0002
+#define MII_STR_TI_100VGPMI "ThunderLAN 100VG-AnyLan media interface"
+#define MII_MODEL_TI_TNETE2101 0x0003
+#define MII_STR_TI_TNETE2101 "TNETE2101 media interface"
+
+/* TDK Semiconductor PHYs */
+#define MII_MODEL_xxTSC_78Q2120 0x0014
+#define MII_STR_xxTSC_78Q2120 "78Q2120 10/100 media interface"
+#define MII_MODEL_xxTSC_78Q2121 0x0015
+#define MII_STR_xxTSC_78Q2121 "78Q2121 100BASE-TX media interface"
+
+/* XaQti Corp. PHYs */
+#define MII_MODEL_xxXAQTI_XMACII 0x0000
+#define MII_STR_xxXAQTI_XMACII "XaQti Corp. XMAC II gigabit interface"
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_capsicum.h b/rtemsbsd/include/rtems/bsd/local/opt_capsicum.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_capsicum.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_ofed.h b/rtemsbsd/include/rtems/bsd/local/opt_ofed.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_ofed.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_pcbgroup.h b/rtemsbsd/include/rtems/bsd/local/opt_pcbgroup.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_pcbgroup.h
diff --git a/rtemsbsd/include/rtems/bsd/local/pcib_if.h b/rtemsbsd/include/rtems/bsd/local/pcib_if.h
index 22c387e9..d8f20c78 100644
--- a/rtemsbsd/include/rtems/bsd/local/pcib_if.h
+++ b/rtemsbsd/include/rtems/bsd/local/pcib_if.h
@@ -134,4 +134,17 @@ static __inline int PCIB_MAP_MSI(device_t pcib, device_t dev, int irq,
return ((pcib_map_msi_t *) _m)(pcib, dev, irq, addr, data);
}
+/** @brief Unique descriptor for the PCIB_POWER_FOR_SLEEP() method */
+extern struct kobjop_desc pcib_power_for_sleep_desc;
+/** @brief A function implementing the PCIB_POWER_FOR_SLEEP() method */
+typedef int pcib_power_for_sleep_t(device_t pcib, device_t dev, int *pstate);
+
+static __inline int PCIB_POWER_FOR_SLEEP(device_t pcib, device_t dev,
+ int *pstate)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)pcib)->ops,pcib_power_for_sleep);
+ return ((pcib_power_for_sleep_t *) _m)(pcib, dev, pstate);
+}
+
#endif /* _pcib_if_h_ */
diff --git a/rtemsbsd/include/rtems/bsd/local/usbdevs.h b/rtemsbsd/include/rtems/bsd/local/usbdevs.h
index 63f285c3..0a3d105c 100644
--- a/rtemsbsd/include/rtems/bsd/local/usbdevs.h
+++ b/rtemsbsd/include/rtems/bsd/local/usbdevs.h
@@ -330,6 +330,7 @@
#define USB_VENDOR_AVISION 0x0638 /* Avision */
#define USB_VENDOR_TEAC 0x0644 /* TEAC */
#define USB_VENDOR_ACTON 0x0647 /* Acton Research Corp. */
+#define USB_VENDOR_OPTO 0x065a /* Optoelectronics Co., Ltd */
#define USB_VENDOR_SGI 0x065e /* Silicon Graphics */
#define USB_VENDOR_SANWASUPPLY 0x0663 /* Sanwa Supply */
#define USB_VENDOR_MEGATEC 0x0665 /* Megatec */
@@ -684,6 +685,7 @@
#define USB_VENDOR_SWEEX2 0x177f /* Sweex */
#define USB_VENDOR_METAGEEK 0x1781 /* MetaGeek */
#define USB_VENDOR_KAMSTRUP 0x17a8 /* Kamstrup A/S */
+#define USB_VENDOR_LENOVO 0x17ef /* Lenovo */
#define USB_VENDOR_WAVESENSE 0x17f4 /* WaveSense */
#define USB_VENDOR_VAISALA 0x1843 /* Vaisala */
#define USB_VENDOR_AMIT 0x18c5 /* AMIT */
@@ -728,7 +730,6 @@
#define USB_VENDOR_VIALABS 0x2109 /* VIA Labs */
#define USB_VENDOR_ERICSSON 0x2282 /* Ericsson */
#define USB_VENDOR_MOTOROLA2 0x22b8 /* Motorola */
-#define USB_VENDOR_WETELECOM 0x22de /* WeTelecom */
#define USB_VENDOR_TRIPPLITE 0x2478 /* Tripp-Lite */
#define USB_VENDOR_HIROSE 0x2631 /* Hirose Electric */
#define USB_VENDOR_NHJ 0x2770 /* NHJ */
@@ -1088,6 +1089,7 @@
#define USB_PRODUCT_APPLE_IPHONE_3G 0x1292 /* iPhone 3G */
#define USB_PRODUCT_APPLE_IPHONE_3GS 0x1294 /* iPhone 3GS */
#define USB_PRODUCT_APPLE_IPHONE_4 0x1297 /* iPhone 4 */
+#define USB_PRODUCT_APPLE_IPHONE_4S 0x12a0 /* iPhone 4S */
#define USB_PRODUCT_APPLE_IPHONE_5 0x12a8 /* iPhone 5 */
#define USB_PRODUCT_APPLE_IPAD 0x129a /* iPad */
#define USB_PRODUCT_APPLE_ETHERNET 0x1402 /* Ethernet A1277 */
@@ -1493,7 +1495,6 @@
/* D-Link products */
/*product DLINK DSBS25 0x0100 DSB-S25 serial*/
#define USB_PRODUCT_DLINK_DUBE100 0x1a00 /* 10/100 Ethernet */
-#define USB_PRODUCT_DLINK_DUBE100C1 0x1a02 /* DUB-E100 rev C1 */
#define USB_PRODUCT_DLINK_DSB650TX4 0x200c /* 10/100 Ethernet */
#define USB_PRODUCT_DLINK_DWL120E 0x3200 /* DWL-120 rev E */
#define USB_PRODUCT_DLINK_DWL122 0x3700 /* DWL-122 */
@@ -1517,6 +1518,8 @@
#define USB_PRODUCT_DLINK_DSB650TX2 0x4102 /* 10/100 Ethernet */
#define USB_PRODUCT_DLINK_DSB650 0xabc1 /* 10/100 Ethernet */
#define USB_PRODUCT_DLINK_DUBH7 0xf103 /* DUB-H7 USB 2.0 7-Port Hub */
+#define USB_PRODUCT_DLINK_DWR510_CD 0xa805 /* DWR-510 CD-ROM Mode */
+#define USB_PRODUCT_DLINK_DWR510 0x7e12 /* DWR-510 */
#define USB_PRODUCT_DLINK2_DWA120 0x3a0c /* DWA-120 */
#define USB_PRODUCT_DLINK2_DWA120_NF 0x3a0d /* DWA-120 (no firmware) */
#define USB_PRODUCT_DLINK2_DWLG122C1 0x3c03 /* DWL-G122 c1 */
@@ -1556,7 +1559,6 @@
#define USB_PRODUCT_EDIMAX_RT2870_1 0x7711 /* RT2870 */
#define USB_PRODUCT_EDIMAX_EW7717 0x7717 /* EW-7717 */
#define USB_PRODUCT_EDIMAX_EW7718 0x7718 /* EW-7718 */
-#define USB_PRODUCT_EDIMAX_EW7811UN 0x7811 /* EW-7811Un */
/* eGalax Products */
#define USB_PRODUCT_EGALAX_TPANEL 0x0001 /* Touch Panel */
@@ -1723,6 +1725,7 @@
#define USB_PRODUCT_FTDI_SERIAL_2232D 0x9e90 /* FT2232D Dual port Serial */
#define USB_PRODUCT_FTDI_SERIAL_4232H 0x6011 /* FT4232H Quad port Serial */
#define USB_PRODUCT_FTDI_BEAGLEBONE 0xa6d0 /* BeagleBone */
+#define USB_PRODUCT_FTDI_KTLINK 0xbbe2 /* KT-LINK Embedded Hackers Multitool */
#define USB_PRODUCT_FTDI_TURTELIZER2 0xbdc8 /* egnite Turtelizer 2 JTAG/RS232 Adapter */
/* Gude Analog- und Digitalsysteme products also uses FTDI's id: */
#define USB_PRODUCT_FTDI_TACTRIX_OPENPORT_13M 0xcc48 /* OpenPort 1.3 Mitsubishi */
@@ -2425,6 +2428,9 @@
/* Leadtek products */
#define USB_PRODUCT_LEADTEK_9531 0x2101 /* 9531 GPS */
+/* Lenovo products */
+#define USB_PRODUCT_LENOVO_ETHERNET 0x7203 /* USB 2.0 Ethernet */
+
/* Lexar products */
#define USB_PRODUCT_LEXAR_JUMPSHOT 0x0001 /* jumpSHOT CompactFlash Reader */
#define USB_PRODUCT_LEXAR_CF_READER 0xb002 /* USB CF Reader */
@@ -2469,7 +2475,6 @@
#define USB_PRODUCT_LOGITECH_PAGESCAN 0x040f /* PageScan */
#define USB_PRODUCT_LOGITECH_QUICKCAMWEB 0x0801 /* QuickCam Web */
#define USB_PRODUCT_LOGITECH_QUICKCAMPRO 0x0810 /* QuickCam Pro */
-#define USB_PRODUCT_LOGITECH_WEBCAMC100 0X0817 /* Webcam C100 */
#define USB_PRODUCT_LOGITECH_QUICKCAMEXP 0x0840 /* QuickCam Express */
#define USB_PRODUCT_LOGITECH_QUICKCAM 0x0850 /* QuickCam */
#define USB_PRODUCT_LOGITECH_QUICKCAMPRO3 0x0990 /* QuickCam Pro 9000 */
@@ -2498,6 +2503,7 @@
#define USB_PRODUCT_LOGITEC_RT2870_3 0x0164 /* RT2870 */
#define USB_PRODUCT_LOGITEC_LANW300NU2 0x0166 /* LAN-W300N/U2 */
#define USB_PRODUCT_LOGITEC_LANW150NU2 0x0168 /* LAN-W150N/U2 */
+#define USB_PRODUCT_LOGITEC_LANW300NU2S 0x0169 /* LAN-W300N/U2S */
/* Longcheer Holdings, Ltd. products */
#define USB_PRODUCT_LONGCHEER_WM66 0x6061 /* Longcheer WM66 HSDPA */
@@ -2947,6 +2953,7 @@
#define USB_PRODUCT_MOTOROLA2_E398 0x4810 /* E398 Mobile Phone */
#define USB_PRODUCT_MOTOROLA2_USBLAN 0x600c /* USBLAN */
#define USB_PRODUCT_MOTOROLA2_USBLAN2 0x6027 /* USBLAN */
+#define USB_PRODUCT_MOTOROLA2_MB886 0x710f /* MB886 Mobile Phone (Atria HD) */
#define USB_PRODUCT_MOTOROLA4_RT2770 0x9031 /* RT2770 */
#define USB_PRODUCT_MOTOROLA4_RT3070 0x9032 /* RT3070 */
@@ -3066,6 +3073,7 @@
#define USB_PRODUCT_NOVATEL_MC950D 0x4400 /* Novatel MC950D HSUPA */
#define USB_PRODUCT_NOVATEL_ZEROCD 0x5010 /* Novatel ZeroCD */
#define USB_PRODUCT_NOVATEL_ZEROCD2 0x5030 /* Novatel ZeroCD */
+#define USB_PRODUCT_NOVATEL_MIFI2200 0x5041 /* Novatel MiFi 2200 CDMA */
#define USB_PRODUCT_NOVATEL_U727_2 0x5100 /* Merlin U727 CDMA */
#define USB_PRODUCT_NOVATEL_U760 0x6000 /* Novatel U760 */
#define USB_PRODUCT_NOVATEL_MC760 0x6002 /* Novatel MC760 */
@@ -3148,6 +3156,13 @@
#define USB_PRODUCT_OPTION_ICON505 0xd055 /* Globetrotter iCON 505 */
#define USB_PRODUCT_OPTION_ICON452 0x7901 /* Globetrotter iCON 452 */
+/* Optoelectronics Co., Ltd */
+#define USB_PRODUCT_OPTO_BARCODE 0x0001 /* Barcode Reader */
+#define USB_PRODUCT_OPTO_OPTICONCODE 0x0009 /* Opticon Code Reader */
+#define USB_PRODUCT_OPTO_BARCODE_1 0xa002 /* Barcode Reader */
+#define USB_PRODUCT_OPTO_CRD7734 0xc000 /* USB Cradle CRD-7734-RU */
+#define USB_PRODUCT_OPTO_CRD7734_1 0xc001 /* USB Cradle CRD-7734-RU */
+
/* OvisLink product */
#define USB_PRODUCT_OVISLINK_RT3072 0x3072 /* RT3072 */
@@ -3263,6 +3278,7 @@
#define USB_PRODUCT_PLANEX2_GWUS54HP 0xab01 /* GW-US54HP */
#define USB_PRODUCT_PLANEX2_GWUS300MINIS 0xab24 /* GW-US300MiniS */
#define USB_PRODUCT_PLANEX2_RT3070 0xab25 /* RT3070 */
+#define USB_PRODUCT_PLANEX2_MZKUE150N 0xab2f /* MZK-UE150N */
#define USB_PRODUCT_PLANEX2_GWUS54MINI2 0xab50 /* GW-US54Mini2 */
#define USB_PRODUCT_PLANEX2_GWUS54SG 0xc002 /* GW-US54SG */
#define USB_PRODUCT_PLANEX2_GWUS54GZL 0xc007 /* GW-US54GZL */
@@ -3354,6 +3370,8 @@
/* Qualcomm products */
#define USB_PRODUCT_QUALCOMM_CDMA_MSM 0x6000 /* CDMA Technologies MSM phone */
+#define USB_PRODUCT_QUALCOMM_NTT_L02C_MODEM 0x618f /* NTT DOCOMO L-02C */
+#define USB_PRODUCT_QUALCOMM_NTT_L02C_STORAGE 0x61dd /* NTT DOCOMO L-02C */
#define USB_PRODUCT_QUALCOMM2_MF330 0x6613 /* MF330 */
#define USB_PRODUCT_QUALCOMM2_RWT_FCT 0x3100 /* RWT FCT-CDMA 2000 1xRTT modem */
#define USB_PRODUCT_QUALCOMM2_CDMA_MSM 0x3196 /* CDMA Technologies MSM modem */
@@ -3890,7 +3908,6 @@
#define USB_PRODUCT_SMC2_2020HUB 0x2020 /* USB Hub */
#define USB_PRODUCT_SMC2_2514HUB 0x2514 /* USB Hub */
#define USB_PRODUCT_SMC3_2662WUSB 0xa002 /* 2662W-AR Wireless */
-#define USB_PRODUCT_SMC2_LAN9514_ETH 0xec00 /* USB/Ethernet */
/* SOHOware products */
#define USB_PRODUCT_SOHOWARE_NUB100 0x9100 /* 10/100 USB Ethernet */
@@ -3991,7 +4008,6 @@
/* Super Top products */
#define USB_PRODUCT_SUPERTOP_IDE 0x6600 /* USB-IDE */
-#define USB_PRODUCT_SUPERTOP_FLASHDRIVE 0x121c /* extrememory Snippy */
/* Syntech products */
#define USB_PRODUCT_SYNTECH_CPT8001C 0x0001 /* CPT-8001C Barcode scanner */
@@ -4205,9 +4221,6 @@
#define USB_PRODUCT_WESTERN_MYBOOK 0x0901 /* MyBook External HDD */
#define USB_PRODUCT_WESTERN_MYPASSWORD 0x0704 /* MyPassword External HDD */
-/* WeTelecom products */
-#define USB_PRODUCT_WETELECOM_WM_D200 0x6801 /* WM-D200 */
-
/* WIENER Plein & Baus GmbH products */
#define USB_PRODUCT_WIENERPLEINBAUS_PL512 0x0010 /* PL512 PSU */
#define USB_PRODUCT_WIENERPLEINBAUS_RCM 0x0011 /* RCM Remote Control */
diff --git a/rtemsbsd/include/rtems/bsd/local/usbdevs_data.h b/rtemsbsd/include/rtems/bsd/local/usbdevs_data.h
index 4a0c6b94..9ac09b57 100644
--- a/rtemsbsd/include/rtems/bsd/local/usbdevs_data.h
+++ b/rtemsbsd/include/rtems/bsd/local/usbdevs_data.h
@@ -1306,6 +1306,12 @@ const struct usb_knowndev usb_knowndevs[] = {
"iPhone 4",
},
{
+ USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE_4S,
+ 0,
+ "Apple Computer",
+ "iPhone 4S",
+ },
+ {
USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE_5,
0,
"Apple Computer",
@@ -3010,12 +3016,6 @@ const struct usb_knowndev usb_knowndevs[] = {
"10/100 Ethernet",
},
{
- USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DUBE100C1,
- 0,
- "D-Link",
- "DUB-E100 rev C1",
- },
- {
USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650TX4,
0,
"D-Link",
@@ -3154,6 +3154,18 @@ const struct usb_knowndev usb_knowndevs[] = {
"DUB-H7 USB 2.0 7-Port Hub",
},
{
+ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DWR510_CD,
+ 0,
+ "D-Link",
+ "DWR-510 CD-ROM Mode",
+ },
+ {
+ USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DWR510,
+ 0,
+ "D-Link",
+ "DWR-510",
+ },
+ {
USB_VENDOR_DLINK2, USB_PRODUCT_DLINK2_DWA120,
0,
"D-Link",
@@ -3328,12 +3340,6 @@ const struct usb_knowndev usb_knowndevs[] = {
"EW-7718",
},
{
- USB_VENDOR_EDIMAX, USB_PRODUCT_EDIMAX_EW7811UN,
- 0,
- "Edimax",
- "EW-7811Un",
- },
- {
USB_VENDOR_EGALAX, USB_PRODUCT_EGALAX_TPANEL,
0,
"eGalax, Inc.",
@@ -3994,6 +4000,12 @@ const struct usb_knowndev usb_knowndevs[] = {
"BeagleBone",
},
{
+ USB_VENDOR_FTDI, USB_PRODUCT_FTDI_KTLINK,
+ 0,
+ "Future Technology Devices",
+ "KT-LINK Embedded Hackers Multitool",
+ },
+ {
USB_VENDOR_FTDI, USB_PRODUCT_FTDI_TURTELIZER2,
0,
"Future Technology Devices",
@@ -7342,6 +7354,12 @@ const struct usb_knowndev usb_knowndevs[] = {
"9531 GPS",
},
{
+ USB_VENDOR_LENOVO, USB_PRODUCT_LENOVO_ETHERNET,
+ 0,
+ "Lenovo",
+ "USB 2.0 Ethernet",
+ },
+ {
USB_VENDOR_LEXAR, USB_PRODUCT_LEXAR_JUMPSHOT,
0,
"Lexar Media",
@@ -7528,12 +7546,6 @@ const struct usb_knowndev usb_knowndevs[] = {
"QuickCam Pro",
},
{
- USB_VENDOR_LOGITECH, USB_PRODUCT_LOGITECH_WEBCAMC100,
- 0,
- "Logitech",
- "Webcam C100",
- },
- {
USB_VENDOR_LOGITECH, USB_PRODUCT_LOGITECH_QUICKCAMEXP,
0,
"Logitech",
@@ -7690,6 +7702,12 @@ const struct usb_knowndev usb_knowndevs[] = {
"LAN-W150N/U2",
},
{
+ USB_VENDOR_LOGITEC, USB_PRODUCT_LOGITEC_LANW300NU2S,
+ 0,
+ "Logitec",
+ "LAN-W300N/U2S",
+ },
+ {
USB_VENDOR_LONGCHEER, USB_PRODUCT_LONGCHEER_WM66,
0,
"Longcheer Holdings, Ltd.",
@@ -10006,6 +10024,12 @@ const struct usb_knowndev usb_knowndevs[] = {
"USBLAN",
},
{
+ USB_VENDOR_MOTOROLA2, USB_PRODUCT_MOTOROLA2_MB886,
+ 0,
+ "Motorola",
+ "MB886 Mobile Phone (Atria HD)",
+ },
+ {
USB_VENDOR_MOTOROLA4, USB_PRODUCT_MOTOROLA4_RT2770,
0,
"Motorola",
@@ -10504,6 +10528,12 @@ const struct usb_knowndev usb_knowndevs[] = {
"Novatel ZeroCD",
},
{
+ USB_VENDOR_NOVATEL, USB_PRODUCT_NOVATEL_MIFI2200,
+ 0,
+ "Novatel Wireless",
+ "Novatel MiFi 2200 CDMA",
+ },
+ {
USB_VENDOR_NOVATEL, USB_PRODUCT_NOVATEL_U727_2,
0,
"Novatel Wireless",
@@ -10906,6 +10936,36 @@ const struct usb_knowndev usb_knowndevs[] = {
"Globetrotter iCON 452",
},
{
+ USB_VENDOR_OPTO, USB_PRODUCT_OPTO_BARCODE,
+ 0,
+ "Optoelectronics Co., Ltd",
+ "Barcode Reader",
+ },
+ {
+ USB_VENDOR_OPTO, USB_PRODUCT_OPTO_OPTICONCODE,
+ 0,
+ "Optoelectronics Co., Ltd",
+ "Opticon Code Reader",
+ },
+ {
+ USB_VENDOR_OPTO, USB_PRODUCT_OPTO_BARCODE_1,
+ 0,
+ "Optoelectronics Co., Ltd",
+ "Barcode Reader",
+ },
+ {
+ USB_VENDOR_OPTO, USB_PRODUCT_OPTO_CRD7734,
+ 0,
+ "Optoelectronics Co., Ltd",
+ "USB Cradle CRD-7734-RU",
+ },
+ {
+ USB_VENDOR_OPTO, USB_PRODUCT_OPTO_CRD7734_1,
+ 0,
+ "Optoelectronics Co., Ltd",
+ "USB Cradle CRD-7734-RU",
+ },
+ {
USB_VENDOR_OVISLINK, USB_PRODUCT_OVISLINK_RT3072,
0,
"OvisLink",
@@ -11410,6 +11470,12 @@ const struct usb_knowndev usb_knowndevs[] = {
"RT3070",
},
{
+ USB_VENDOR_PLANEX2, USB_PRODUCT_PLANEX2_MZKUE150N,
+ 0,
+ "Planex Communications",
+ "MZK-UE150N",
+ },
+ {
USB_VENDOR_PLANEX2, USB_PRODUCT_PLANEX2_GWUS54MINI2,
0,
"Planex Communications",
@@ -11800,6 +11866,18 @@ const struct usb_knowndev usb_knowndevs[] = {
"CDMA Technologies MSM phone",
},
{
+ USB_VENDOR_QUALCOMM, USB_PRODUCT_QUALCOMM_NTT_L02C_MODEM,
+ 0,
+ "Qualcomm",
+ "NTT DOCOMO L-02C",
+ },
+ {
+ USB_VENDOR_QUALCOMM, USB_PRODUCT_QUALCOMM_NTT_L02C_STORAGE,
+ 0,
+ "Qualcomm",
+ "NTT DOCOMO L-02C",
+ },
+ {
USB_VENDOR_QUALCOMM2, USB_PRODUCT_QUALCOMM2_MF330,
0,
"Qualcomm",
@@ -14488,12 +14566,6 @@ const struct usb_knowndev usb_knowndevs[] = {
"2662W-AR Wireless",
},
{
- USB_VENDOR_SMC2, USB_PRODUCT_SMC2_LAN9514_ETH,
- 0,
- "Standard Microsystems",
- "USB/Ethernet",
- },
- {
USB_VENDOR_SOHOWARE, USB_PRODUCT_SOHOWARE_NUB100,
0,
"SOHOware",
@@ -14896,12 +14968,6 @@ const struct usb_knowndev usb_knowndevs[] = {
"USB-IDE",
},
{
- USB_VENDOR_SUPERTOP, USB_PRODUCT_SUPERTOP_FLASHDRIVE,
- 0,
- "Super Top",
- "extrememory Snippy",
- },
- {
USB_VENDOR_SYNTECH, USB_PRODUCT_SYNTECH_CPT8001C,
0,
"Syntech Information",
@@ -15574,12 +15640,6 @@ const struct usb_knowndev usb_knowndevs[] = {
"MyPassword External HDD",
},
{
- USB_VENDOR_WETELECOM, USB_PRODUCT_WETELECOM_WM_D200,
- 0,
- "WeTelecom",
- "WM-D200",
- },
- {
USB_VENDOR_WIENERPLEINBAUS, USB_PRODUCT_WIENERPLEINBAUS_PL512,
0,
"WIENER Plein & Baus GmbH.",
@@ -17542,6 +17602,12 @@ const struct usb_knowndev usb_knowndevs[] = {
NULL,
},
{
+ USB_VENDOR_OPTO, 0,
+ USB_KNOWNDEV_NOPROD,
+ "Optoelectronics Co., Ltd",
+ NULL,
+ },
+ {
USB_VENDOR_SGI, 0,
USB_KNOWNDEV_NOPROD,
"Silicon Graphics",
@@ -19666,6 +19732,12 @@ const struct usb_knowndev usb_knowndevs[] = {
NULL,
},
{
+ USB_VENDOR_LENOVO, 0,
+ USB_KNOWNDEV_NOPROD,
+ "Lenovo",
+ NULL,
+ },
+ {
USB_VENDOR_WAVESENSE, 0,
USB_KNOWNDEV_NOPROD,
"WaveSense",
@@ -19930,12 +20002,6 @@ const struct usb_knowndev usb_knowndevs[] = {
NULL,
},
{
- USB_VENDOR_WETELECOM, 0,
- USB_KNOWNDEV_NOPROD,
- "WeTelecom",
- NULL,
- },
- {
USB_VENDOR_TRIPPLITE, 0,
USB_KNOWNDEV_NOPROD,
"Tripp-Lite",
diff --git a/rtemsbsd/include/rtems/bsd/sys/_types.h b/rtemsbsd/include/rtems/bsd/sys/_types.h
index f1af2cbc..76ec2440 100644
--- a/rtemsbsd/include/rtems/bsd/sys/_types.h
+++ b/rtemsbsd/include/rtems/bsd/sys/_types.h
@@ -156,12 +156,20 @@ typedef char vm_memattr_t;
typedef int accmode_t;
#define _ACCMODE_T_DECLARED
+typedef uint64_t cap_rights_t;
+
+typedef int32_t __clockid_t;
+
typedef const char *c_caddr_t;
typedef uint32_t cpumask_t;
+typedef int cpulevel_t;
+
typedef int cpusetid_t;
+typedef int cpuwhich_t;
+
#define _DEV_T_DECLARED
typedef uint32_t __fixpt_t;
@@ -169,6 +177,8 @@ typedef __fixpt_t fixpt_t;
#define _GID_T_DECLARED
+typedef int64_t id_t;
+
typedef uint32_t in_addr_t;
#define _IN_ADDR_T_DECLARED
diff --git a/rtemsbsd/include/rtems/bsd/sys/param.h b/rtemsbsd/include/rtems/bsd/sys/param.h
index 1479d8e3..76ba0b7b 100644
--- a/rtemsbsd/include/rtems/bsd/sys/param.h
+++ b/rtemsbsd/include/rtems/bsd/sys/param.h
@@ -334,4 +334,10 @@ __END_DECLS
#define member2struct(s, m, x) \
((struct s *)(void *)((char *)(x) - offsetof(struct s, m)))
+/*
+ * Access a variable length array that has been declared as a fixed
+ * length array.
+ */
+#define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset])
+
#endif /* _RTEMS_BSD_SYS_PARAM_H_ */
diff --git a/rtemsbsd/include/sys/_stdint.h b/rtemsbsd/include/sys/_stdint.h
new file mode 100644
index 00000000..9a6118bd
--- /dev/null
+++ b/rtemsbsd/include/sys/_stdint.h
@@ -0,0 +1 @@
+#include <stdint.h>
diff --git a/rtemsbsd/include/sys/rangelock.h b/rtemsbsd/include/sys/rangelock.h
new file mode 100644
index 00000000..936ffd88
--- /dev/null
+++ b/rtemsbsd/include/sys/rangelock.h
@@ -0,0 +1 @@
+/* EMPTY */
diff --git a/rtemsbsd/local/bus_if.c b/rtemsbsd/local/bus_if.c
index 31259ce6..558888af 100644
--- a/rtemsbsd/local/bus_if.c
+++ b/rtemsbsd/local/bus_if.c
@@ -16,7 +16,7 @@
#include <sys/queue.h>
#include <sys/kernel.h>
#include <sys/kobj.h>
-#include <rtems/bsd/sys/types.h>
+#include <sys/types.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <rtems/bsd/local/bus_if.h>
@@ -79,6 +79,14 @@ struct kobjop_desc bus_write_ivar_desc = {
0, &bus_write_ivar_method_default
};
+struct kobj_method bus_child_deleted_method_default = {
+ &bus_child_deleted_desc, (kobjop_t) kobj_error_method
+};
+
+struct kobjop_desc bus_child_deleted_desc = {
+ 0, &bus_child_deleted_method_default
+};
+
struct kobj_method bus_child_detached_method_default = {
&bus_child_detached_desc, (kobjop_t) kobj_error_method
};
diff --git a/rtemsbsd/local/pcib_if.c b/rtemsbsd/local/pcib_if.c
index cc036b4f..07d7abb5 100644
--- a/rtemsbsd/local/pcib_if.c
+++ b/rtemsbsd/local/pcib_if.c
@@ -99,3 +99,11 @@ struct kobjop_desc pcib_map_msi_desc = {
0, &pcib_map_msi_method_default
};
+struct kobj_method pcib_power_for_sleep_method_default = {
+ &pcib_power_for_sleep_desc, (kobjop_t) kobj_error_method
+};
+
+struct kobjop_desc pcib_power_for_sleep_desc = {
+ 0, &pcib_power_for_sleep_method_default
+};
+
diff --git a/rtemsbsd/rtems/rtems-bsd-sysctl.c b/rtemsbsd/rtems/rtems-bsd-sysctl.c
index f9ecab81..7e087385 100644
--- a/rtemsbsd/rtems/rtems-bsd-sysctl.c
+++ b/rtemsbsd/rtems/rtems-bsd-sysctl.c
@@ -46,11 +46,11 @@
#include <string.h>
int sysctl(
- int *name,
+ const int *name,
u_int namelen,
void *oldp,
size_t *oldlenp,
- void *newp,
+ const void *newp,
size_t newlen
)
{
diff --git a/rtemsbsd/rtems/rtems-bsd-sysctlbyname.c b/rtemsbsd/rtems/rtems-bsd-sysctlbyname.c
index ced85f7c..ab3e8083 100644
--- a/rtemsbsd/rtems/rtems-bsd-sysctlbyname.c
+++ b/rtemsbsd/rtems/rtems-bsd-sysctlbyname.c
@@ -29,7 +29,7 @@ __FBSDID("$FreeBSD$");
int
sysctlbyname(const char *name, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
+ const void *newp, size_t newlen)
{
int real_oid[CTL_MAXNAME+2];
int error;
diff --git a/testsuite/selectpollkqueue01/test_main.c b/testsuite/selectpollkqueue01/test_main.c
index d1b9a8b0..468095ed 100644
--- a/testsuite/selectpollkqueue01/test_main.c
+++ b/testsuite/selectpollkqueue01/test_main.c
@@ -878,7 +878,7 @@ test_kqueue_write(test_context *ctx)
assert(event.filter == EVFILT_WRITE);
assert(event.flags == 0);
assert(event.fflags == 0);
- assert(event.data == 18432);
+ assert(event.data == 20428);
assert(event.udata == TEST_UDATA);
n = write(afd, &ctx->buf[0], 1);